LLVM  16.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "MCTargetDesc/X86MCExpr.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/SourceMgr.h"
40 #include <algorithm>
41 #include <memory>
42 
43 using namespace llvm;
44 
46  "x86-experimental-lvi-inline-asm-hardening",
47  cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48  " Injection (LVI). This feature is experimental."), cl::Hidden);
49 
50 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
51  if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
52  ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
53  return true;
54  }
55  return false;
56 }
57 
58 namespace {
59 
60 static const char OpPrecedence[] = {
61  0, // IC_OR
62  1, // IC_XOR
63  2, // IC_AND
64  4, // IC_LSHIFT
65  4, // IC_RSHIFT
66  5, // IC_PLUS
67  5, // IC_MINUS
68  6, // IC_MULTIPLY
69  6, // IC_DIVIDE
70  6, // IC_MOD
71  7, // IC_NOT
72  8, // IC_NEG
73  9, // IC_RPAREN
74  10, // IC_LPAREN
75  0, // IC_IMM
76  0, // IC_REGISTER
77  3, // IC_EQ
78  3, // IC_NE
79  3, // IC_LT
80  3, // IC_LE
81  3, // IC_GT
82  3 // IC_GE
83 };
84 
85 class X86AsmParser : public MCTargetAsmParser {
86  ParseInstructionInfo *InstInfo;
87  bool Code16GCC;
88  unsigned ForcedDataPrefix = 0;
89 
90  enum VEXEncoding {
91  VEXEncoding_Default,
92  VEXEncoding_VEX,
93  VEXEncoding_VEX2,
94  VEXEncoding_VEX3,
95  VEXEncoding_EVEX,
96  };
97 
98  VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
99 
100  enum DispEncoding {
101  DispEncoding_Default,
102  DispEncoding_Disp8,
103  DispEncoding_Disp32,
104  };
105 
106  DispEncoding ForcedDispEncoding = DispEncoding_Default;
107 
108 private:
109  SMLoc consumeToken() {
110  MCAsmParser &Parser = getParser();
111  SMLoc Result = Parser.getTok().getLoc();
112  Parser.Lex();
113  return Result;
114  }
115 
116  X86TargetStreamer &getTargetStreamer() {
117  assert(getParser().getStreamer().getTargetStreamer() &&
118  "do not have a target streamer");
119  MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
120  return static_cast<X86TargetStreamer &>(TS);
121  }
122 
123  unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
124  uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
125  bool matchingInlineAsm, unsigned VariantID = 0) {
126  // In Code16GCC mode, match as 32-bit.
127  if (Code16GCC)
128  SwitchMode(X86::Is32Bit);
129  unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
130  MissingFeatures, matchingInlineAsm,
131  VariantID);
132  if (Code16GCC)
133  SwitchMode(X86::Is16Bit);
134  return rv;
135  }
136 
137  enum InfixCalculatorTok {
138  IC_OR = 0,
139  IC_XOR,
140  IC_AND,
141  IC_LSHIFT,
142  IC_RSHIFT,
143  IC_PLUS,
144  IC_MINUS,
145  IC_MULTIPLY,
146  IC_DIVIDE,
147  IC_MOD,
148  IC_NOT,
149  IC_NEG,
150  IC_RPAREN,
151  IC_LPAREN,
152  IC_IMM,
153  IC_REGISTER,
154  IC_EQ,
155  IC_NE,
156  IC_LT,
157  IC_LE,
158  IC_GT,
159  IC_GE
160  };
161 
162  enum IntelOperatorKind {
163  IOK_INVALID = 0,
164  IOK_LENGTH,
165  IOK_SIZE,
166  IOK_TYPE,
167  };
168 
169  enum MasmOperatorKind {
170  MOK_INVALID = 0,
171  MOK_LENGTHOF,
172  MOK_SIZEOF,
173  MOK_TYPE,
174  };
175 
176  class InfixCalculator {
177  typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
178  SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
179  SmallVector<ICToken, 4> PostfixStack;
180 
181  bool isUnaryOperator(InfixCalculatorTok Op) const {
182  return Op == IC_NEG || Op == IC_NOT;
183  }
184 
185  public:
186  int64_t popOperand() {
187  assert (!PostfixStack.empty() && "Poped an empty stack!");
188  ICToken Op = PostfixStack.pop_back_val();
189  if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
190  return -1; // The invalid Scale value will be caught later by checkScale
191  return Op.second;
192  }
193  void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
194  assert ((Op == IC_IMM || Op == IC_REGISTER) &&
195  "Unexpected operand!");
196  PostfixStack.push_back(std::make_pair(Op, Val));
197  }
198 
199  void popOperator() { InfixOperatorStack.pop_back(); }
200  void pushOperator(InfixCalculatorTok Op) {
201  // Push the new operator if the stack is empty.
202  if (InfixOperatorStack.empty()) {
203  InfixOperatorStack.push_back(Op);
204  return;
205  }
206 
207  // Push the new operator if it has a higher precedence than the operator
208  // on the top of the stack or the operator on the top of the stack is a
209  // left parentheses.
210  unsigned Idx = InfixOperatorStack.size() - 1;
211  InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
212  if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
213  InfixOperatorStack.push_back(Op);
214  return;
215  }
216 
217  // The operator on the top of the stack has higher precedence than the
218  // new operator.
219  unsigned ParenCount = 0;
220  while (true) {
221  // Nothing to process.
222  if (InfixOperatorStack.empty())
223  break;
224 
225  Idx = InfixOperatorStack.size() - 1;
226  StackOp = InfixOperatorStack[Idx];
227  if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
228  break;
229 
230  // If we have an even parentheses count and we see a left parentheses,
231  // then stop processing.
232  if (!ParenCount && StackOp == IC_LPAREN)
233  break;
234 
235  if (StackOp == IC_RPAREN) {
236  ++ParenCount;
237  InfixOperatorStack.pop_back();
238  } else if (StackOp == IC_LPAREN) {
239  --ParenCount;
240  InfixOperatorStack.pop_back();
241  } else {
242  InfixOperatorStack.pop_back();
243  PostfixStack.push_back(std::make_pair(StackOp, 0));
244  }
245  }
246  // Push the new operator.
247  InfixOperatorStack.push_back(Op);
248  }
249 
250  int64_t execute() {
251  // Push any remaining operators onto the postfix stack.
252  while (!InfixOperatorStack.empty()) {
253  InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
254  if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
255  PostfixStack.push_back(std::make_pair(StackOp, 0));
256  }
257 
258  if (PostfixStack.empty())
259  return 0;
260 
261  SmallVector<ICToken, 16> OperandStack;
262  for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
263  ICToken Op = PostfixStack[i];
264  if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
265  OperandStack.push_back(Op);
266  } else if (isUnaryOperator(Op.first)) {
267  assert (OperandStack.size() > 0 && "Too few operands.");
268  ICToken Operand = OperandStack.pop_back_val();
269  assert (Operand.first == IC_IMM &&
270  "Unary operation with a register!");
271  switch (Op.first) {
272  default:
273  report_fatal_error("Unexpected operator!");
274  break;
275  case IC_NEG:
276  OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
277  break;
278  case IC_NOT:
279  OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
280  break;
281  }
282  } else {
283  assert (OperandStack.size() > 1 && "Too few operands.");
284  int64_t Val;
285  ICToken Op2 = OperandStack.pop_back_val();
286  ICToken Op1 = OperandStack.pop_back_val();
287  switch (Op.first) {
288  default:
289  report_fatal_error("Unexpected operator!");
290  break;
291  case IC_PLUS:
292  Val = Op1.second + Op2.second;
293  OperandStack.push_back(std::make_pair(IC_IMM, Val));
294  break;
295  case IC_MINUS:
296  Val = Op1.second - Op2.second;
297  OperandStack.push_back(std::make_pair(IC_IMM, Val));
298  break;
299  case IC_MULTIPLY:
300  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
301  "Multiply operation with an immediate and a register!");
302  Val = Op1.second * Op2.second;
303  OperandStack.push_back(std::make_pair(IC_IMM, Val));
304  break;
305  case IC_DIVIDE:
306  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
307  "Divide operation with an immediate and a register!");
308  assert (Op2.second != 0 && "Division by zero!");
309  Val = Op1.second / Op2.second;
310  OperandStack.push_back(std::make_pair(IC_IMM, Val));
311  break;
312  case IC_MOD:
313  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
314  "Modulo operation with an immediate and a register!");
315  Val = Op1.second % Op2.second;
316  OperandStack.push_back(std::make_pair(IC_IMM, Val));
317  break;
318  case IC_OR:
319  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
320  "Or operation with an immediate and a register!");
321  Val = Op1.second | Op2.second;
322  OperandStack.push_back(std::make_pair(IC_IMM, Val));
323  break;
324  case IC_XOR:
325  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
326  "Xor operation with an immediate and a register!");
327  Val = Op1.second ^ Op2.second;
328  OperandStack.push_back(std::make_pair(IC_IMM, Val));
329  break;
330  case IC_AND:
331  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
332  "And operation with an immediate and a register!");
333  Val = Op1.second & Op2.second;
334  OperandStack.push_back(std::make_pair(IC_IMM, Val));
335  break;
336  case IC_LSHIFT:
337  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
338  "Left shift operation with an immediate and a register!");
339  Val = Op1.second << Op2.second;
340  OperandStack.push_back(std::make_pair(IC_IMM, Val));
341  break;
342  case IC_RSHIFT:
343  assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
344  "Right shift operation with an immediate and a register!");
345  Val = Op1.second >> Op2.second;
346  OperandStack.push_back(std::make_pair(IC_IMM, Val));
347  break;
348  case IC_EQ:
349  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
350  "Equals operation with an immediate and a register!");
351  Val = (Op1.second == Op2.second) ? -1 : 0;
352  OperandStack.push_back(std::make_pair(IC_IMM, Val));
353  break;
354  case IC_NE:
355  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
356  "Not-equals operation with an immediate and a register!");
357  Val = (Op1.second != Op2.second) ? -1 : 0;
358  OperandStack.push_back(std::make_pair(IC_IMM, Val));
359  break;
360  case IC_LT:
361  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
362  "Less-than operation with an immediate and a register!");
363  Val = (Op1.second < Op2.second) ? -1 : 0;
364  OperandStack.push_back(std::make_pair(IC_IMM, Val));
365  break;
366  case IC_LE:
367  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
368  "Less-than-or-equal operation with an immediate and a "
369  "register!");
370  Val = (Op1.second <= Op2.second) ? -1 : 0;
371  OperandStack.push_back(std::make_pair(IC_IMM, Val));
372  break;
373  case IC_GT:
374  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
375  "Greater-than operation with an immediate and a register!");
376  Val = (Op1.second > Op2.second) ? -1 : 0;
377  OperandStack.push_back(std::make_pair(IC_IMM, Val));
378  break;
379  case IC_GE:
380  assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
381  "Greater-than-or-equal operation with an immediate and a "
382  "register!");
383  Val = (Op1.second >= Op2.second) ? -1 : 0;
384  OperandStack.push_back(std::make_pair(IC_IMM, Val));
385  break;
386  }
387  }
388  }
389  assert (OperandStack.size() == 1 && "Expected a single result.");
390  return OperandStack.pop_back_val().second;
391  }
392  };
393 
394  enum IntelExprState {
395  IES_INIT,
396  IES_OR,
397  IES_XOR,
398  IES_AND,
399  IES_EQ,
400  IES_NE,
401  IES_LT,
402  IES_LE,
403  IES_GT,
404  IES_GE,
405  IES_LSHIFT,
406  IES_RSHIFT,
407  IES_PLUS,
408  IES_MINUS,
409  IES_OFFSET,
410  IES_CAST,
411  IES_NOT,
412  IES_MULTIPLY,
413  IES_DIVIDE,
414  IES_MOD,
415  IES_LBRAC,
416  IES_RBRAC,
417  IES_LPAREN,
418  IES_RPAREN,
419  IES_REGISTER,
420  IES_INTEGER,
421  IES_IDENTIFIER,
422  IES_ERROR
423  };
424 
425  class IntelExprStateMachine {
426  IntelExprState State = IES_INIT, PrevState = IES_ERROR;
427  unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
428  int64_t Imm = 0;
429  const MCExpr *Sym = nullptr;
430  StringRef SymName;
431  InfixCalculator IC;
433  short BracCount = 0;
434  bool MemExpr = false;
435  bool OffsetOperator = false;
436  bool AttachToOperandIdx = false;
437  bool IsPIC = false;
438  SMLoc OffsetOperatorLoc;
439  AsmTypeInfo CurType;
440 
441  bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
442  if (Sym) {
443  ErrMsg = "cannot use more than one symbol in memory operand";
444  return true;
445  }
446  Sym = Val;
447  SymName = ID;
448  return false;
449  }
450 
451  public:
452  IntelExprStateMachine() = default;
453 
454  void addImm(int64_t imm) { Imm += imm; }
455  short getBracCount() const { return BracCount; }
456  bool isMemExpr() const { return MemExpr; }
457  bool isOffsetOperator() const { return OffsetOperator; }
458  SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
459  unsigned getBaseReg() const { return BaseReg; }
460  unsigned getIndexReg() const { return IndexReg; }
461  unsigned getScale() const { return Scale; }
462  const MCExpr *getSym() const { return Sym; }
463  StringRef getSymName() const { return SymName; }
464  StringRef getType() const { return CurType.Name; }
465  unsigned getSize() const { return CurType.Size; }
466  unsigned getElementSize() const { return CurType.ElementSize; }
467  unsigned getLength() const { return CurType.Length; }
468  int64_t getImm() { return Imm + IC.execute(); }
469  bool isValidEndState() const {
470  return State == IES_RBRAC || State == IES_INTEGER;
471  }
472 
473  // Is the intel expression appended after an operand index.
474  // [OperandIdx][Intel Expression]
475  // This is neccessary for checking if it is an independent
476  // intel expression at back end when parse inline asm.
477  void setAppendAfterOperand() { AttachToOperandIdx = true; }
478 
479  bool isPIC() const { return IsPIC; }
480  void setPIC() { IsPIC = true; }
481 
482  bool hadError() const { return State == IES_ERROR; }
483  const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
484 
485  bool regsUseUpError(StringRef &ErrMsg) {
486  // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
487  // can not intruduce additional register in inline asm in PIC model.
488  if (IsPIC && AttachToOperandIdx)
489  ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
490  else
491  ErrMsg = "BaseReg/IndexReg already set!";
492  return true;
493  }
494 
495  void onOr() {
496  IntelExprState CurrState = State;
497  switch (State) {
498  default:
499  State = IES_ERROR;
500  break;
501  case IES_INTEGER:
502  case IES_RPAREN:
503  case IES_REGISTER:
504  State = IES_OR;
505  IC.pushOperator(IC_OR);
506  break;
507  }
508  PrevState = CurrState;
509  }
510  void onXor() {
511  IntelExprState CurrState = State;
512  switch (State) {
513  default:
514  State = IES_ERROR;
515  break;
516  case IES_INTEGER:
517  case IES_RPAREN:
518  case IES_REGISTER:
519  State = IES_XOR;
520  IC.pushOperator(IC_XOR);
521  break;
522  }
523  PrevState = CurrState;
524  }
525  void onAnd() {
526  IntelExprState CurrState = State;
527  switch (State) {
528  default:
529  State = IES_ERROR;
530  break;
531  case IES_INTEGER:
532  case IES_RPAREN:
533  case IES_REGISTER:
534  State = IES_AND;
535  IC.pushOperator(IC_AND);
536  break;
537  }
538  PrevState = CurrState;
539  }
540  void onEq() {
541  IntelExprState CurrState = State;
542  switch (State) {
543  default:
544  State = IES_ERROR;
545  break;
546  case IES_INTEGER:
547  case IES_RPAREN:
548  case IES_REGISTER:
549  State = IES_EQ;
550  IC.pushOperator(IC_EQ);
551  break;
552  }
553  PrevState = CurrState;
554  }
555  void onNE() {
556  IntelExprState CurrState = State;
557  switch (State) {
558  default:
559  State = IES_ERROR;
560  break;
561  case IES_INTEGER:
562  case IES_RPAREN:
563  case IES_REGISTER:
564  State = IES_NE;
565  IC.pushOperator(IC_NE);
566  break;
567  }
568  PrevState = CurrState;
569  }
570  void onLT() {
571  IntelExprState CurrState = State;
572  switch (State) {
573  default:
574  State = IES_ERROR;
575  break;
576  case IES_INTEGER:
577  case IES_RPAREN:
578  case IES_REGISTER:
579  State = IES_LT;
580  IC.pushOperator(IC_LT);
581  break;
582  }
583  PrevState = CurrState;
584  }
585  void onLE() {
586  IntelExprState CurrState = State;
587  switch (State) {
588  default:
589  State = IES_ERROR;
590  break;
591  case IES_INTEGER:
592  case IES_RPAREN:
593  case IES_REGISTER:
594  State = IES_LE;
595  IC.pushOperator(IC_LE);
596  break;
597  }
598  PrevState = CurrState;
599  }
600  void onGT() {
601  IntelExprState CurrState = State;
602  switch (State) {
603  default:
604  State = IES_ERROR;
605  break;
606  case IES_INTEGER:
607  case IES_RPAREN:
608  case IES_REGISTER:
609  State = IES_GT;
610  IC.pushOperator(IC_GT);
611  break;
612  }
613  PrevState = CurrState;
614  }
615  void onGE() {
616  IntelExprState CurrState = State;
617  switch (State) {
618  default:
619  State = IES_ERROR;
620  break;
621  case IES_INTEGER:
622  case IES_RPAREN:
623  case IES_REGISTER:
624  State = IES_GE;
625  IC.pushOperator(IC_GE);
626  break;
627  }
628  PrevState = CurrState;
629  }
630  void onLShift() {
631  IntelExprState CurrState = State;
632  switch (State) {
633  default:
634  State = IES_ERROR;
635  break;
636  case IES_INTEGER:
637  case IES_RPAREN:
638  case IES_REGISTER:
639  State = IES_LSHIFT;
640  IC.pushOperator(IC_LSHIFT);
641  break;
642  }
643  PrevState = CurrState;
644  }
645  void onRShift() {
646  IntelExprState CurrState = State;
647  switch (State) {
648  default:
649  State = IES_ERROR;
650  break;
651  case IES_INTEGER:
652  case IES_RPAREN:
653  case IES_REGISTER:
654  State = IES_RSHIFT;
655  IC.pushOperator(IC_RSHIFT);
656  break;
657  }
658  PrevState = CurrState;
659  }
660  bool onPlus(StringRef &ErrMsg) {
661  IntelExprState CurrState = State;
662  switch (State) {
663  default:
664  State = IES_ERROR;
665  break;
666  case IES_INTEGER:
667  case IES_RPAREN:
668  case IES_REGISTER:
669  case IES_OFFSET:
670  State = IES_PLUS;
671  IC.pushOperator(IC_PLUS);
672  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
673  // If we already have a BaseReg, then assume this is the IndexReg with
674  // no explicit scale.
675  if (!BaseReg) {
676  BaseReg = TmpReg;
677  } else {
678  if (IndexReg)
679  return regsUseUpError(ErrMsg);
680  IndexReg = TmpReg;
681  Scale = 0;
682  }
683  }
684  break;
685  }
686  PrevState = CurrState;
687  return false;
688  }
689  bool onMinus(StringRef &ErrMsg) {
690  IntelExprState CurrState = State;
691  switch (State) {
692  default:
693  State = IES_ERROR;
694  break;
695  case IES_OR:
696  case IES_XOR:
697  case IES_AND:
698  case IES_EQ:
699  case IES_NE:
700  case IES_LT:
701  case IES_LE:
702  case IES_GT:
703  case IES_GE:
704  case IES_LSHIFT:
705  case IES_RSHIFT:
706  case IES_PLUS:
707  case IES_NOT:
708  case IES_MULTIPLY:
709  case IES_DIVIDE:
710  case IES_MOD:
711  case IES_LPAREN:
712  case IES_RPAREN:
713  case IES_LBRAC:
714  case IES_RBRAC:
715  case IES_INTEGER:
716  case IES_REGISTER:
717  case IES_INIT:
718  case IES_OFFSET:
719  State = IES_MINUS;
720  // push minus operator if it is not a negate operator
721  if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
722  CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
723  CurrState == IES_OFFSET)
724  IC.pushOperator(IC_MINUS);
725  else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
726  // We have negate operator for Scale: it's illegal
727  ErrMsg = "Scale can't be negative";
728  return true;
729  } else
730  IC.pushOperator(IC_NEG);
731  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
732  // If we already have a BaseReg, then assume this is the IndexReg with
733  // no explicit scale.
734  if (!BaseReg) {
735  BaseReg = TmpReg;
736  } else {
737  if (IndexReg)
738  return regsUseUpError(ErrMsg);
739  IndexReg = TmpReg;
740  Scale = 0;
741  }
742  }
743  break;
744  }
745  PrevState = CurrState;
746  return false;
747  }
748  void onNot() {
749  IntelExprState CurrState = State;
750  switch (State) {
751  default:
752  State = IES_ERROR;
753  break;
754  case IES_OR:
755  case IES_XOR:
756  case IES_AND:
757  case IES_EQ:
758  case IES_NE:
759  case IES_LT:
760  case IES_LE:
761  case IES_GT:
762  case IES_GE:
763  case IES_LSHIFT:
764  case IES_RSHIFT:
765  case IES_PLUS:
766  case IES_MINUS:
767  case IES_NOT:
768  case IES_MULTIPLY:
769  case IES_DIVIDE:
770  case IES_MOD:
771  case IES_LPAREN:
772  case IES_LBRAC:
773  case IES_INIT:
774  State = IES_NOT;
775  IC.pushOperator(IC_NOT);
776  break;
777  }
778  PrevState = CurrState;
779  }
780  bool onRegister(unsigned Reg, StringRef &ErrMsg) {
781  IntelExprState CurrState = State;
782  switch (State) {
783  default:
784  State = IES_ERROR;
785  break;
786  case IES_PLUS:
787  case IES_LPAREN:
788  case IES_LBRAC:
789  State = IES_REGISTER;
790  TmpReg = Reg;
791  IC.pushOperand(IC_REGISTER);
792  break;
793  case IES_MULTIPLY:
794  // Index Register - Scale * Register
795  if (PrevState == IES_INTEGER) {
796  if (IndexReg)
797  return regsUseUpError(ErrMsg);
798  State = IES_REGISTER;
799  IndexReg = Reg;
800  // Get the scale and replace the 'Scale * Register' with '0'.
801  Scale = IC.popOperand();
802  if (checkScale(Scale, ErrMsg))
803  return true;
804  IC.pushOperand(IC_IMM);
805  IC.popOperator();
806  } else {
807  State = IES_ERROR;
808  }
809  break;
810  }
811  PrevState = CurrState;
812  return false;
813  }
814  bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
815  const InlineAsmIdentifierInfo &IDInfo,
816  const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
817  StringRef &ErrMsg) {
818  // InlineAsm: Treat an enum value as an integer
819  if (ParsingMSInlineAsm)
821  return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
822  // Treat a symbolic constant like an integer
823  if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
824  return onInteger(CE->getValue(), ErrMsg);
825  PrevState = State;
826  switch (State) {
827  default:
828  State = IES_ERROR;
829  break;
830  case IES_CAST:
831  case IES_PLUS:
832  case IES_MINUS:
833  case IES_NOT:
834  case IES_INIT:
835  case IES_LBRAC:
836  case IES_LPAREN:
837  if (setSymRef(SymRef, SymRefName, ErrMsg))
838  return true;
839  MemExpr = true;
840  State = IES_INTEGER;
841  IC.pushOperand(IC_IMM);
842  if (ParsingMSInlineAsm)
843  Info = IDInfo;
844  setTypeInfo(Type);
845  break;
846  }
847  return false;
848  }
849  bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
850  IntelExprState CurrState = State;
851  switch (State) {
852  default:
853  State = IES_ERROR;
854  break;
855  case IES_PLUS:
856  case IES_MINUS:
857  case IES_NOT:
858  case IES_OR:
859  case IES_XOR:
860  case IES_AND:
861  case IES_EQ:
862  case IES_NE:
863  case IES_LT:
864  case IES_LE:
865  case IES_GT:
866  case IES_GE:
867  case IES_LSHIFT:
868  case IES_RSHIFT:
869  case IES_DIVIDE:
870  case IES_MOD:
871  case IES_MULTIPLY:
872  case IES_LPAREN:
873  case IES_INIT:
874  case IES_LBRAC:
875  State = IES_INTEGER;
876  if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
877  // Index Register - Register * Scale
878  if (IndexReg)
879  return regsUseUpError(ErrMsg);
880  IndexReg = TmpReg;
881  Scale = TmpInt;
882  if (checkScale(Scale, ErrMsg))
883  return true;
884  // Get the scale and replace the 'Register * Scale' with '0'.
885  IC.popOperator();
886  } else {
887  IC.pushOperand(IC_IMM, TmpInt);
888  }
889  break;
890  }
891  PrevState = CurrState;
892  return false;
893  }
894  void onStar() {
895  PrevState = State;
896  switch (State) {
897  default:
898  State = IES_ERROR;
899  break;
900  case IES_INTEGER:
901  case IES_REGISTER:
902  case IES_RPAREN:
903  State = IES_MULTIPLY;
904  IC.pushOperator(IC_MULTIPLY);
905  break;
906  }
907  }
908  void onDivide() {
909  PrevState = State;
910  switch (State) {
911  default:
912  State = IES_ERROR;
913  break;
914  case IES_INTEGER:
915  case IES_RPAREN:
916  State = IES_DIVIDE;
917  IC.pushOperator(IC_DIVIDE);
918  break;
919  }
920  }
921  void onMod() {
922  PrevState = State;
923  switch (State) {
924  default:
925  State = IES_ERROR;
926  break;
927  case IES_INTEGER:
928  case IES_RPAREN:
929  State = IES_MOD;
930  IC.pushOperator(IC_MOD);
931  break;
932  }
933  }
934  bool onLBrac() {
935  if (BracCount)
936  return true;
937  PrevState = State;
938  switch (State) {
939  default:
940  State = IES_ERROR;
941  break;
942  case IES_RBRAC:
943  case IES_INTEGER:
944  case IES_RPAREN:
945  State = IES_PLUS;
946  IC.pushOperator(IC_PLUS);
947  CurType.Length = 1;
948  CurType.Size = CurType.ElementSize;
949  break;
950  case IES_INIT:
951  case IES_CAST:
952  assert(!BracCount && "BracCount should be zero on parsing's start");
953  State = IES_LBRAC;
954  break;
955  }
956  MemExpr = true;
957  BracCount++;
958  return false;
959  }
960  bool onRBrac(StringRef &ErrMsg) {
961  IntelExprState CurrState = State;
962  switch (State) {
963  default:
964  State = IES_ERROR;
965  break;
966  case IES_INTEGER:
967  case IES_OFFSET:
968  case IES_REGISTER:
969  case IES_RPAREN:
970  if (BracCount-- != 1) {
971  ErrMsg = "unexpected bracket encountered";
972  return true;
973  }
974  State = IES_RBRAC;
975  if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
976  // If we already have a BaseReg, then assume this is the IndexReg with
977  // no explicit scale.
978  if (!BaseReg) {
979  BaseReg = TmpReg;
980  } else {
981  if (IndexReg)
982  return regsUseUpError(ErrMsg);
983  IndexReg = TmpReg;
984  Scale = 0;
985  }
986  }
987  break;
988  }
989  PrevState = CurrState;
990  return false;
991  }
992  void onLParen() {
993  IntelExprState CurrState = State;
994  switch (State) {
995  default:
996  State = IES_ERROR;
997  break;
998  case IES_PLUS:
999  case IES_MINUS:
1000  case IES_NOT:
1001  case IES_OR:
1002  case IES_XOR:
1003  case IES_AND:
1004  case IES_EQ:
1005  case IES_NE:
1006  case IES_LT:
1007  case IES_LE:
1008  case IES_GT:
1009  case IES_GE:
1010  case IES_LSHIFT:
1011  case IES_RSHIFT:
1012  case IES_MULTIPLY:
1013  case IES_DIVIDE:
1014  case IES_MOD:
1015  case IES_LPAREN:
1016  case IES_INIT:
1017  case IES_LBRAC:
1018  State = IES_LPAREN;
1019  IC.pushOperator(IC_LPAREN);
1020  break;
1021  }
1022  PrevState = CurrState;
1023  }
1024  void onRParen() {
1025  PrevState = State;
1026  switch (State) {
1027  default:
1028  State = IES_ERROR;
1029  break;
1030  case IES_INTEGER:
1031  case IES_OFFSET:
1032  case IES_REGISTER:
1033  case IES_RBRAC:
1034  case IES_RPAREN:
1035  State = IES_RPAREN;
1036  IC.pushOperator(IC_RPAREN);
1037  break;
1038  }
1039  }
1040  bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1041  const InlineAsmIdentifierInfo &IDInfo,
1042  bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1043  PrevState = State;
1044  switch (State) {
1045  default:
1046  ErrMsg = "unexpected offset operator expression";
1047  return true;
1048  case IES_PLUS:
1049  case IES_INIT:
1050  case IES_LBRAC:
1051  if (setSymRef(Val, ID, ErrMsg))
1052  return true;
1053  OffsetOperator = true;
1054  OffsetOperatorLoc = OffsetLoc;
1055  State = IES_OFFSET;
1056  // As we cannot yet resolve the actual value (offset), we retain
1057  // the requested semantics by pushing a '0' to the operands stack
1058  IC.pushOperand(IC_IMM);
1059  if (ParsingMSInlineAsm) {
1060  Info = IDInfo;
1061  }
1062  break;
1063  }
1064  return false;
1065  }
1066  void onCast(AsmTypeInfo Info) {
1067  PrevState = State;
1068  switch (State) {
1069  default:
1070  State = IES_ERROR;
1071  break;
1072  case IES_LPAREN:
1073  setTypeInfo(Info);
1074  State = IES_CAST;
1075  break;
1076  }
1077  }
1078  void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1079  };
1080 
1081  bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1082  bool MatchingInlineAsm = false) {
1083  MCAsmParser &Parser = getParser();
1084  if (MatchingInlineAsm) {
1085  if (!getLexer().isAtStartOfStatement())
1086  Parser.eatToEndOfStatement();
1087  return false;
1088  }
1089  return Parser.Error(L, Msg, Range);
1090  }
1091 
1092  bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
1093  SMLoc EndLoc);
1094  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1095  bool RestoreOnFailure);
1096 
1097  std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1098  std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1099  bool IsSIReg(unsigned Reg);
1100  unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1101  void
1102  AddDefaultSrcDestOperands(OperandVector &Operands,
1103  std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1104  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1105  bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1106  OperandVector &FinalOperands);
1107  bool parseOperand(OperandVector &Operands, StringRef Name);
1108  bool parseATTOperand(OperandVector &Operands);
1109  bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1110  bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1112  bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1113  unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1114  unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1115  unsigned IdentifyMasmOperator(StringRef Name);
1116  bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1117  bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1118  bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1119  bool &ParseError, SMLoc &End);
1120  bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1121  bool &ParseError, SMLoc &End);
1122  void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1123  SMLoc End);
1124  bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1125  bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1127  bool IsUnevaluatedOperand, SMLoc &End,
1128  bool IsParsingOffsetOperator = false);
1129  void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1130  IntelExprStateMachine &SM);
1131 
1132  bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1133  SMLoc EndLoc, OperandVector &Operands);
1134 
1135  X86::CondCode ParseConditionCode(StringRef CCode);
1136 
1137  bool ParseIntelMemoryOperandSize(unsigned &Size);
1138  bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1139  unsigned BaseReg, unsigned IndexReg,
1140  unsigned Scale, SMLoc Start, SMLoc End,
1141  unsigned Size, StringRef Identifier,
1144 
1145  bool parseDirectiveArch();
1146  bool parseDirectiveNops(SMLoc L);
1147  bool parseDirectiveEven(SMLoc L);
1148  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1149 
1150  /// CodeView FPO data directives.
1151  bool parseDirectiveFPOProc(SMLoc L);
1152  bool parseDirectiveFPOSetFrame(SMLoc L);
1153  bool parseDirectiveFPOPushReg(SMLoc L);
1154  bool parseDirectiveFPOStackAlloc(SMLoc L);
1155  bool parseDirectiveFPOStackAlign(SMLoc L);
1156  bool parseDirectiveFPOEndPrologue(SMLoc L);
1157  bool parseDirectiveFPOEndProc(SMLoc L);
1158 
1159  /// SEH directives.
1160  bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
1161  bool parseDirectiveSEHPushReg(SMLoc);
1162  bool parseDirectiveSEHSetFrame(SMLoc);
1163  bool parseDirectiveSEHSaveReg(SMLoc);
1164  bool parseDirectiveSEHSaveXMM(SMLoc);
1165  bool parseDirectiveSEHPushFrame(SMLoc);
1166 
1167  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1168 
1169  bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1170  bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1171 
1172  // Load Value Injection (LVI) Mitigations for machine code
1173  void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1174  void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1175  void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1176 
1177  /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1178  /// instrumentation around Inst.
1179  void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1180 
1181  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1184  bool MatchingInlineAsm) override;
1185 
1186  void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1187  MCStreamer &Out, bool MatchingInlineAsm);
1188 
1189  bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1190  bool MatchingInlineAsm);
1191 
1192  bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1195  bool MatchingInlineAsm);
1196 
1197  bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1200  bool MatchingInlineAsm);
1201 
1202  bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1203 
1204  /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1205  /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1206  /// return false if no parsing errors occurred, true otherwise.
1207  bool HandleAVX512Operand(OperandVector &Operands);
1208 
1209  bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1210 
1211  bool is64BitMode() const {
1212  // FIXME: Can tablegen auto-generate this?
1213  return getSTI().getFeatureBits()[X86::Is64Bit];
1214  }
1215  bool is32BitMode() const {
1216  // FIXME: Can tablegen auto-generate this?
1217  return getSTI().getFeatureBits()[X86::Is32Bit];
1218  }
1219  bool is16BitMode() const {
1220  // FIXME: Can tablegen auto-generate this?
1221  return getSTI().getFeatureBits()[X86::Is16Bit];
1222  }
1223  void SwitchMode(unsigned mode) {
1224  MCSubtargetInfo &STI = copySTI();
1225  FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1226  FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1227  FeatureBitset FB = ComputeAvailableFeatures(
1228  STI.ToggleFeature(OldMode.flip(mode)));
1229  setAvailableFeatures(FB);
1230 
1231  assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1232  }
1233 
1234  unsigned getPointerWidth() {
1235  if (is16BitMode()) return 16;
1236  if (is32BitMode()) return 32;
1237  if (is64BitMode()) return 64;
1238  llvm_unreachable("invalid mode");
1239  }
1240 
1241  bool isParsingIntelSyntax() {
1242  return getParser().getAssemblerDialect();
1243  }
1244 
1245  /// @name Auto-generated Matcher Functions
1246  /// {
1247 
1248 #define GET_ASSEMBLER_HEADER
1249 #include "X86GenAsmMatcher.inc"
1250 
1251  /// }
1252 
1253 public:
1254  enum X86MatchResultTy {
1255  Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1256 #define GET_OPERAND_DIAGNOSTIC_TYPES
1257 #include "X86GenAsmMatcher.inc"
1258  };
1259 
1260  X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1261  const MCInstrInfo &mii, const MCTargetOptions &Options)
1262  : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1263  Code16GCC(false) {
1264 
1265  Parser.addAliasForDirective(".word", ".2byte");
1266 
1267  // Initialize the set of available features.
1268  setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1269  }
1270 
1271  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1272  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1273  SMLoc &EndLoc) override;
1274 
1275  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1276 
1277  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1278  SMLoc NameLoc, OperandVector &Operands) override;
1279 
1280  bool ParseDirective(AsmToken DirectiveID) override;
1281 };
1282 } // end anonymous namespace
1283 
1284 /// @name Auto-generated Match Functions
1285 /// {
1286 
1287 static unsigned MatchRegisterName(StringRef Name);
1288 
1289 /// }
1290 
1291 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1292  unsigned Scale, bool Is64BitMode,
1293  StringRef &ErrMsg) {
1294  // If we have both a base register and an index register make sure they are
1295  // both 64-bit or 32-bit registers.
1296  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1297 
1298  if (BaseReg != 0 &&
1299  !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1300  X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1301  X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1302  X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1303  ErrMsg = "invalid base+index expression";
1304  return true;
1305  }
1306 
1307  if (IndexReg != 0 &&
1308  !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1309  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1310  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1311  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1312  X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1313  X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1314  X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1315  ErrMsg = "invalid base+index expression";
1316  return true;
1317  }
1318 
1319  if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1320  IndexReg == X86::EIP || IndexReg == X86::RIP ||
1321  IndexReg == X86::ESP || IndexReg == X86::RSP) {
1322  ErrMsg = "invalid base+index expression";
1323  return true;
1324  }
1325 
1326  // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1327  // and then only in non-64-bit modes.
1328  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1329  (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1330  BaseReg != X86::SI && BaseReg != X86::DI))) {
1331  ErrMsg = "invalid 16-bit base register";
1332  return true;
1333  }
1334 
1335  if (BaseReg == 0 &&
1336  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1337  ErrMsg = "16-bit memory operand may not include only index register";
1338  return true;
1339  }
1340 
1341  if (BaseReg != 0 && IndexReg != 0) {
1342  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1343  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1344  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1345  IndexReg == X86::EIZ)) {
1346  ErrMsg = "base register is 64-bit, but index register is not";
1347  return true;
1348  }
1349  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1350  (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1351  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1352  IndexReg == X86::RIZ)) {
1353  ErrMsg = "base register is 32-bit, but index register is not";
1354  return true;
1355  }
1356  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1357  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1358  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1359  ErrMsg = "base register is 16-bit, but index register is not";
1360  return true;
1361  }
1362  if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1363  (IndexReg != X86::SI && IndexReg != X86::DI)) {
1364  ErrMsg = "invalid 16-bit base/index register combination";
1365  return true;
1366  }
1367  }
1368  }
1369 
1370  // RIP/EIP-relative addressing is only supported in 64-bit mode.
1371  if (!Is64BitMode && BaseReg != 0 &&
1372  (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1373  ErrMsg = "IP-relative addressing requires 64-bit mode";
1374  return true;
1375  }
1376 
1377  return checkScale(Scale, ErrMsg);
1378 }
1379 
1380 bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
1381  SMLoc StartLoc, SMLoc EndLoc) {
1382  // If we encounter a %, ignore it. This code handles registers with and
1383  // without the prefix, unprefixed registers can occur in cfi directives.
1384  RegName.consume_front("%");
1385 
1386  RegNo = MatchRegisterName(RegName);
1387 
1388  // If the match failed, try the register name as lowercase.
1389  if (RegNo == 0)
1390  RegNo = MatchRegisterName(RegName.lower());
1391 
1392  // The "flags" and "mxcsr" registers cannot be referenced directly.
1393  // Treat it as an identifier instead.
1394  if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1395  (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1396  RegNo = 0;
1397 
1398  if (!is64BitMode()) {
1399  // FIXME: This should be done using Requires<Not64BitMode> and
1400  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1401  // checked.
1402  if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1403  X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1405  X86II::isX86_64ExtendedReg(RegNo)) {
1406  return Error(StartLoc,
1407  "register %" + RegName + " is only available in 64-bit mode",
1408  SMRange(StartLoc, EndLoc));
1409  }
1410  }
1411 
1412  // If this is "db[0-15]", match it as an alias
1413  // for dr[0-15].
1414  if (RegNo == 0 && RegName.startswith("db")) {
1415  if (RegName.size() == 3) {
1416  switch (RegName[2]) {
1417  case '0':
1418  RegNo = X86::DR0;
1419  break;
1420  case '1':
1421  RegNo = X86::DR1;
1422  break;
1423  case '2':
1424  RegNo = X86::DR2;
1425  break;
1426  case '3':
1427  RegNo = X86::DR3;
1428  break;
1429  case '4':
1430  RegNo = X86::DR4;
1431  break;
1432  case '5':
1433  RegNo = X86::DR5;
1434  break;
1435  case '6':
1436  RegNo = X86::DR6;
1437  break;
1438  case '7':
1439  RegNo = X86::DR7;
1440  break;
1441  case '8':
1442  RegNo = X86::DR8;
1443  break;
1444  case '9':
1445  RegNo = X86::DR9;
1446  break;
1447  }
1448  } else if (RegName.size() == 4 && RegName[2] == '1') {
1449  switch (RegName[3]) {
1450  case '0':
1451  RegNo = X86::DR10;
1452  break;
1453  case '1':
1454  RegNo = X86::DR11;
1455  break;
1456  case '2':
1457  RegNo = X86::DR12;
1458  break;
1459  case '3':
1460  RegNo = X86::DR13;
1461  break;
1462  case '4':
1463  RegNo = X86::DR14;
1464  break;
1465  case '5':
1466  RegNo = X86::DR15;
1467  break;
1468  }
1469  }
1470  }
1471 
1472  if (RegNo == 0) {
1473  if (isParsingIntelSyntax())
1474  return true;
1475  return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1476  }
1477  return false;
1478 }
1479 
1480 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1481  SMLoc &EndLoc, bool RestoreOnFailure) {
1482  MCAsmParser &Parser = getParser();
1483  MCAsmLexer &Lexer = getLexer();
1484  RegNo = 0;
1485 
1486  SmallVector<AsmToken, 5> Tokens;
1487  auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1488  if (RestoreOnFailure) {
1489  while (!Tokens.empty()) {
1490  Lexer.UnLex(Tokens.pop_back_val());
1491  }
1492  }
1493  };
1494 
1495  const AsmToken &PercentTok = Parser.getTok();
1496  StartLoc = PercentTok.getLoc();
1497 
1498  // If we encounter a %, ignore it. This code handles registers with and
1499  // without the prefix, unprefixed registers can occur in cfi directives.
1500  if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1501  Tokens.push_back(PercentTok);
1502  Parser.Lex(); // Eat percent token.
1503  }
1504 
1505  const AsmToken &Tok = Parser.getTok();
1506  EndLoc = Tok.getEndLoc();
1507 
1508  if (Tok.isNot(AsmToken::Identifier)) {
1509  OnFailure();
1510  if (isParsingIntelSyntax()) return true;
1511  return Error(StartLoc, "invalid register name",
1512  SMRange(StartLoc, EndLoc));
1513  }
1514 
1515  if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1516  OnFailure();
1517  return true;
1518  }
1519 
1520  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1521  if (RegNo == X86::ST0) {
1522  Tokens.push_back(Tok);
1523  Parser.Lex(); // Eat 'st'
1524 
1525  // Check to see if we have '(4)' after %st.
1526  if (Lexer.isNot(AsmToken::LParen))
1527  return false;
1528  // Lex the paren.
1529  Tokens.push_back(Parser.getTok());
1530  Parser.Lex();
1531 
1532  const AsmToken &IntTok = Parser.getTok();
1533  if (IntTok.isNot(AsmToken::Integer)) {
1534  OnFailure();
1535  return Error(IntTok.getLoc(), "expected stack index");
1536  }
1537  switch (IntTok.getIntVal()) {
1538  case 0: RegNo = X86::ST0; break;
1539  case 1: RegNo = X86::ST1; break;
1540  case 2: RegNo = X86::ST2; break;
1541  case 3: RegNo = X86::ST3; break;
1542  case 4: RegNo = X86::ST4; break;
1543  case 5: RegNo = X86::ST5; break;
1544  case 6: RegNo = X86::ST6; break;
1545  case 7: RegNo = X86::ST7; break;
1546  default:
1547  OnFailure();
1548  return Error(IntTok.getLoc(), "invalid stack index");
1549  }
1550 
1551  // Lex IntTok
1552  Tokens.push_back(IntTok);
1553  Parser.Lex();
1554  if (Lexer.isNot(AsmToken::RParen)) {
1555  OnFailure();
1556  return Error(Parser.getTok().getLoc(), "expected ')'");
1557  }
1558 
1559  EndLoc = Parser.getTok().getEndLoc();
1560  Parser.Lex(); // Eat ')'
1561  return false;
1562  }
1563 
1564  EndLoc = Parser.getTok().getEndLoc();
1565 
1566  if (RegNo == 0) {
1567  OnFailure();
1568  if (isParsingIntelSyntax()) return true;
1569  return Error(StartLoc, "invalid register name",
1570  SMRange(StartLoc, EndLoc));
1571  }
1572 
1573  Parser.Lex(); // Eat identifier token.
1574  return false;
1575 }
1576 
1577 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1578  SMLoc &EndLoc) {
1579  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1580 }
1581 
1582 OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo,
1583  SMLoc &StartLoc,
1584  SMLoc &EndLoc) {
1585  bool Result =
1586  ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1587  bool PendingErrors = getParser().hasPendingError();
1588  getParser().clearPendingErrors();
1589  if (PendingErrors)
1590  return MatchOperand_ParseFail;
1591  if (Result)
1592  return MatchOperand_NoMatch;
1593  return MatchOperand_Success;
1594 }
1595 
1596 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1597  bool Parse32 = is32BitMode() || Code16GCC;
1598  unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1599  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1600  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1601  /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1602  Loc, Loc, 0);
1603 }
1604 
1605 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1606  bool Parse32 = is32BitMode() || Code16GCC;
1607  unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1608  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1609  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1610  /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1611  Loc, Loc, 0);
1612 }
1613 
1614 bool X86AsmParser::IsSIReg(unsigned Reg) {
1615  switch (Reg) {
1616  default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1617  case X86::RSI:
1618  case X86::ESI:
1619  case X86::SI:
1620  return true;
1621  case X86::RDI:
1622  case X86::EDI:
1623  case X86::DI:
1624  return false;
1625  }
1626 }
1627 
1628 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1629  bool IsSIReg) {
1630  switch (RegClassID) {
1631  default: llvm_unreachable("Unexpected register class");
1632  case X86::GR64RegClassID:
1633  return IsSIReg ? X86::RSI : X86::RDI;
1634  case X86::GR32RegClassID:
1635  return IsSIReg ? X86::ESI : X86::EDI;
1636  case X86::GR16RegClassID:
1637  return IsSIReg ? X86::SI : X86::DI;
1638  }
1639 }
1640 
1641 void X86AsmParser::AddDefaultSrcDestOperands(
1642  OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1643  std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1644  if (isParsingIntelSyntax()) {
1645  Operands.push_back(std::move(Dst));
1646  Operands.push_back(std::move(Src));
1647  }
1648  else {
1649  Operands.push_back(std::move(Src));
1650  Operands.push_back(std::move(Dst));
1651  }
1652 }
1653 
1654 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1655  OperandVector &FinalOperands) {
1656 
1657  if (OrigOperands.size() > 1) {
1658  // Check if sizes match, OrigOperands also contains the instruction name
1659  assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1660  "Operand size mismatch");
1661 
1663  // Verify types match
1664  int RegClassID = -1;
1665  for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1666  X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1667  X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1668 
1669  if (FinalOp.isReg() &&
1670  (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1671  // Return false and let a normal complaint about bogus operands happen
1672  return false;
1673 
1674  if (FinalOp.isMem()) {
1675 
1676  if (!OrigOp.isMem())
1677  // Return false and let a normal complaint about bogus operands happen
1678  return false;
1679 
1680  unsigned OrigReg = OrigOp.Mem.BaseReg;
1681  unsigned FinalReg = FinalOp.Mem.BaseReg;
1682 
1683  // If we've already encounterd a register class, make sure all register
1684  // bases are of the same register class
1685  if (RegClassID != -1 &&
1686  !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1687  return Error(OrigOp.getStartLoc(),
1688  "mismatching source and destination index registers");
1689  }
1690 
1691  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1692  RegClassID = X86::GR64RegClassID;
1693  else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1694  RegClassID = X86::GR32RegClassID;
1695  else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1696  RegClassID = X86::GR16RegClassID;
1697  else
1698  // Unexpected register class type
1699  // Return false and let a normal complaint about bogus operands happen
1700  return false;
1701 
1702  bool IsSI = IsSIReg(FinalReg);
1703  FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1704 
1705  if (FinalReg != OrigReg) {
1706  std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1707  Warnings.push_back(std::make_pair(
1708  OrigOp.getStartLoc(),
1709  "memory operand is only for determining the size, " + RegName +
1710  " will be used for the location"));
1711  }
1712 
1713  FinalOp.Mem.Size = OrigOp.Mem.Size;
1714  FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1715  FinalOp.Mem.BaseReg = FinalReg;
1716  }
1717  }
1718 
1719  // Produce warnings only if all the operands passed the adjustment - prevent
1720  // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1721  for (auto &WarningMsg : Warnings) {
1722  Warning(WarningMsg.first, WarningMsg.second);
1723  }
1724 
1725  // Remove old operands
1726  for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1727  OrigOperands.pop_back();
1728  }
1729  // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1730  for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1731  OrigOperands.push_back(std::move(FinalOperands[i]));
1732 
1733  return false;
1734 }
1735 
1736 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1737  if (isParsingIntelSyntax())
1738  return parseIntelOperand(Operands, Name);
1739 
1740  return parseATTOperand(Operands);
1741 }
1742 
1743 bool X86AsmParser::CreateMemForMSInlineAsm(
1744  unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1745  unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1747  // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1748  // some other label reference.
1750  // Insert an explicit size if the user didn't have one.
1751  if (!Size) {
1752  Size = getPointerWidth();
1753  InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1754  /*Len=*/0, Size);
1755  }
1756  // Create an absolute memory reference in order to match against
1757  // instructions taking a PC relative operand.
1758  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1759  End, Size, Identifier,
1760  Info.Label.Decl));
1761  return false;
1762  }
1763  // We either have a direct symbol reference, or an offset from a symbol. The
1764  // parser always puts the symbol on the LHS, so look there for size
1765  // calculation purposes.
1766  unsigned FrontendSize = 0;
1767  void *Decl = nullptr;
1768  bool IsGlobalLV = false;
1769  if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1770  // Size is in terms of bits in this context.
1771  FrontendSize = Info.Var.Type * 8;
1772  Decl = Info.Var.Decl;
1773  IsGlobalLV = Info.Var.IsGlobalLV;
1774  }
1775  // It is widely common for MS InlineAsm to use a global variable and one/two
1776  // registers in a mmory expression, and though unaccessible via rip/eip.
1777  if (IsGlobalLV && (BaseReg || IndexReg)) {
1778  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1779  End, Size, Identifier, Decl, 0,
1780  BaseReg && IndexReg));
1781  return false;
1782  }
1783  // Otherwise, we set the base register to a non-zero value
1784  // if we don't know the actual value at this time. This is necessary to
1785  // get the matching correct in some cases.
1786  BaseReg = BaseReg ? BaseReg : 1;
1787  Operands.push_back(X86Operand::CreateMem(
1788  getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1789  Size,
1790  /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1791  return false;
1792 }
1793 
1794 // Some binary bitwise operators have a named synonymous
1795 // Query a candidate string for being such a named operator
1796 // and if so - invoke the appropriate handler
1797 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1798  IntelExprStateMachine &SM,
1799  bool &ParseError, SMLoc &End) {
1800  // A named operator should be either lower or upper case, but not a mix...
1801  // except in MASM, which uses full case-insensitivity.
1802  if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1803  !getParser().isParsingMasm())
1804  return false;
1805  if (Name.equals_insensitive("not")) {
1806  SM.onNot();
1807  } else if (Name.equals_insensitive("or")) {
1808  SM.onOr();
1809  } else if (Name.equals_insensitive("shl")) {
1810  SM.onLShift();
1811  } else if (Name.equals_insensitive("shr")) {
1812  SM.onRShift();
1813  } else if (Name.equals_insensitive("xor")) {
1814  SM.onXor();
1815  } else if (Name.equals_insensitive("and")) {
1816  SM.onAnd();
1817  } else if (Name.equals_insensitive("mod")) {
1818  SM.onMod();
1819  } else if (Name.equals_insensitive("offset")) {
1820  SMLoc OffsetLoc = getTok().getLoc();
1821  const MCExpr *Val = nullptr;
1822  StringRef ID;
1824  ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1825  if (ParseError)
1826  return true;
1827  StringRef ErrMsg;
1828  ParseError =
1829  SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1830  if (ParseError)
1831  return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1832  } else {
1833  return false;
1834  }
1835  if (!Name.equals_insensitive("offset"))
1836  End = consumeToken();
1837  return true;
1838 }
1839 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1840  IntelExprStateMachine &SM,
1841  bool &ParseError, SMLoc &End) {
1842  if (Name.equals_insensitive("eq")) {
1843  SM.onEq();
1844  } else if (Name.equals_insensitive("ne")) {
1845  SM.onNE();
1846  } else if (Name.equals_insensitive("lt")) {
1847  SM.onLT();
1848  } else if (Name.equals_insensitive("le")) {
1849  SM.onLE();
1850  } else if (Name.equals_insensitive("gt")) {
1851  SM.onGT();
1852  } else if (Name.equals_insensitive("ge")) {
1853  SM.onGE();
1854  } else {
1855  return false;
1856  }
1857  End = consumeToken();
1858  return true;
1859 }
1860 
1861 // Check if current intel expression append after an operand.
1862 // Like: [Operand][Intel Expression]
1863 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1864  IntelExprStateMachine &SM) {
1865  if (PrevTK != AsmToken::RBrac)
1866  return;
1867 
1868  SM.setAppendAfterOperand();
1869 }
1870 
1871 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1872  MCAsmParser &Parser = getParser();
1873  StringRef ErrMsg;
1874 
1876 
1877  if (getContext().getObjectFileInfo()->isPositionIndependent())
1878  SM.setPIC();
1879 
1880  bool Done = false;
1881  while (!Done) {
1882  // Get a fresh reference on each loop iteration in case the previous
1883  // iteration moved the token storage during UnLex().
1884  const AsmToken &Tok = Parser.getTok();
1885 
1886  bool UpdateLocLex = true;
1887  AsmToken::TokenKind TK = getLexer().getKind();
1888 
1889  switch (TK) {
1890  default:
1891  if ((Done = SM.isValidEndState()))
1892  break;
1893  return Error(Tok.getLoc(), "unknown token in expression");
1894  case AsmToken::Error:
1895  return Error(getLexer().getErrLoc(), getLexer().getErr());
1896  break;
1898  Done = true;
1899  break;
1900  case AsmToken::Real:
1901  // DotOperator: [ebx].0
1902  UpdateLocLex = false;
1903  if (ParseIntelDotOperator(SM, End))
1904  return true;
1905  break;
1906  case AsmToken::Dot:
1907  if (!Parser.isParsingMasm()) {
1908  if ((Done = SM.isValidEndState()))
1909  break;
1910  return Error(Tok.getLoc(), "unknown token in expression");
1911  }
1912  // MASM allows spaces around the dot operator (e.g., "var . x")
1913  Lex();
1914  UpdateLocLex = false;
1915  if (ParseIntelDotOperator(SM, End))
1916  return true;
1917  break;
1918  case AsmToken::Dollar:
1919  if (!Parser.isParsingMasm()) {
1920  if ((Done = SM.isValidEndState()))
1921  break;
1922  return Error(Tok.getLoc(), "unknown token in expression");
1923  }
1924  [[fallthrough]];
1925  case AsmToken::String: {
1926  if (Parser.isParsingMasm()) {
1927  // MASM parsers handle strings in expressions as constants.
1928  SMLoc ValueLoc = Tok.getLoc();
1929  int64_t Res;
1930  const MCExpr *Val;
1931  if (Parser.parsePrimaryExpr(Val, End, nullptr))
1932  return true;
1933  UpdateLocLex = false;
1934  if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1935  return Error(ValueLoc, "expected absolute value");
1936  if (SM.onInteger(Res, ErrMsg))
1937  return Error(ValueLoc, ErrMsg);
1938  break;
1939  }
1940  [[fallthrough]];
1941  }
1942  case AsmToken::At:
1943  case AsmToken::Identifier: {
1944  SMLoc IdentLoc = Tok.getLoc();
1945  StringRef Identifier = Tok.getString();
1946  UpdateLocLex = false;
1947  if (Parser.isParsingMasm()) {
1948  size_t DotOffset = Identifier.find_first_of('.');
1949  if (DotOffset != StringRef::npos) {
1950  consumeToken();
1951  StringRef LHS = Identifier.slice(0, DotOffset);
1952  StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1953  StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1954  if (!RHS.empty()) {
1955  getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1956  }
1957  getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1958  if (!LHS.empty()) {
1959  getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1960  }
1961  break;
1962  }
1963  }
1964  // (MASM only) <TYPE> PTR operator
1965  if (Parser.isParsingMasm()) {
1966  const AsmToken &NextTok = getLexer().peekTok();
1967  if (NextTok.is(AsmToken::Identifier) &&
1968  NextTok.getIdentifier().equals_insensitive("ptr")) {
1969  AsmTypeInfo Info;
1970  if (Parser.lookUpType(Identifier, Info))
1971  return Error(Tok.getLoc(), "unknown type");
1972  SM.onCast(Info);
1973  // Eat type and PTR.
1974  consumeToken();
1975  End = consumeToken();
1976  break;
1977  }
1978  }
1979  // Register, or (MASM only) <register>.<field>
1980  unsigned Reg;
1981  if (Tok.is(AsmToken::Identifier)) {
1982  if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1983  if (SM.onRegister(Reg, ErrMsg))
1984  return Error(IdentLoc, ErrMsg);
1985  break;
1986  }
1987  if (Parser.isParsingMasm()) {
1988  const std::pair<StringRef, StringRef> IDField =
1989  Tok.getString().split('.');
1990  const StringRef ID = IDField.first, Field = IDField.second;
1991  SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1992  if (!Field.empty() &&
1993  !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1994  if (SM.onRegister(Reg, ErrMsg))
1995  return Error(IdentLoc, ErrMsg);
1996 
1998  SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
1999  if (Parser.lookUpField(Field, Info))
2000  return Error(FieldStartLoc, "unknown offset");
2001  else if (SM.onPlus(ErrMsg))
2002  return Error(getTok().getLoc(), ErrMsg);
2003  else if (SM.onInteger(Info.Offset, ErrMsg))
2004  return Error(IdentLoc, ErrMsg);
2005  SM.setTypeInfo(Info.Type);
2006 
2007  End = consumeToken();
2008  break;
2009  }
2010  }
2011  }
2012  // Operator synonymous ("not", "or" etc.)
2013  bool ParseError = false;
2014  if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2015  if (ParseError)
2016  return true;
2017  break;
2018  }
2019  if (Parser.isParsingMasm() &&
2020  ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2021  if (ParseError)
2022  return true;
2023  break;
2024  }
2025  // Symbol reference, when parsing assembly content
2027  AsmFieldInfo FieldInfo;
2028  const MCExpr *Val;
2029  if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2030  // MS Dot Operator expression
2031  if (Identifier.count('.') &&
2032  (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2033  if (ParseIntelDotOperator(SM, End))
2034  return true;
2035  break;
2036  }
2037  }
2038  if (isParsingMSInlineAsm()) {
2039  // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2040  if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2041  if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2042  if (SM.onInteger(Val, ErrMsg))
2043  return Error(IdentLoc, ErrMsg);
2044  } else {
2045  return true;
2046  }
2047  break;
2048  }
2049  // MS InlineAsm identifier
2050  // Call parseIdentifier() to combine @ with the identifier behind it.
2051  if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2052  return Error(IdentLoc, "expected identifier");
2053  if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2054  return true;
2055  else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2056  true, ErrMsg))
2057  return Error(IdentLoc, ErrMsg);
2058  break;
2059  }
2060  if (Parser.isParsingMasm()) {
2061  if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2062  int64_t Val;
2063  if (ParseMasmOperator(OpKind, Val))
2064  return true;
2065  if (SM.onInteger(Val, ErrMsg))
2066  return Error(IdentLoc, ErrMsg);
2067  break;
2068  }
2069  if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2070  // Field offset immediate; <TYPE>.<field specification>
2071  Lex(); // eat type
2072  bool EndDot = parseOptionalToken(AsmToken::Dot);
2073  while (EndDot || (getTok().is(AsmToken::Identifier) &&
2074  getTok().getString().startswith("."))) {
2075  getParser().parseIdentifier(Identifier);
2076  if (!EndDot)
2077  Identifier.consume_front(".");
2078  EndDot = Identifier.consume_back(".");
2079  if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2080  FieldInfo)) {
2081  SMLoc IDEnd =
2082  SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2083  return Error(IdentLoc, "Unable to lookup field reference!",
2084  SMRange(IdentLoc, IDEnd));
2085  }
2086  if (!EndDot)
2087  EndDot = parseOptionalToken(AsmToken::Dot);
2088  }
2089  if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2090  return Error(IdentLoc, ErrMsg);
2091  break;
2092  }
2093  }
2094  if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2095  return Error(Tok.getLoc(), "Unexpected identifier!");
2096  } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2097  false, ErrMsg)) {
2098  return Error(IdentLoc, ErrMsg);
2099  }
2100  break;
2101  }
2102  case AsmToken::Integer: {
2103  // Look for 'b' or 'f' following an Integer as a directional label
2104  SMLoc Loc = getTok().getLoc();
2105  int64_t IntVal = getTok().getIntVal();
2106  End = consumeToken();
2107  UpdateLocLex = false;
2108  if (getLexer().getKind() == AsmToken::Identifier) {
2109  StringRef IDVal = getTok().getString();
2110  if (IDVal == "f" || IDVal == "b") {
2111  MCSymbol *Sym =
2112  getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2114  const MCExpr *Val =
2115  MCSymbolRefExpr::create(Sym, Variant, getContext());
2116  if (IDVal == "b" && Sym->isUndefined())
2117  return Error(Loc, "invalid reference to undefined symbol");
2118  StringRef Identifier = Sym->getName();
2120  AsmTypeInfo Type;
2121  if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2122  isParsingMSInlineAsm(), ErrMsg))
2123  return Error(Loc, ErrMsg);
2124  End = consumeToken();
2125  } else {
2126  if (SM.onInteger(IntVal, ErrMsg))
2127  return Error(Loc, ErrMsg);
2128  }
2129  } else {
2130  if (SM.onInteger(IntVal, ErrMsg))
2131  return Error(Loc, ErrMsg);
2132  }
2133  break;
2134  }
2135  case AsmToken::Plus:
2136  if (SM.onPlus(ErrMsg))
2137  return Error(getTok().getLoc(), ErrMsg);
2138  break;
2139  case AsmToken::Minus:
2140  if (SM.onMinus(ErrMsg))
2141  return Error(getTok().getLoc(), ErrMsg);
2142  break;
2143  case AsmToken::Tilde: SM.onNot(); break;
2144  case AsmToken::Star: SM.onStar(); break;
2145  case AsmToken::Slash: SM.onDivide(); break;
2146  case AsmToken::Percent: SM.onMod(); break;
2147  case AsmToken::Pipe: SM.onOr(); break;
2148  case AsmToken::Caret: SM.onXor(); break;
2149  case AsmToken::Amp: SM.onAnd(); break;
2150  case AsmToken::LessLess:
2151  SM.onLShift(); break;
2153  SM.onRShift(); break;
2154  case AsmToken::LBrac:
2155  if (SM.onLBrac())
2156  return Error(Tok.getLoc(), "unexpected bracket encountered");
2157  tryParseOperandIdx(PrevTK, SM);
2158  break;
2159  case AsmToken::RBrac:
2160  if (SM.onRBrac(ErrMsg)) {
2161  return Error(Tok.getLoc(), ErrMsg);
2162  }
2163  break;
2164  case AsmToken::LParen: SM.onLParen(); break;
2165  case AsmToken::RParen: SM.onRParen(); break;
2166  }
2167  if (SM.hadError())
2168  return Error(Tok.getLoc(), "unknown token in expression");
2169 
2170  if (!Done && UpdateLocLex)
2171  End = consumeToken();
2172 
2173  PrevTK = TK;
2174  }
2175  return false;
2176 }
2177 
2178 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2179  SMLoc Start, SMLoc End) {
2180  SMLoc Loc = Start;
2181  unsigned ExprLen = End.getPointer() - Start.getPointer();
2182  // Skip everything before a symbol displacement (if we have one)
2183  if (SM.getSym() && !SM.isOffsetOperator()) {
2184  StringRef SymName = SM.getSymName();
2185  if (unsigned Len = SymName.data() - Start.getPointer())
2186  InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2187  Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2188  ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2189  // If we have only a symbol than there's no need for complex rewrite,
2190  // simply skip everything after it
2191  if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2192  if (ExprLen)
2193  InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2194  return;
2195  }
2196  }
2197  // Build an Intel Expression rewrite
2198  StringRef BaseRegStr;
2199  StringRef IndexRegStr;
2200  StringRef OffsetNameStr;
2201  if (SM.getBaseReg())
2202  BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2203  if (SM.getIndexReg())
2204  IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2205  if (SM.isOffsetOperator())
2206  OffsetNameStr = SM.getSymName();
2207  // Emit it
2208  IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2209  SM.getImm(), SM.isMemExpr());
2210  InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2211 }
2212 
2213 // Inline assembly may use variable names with namespace alias qualifiers.
2214 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2215  const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2216  bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2217  MCAsmParser &Parser = getParser();
2218  assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2219  Val = nullptr;
2220 
2221  StringRef LineBuf(Identifier.data());
2222  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2223 
2224  const AsmToken &Tok = Parser.getTok();
2225  SMLoc Loc = Tok.getLoc();
2226 
2227  // Advance the token stream until the end of the current token is
2228  // after the end of what the frontend claimed.
2229  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2230  do {
2231  End = Tok.getEndLoc();
2232  getLexer().Lex();
2233  } while (End.getPointer() < EndPtr);
2234  Identifier = LineBuf;
2235 
2236  // The frontend should end parsing on an assembler token boundary, unless it
2237  // failed parsing.
2238  assert((End.getPointer() == EndPtr ||
2240  "frontend claimed part of a token?");
2241 
2242  // If the identifier lookup was unsuccessful, assume that we are dealing with
2243  // a label.
2245  StringRef InternalName =
2246  SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2247  Loc, false);
2248  assert(InternalName.size() && "We should have an internal name here.");
2249  // Push a rewrite for replacing the identifier name with the internal name,
2250  // unless we are parsing the operand of an offset operator
2251  if (!IsParsingOffsetOperator)
2252  InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2253  InternalName);
2254  else
2255  Identifier = InternalName;
2256  } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2257  return false;
2258  // Create the symbol reference.
2259  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2261  Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2262  return false;
2263 }
2264 
2265 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2266 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2267  MCAsmParser &Parser = getParser();
2268  const AsmToken &Tok = Parser.getTok();
2269  // Eat "{" and mark the current place.
2270  const SMLoc consumedToken = consumeToken();
2271  if (Tok.isNot(AsmToken::Identifier))
2272  return Error(Tok.getLoc(), "Expected an identifier after {");
2273  if (Tok.getIdentifier().startswith("r")){
2274  int rndMode = StringSwitch<int>(Tok.getIdentifier())
2279  .Default(-1);
2280  if (-1 == rndMode)
2281  return Error(Tok.getLoc(), "Invalid rounding mode.");
2282  Parser.Lex(); // Eat "r*" of r*-sae
2283  if (!getLexer().is(AsmToken::Minus))
2284  return Error(Tok.getLoc(), "Expected - at this point");
2285  Parser.Lex(); // Eat "-"
2286  Parser.Lex(); // Eat the sae
2287  if (!getLexer().is(AsmToken::RCurly))
2288  return Error(Tok.getLoc(), "Expected } at this point");
2289  SMLoc End = Tok.getEndLoc();
2290  Parser.Lex(); // Eat "}"
2291  const MCExpr *RndModeOp =
2292  MCConstantExpr::create(rndMode, Parser.getContext());
2293  Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2294  return false;
2295  }
2296  if(Tok.getIdentifier().equals("sae")){
2297  Parser.Lex(); // Eat the sae
2298  if (!getLexer().is(AsmToken::RCurly))
2299  return Error(Tok.getLoc(), "Expected } at this point");
2300  Parser.Lex(); // Eat "}"
2301  Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2302  return false;
2303  }
2304  return Error(Tok.getLoc(), "unknown token in expression");
2305 }
2306 
2307 /// Parse the '.' operator.
2308 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2309  SMLoc &End) {
2310  const AsmToken &Tok = getTok();
2312 
2313  // Drop the optional '.'.
2314  StringRef DotDispStr = Tok.getString();
2315  if (DotDispStr.startswith("."))
2316  DotDispStr = DotDispStr.drop_front(1);
2317  StringRef TrailingDot;
2318 
2319  // .Imm gets lexed as a real.
2320  if (Tok.is(AsmToken::Real)) {
2321  APInt DotDisp;
2322  DotDispStr.getAsInteger(10, DotDisp);
2323  Info.Offset = DotDisp.getZExtValue();
2324  } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2325  Tok.is(AsmToken::Identifier)) {
2326  if (DotDispStr.endswith(".")) {
2327  TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2328  DotDispStr = DotDispStr.drop_back(1);
2329  }
2330  const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2331  const StringRef Base = BaseMember.first, Member = BaseMember.second;
2332  if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2333  getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2334  getParser().lookUpField(DotDispStr, Info) &&
2335  (!SemaCallback ||
2336  SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2337  return Error(Tok.getLoc(), "Unable to lookup field reference!");
2338  } else {
2339  return Error(Tok.getLoc(), "Unexpected token type!");
2340  }
2341 
2342  // Eat the DotExpression and update End
2343  End = SMLoc::getFromPointer(DotDispStr.data());
2344  const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2345  while (Tok.getLoc().getPointer() < DotExprEndLoc)
2346  Lex();
2347  if (!TrailingDot.empty())
2348  getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2349  SM.addImm(Info.Offset);
2350  SM.setTypeInfo(Info.Type);
2351  return false;
2352 }
2353 
2354 /// Parse the 'offset' operator.
2355 /// This operator is used to specify the location of a given operand
2356 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2358  SMLoc &End) {
2359  // Eat offset, mark start of identifier.
2360  SMLoc Start = Lex().getLoc();
2361  ID = getTok().getString();
2362  if (!isParsingMSInlineAsm()) {
2363  if ((getTok().isNot(AsmToken::Identifier) &&
2364  getTok().isNot(AsmToken::String)) ||
2365  getParser().parsePrimaryExpr(Val, End, nullptr))
2366  return Error(Start, "unexpected token!");
2367  } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2368  return Error(Start, "unable to lookup expression");
2369  } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2370  return Error(Start, "offset operator cannot yet handle constants");
2371  }
2372  return false;
2373 }
2374 
2375 // Query a candidate string for being an Intel assembly operator
2376 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2377 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2378  return StringSwitch<unsigned>(Name)
2379  .Cases("TYPE","type",IOK_TYPE)
2380  .Cases("SIZE","size",IOK_SIZE)
2381  .Cases("LENGTH","length",IOK_LENGTH)
2382  .Default(IOK_INVALID);
2383 }
2384 
2385 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2386 /// returns the number of elements in an array. It returns the value 1 for
2387 /// non-array variables. The SIZE operator returns the size of a C or C++
2388 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2389 /// TYPE operator returns the size of a C or C++ type or variable. If the
2390 /// variable is an array, TYPE returns the size of a single element.
2391 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2392  MCAsmParser &Parser = getParser();
2393  const AsmToken &Tok = Parser.getTok();
2394  Parser.Lex(); // Eat operator.
2395 
2396  const MCExpr *Val = nullptr;
2398  SMLoc Start = Tok.getLoc(), End;
2399  StringRef Identifier = Tok.getString();
2400  if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2401  /*IsUnevaluatedOperand=*/true, End))
2402  return 0;
2403 
2404  if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2405  Error(Start, "unable to lookup expression");
2406  return 0;
2407  }
2408 
2409  unsigned CVal = 0;
2410  switch(OpKind) {
2411  default: llvm_unreachable("Unexpected operand kind!");
2412  case IOK_LENGTH: CVal = Info.Var.Length; break;
2413  case IOK_SIZE: CVal = Info.Var.Size; break;
2414  case IOK_TYPE: CVal = Info.Var.Type; break;
2415  }
2416 
2417  return CVal;
2418 }
2419 
2420 // Query a candidate string for being an Intel assembly operator
2421 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2422 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2423  return StringSwitch<unsigned>(Name.lower())
2424  .Case("type", MOK_TYPE)
2425  .Cases("size", "sizeof", MOK_SIZEOF)
2426  .Cases("length", "lengthof", MOK_LENGTHOF)
2427  .Default(MOK_INVALID);
2428 }
2429 
2430 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2431 /// returns the number of elements in an array. It returns the value 1 for
2432 /// non-array variables. The SIZEOF operator returns the size of a type or
2433 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2434 /// The TYPE operator returns the size of a variable. If the variable is an
2435 /// array, TYPE returns the size of a single element.
2436 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2437  MCAsmParser &Parser = getParser();
2438  SMLoc OpLoc = Parser.getTok().getLoc();
2439  Parser.Lex(); // Eat operator.
2440 
2441  Val = 0;
2442  if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2443  // Check for SIZEOF(<type>) and TYPE(<type>).
2444  bool InParens = Parser.getTok().is(AsmToken::LParen);
2445  const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2446  AsmTypeInfo Type;
2447  if (IDTok.is(AsmToken::Identifier) &&
2448  !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2449  Val = Type.Size;
2450 
2451  // Eat tokens.
2452  if (InParens)
2453  parseToken(AsmToken::LParen);
2454  parseToken(AsmToken::Identifier);
2455  if (InParens)
2456  parseToken(AsmToken::RParen);
2457  }
2458  }
2459 
2460  if (!Val) {
2461  IntelExprStateMachine SM;
2462  SMLoc End, Start = Parser.getTok().getLoc();
2463  if (ParseIntelExpression(SM, End))
2464  return true;
2465 
2466  switch (OpKind) {
2467  default:
2468  llvm_unreachable("Unexpected operand kind!");
2469  case MOK_SIZEOF:
2470  Val = SM.getSize();
2471  break;
2472  case MOK_LENGTHOF:
2473  Val = SM.getLength();
2474  break;
2475  case MOK_TYPE:
2476  Val = SM.getElementSize();
2477  break;
2478  }
2479 
2480  if (!Val)
2481  return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2482  }
2483 
2484  return false;
2485 }
2486 
2487 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2488  Size = StringSwitch<unsigned>(getTok().getString())
2489  .Cases("BYTE", "byte", 8)
2490  .Cases("WORD", "word", 16)
2491  .Cases("DWORD", "dword", 32)
2492  .Cases("FLOAT", "float", 32)
2493  .Cases("LONG", "long", 32)
2494  .Cases("FWORD", "fword", 48)
2495  .Cases("DOUBLE", "double", 64)
2496  .Cases("QWORD", "qword", 64)
2497  .Cases("MMWORD","mmword", 64)
2498  .Cases("XWORD", "xword", 80)
2499  .Cases("TBYTE", "tbyte", 80)
2500  .Cases("XMMWORD", "xmmword", 128)
2501  .Cases("YMMWORD", "ymmword", 256)
2502  .Cases("ZMMWORD", "zmmword", 512)
2503  .Default(0);
2504  if (Size) {
2505  const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2506  if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2507  return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2508  Lex(); // Eat ptr.
2509  }
2510  return false;
2511 }
2512 
2513 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2514  MCAsmParser &Parser = getParser();
2515  const AsmToken &Tok = Parser.getTok();
2516  SMLoc Start, End;
2517 
2518  // Parse optional Size directive.
2519  unsigned Size;
2520  if (ParseIntelMemoryOperandSize(Size))
2521  return true;
2522  bool PtrInOperand = bool(Size);
2523 
2524  Start = Tok.getLoc();
2525 
2526  // Rounding mode operand.
2527  if (getLexer().is(AsmToken::LCurly))
2528  return ParseRoundingModeOp(Start, Operands);
2529 
2530  // Register operand.
2531  unsigned RegNo = 0;
2532  if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
2533  if (RegNo == X86::RIP)
2534  return Error(Start, "rip can only be used as a base register");
2535  // A Register followed by ':' is considered a segment override
2536  if (Tok.isNot(AsmToken::Colon)) {
2537  if (PtrInOperand)
2538  return Error(Start, "expected memory operand after 'ptr', "
2539  "found register operand instead");
2540  Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2541  return false;
2542  }
2543  // An alleged segment override. check if we have a valid segment register
2544  if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2545  return Error(Start, "invalid segment register");
2546  // Eat ':' and update Start location
2547  Start = Lex().getLoc();
2548  }
2549 
2550  // Immediates and Memory
2551  IntelExprStateMachine SM;
2552  if (ParseIntelExpression(SM, End))
2553  return true;
2554 
2555  if (isParsingMSInlineAsm())
2556  RewriteIntelExpression(SM, Start, Tok.getLoc());
2557 
2558  int64_t Imm = SM.getImm();
2559  const MCExpr *Disp = SM.getSym();
2560  const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2561  if (Disp && Imm)
2562  Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2563  if (!Disp)
2564  Disp = ImmDisp;
2565 
2566  // RegNo != 0 specifies a valid segment register,
2567  // and we are parsing a segment override
2568  if (!SM.isMemExpr() && !RegNo) {
2569  if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2570  const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2571  if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2572  // Disp includes the address of a variable; make sure this is recorded
2573  // for later handling.
2574  Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2575  SM.getSymName(), Info.Var.Decl,
2576  Info.Var.IsGlobalLV));
2577  return false;
2578  }
2579  }
2580 
2581  Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2582  return false;
2583  }
2584 
2585  StringRef ErrMsg;
2586  unsigned BaseReg = SM.getBaseReg();
2587  unsigned IndexReg = SM.getIndexReg();
2588  if (IndexReg && BaseReg == X86::RIP)
2589  BaseReg = 0;
2590  unsigned Scale = SM.getScale();
2591  if (!PtrInOperand)
2592  Size = SM.getElementSize() << 3;
2593 
2594  if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2595  (IndexReg == X86::ESP || IndexReg == X86::RSP))
2596  std::swap(BaseReg, IndexReg);
2597 
2598  // If BaseReg is a vector register and IndexReg is not, swap them unless
2599  // Scale was specified in which case it would be an error.
2600  if (Scale == 0 &&
2601  !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2602  X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2603  X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2604  (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2605  X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2606  X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2607  std::swap(BaseReg, IndexReg);
2608 
2609  if (Scale != 0 &&
2610  X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2611  return Error(Start, "16-bit addresses cannot have a scale");
2612 
2613  // If there was no explicit scale specified, change it to 1.
2614  if (Scale == 0)
2615  Scale = 1;
2616 
2617  // If this is a 16-bit addressing mode with the base and index in the wrong
2618  // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2619  // shared with att syntax where order matters.
2620  if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2621  (IndexReg == X86::BX || IndexReg == X86::BP))
2622  std::swap(BaseReg, IndexReg);
2623 
2624  if ((BaseReg || IndexReg) &&
2625  CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2626  ErrMsg))
2627  return Error(Start, ErrMsg);
2628  if (isParsingMSInlineAsm())
2629  return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2630  End, Size, SM.getSymName(),
2631  SM.getIdentifierInfo(), Operands);
2632 
2633  // When parsing x64 MS-style assembly, all non-absolute references to a named
2634  // variable default to RIP-relative.
2635  unsigned DefaultBaseReg = X86::NoRegister;
2636  bool MaybeDirectBranchDest = true;
2637 
2638  if (Parser.isParsingMasm()) {
2639  bool IsUnconditionalBranch =
2640  Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2641  if (is64BitMode() && SM.getElementSize() > 0) {
2642  DefaultBaseReg = X86::RIP;
2643  }
2644  if (IsUnconditionalBranch) {
2645  if (PtrInOperand) {
2646  MaybeDirectBranchDest = false;
2647  if (is64BitMode())
2648  DefaultBaseReg = X86::RIP;
2649  } else if (!BaseReg && !IndexReg && Disp &&
2650  Disp->getKind() == MCExpr::SymbolRef) {
2651  if (is64BitMode()) {
2652  if (SM.getSize() == 8) {
2653  MaybeDirectBranchDest = false;
2654  DefaultBaseReg = X86::RIP;
2655  }
2656  } else {
2657  if (SM.getSize() == 4 || SM.getSize() == 2)
2658  MaybeDirectBranchDest = false;
2659  }
2660  }
2661  }
2662  }
2663 
2664  if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2665  Operands.push_back(X86Operand::CreateMem(
2666  getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2667  Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2668  /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2669  else
2670  Operands.push_back(X86Operand::CreateMem(
2671  getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2672  /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2673  MaybeDirectBranchDest));
2674  return false;
2675 }
2676 
2677 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2678  MCAsmParser &Parser = getParser();
2679  switch (getLexer().getKind()) {
2680  case AsmToken::Dollar: {
2681  // $42 or $ID -> immediate.
2682  SMLoc Start = Parser.getTok().getLoc(), End;
2683  Parser.Lex();
2684  const MCExpr *Val;
2685  // This is an immediate, so we should not parse a register. Do a precheck
2686  // for '%' to supercede intra-register parse errors.
2687  SMLoc L = Parser.getTok().getLoc();
2688  if (check(getLexer().is(AsmToken::Percent), L,
2689  "expected immediate expression") ||
2690  getParser().parseExpression(Val, End) ||
2691  check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2692  return true;
2693  Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2694  return false;
2695  }
2696  case AsmToken::LCurly: {
2697  SMLoc Start = Parser.getTok().getLoc();
2698  return ParseRoundingModeOp(Start, Operands);
2699  }
2700  default: {
2701  // This a memory operand or a register. We have some parsing complications
2702  // as a '(' may be part of an immediate expression or the addressing mode
2703  // block. This is complicated by the fact that an assembler-level variable
2704  // may refer either to a register or an immediate expression.
2705 
2706  SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2707  const MCExpr *Expr = nullptr;
2708  unsigned Reg = 0;
2709  if (getLexer().isNot(AsmToken::LParen)) {
2710  // No '(' so this is either a displacement expression or a register.
2711  if (Parser.parseExpression(Expr, EndLoc))
2712  return true;
2713  if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2714  // Segment Register. Reset Expr and copy value to register.
2715  Expr = nullptr;
2716  Reg = RE->getRegNo();
2717 
2718  // Check the register.
2719  if (Reg == X86::EIZ || Reg == X86::RIZ)
2720  return Error(
2721  Loc, "%eiz and %riz can only be used as index registers",
2722  SMRange(Loc, EndLoc));
2723  if (Reg == X86::RIP)
2724  return Error(Loc, "%rip can only be used as a base register",
2725  SMRange(Loc, EndLoc));
2726  // Return register that are not segment prefixes immediately.
2727  if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2728  Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2729  return false;
2730  }
2731  if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2732  return Error(Loc, "invalid segment register");
2733  // Accept a '*' absolute memory reference after the segment. Place it
2734  // before the full memory operand.
2735  if (getLexer().is(AsmToken::Star))
2736  Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2737  }
2738  }
2739  // This is a Memory operand.
2740  return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2741  }
2742  }
2743 }
2744 
2745 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2746 // otherwise the EFLAGS Condition Code enumerator.
2747 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2749  .Case("o", X86::COND_O) // Overflow
2750  .Case("no", X86::COND_NO) // No Overflow
2751  .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2752  .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2753  .Cases("e", "z", X86::COND_E) // Equal/Zero
2754  .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2755  .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2756  .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2757  .Case("s", X86::COND_S) // Sign
2758  .Case("ns", X86::COND_NS) // No Sign
2759  .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2760  .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2761  .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2762  .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2763  .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2764  .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2766 }
2767 
2768 // true on failure, false otherwise
2769 // If no {z} mark was found - Parser doesn't advance
2770 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2771  const SMLoc &StartLoc) {
2772  MCAsmParser &Parser = getParser();
2773  // Assuming we are just pass the '{' mark, quering the next token
2774  // Searched for {z}, but none was found. Return false, as no parsing error was
2775  // encountered
2776  if (!(getLexer().is(AsmToken::Identifier) &&
2777  (getLexer().getTok().getIdentifier() == "z")))
2778  return false;
2779  Parser.Lex(); // Eat z
2780  // Query and eat the '}' mark
2781  if (!getLexer().is(AsmToken::RCurly))
2782  return Error(getLexer().getLoc(), "Expected } at this point");
2783  Parser.Lex(); // Eat '}'
2784  // Assign Z with the {z} mark operand
2785  Z = X86Operand::CreateToken("{z}", StartLoc);
2786  return false;
2787 }
2788 
2789 // true on failure, false otherwise
2790 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2791  MCAsmParser &Parser = getParser();
2792  if (getLexer().is(AsmToken::LCurly)) {
2793  // Eat "{" and mark the current place.
2794  const SMLoc consumedToken = consumeToken();
2795  // Distinguish {1to<NUM>} from {%k<NUM>}.
2796  if(getLexer().is(AsmToken::Integer)) {
2797  // Parse memory broadcasting ({1to<NUM>}).
2798  if (getLexer().getTok().getIntVal() != 1)
2799  return TokError("Expected 1to<NUM> at this point");
2800  StringRef Prefix = getLexer().getTok().getString();
2801  Parser.Lex(); // Eat first token of 1to8
2802  if (!getLexer().is(AsmToken::Identifier))
2803  return TokError("Expected 1to<NUM> at this point");
2804  // Recognize only reasonable suffixes.
2805  SmallVector<char, 5> BroadcastVector;
2806  StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2807  .toStringRef(BroadcastVector);
2808  if (!BroadcastString.startswith("1to"))
2809  return TokError("Expected 1to<NUM> at this point");
2810  const char *BroadcastPrimitive =
2811  StringSwitch<const char *>(BroadcastString)
2812  .Case("1to2", "{1to2}")
2813  .Case("1to4", "{1to4}")
2814  .Case("1to8", "{1to8}")
2815  .Case("1to16", "{1to16}")
2816  .Case("1to32", "{1to32}")
2817  .Default(nullptr);
2818  if (!BroadcastPrimitive)
2819  return TokError("Invalid memory broadcast primitive.");
2820  Parser.Lex(); // Eat trailing token of 1toN
2821  if (!getLexer().is(AsmToken::RCurly))
2822  return TokError("Expected } at this point");
2823  Parser.Lex(); // Eat "}"
2824  Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2825  consumedToken));
2826  // No AVX512 specific primitives can pass
2827  // after memory broadcasting, so return.
2828  return false;
2829  } else {
2830  // Parse either {k}{z}, {z}{k}, {k} or {z}
2831  // last one have no meaning, but GCC accepts it
2832  // Currently, we're just pass a '{' mark
2833  std::unique_ptr<X86Operand> Z;
2834  if (ParseZ(Z, consumedToken))
2835  return true;
2836  // Reaching here means that parsing of the allegadly '{z}' mark yielded
2837  // no errors.
2838  // Query for the need of further parsing for a {%k<NUM>} mark
2839  if (!Z || getLexer().is(AsmToken::LCurly)) {
2840  SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2841  // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2842  // expected
2843  unsigned RegNo;
2844  SMLoc RegLoc;
2845  if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2846  X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2847  if (RegNo == X86::K0)
2848  return Error(RegLoc, "Register k0 can't be used as write mask");
2849  if (!getLexer().is(AsmToken::RCurly))
2850  return Error(getLexer().getLoc(), "Expected } at this point");
2851  Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2852  Operands.push_back(
2853  X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2854  Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2855  } else
2856  return Error(getLexer().getLoc(),
2857  "Expected an op-mask register at this point");
2858  // {%k<NUM>} mark is found, inquire for {z}
2859  if (getLexer().is(AsmToken::LCurly) && !Z) {
2860  // Have we've found a parsing error, or found no (expected) {z} mark
2861  // - report an error
2862  if (ParseZ(Z, consumeToken()) || !Z)
2863  return Error(getLexer().getLoc(),
2864  "Expected a {z} mark at this point");
2865 
2866  }
2867  // '{z}' on its own is meaningless, hence should be ignored.
2868  // on the contrary - have it been accompanied by a K register,
2869  // allow it.
2870  if (Z)
2871  Operands.push_back(std::move(Z));
2872  }
2873  }
2874  }
2875  return false;
2876 }
2877 
2878 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2879 /// has already been parsed if present. disp may be provided as well.
2880 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2881  SMLoc StartLoc, SMLoc EndLoc,
2883  MCAsmParser &Parser = getParser();
2884  SMLoc Loc;
2885  // Based on the initial passed values, we may be in any of these cases, we are
2886  // in one of these cases (with current position (*)):
2887 
2888  // 1. seg : * disp (base-index-scale-expr)
2889  // 2. seg : *(disp) (base-index-scale-expr)
2890  // 3. seg : *(base-index-scale-expr)
2891  // 4. disp *(base-index-scale-expr)
2892  // 5. *(disp) (base-index-scale-expr)
2893  // 6. *(base-index-scale-expr)
2894  // 7. disp *
2895  // 8. *(disp)
2896 
2897  // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2898  // checking if the first object after the parenthesis is a register (or an
2899  // identifier referring to a register) and parse the displacement or default
2900  // to 0 as appropriate.
2901  auto isAtMemOperand = [this]() {
2902  if (this->getLexer().isNot(AsmToken::LParen))
2903  return false;
2904  AsmToken Buf[2];
2905  StringRef Id;
2906  auto TokCount = this->getLexer().peekTokens(Buf, true);
2907  if (TokCount == 0)
2908  return false;
2909  switch (Buf[0].getKind()) {
2910  case AsmToken::Percent:
2911  case AsmToken::Comma:
2912  return true;
2913  // These lower cases are doing a peekIdentifier.
2914  case AsmToken::At:
2915  case AsmToken::Dollar:
2916  if ((TokCount > 1) &&
2917  (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2918  (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2919  Id = StringRef(Buf[0].getLoc().getPointer(),
2920  Buf[1].getIdentifier().size() + 1);
2921  break;
2922  case AsmToken::Identifier:
2923  case AsmToken::String:
2924  Id = Buf[0].getIdentifier();
2925  break;
2926  default:
2927  return false;
2928  }
2929  // We have an ID. Check if it is bound to a register.
2930  if (!Id.empty()) {
2931  MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2932  if (Sym->isVariable()) {
2933  auto V = Sym->getVariableValue(/*SetUsed*/ false);
2934  return isa<X86MCExpr>(V);
2935  }
2936  }
2937  return false;
2938  };
2939 
2940  if (!Disp) {
2941  // Parse immediate if we're not at a mem operand yet.
2942  if (!isAtMemOperand()) {
2943  if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2944  return true;
2945  assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2946  } else {
2947  // Disp is implicitly zero if we haven't parsed it yet.
2948  Disp = MCConstantExpr::create(0, Parser.getContext());
2949  }
2950  }
2951 
2952  // We are now either at the end of the operand or at the '(' at the start of a
2953  // base-index-scale-expr.
2954 
2955  if (!parseOptionalToken(AsmToken::LParen)) {
2956  if (SegReg == 0)
2957  Operands.push_back(
2958  X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2959  else
2960  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2961  0, 0, 1, StartLoc, EndLoc));
2962  return false;
2963  }
2964 
2965  // If we reached here, then eat the '(' and Process
2966  // the rest of the memory operand.
2967  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2968  SMLoc BaseLoc = getLexer().getLoc();
2969  const MCExpr *E;
2970  StringRef ErrMsg;
2971 
2972  // Parse BaseReg if one is provided.
2973  if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2974  if (Parser.parseExpression(E, EndLoc) ||
2975  check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2976  return true;
2977 
2978  // Check the register.
2979  BaseReg = cast<X86MCExpr>(E)->getRegNo();
2980  if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2981  return Error(BaseLoc, "eiz and riz can only be used as index registers",
2982  SMRange(BaseLoc, EndLoc));
2983  }
2984 
2985  if (parseOptionalToken(AsmToken::Comma)) {
2986  // Following the comma we should have either an index register, or a scale
2987  // value. We don't support the later form, but we want to parse it
2988  // correctly.
2989  //
2990  // Even though it would be completely consistent to support syntax like
2991  // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2992  if (getLexer().isNot(AsmToken::RParen)) {
2993  if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2994  return true;
2995 
2996  if (!isa<X86MCExpr>(E)) {
2997  // We've parsed an unexpected Scale Value instead of an index
2998  // register. Interpret it as an absolute.
2999  int64_t ScaleVal;
3000  if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3001  return Error(Loc, "expected absolute expression");
3002  if (ScaleVal != 1)
3003  Warning(Loc, "scale factor without index register is ignored");
3004  Scale = 1;
3005  } else { // IndexReg Found.
3006  IndexReg = cast<X86MCExpr>(E)->getRegNo();
3007 
3008  if (BaseReg == X86::RIP)
3009  return Error(Loc,
3010  "%rip as base register can not have an index register");
3011  if (IndexReg == X86::RIP)
3012  return Error(Loc, "%rip is not allowed as an index register");
3013 
3014  if (parseOptionalToken(AsmToken::Comma)) {
3015  // Parse the scale amount:
3016  // ::= ',' [scale-expression]
3017 
3018  // A scale amount without an index is ignored.
3019  if (getLexer().isNot(AsmToken::RParen)) {
3020  int64_t ScaleVal;
3021  if (Parser.parseTokenLoc(Loc) ||
3022  Parser.parseAbsoluteExpression(ScaleVal))
3023  return Error(Loc, "expected scale expression");
3024  Scale = (unsigned)ScaleVal;
3025  // Validate the scale amount.
3026  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3027  Scale != 1)
3028  return Error(Loc, "scale factor in 16-bit address must be 1");
3029  if (checkScale(Scale, ErrMsg))
3030  return Error(Loc, ErrMsg);
3031  }
3032  }
3033  }
3034  }
3035  }
3036 
3037  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3038  if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3039  return true;
3040 
3041  // This is to support otherwise illegal operand (%dx) found in various
3042  // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3043  // be supported. Mark such DX variants separately fix only in special cases.
3044  if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3045  isa<MCConstantExpr>(Disp) &&
3046  cast<MCConstantExpr>(Disp)->getValue() == 0) {
3047  Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3048  return false;
3049  }
3050 
3051  if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3052  ErrMsg))
3053  return Error(BaseLoc, ErrMsg);
3054 
3055  if (SegReg || BaseReg || IndexReg)
3056  Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3057  BaseReg, IndexReg, Scale, StartLoc,
3058  EndLoc));
3059  else
3060  Operands.push_back(
3061  X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3062  return false;
3063 }
3064 
3065 // Parse either a standard primary expression or a register.
3066 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3067  MCAsmParser &Parser = getParser();
3068  // See if this is a register first.
3069  if (getTok().is(AsmToken::Percent) ||
3070  (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3071  MatchRegisterName(Parser.getTok().getString()))) {
3072  SMLoc StartLoc = Parser.getTok().getLoc();
3073  unsigned RegNo;
3074  if (ParseRegister(RegNo, StartLoc, EndLoc))
3075  return true;
3076  Res = X86MCExpr::create(RegNo, Parser.getContext());
3077  return false;
3078  }
3079  return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3080 }
3081 
3082 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3083  SMLoc NameLoc, OperandVector &Operands) {
3084  MCAsmParser &Parser = getParser();
3085  InstInfo = &Info;
3086 
3087  // Reset the forced VEX encoding.
3088  ForcedVEXEncoding = VEXEncoding_Default;
3089  ForcedDispEncoding = DispEncoding_Default;
3090 
3091  // Parse pseudo prefixes.
3092  while (true) {
3093  if (Name == "{") {
3094  if (getLexer().isNot(AsmToken::Identifier))
3095  return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3096  std::string Prefix = Parser.getTok().getString().lower();
3097  Parser.Lex(); // Eat identifier.
3098  if (getLexer().isNot(AsmToken::RCurly))
3099  return Error(Parser.getTok().getLoc(), "Expected '}'");
3100  Parser.Lex(); // Eat curly.
3101 
3102  if (Prefix == "vex")
3103  ForcedVEXEncoding = VEXEncoding_VEX;
3104  else if (Prefix == "vex2")
3105  ForcedVEXEncoding = VEXEncoding_VEX2;
3106  else if (Prefix == "vex3")
3107  ForcedVEXEncoding = VEXEncoding_VEX3;
3108  else if (Prefix == "evex")
3109  ForcedVEXEncoding = VEXEncoding_EVEX;
3110  else if (Prefix == "disp8")
3111  ForcedDispEncoding = DispEncoding_Disp8;
3112  else if (Prefix == "disp32")
3113  ForcedDispEncoding = DispEncoding_Disp32;
3114  else
3115  return Error(NameLoc, "unknown prefix");
3116 
3117  NameLoc = Parser.getTok().getLoc();
3118  if (getLexer().is(AsmToken::LCurly)) {
3119  Parser.Lex();
3120  Name = "{";
3121  } else {
3122  if (getLexer().isNot(AsmToken::Identifier))
3123  return Error(Parser.getTok().getLoc(), "Expected identifier");
3124  // FIXME: The mnemonic won't match correctly if its not in lower case.
3125  Name = Parser.getTok().getString();
3126  Parser.Lex();
3127  }
3128  continue;
3129  }
3130  // Parse MASM style pseudo prefixes.
3131  if (isParsingMSInlineAsm()) {
3132  if (Name.equals_insensitive("vex"))
3133  ForcedVEXEncoding = VEXEncoding_VEX;
3134  else if (Name.equals_insensitive("vex2"))
3135  ForcedVEXEncoding = VEXEncoding_VEX2;
3136  else if (Name.equals_insensitive("vex3"))
3137  ForcedVEXEncoding = VEXEncoding_VEX3;
3138  else if (Name.equals_insensitive("evex"))
3139  ForcedVEXEncoding = VEXEncoding_EVEX;
3140 
3141  if (ForcedVEXEncoding != VEXEncoding_Default) {
3142  if (getLexer().isNot(AsmToken::Identifier))
3143  return Error(Parser.getTok().getLoc(), "Expected identifier");
3144  // FIXME: The mnemonic won't match correctly if its not in lower case.
3145  Name = Parser.getTok().getString();
3146  NameLoc = Parser.getTok().getLoc();
3147  Parser.Lex();
3148  }
3149  }
3150  break;
3151  }
3152 
3153  // Support the suffix syntax for overriding displacement size as well.
3154  if (Name.consume_back(".d32")) {
3155  ForcedDispEncoding = DispEncoding_Disp32;
3156  } else if (Name.consume_back(".d8")) {
3157  ForcedDispEncoding = DispEncoding_Disp8;
3158  }
3159 
3160  StringRef PatchedName = Name;
3161 
3162  // Hack to skip "short" following Jcc.
3163  if (isParsingIntelSyntax() &&
3164  (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3165  PatchedName == "jcxz" || PatchedName == "jecxz" ||
3166  (PatchedName.startswith("j") &&
3167  ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3168  StringRef NextTok = Parser.getTok().getString();
3169  if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3170  : NextTok == "short") {
3171  SMLoc NameEndLoc =
3172  NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3173  // Eat the short keyword.
3174  Parser.Lex();
3175  // MS and GAS ignore the short keyword; they both determine the jmp type
3176  // based on the distance of the label. (NASM does emit different code with
3177  // and without "short," though.)
3178  InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3179  NextTok.size() + 1);
3180  }
3181  }
3182 
3183  // FIXME: Hack to recognize setneb as setne.
3184  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3185  PatchedName != "setb" && PatchedName != "setnb")
3186  PatchedName = PatchedName.substr(0, Name.size()-1);
3187 
3188  unsigned ComparisonPredicate = ~0U;
3189 
3190  // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3191  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3192  (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3193  PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
3194  PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3195  bool IsVCMP = PatchedName[0] == 'v';
3196  unsigned CCIdx = IsVCMP ? 4 : 3;
3197  unsigned CC = StringSwitch<unsigned>(
3198  PatchedName.slice(CCIdx, PatchedName.size() - 2))
3199  .Case("eq", 0x00)
3200  .Case("eq_oq", 0x00)
3201  .Case("lt", 0x01)
3202  .Case("lt_os", 0x01)
3203  .Case("le", 0x02)
3204  .Case("le_os", 0x02)
3205  .Case("unord", 0x03)
3206  .Case("unord_q", 0x03)
3207  .Case("neq", 0x04)
3208  .Case("neq_uq", 0x04)
3209  .Case("nlt", 0x05)
3210  .Case("nlt_us", 0x05)
3211  .Case("nle", 0x06)
3212  .Case("nle_us", 0x06)
3213  .Case("ord", 0x07)
3214  .Case("ord_q", 0x07)
3215  /* AVX only from here */
3216  .Case("eq_uq", 0x08)
3217  .Case("nge", 0x09)
3218  .Case("nge_us", 0x09)
3219  .Case("ngt", 0x0A)
3220  .Case("ngt_us", 0x0A)
3221  .Case("false", 0x0B)
3222  .Case("false_oq", 0x0B)
3223  .Case("neq_oq", 0x0C)
3224  .Case("ge", 0x0D)
3225  .Case("ge_os", 0x0D)
3226  .Case("gt", 0x0E)
3227  .Case("gt_os", 0x0E)
3228  .Case("true", 0x0F)
3229  .Case("true_uq", 0x0F)
3230  .Case("eq_os", 0x10)
3231  .Case("lt_oq", 0x11)
3232  .Case("le_oq", 0x12)
3233  .Case("unord_s", 0x13)
3234  .Case("neq_us", 0x14)
3235  .Case("nlt_uq", 0x15)
3236  .Case("nle_uq", 0x16)
3237  .Case("ord_s", 0x17)
3238  .Case("eq_us", 0x18)
3239  .Case("nge_uq", 0x19)
3240  .Case("ngt_uq", 0x1A)
3241  .Case("false_os", 0x1B)
3242  .Case("neq_os", 0x1C)
3243  .Case("ge_oq", 0x1D)
3244  .Case("gt_oq", 0x1E)
3245  .Case("true_us", 0x1F)
3246  .Default(~0U);
3247  if (CC != ~0U && (IsVCMP || CC < 8) &&
3248  (IsVCMP || PatchedName.back() != 'h')) {
3249  if (PatchedName.endswith("ss"))
3250  PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3251  else if (PatchedName.endswith("sd"))
3252  PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3253  else if (PatchedName.endswith("ps"))
3254  PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3255  else if (PatchedName.endswith("pd"))
3256  PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3257  else if (PatchedName.endswith("sh"))
3258  PatchedName = "vcmpsh";
3259  else if (PatchedName.endswith("ph"))
3260  PatchedName = "vcmpph";
3261  else
3262  llvm_unreachable("Unexpected suffix!");
3263 
3264  ComparisonPredicate = CC;
3265  }
3266  }
3267 
3268  // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3269  if (PatchedName.startswith("vpcmp") &&
3270  (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3271  PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3272  unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3273  unsigned CC = StringSwitch<unsigned>(
3274  PatchedName.slice(5, PatchedName.size() - SuffixSize))
3275  .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3276  .Case("lt", 0x1)
3277  .Case("le", 0x2)
3278  //.Case("false", 0x3) // Not a documented alias.
3279  .Case("neq", 0x4)
3280  .Case("nlt", 0x5)
3281  .Case("nle", 0x6)
3282  //.Case("true", 0x7) // Not a documented alias.
3283  .Default(~0U);
3284  if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3285  switch (PatchedName.back()) {
3286  default: llvm_unreachable("Unexpected character!");
3287  case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3288  case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3289  case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3290  case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3291  }
3292  // Set up the immediate to push into the operands later.
3293  ComparisonPredicate = CC;
3294  }
3295  }
3296 
3297  // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3298  if (PatchedName.startswith("vpcom") &&
3299  (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3300  PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3301  unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3302  unsigned CC = StringSwitch<unsigned>(
3303  PatchedName.slice(5, PatchedName.size() - SuffixSize))
3304  .Case("lt", 0x0)
3305  .Case("le", 0x1)
3306  .Case("gt", 0x2)
3307  .Case("ge", 0x3)
3308  .Case("eq", 0x4)
3309  .Case("neq", 0x5)
3310  .Case("false", 0x6)
3311  .Case("true", 0x7)
3312  .Default(~0U);
3313  if (CC != ~0U) {
3314  switch (PatchedName.back()) {
3315  default: llvm_unreachable("Unexpected character!");
3316  case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3317  case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3318  case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3319  case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3320  }
3321  // Set up the immediate to push into the operands later.
3322  ComparisonPredicate = CC;
3323  }
3324  }
3325 
3326 
3327  // Determine whether this is an instruction prefix.
3328  // FIXME:
3329  // Enhance prefixes integrity robustness. for example, following forms
3330  // are currently tolerated:
3331  // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3332  // lock addq %rax, %rbx ; Destination operand must be of memory type
3333  // xacquire <insn> ; xacquire must be accompanied by 'lock'
3334  bool IsPrefix =
3335  StringSwitch<bool>(Name)
3336  .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3337  .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3338  .Cases("xacquire", "xrelease", true)
3339  .Cases("acquire", "release", isParsingIntelSyntax())
3340  .Default(false);
3341 
3342  auto isLockRepeatNtPrefix = [](StringRef N) {
3343  return StringSwitch<bool>(N)
3344  .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3345  .Default(false);
3346  };
3347 
3348  bool CurlyAsEndOfStatement = false;
3349 
3350  unsigned Flags = X86::IP_NO_PREFIX;
3351  while (isLockRepeatNtPrefix(Name.lower())) {
3352  unsigned Prefix =
3354  .Cases("lock", "lock", X86::IP_HAS_LOCK)
3355  .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3356  .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3357  .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3358  .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3359  Flags |= Prefix;
3360  if (getLexer().is(AsmToken::EndOfStatement)) {
3361  // We don't have real instr with the given prefix
3362  // let's use the prefix as the instr.
3363  // TODO: there could be several prefixes one after another
3365  break;
3366  }
3367  // FIXME: The mnemonic won't match correctly if its not in lower case.
3368  Name = Parser.getTok().getString();
3369  Parser.Lex(); // eat the prefix
3370  // Hack: we could have something like "rep # some comment" or
3371  // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3372  while (Name.startswith(";") || Name.startswith("\n") ||
3373  Name.startswith("#") || Name.startswith("\t") ||
3374  Name.startswith("/")) {
3375  // FIXME: The mnemonic won't match correctly if its not in lower case.
3376  Name = Parser.getTok().getString();
3377  Parser.Lex(); // go to next prefix or instr
3378  }
3379  }
3380 
3381  if (Flags)
3382  PatchedName = Name;
3383 
3384  // Hacks to handle 'data16' and 'data32'
3385  if (PatchedName == "data16" && is16BitMode()) {
3386  return Error(NameLoc, "redundant data16 prefix");
3387  }
3388  if (PatchedName == "data32") {
3389  if (is32BitMode())
3390  return Error(NameLoc, "redundant data32 prefix");
3391  if (is64BitMode())
3392  return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3393  // Hack to 'data16' for the table lookup.
3394  PatchedName = "data16";
3395 
3396  if (getLexer().isNot(AsmToken::EndOfStatement)) {
3397  StringRef Next = Parser.getTok().getString();
3398  getLexer().Lex();
3399  // data32 effectively changes the instruction suffix.
3400  // TODO Generalize.
3401  if (Next == "callw")
3402  Next = "calll";
3403  if (Next == "ljmpw")
3404  Next = "ljmpl";
3405 
3406  Name = Next;
3407  PatchedName = Name;
3408  ForcedDataPrefix = X86::Is32Bit;
3409  IsPrefix = false;
3410  }
3411  }
3412 
3413  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3414 
3415  // Push the immediate if we extracted one from the mnemonic.
3416  if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3417  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3418  getParser().getContext());
3419  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3420  }
3421 
3422  // This does the actual operand parsing. Don't parse any more if we have a
3423  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3424  // just want to parse the "lock" as the first instruction and the "incl" as
3425  // the next one.
3426  if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3427  // Parse '*' modifier.
3428  if (getLexer().is(AsmToken::Star))
3429  Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3430 
3431  // Read the operands.
3432  while (true) {
3433  if (parseOperand(Operands, Name))
3434  return true;
3435  if (HandleAVX512Operand(Operands))
3436  return true;
3437 
3438  // check for comma and eat it
3439  if (getLexer().is(AsmToken::Comma))
3440  Parser.Lex();
3441  else
3442  break;
3443  }
3444 
3445  // In MS inline asm curly braces mark the beginning/end of a block,
3446  // therefore they should be interepreted as end of statement
3447  CurlyAsEndOfStatement =
3448  isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3449  (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3450  if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3451  return TokError("unexpected token in argument list");
3452  }
3453 
3454  // Push the immediate if we extracted one from the mnemonic.
3455  if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3456  const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3457  getParser().getContext());
3458  Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3459  }
3460 
3461  // Consume the EndOfStatement or the prefix separator Slash
3462  if (getLexer().is(AsmToken::EndOfStatement) ||
3463  (IsPrefix && getLexer().is(AsmToken::Slash)))
3464  Parser.Lex();
3465  else if (CurlyAsEndOfStatement)
3466  // Add an actual EndOfStatement before the curly brace
3467  Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3468  getLexer().getTok().getLoc(), 0);
3469 
3470  // This is for gas compatibility and cannot be done in td.
3471  // Adding "p" for some floating point with no argument.
3472  // For example: fsub --> fsubp
3473  bool IsFp =
3474  Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3475  if (IsFp && Operands.size() == 1) {
3476  const char *Repl = StringSwitch<const char *>(Name)
3477  .Case("fsub", "fsubp")
3478  .Case("fdiv", "fdivp")
3479  .Case("fsubr", "fsubrp")
3480  .Case("fdivr", "fdivrp");
3481  static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3482  }
3483 
3484  if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3485  (Operands.size() == 3)) {
3486  X86Operand &Op1 = (X86Operand &)*Operands[1];
3487  X86Operand &Op2 = (X86Operand &)*Operands[2];
3488  SMLoc Loc = Op1.getEndLoc();
3489  // Moving a 32 or 16 bit value into a segment register has the same
3490  // behavior. Modify such instructions to always take shorter form.
3491  if (Op1.isReg() && Op2.isReg() &&
3492  X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3493  Op2.getReg()) &&
3494  (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3495  X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3496  // Change instruction name to match new instruction.
3497  if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3498  Name = is16BitMode() ? "movw" : "movl";
3499  Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3500  }
3501  // Select the correct equivalent 16-/32-bit source register.
3502  unsigned Reg =
3503  getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
3504  Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3505  }
3506  }
3507 
3508  // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3509  // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3510  // documented form in various unofficial manuals, so a lot of code uses it.
3511  if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3512  Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3513  Operands.size() == 3) {
3514  X86Operand &Op = (X86Operand &)*Operands.back();
3515  if (Op.isDXReg())
3516  Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3517  Op.getEndLoc());
3518  }
3519  // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3520  if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3521  Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3522  Operands.size() == 3) {
3523  X86Operand &Op = (X86Operand &)*Operands[1];
3524  if (Op.isDXReg())
3525  Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3526  Op.getEndLoc());
3527  }
3528 
3530  bool HadVerifyError = false;
3531 
3532  // Append default arguments to "ins[bwld]"
3533  if (Name.startswith("ins") &&
3534  (Operands.size() == 1 || Operands.size() == 3) &&
3535  (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3536  Name == "ins")) {
3537 
3538  AddDefaultSrcDestOperands(TmpOperands,
3539  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3540  DefaultMemDIOperand(NameLoc));
3541  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3542  }
3543 
3544  // Append default arguments to "outs[bwld]"
3545  if (Name.startswith("outs") &&
3546  (Operands.size() == 1 || Operands.size() == 3) &&
3547  (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3548  Name == "outsd" || Name == "outs")) {
3549  AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3550  X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3551  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3552  }
3553 
3554  // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3555  // values of $SIREG according to the mode. It would be nice if this
3556  // could be achieved with InstAlias in the tables.
3557  if (Name.startswith("lods") &&
3558  (Operands.size() == 1 || Operands.size() == 2) &&
3559  (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3560  Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3561  TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3562  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3563  }
3564 
3565  // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3566  // values of $DIREG according to the mode. It would be nice if this
3567  // could be achieved with InstAlias in the tables.
3568  if (Name.startswith("stos") &&
3569  (Operands.size() == 1 || Operands.size() == 2) &&
3570  (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3571  Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3572  TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3573  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3574  }
3575 
3576  // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3577  // values of $DIREG according to the mode. It would be nice if this
3578  // could be achieved with InstAlias in the tables.
3579  if (Name.startswith("scas") &&
3580  (Operands.size() == 1 || Operands.size() == 2) &&
3581  (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3582  Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3583  TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3584  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3585  }
3586 
3587  // Add default SI and DI operands to "cmps[bwlq]".
3588  if (Name.startswith("cmps") &&
3589  (Operands.size() == 1 || Operands.size() == 3) &&
3590  (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3591  Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3592  AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3593  DefaultMemSIOperand(NameLoc));
3594  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3595  }
3596 
3597  // Add default SI and DI operands to "movs[bwlq]".
3598  if (((Name.startswith("movs") &&
3599  (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3600  Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3601  (Name.startswith("smov") &&
3602  (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3603  Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3604  (Operands.size() == 1 || Operands.size() == 3)) {
3605  if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3606  Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3607  AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3608  DefaultMemDIOperand(NameLoc));
3609  HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3610  }
3611 
3612  // Check if we encountered an error for one the string insturctions
3613  if (HadVerifyError) {
3614  return HadVerifyError;
3615  }
3616 
3617  // Transforms "xlat mem8" into "xlatb"
3618  if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3619  X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3620  if (Op1.isMem8()) {
3621  Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3622  "size, (R|E)BX will be used for the location");
3623  Operands.pop_back();
3624  static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3625  }
3626  }
3627 
3628  if (Flags)
3629  Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3630  return false;
3631 }
3632 
3633 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3634  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3635 
3636  switch (Inst.getOpcode()) {
3637  default: return false;
3638  case X86::JMP_1:
3639  // {disp32} forces a larger displacement as if the instruction was relaxed.
3640  // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3641  // This matches GNU assembler.
3642  if (ForcedDispEncoding == DispEncoding_Disp32) {
3643  Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3644  return true;
3645  }
3646 
3647  return false;
3648  case X86::JCC_1:
3649  // {disp32} forces a larger displacement as if the instruction was relaxed.
3650  // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3651  // This matches GNU assembler.
3652  if (ForcedDispEncoding == DispEncoding_Disp32) {
3653  Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3654  return true;
3655  }
3656 
3657  return false;
3658  case X86::VMOVZPQILo2PQIrr:
3659  case X86::VMOVAPDrr:
3660  case X86::VMOVAPDYrr:
3661  case X86::VMOVAPSrr:
3662  case X86::VMOVAPSYrr:
3663  case X86::VMOVDQArr:
3664  case X86::VMOVDQAYrr:
3665  case X86::VMOVDQUrr:
3666  case X86::VMOVDQUYrr:
3667  case X86::VMOVUPDrr:
3668  case X86::VMOVUPDYrr:
3669  case X86::VMOVUPSrr:
3670  case X86::VMOVUPSYrr: {
3671  // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3672  // the registers is extended, but other isn't.
3673  if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3674  MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3675  MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
3676  return false;
3677 
3678  unsigned NewOpc;
3679  switch (Inst.getOpcode()) {
3680  default: llvm_unreachable("Invalid opcode");
3681  case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
3682  case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
3683  case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
3684  case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
3685  case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
3686  case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
3687  case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
3688  case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
3689  case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
3690  case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
3691  case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
3692  case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
3693  case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
3694  }
3695  Inst.setOpcode(NewOpc);
3696  return true;
3697  }
3698  case X86::VMOVSDrr:
3699  case X86::VMOVSSrr: {
3700  // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3701  // the registers is extended, but other isn't.
3702  if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3703  MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3704  MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
3705  return false;
3706 
3707  unsigned NewOpc;
3708  switch (Inst.getOpcode()) {
3709  default: llvm_unreachable("Invalid opcode");
3710  case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
3711  case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
3712  }
3713  Inst.setOpcode(NewOpc);
3714  return true;
3715  }
3716  case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
3717  case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
3718  case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
3719  case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
3720  case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
3721  case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
3722  case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
3723  // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3724  // FIXME: It would be great if we could just do this with an InstAlias.
3725  if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
3726  return false;
3727 
3728  unsigned NewOpc;
3729  switch (Inst.getOpcode()) {
3730  default: llvm_unreachable("Invalid opcode");
3731  case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
3732  case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
3733  case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
3734  case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
3735  case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
3736  case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
3737  case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
3738  case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
3739  case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
3740  case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
3741  case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
3742  case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
3743  case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
3744  case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
3745  case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
3746  case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
3747  case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
3748  case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
3749  case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
3750  case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
3751  case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
3752  case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
3753  case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
3754  case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
3755  case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
3756  case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
3757  case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
3758  case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
3759  }
3760 
3761  MCInst TmpInst;
3762  TmpInst.setOpcode(NewOpc);
3763  TmpInst.addOperand(Inst.getOperand(0));
3764  TmpInst.addOperand(Inst.getOperand(1));
3765  Inst = TmpInst;
3766  return true;
3767  }
3768  case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
3769  case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
3770  case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
3771  case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
3772  case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
3773  case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
3774  case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
3775  // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3776  // FIXME: It would be great if we could just do this with an InstAlias.
3777  if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
3778  Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
3779  return false;
3780 
3781  unsigned NewOpc;
3782  switch (Inst.getOpcode()) {
3783  default: llvm_unreachable("Invalid opcode");
3784  case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
3785  case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
3786  case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
3787  case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
3788  case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
3789  case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
3790  case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
3791  case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
3792  case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
3793  case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
3794  case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
3795  case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
3796  case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
3797  case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
3798  case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
3799  case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
3800  case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
3801  case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
3802  case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
3803  case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
3804  case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
3805  case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
3806  case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
3807  case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
3808  case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
3809  case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
3810  case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
3811  case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
3812  }
3813 
3814  MCInst TmpInst;
3815  TmpInst.setOpcode(NewOpc);
3816  for (int i = 0; i != X86::AddrNumOperands; ++i)
3817  TmpInst.addOperand(Inst.getOperand(i));
3818  Inst = TmpInst;
3819  return true;
3820  }
3821  case X86::INT: {
3822  // Transforms "int $3" into "int3" as a size optimization. We can't write an
3823  // instalias with an immediate operand yet.
3824  if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3825  return false;
3826 
3827  MCInst TmpInst;
3828  TmpInst.setOpcode(X86::INT3);
3829  Inst = TmpInst;
3830  return true;
3831  }
3832  }
3833 }
3834 
3835 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3836  using namespace X86;
3837  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3838  unsigned Opcode = Inst.getOpcode();
3839  uint64_t TSFlags = MII.get(Opcode).TSFlags;
3840  if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3841  isVFMADDCSH(Opcode)) {
3842  unsigned Dest = Inst.getOperand(0).getReg();
3843  for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3844  if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3845  return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3846  "distinct from source registers");
3847  } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3848  isVFMULCSH(Opcode)) {
3849  unsigned Dest = Inst.getOperand(0).getReg();
3850  // The mask variants have different operand list. Scan from the third
3851  // operand to avoid emitting incorrect warning.
3852  // VFMULCPHZrr Dest, Src1, Src2
3853  // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3854  // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3855  for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1;
3856  i < Inst.getNumOperands(); i++)
3857  if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3858  return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3859  "distinct from source registers");
3860  } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3861  isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3862  isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3863  unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3865  unsigned Src2Enc = MRI->getEncodingValue(Src2);
3866  if (Src2Enc % 4 != 0) {
3868  unsigned GroupStart = (Src2Enc / 4) * 4;
3869  unsigned GroupEnd = GroupStart + 3;
3870  return Warning(Ops[0]->getStartLoc(),
3871  "source register '" + RegName + "' implicitly denotes '" +
3872  RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3873  RegName.take_front(3) + Twine(GroupEnd) +
3874  "' source group");
3875  }
3876  } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3877  isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3878  isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3879  isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3880  bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3881  if (HasEVEX) {
3882  unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3883  unsigned Index = MRI->getEncodingValue(
3884  Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3885  if (Dest == Index)
3886  return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3887  "should be distinct");
3888  } else {
3889  unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3890  unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3891  unsigned Index = MRI->getEncodingValue(
3892  Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3893  if (Dest == Mask || Dest == Index || Mask == Index)
3894  return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3895  "registers should be distinct");
3896  }
3897  }
3898 
3899  // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3900  // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3901  if ((TSFlags & X86II::EncodingMask) == 0) {
3902  MCPhysReg HReg = X86::NoRegister;
3903  bool UsesRex = TSFlags & X86II::REX_W;
3904  unsigned NumOps = Inst.getNumOperands();
3905  for (unsigned i = 0; i != NumOps; ++i) {
3906  const MCOperand &MO = Inst.getOperand(i);
3907  if (!MO.isReg())
3908  continue;
3909  unsigned Reg = MO.getReg();
3910  if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3911  HReg = Reg;
3914  UsesRex = true;
3915  }
3916 
3917  if (UsesRex && HReg != X86::NoRegister) {
3919  return Error(Ops[0]->getStartLoc(),
3920  "can't encode '" + RegName + "' in an instruction requiring "
3921  "REX prefix");
3922  }
3923  }
3924 
3925  return false;
3926 }
3927 
3928 static const char *getSubtargetFeatureName(uint64_t Val);
3929 
3930 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3931  Warning(Loc, "Instruction may be vulnerable to LVI and "
3932  "requires manual mitigation");
3933  Note(SMLoc(), "See https://software.intel.com/"
3934  "security-software-guidance/insights/"
3935  "deep-dive-load-value-injection#specialinstructions"
3936  " for more information");
3937 }
3938 
3939 /// RET instructions and also instructions that indirect calls/jumps from memory
3940 /// combine a load and a branch within a single instruction. To mitigate these
3941 /// instructions against LVI, they must be decomposed into separate load and
3942 /// branch instructions, with an LFENCE in between. For more details, see:
3943 /// - X86LoadValueInjectionRetHardening.cpp
3944 /// - X86LoadValueInjectionIndirectThunks.cpp
3945 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3946 ///
3947 /// Returns `true` if a mitigation was applied or warning was emitted.
3948 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3949  // Information on control-flow instructions that require manual mitigation can
3950  // be found here:
3951  // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3952  switch (Inst.getOpcode()) {
3953  case X86::RET16:
3954  case X86::RET32:
3955  case X86::RET64:
3956  case X86::RETI16:
3957  case X86::RETI32:
3958  case X86::RETI64: {
3959  MCInst ShlInst, FenceInst;
3960  bool Parse32 = is32BitMode() || Code16GCC;
3961  unsigned Basereg =
3962  is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3963  const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3964  auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3965  /*BaseReg=*/Basereg, /*IndexReg=*/0,
3966  /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3967  ShlInst.setOpcode(X86::SHL64mi);
3968  ShlMemOp->addMemOperands(ShlInst, 5);
3969  ShlInst.addOperand(MCOperand::createImm(0));
3970  FenceInst.setOpcode(X86::LFENCE);
3971  Out.emitInstruction(ShlInst, getSTI());
3972  Out.emitInstruction(FenceInst, getSTI());
3973  return;
3974  }
3975  case X86::JMP16m:
3976  case X86::JMP32m:
3977  case X86::JMP64m:
3978  case X86::CALL16m:
3979  case X86::CALL32m:
3980  case X86::CALL64m:
3981  emitWarningForSpecialLVIInstruction(Inst.getLoc());
3982  return;
3983  }
3984 }
3985 
3986 /// To mitigate LVI, every instruction that performs a load can be followed by
3987 /// an LFENCE instruction to squash any potential mis-speculation. There are
3988 /// some instructions that require additional considerations, and may requre
3989 /// manual mitigation. For more details, see:
3990 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3991 ///
3992 /// Returns `true` if a mitigation was applied or warning was emitted.
3993 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3994  MCStreamer &Out) {
3995  auto Opcode = Inst.getOpcode();
3996  auto Flags = Inst.getFlags();
3997  if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3998  // Information on REP string instructions that require manual mitigation can
3999  // be found here:
4000  // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4001  switch (Opcode) {
4002  case X86::CMPSB:
4003  case X86::CMPSW:
4004  case X86::CMPSL:
4005  case X86::CMPSQ:
4006  case X86::SCASB:
4007  case X86::SCASW:
4008  case X86::SCASL:
4009  case X86::SCASQ:
4010  emitWarningForSpecialLVIInstruction(Inst.getLoc());
4011  return;
4012  }
4013  } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4014  // If a REP instruction is found on its own line, it may or may not be
4015  // followed by a vulnerable instruction. Emit a warning just in case.
4016  emitWarningForSpecialLVIInstruction(Inst.getLoc());
4017  return;
4018  }
4019 
4020  const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4021 
4022  // Can't mitigate after terminators or calls. A control flow change may have
4023  // already occurred.
4024  if (MCID.isTerminator() || MCID.isCall())
4025  return;
4026 
4027  // LFENCE has the mayLoad property, don't double fence.
4028  if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4029  MCInst FenceInst;
4030  FenceInst.setOpcode(X86::LFENCE);
4031  Out.emitInstruction(FenceInst, getSTI());
4032  }
4033 }
4034 
4035 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4036  MCStreamer &Out) {
4037  if (LVIInlineAsmHardening &&
4038  getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
4039  applyLVICFIMitigation(Inst, Out);
4040 
4041  Out.emitInstruction(Inst, getSTI());
4042 
4043  if (LVIInlineAsmHardening &&
4044  getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
4045  applyLVILoadHardeningMitigation(Inst, Out);
4046 }
4047 
4048 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4050  MCStreamer &Out, uint64_t &ErrorInfo,
4051  bool MatchingInlineAsm) {
4052  if (isParsingIntelSyntax())
4053  return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4054  MatchingInlineAsm);
4055  return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4056  MatchingInlineAsm);
4057 }
4058 
4059 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4061  bool MatchingInlineAsm) {
4062  // FIXME: This should be replaced with a real .td file alias mechanism.
4063  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4064  // call.
4065  const char *Repl = StringSwitch<const char *>(Op.getToken())
4066  .Case("finit", "fninit")
4067  .Case("fsave", "fnsave")
4068  .Case("fstcw", "fnstcw")
4069  .Case("fstcww", "fnstcw")
4070  .Case("fstenv", "fnstenv")
4071  .Case("fstsw", "fnstsw")
4072  .Case("fstsww", "fnstsw")
4073  .Case("fclex", "fnclex")
4074  .Default(nullptr);
4075  if (Repl) {
4076  MCInst Inst;
4077  Inst.setOpcode(X86::WAIT);
4078  Inst.setLoc(IDLoc);
4079  if (!MatchingInlineAsm)
4080  emitInstruction(Inst, Operands, Out);
4081  Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4082  }
4083 }
4084 
4085 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4086  const FeatureBitset &MissingFeatures,
4087  bool MatchingInlineAsm) {
4088  assert(MissingFeatures.any() && "Unknown missing feature!");
4091  OS << "instruction requires:";
4092  for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4093  if (MissingFeatures[i])
4094  OS << ' ' << getSubtargetFeatureName(i);
4095  }
4096  return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4097 }
4098 
4100  unsigned Result = 0;
4101  X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4102  if (Prefix.isPrefix()) {
4103  Result = Prefix.getPrefix();
4104  Operands.pop_back();
4105  }
4106  return Result;
4107 }
4108 
4109 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4110  unsigned Opc = Inst.getOpcode();
4111  const MCInstrDesc &MCID = MII.get(Opc);
4112 
4113  if (ForcedVEXEncoding == VEXEncoding_EVEX &&
4115  return Match_Unsupported;
4116 
4117  if ((ForcedVEXEncoding == VEXEncoding_VEX ||
4118  ForcedVEXEncoding == VEXEncoding_VEX2 ||
4119  ForcedVEXEncoding == VEXEncoding_VEX3) &&
4121  return Match_Unsupported;
4122 
4123  // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4124  if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
4125  (ForcedVEXEncoding != VEXEncoding_VEX &&
4126  ForcedVEXEncoding != VEXEncoding_VEX2 &&
4127  ForcedVEXEncoding != VEXEncoding_VEX3))
4128  return Match_Unsupported;
4129 
4130  return Match_Success;
4131 }
4132 
4133 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
4135  MCStreamer &Out,
4137  bool MatchingInlineAsm) {
4138  assert(!Operands.empty() && "Unexpect empty operand list!");
4139  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4140  SMRange EmptyRange = std::nullopt;
4141 
4142  // First, handle aliases that expand to multiple instructions.
4143  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4144  Out, MatchingInlineAsm);
4145  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4146  unsigned Prefixes = getPrefixes(Operands);
4147 
4148  MCInst Inst;
4149 
4150  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4151  // encoder and printer.
4152  if (ForcedVEXEncoding == VEXEncoding_VEX)
4153  Prefixes |= X86::IP_USE_VEX;
4154  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4155  Prefixes |= X86::IP_USE_VEX2;
4156  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4157  Prefixes |= X86::IP_USE_VEX3;
4158  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4159  Prefixes |= X86::IP_USE_EVEX;
4160 
4161  // Set encoded flags for {disp8} and {disp32}.
4162  if (ForcedDispEncoding == DispEncoding_Disp8)
4163  Prefixes |= X86::IP_USE_DISP8;
4164  else if (ForcedDispEncoding == DispEncoding_Disp32)
4165  Prefixes |= X86::IP_USE_DISP32;
4166 
4167  if (Prefixes)
4168  Inst.setFlags(Prefixes);
4169 
4170  // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4171  // when matching the instruction.
4172  if (ForcedDataPrefix == X86::Is32Bit)
4173  SwitchMode(X86::Is32Bit);
4174  // First, try a direct match.
4175  FeatureBitset MissingFeatures;
4176  unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4177  MissingFeatures, MatchingInlineAsm,
4178  isParsingIntelSyntax());
4179  if (ForcedDataPrefix == X86::Is32Bit) {
4180  SwitchMode(X86::Is16Bit);
4181  ForcedDataPrefix = 0;
4182  }
4183  switch (OriginalError) {
4184  default: llvm_unreachable("Unexpected match result!");
4185  case Match_Success:
4186  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4187  return true;
4188  // Some instructions need post-processing to, for example, tweak which
4189  // encoding is selected. Loop on it while changes happen so the
4190  // individual transformations can chain off each other.
4191  if (!MatchingInlineAsm)
4192  while (processInstruction(Inst, Operands))
4193  ;
4194 
4195  Inst.setLoc(IDLoc);
4196  if (!MatchingInlineAsm)
4197  emitInstruction(Inst, Operands, Out);
4198  Opcode = Inst.getOpcode();
4199  return false;
4200  case Match_InvalidImmUnsignedi4: {
4201  SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4202  if (ErrorLoc == SMLoc())
4203  ErrorLoc = IDLoc;
4204  return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4205  EmptyRange, MatchingInlineAsm);
4206  }
4207  case Match_MissingFeature:
4208  return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4209  case Match_InvalidOperand:
4210  case Match_MnemonicFail:
4211  case Match_Unsupported:
4212  break;
4213  }
4214  if (Op.getToken().empty()) {
4215  Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4216  MatchingInlineAsm);
4217  return true;
4218  }
4219 
4220  // FIXME: Ideally, we would only attempt suffix matches for things which are
4221  // valid prefixes, and we could just infer the right unambiguous
4222  // type. However, that requires substantially more matcher support than the
4223  // following hack.
4224 
4225  // Change the operand to point to a temporary token.
4226  StringRef Base = Op.getToken();
4227  SmallString<16> Tmp;
4228  Tmp += Base;
4229  Tmp += ' ';
4230  Op.setTokenValue(Tmp);
4231 
4232  // If this instruction starts with an 'f', then it is a floating point stack
4233  // instruction. These come in up to three forms for 32-bit, 64-bit, and
4234  // 80-bit floating point, which use the suffixes s,l,t respectively.
4235  //
4236  // Otherwise, we assume that this may be an integer instruction, which comes
4237  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4238  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4239  // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4240  const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4241 
4242  // Check for the various suffix matches.
4243  uint64_t ErrorInfoIgnore;
4244  FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4245  unsigned Match[4];
4246 
4247  // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4248  // So we should make sure the suffix matcher only works for memory variant
4249  // that has the same size with the suffix.
4250  // FIXME: This flag is a workaround for legacy instructions that didn't
4251  // declare non suffix variant assembly.
4252  bool HasVectorReg = false;
4253  X86Operand *MemOp = nullptr;
4254  for (const auto &Op : Operands) {
4255  X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4256  if (X86Op->isVectorReg())
4257  HasVectorReg = true;
4258  else if (X86Op->isMem()) {
4259  MemOp = X86Op;
4260  assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4261  // Have we found an unqualified memory operand,
4262  // break. IA allows only one memory operand.
4263  break;
4264  }
4265  }
4266 
4267  for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4268  Tmp.back() = Suffixes[I];
4269  if (MemOp && HasVectorReg)
4270  MemOp->Mem.Size = MemSize[I];
4271  Match[I] = Match_MnemonicFail;
4272  if (MemOp || !HasVectorReg) {
4273  Match[I] =
4274  MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4275  MatchingInlineAsm, isParsingIntelSyntax());
4276  // If this returned as a missing feature failure, remember that.
4277  if (Match[I] == Match_MissingFeature)
4278  ErrorInfoMissingFeatures = MissingFeatures;
4279  }
4280  }
4281 
4282  // Restore the old token.
4283  Op.setTokenValue(Base);
4284 
4285  // If exactly one matched, then we treat that as a successful match (and the
4286  // instruction will already have been filled in correctly, since the failing
4287  // matches won't have modified it).
4288  unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4289  if (NumSuccessfulMatches == 1) {
4290  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4291  return true;
4292  // Some instructions need post-processing to, for example, tweak which
4293  // encoding is selected. Loop on it while changes happen so the
4294  // individual transformations can chain off each other.
4295  if (!MatchingInlineAsm)
4296  while (processInstruction(Inst, Operands))
4297  ;
4298 
4299  Inst.setLoc(IDLoc);
4300  if (!MatchingInlineAsm)
4301  emitInstruction(Inst, Operands, Out);
4302  Opcode = Inst.getOpcode();
4303  return false;
4304  }
4305 
4306  // Otherwise, the match failed, try to produce a decent error message.
4307 
4308  // If we had multiple suffix matches, then identify this as an ambiguous
4309  // match.
4310  if (NumSuccessfulMatches > 1) {
4311  char MatchChars[4];
4312  unsigned NumMatches = 0;
4313  for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4314  if (Match[I] == Match_Success)
4315  MatchChars[NumMatches++] = Suffixes[I];
4316 
4319  OS << "ambiguous instructions require an explicit suffix (could be ";
4320  for (unsigned i = 0; i != NumMatches; ++i) {
4321  if (i != 0)
4322  OS << ", ";
4323  if (i + 1 == NumMatches)
4324  OS << "or ";
4325  OS << "'" << Base << MatchChars[i] << "'";
4326  }
4327  OS << ")";
4328  Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4329  return true;
4330  }
4331 
4332  // Okay, we know that none of the variants matched successfully.
4333 
4334  // If all of the instructions reported an invalid mnemonic, then the original
4335  // mnemonic was invalid.
4336  if (llvm::count(Match, Match_MnemonicFail) == 4) {
4337  if (OriginalError == Match_MnemonicFail)
4338  return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4339  Op.getLocRange(), MatchingInlineAsm);
4340 
4341  if (OriginalError == Match_Unsupported)
4342  return Error(IDLoc, "unsupported instruction", EmptyRange,
4343  MatchingInlineAsm);
4344 
4345  assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4346  // Recover location info for the operand if we know which was the problem.
4347  if (ErrorInfo != ~0ULL) {
4348  if (ErrorInfo >= Operands.size())
4349  return Error(IDLoc, "too few operands for instruction", EmptyRange,
4350  MatchingInlineAsm);
4351 
4352  X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4353  if (Operand.getStartLoc().isValid()) {
4354  SMRange OperandRange = Operand.getLocRange();
4355  return Error(Operand.getStartLoc(), "invalid operand for instruction",
4356  OperandRange, MatchingInlineAsm);
4357  }
4358  }
4359 
4360  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4361  MatchingInlineAsm);
4362  }
4363 
4364  // If one instruction matched as unsupported, report this as unsupported.
4365  if (llvm::count(Match, Match_Unsupported) == 1) {
4366  return Error(IDLoc, "unsupported instruction", EmptyRange,
4367  MatchingInlineAsm);
4368  }
4369 
4370  // If one instruction matched with a missing feature, report this as a
4371  // missing feature.
4372  if (llvm::count(Match, Match_MissingFeature) == 1) {
4373  ErrorInfo = Match_MissingFeature;
4374  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4375  MatchingInlineAsm);
4376  }
4377 
4378  // If one instruction matched with an invalid operand, report this as an
4379  // operand failure.
4380  if (llvm::count(Match, Match_InvalidOperand) == 1) {
4381  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4382  MatchingInlineAsm);
4383  }
4384 
4385  // If all of these were an outright failure, report it in a useless way.
4386  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4387  EmptyRange, MatchingInlineAsm);
4388  return true;
4389 }
4390 
4391 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4393  MCStreamer &Out,
4395  bool MatchingInlineAsm) {
4396  assert(!Operands.empty() && "Unexpect empty operand list!");
4397  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4398  StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4399  SMRange EmptyRange = std::nullopt;
4400  StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4401  unsigned Prefixes = getPrefixes(Operands);
4402 
4403  // First, handle aliases that expand to multiple instructions.
4404  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4405  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4406 
4407  MCInst Inst;
4408 
4409  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4410  // encoder and printer.
4411  if (ForcedVEXEncoding == VEXEncoding_VEX)
4412  Prefixes |= X86::IP_USE_VEX;
4413  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4414  Prefixes |= X86::IP_USE_VEX2;
4415  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4416  Prefixes |= X86::IP_USE_VEX3;
4417  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4418  Prefixes |= X86::IP_USE_EVEX;
4419 
4420  // Set encoded flags for {disp8} and {disp32}.
4421  if (ForcedDispEncoding == DispEncoding_Disp8)
4422  Prefixes |= X86::IP_USE_DISP8;
4423  else if (ForcedDispEncoding == DispEncoding_Disp32)
4424  Prefixes |= X86::IP_USE_DISP32;
4425 
4426  if (Prefixes)
4427  Inst.setFlags(Prefixes);
4428 
4429  // Find one unsized memory operand, if present.
4430  X86Operand *UnsizedMemOp = nullptr;
4431  for (const auto &Op : Operands) {
4432  X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4433  if (X86Op->isMemUnsized()) {
4434  UnsizedMemOp = X86Op;
4435  // Have we found an unqualified memory operand,
4436  // break. IA allows only one memory operand.
4437  break;
4438  }
4439  }
4440 
4441  // Allow some instructions to have implicitly pointer-sized operands. This is
4442  // compatible with gas.
4443  if (UnsizedMemOp) {
4444  static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4445  for (const char *Instr : PtrSizedInstrs) {
4446  if (Mnemonic == Instr) {
4447  UnsizedMemOp->Mem.Size = getPointerWidth();
4448  break;
4449  }
4450  }
4451  }
4452 
4454  FeatureBitset ErrorInfoMissingFeatures;
4455  FeatureBitset MissingFeatures;
4456 
4457  // If unsized push has immediate operand we should default the default pointer
4458  // size for the size.
4459  if (Mnemonic == "push" && Operands.size() == 2) {
4460  auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4461  if (X86Op->isImm()) {
4462  // If it's not a constant fall through and let remainder take care of it.
4463  const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4464  unsigned Size = getPointerWidth();
4465  if (CE &&
4466  (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4467  SmallString<16> Tmp;
4468  Tmp += Base;
4469  Tmp += (is64BitMode())
4470  ? "q"
4471  : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4472  Op.setTokenValue(Tmp);
4473  // Do match in ATT mode to allow explicit suffix usage.
4474  Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4475  MissingFeatures, MatchingInlineAsm,
4476  false /*isParsingIntelSyntax()*/));
4477  Op.setTokenValue(Base);
4478  }
4479  }
4480  }
4481 
4482  // If an unsized memory operand is present, try to match with each memory
4483  // operand size. In Intel assembly, the size is not part of the instruction
4484  // mnemonic.
4485  if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4486  static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4487  for (unsigned Size : MopSizes) {
4488  UnsizedMemOp->Mem.Size = Size;
4489  uint64_t ErrorInfoIgnore;
4490  unsigned LastOpcode = Inst.getOpcode();
4491  unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4492  MissingFeatures, MatchingInlineAsm,
4493  isParsingIntelSyntax());
4494  if (Match.empty() || LastOpcode != Inst.getOpcode())
4495  Match.push_back(M);
4496 
4497  // If this returned as a missing feature failure, remember that.
4498  if (Match.back() == Match_MissingFeature)
4499  ErrorInfoMissingFeatures = MissingFeatures;
4500  }
4501 
4502  // Restore the size of the unsized memory operand if we modified it.
4503  UnsizedMemOp->Mem.Size = 0;
4504  }
4505 
4506  // If we haven't matched anything yet, this is not a basic integer or FPU
4507  // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4508  // matching with the unsized operand.
4509  if (Match.empty()) {
4510  Match.push_back(MatchInstruction(
4511  Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4512  isParsingIntelSyntax()));
4513  // If this returned as a missing feature failure, remember that.
4514  if (Match.back() == Match_MissingFeature)
4515  ErrorInfoMissingFeatures = MissingFeatures;
4516  }
4517 
4518  // Restore the size of the unsized memory operand if we modified it.
4519  if (UnsizedMemOp)
4520  UnsizedMemOp->Mem.Size = 0;
4521 
4522  // If it's a bad mnemonic, all results will be the same.
4523  if (Match.back() == Match_MnemonicFail) {
4524  return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4525  Op.getLocRange(), MatchingInlineAsm);
4526  }
4527 
4528  unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4529 
4530  // If matching was ambiguous and we had size information from the frontend,
4531  // try again with that. This handles cases like "movxz eax, m8/m16".
4532  if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4533  UnsizedMemOp->getMemFrontendSize()) {
4534  UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4535  unsigned M = MatchInstruction(
4536  Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4537  isParsingIntelSyntax());
4538  if (M == Match_Success)
4539  NumSuccessfulMatches = 1;
4540 
4541  // Add a rewrite that encodes the size information we used from the
4542  // frontend.
4543  InstInfo->AsmRewrites->emplace_back(
4544  AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4545  /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4546  }
4547 
4548  // If exactly one matched, then we treat that as a successful match (and the
4549  // instruction will already have been filled in correctly, since the failing
4550  // matches won't have modified it).
4551  if (NumSuccessfulMatches == 1) {
4552  if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4553  return true;
4554  // Some instructions need post-processing to, for example, tweak which
4555  // encoding is selected. Loop on it while changes happen so the individual
4556  // transformations can chain off each other.
4557  if (!MatchingInlineAsm)
4558  while (processInstruction(Inst, Operands))
4559  ;
4560  Inst.setLoc(IDLoc);
4561  if (!MatchingInlineAsm)
4562  emitInstruction(Inst, Operands, Out);
4563  Opcode = Inst.getOpcode();
4564  return false;
4565  } else if (NumSuccessfulMatches > 1) {
4566  assert(UnsizedMemOp &&
4567  "multiple matches only possible with unsized memory operands");
4568  return Error(UnsizedMemOp->getStartLoc(),
4569  "ambiguous operand size for instruction '" + Mnemonic + "\'",
4570  UnsizedMemOp->getLocRange());
4571  }
4572 
4573  // If one instruction matched as unsupported, report this as unsupported.
4574  if (llvm::count(Match, Match_Unsupported) == 1) {
4575  return Error(IDLoc, "unsupported instruction", EmptyRange,
4576  MatchingInlineAsm);
4577  }
4578 
4579  // If one instruction matched with a missing feature, report this as a
4580  // missing feature.
4581  if (llvm::count(Match, Match_MissingFeature) == 1) {
4582  ErrorInfo = Match_MissingFeature;
4583  return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4584  MatchingInlineAsm);
4585  }
4586 
4587  // If one instruction matched with an invalid operand, report this as an
4588  // operand failure.
4589  if (llvm::count(Match, Match_InvalidOperand) == 1) {
4590  return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4591  MatchingInlineAsm);
4592  }
4593 
4594  if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4595  SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4596  if (ErrorLoc == SMLoc())
4597  ErrorLoc = IDLoc;
4598  return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4599  EmptyRange, MatchingInlineAsm);
4600  }
4601 
4602  // If all of these were an outright failure, report it in a useless way.
4603  return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4604  MatchingInlineAsm);
4605 }
4606 
4607 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4608  return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4609 }
4610 
4611 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4612  MCAsmParser &Parser = getParser();
4613  StringRef IDVal = DirectiveID.getIdentifier();
4614  if (IDVal.startswith(".arch"))
4615  return parseDirectiveArch();
4616  if (IDVal.startswith(".code"))
4617  return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4618  else if (IDVal.startswith(".att_syntax")) {
4619  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4620  if (Parser.getTok().getString() == "prefix")
4621  Parser.Lex();
4622  else if (Parser.getTok().getString() == "noprefix")
4623  return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4624  "supported: registers must have a "
4625  "'%' prefix in .att_syntax");
4626  }
4627  getParser().setAssemblerDialect(0);
4628  return false;
4629  } else if (IDVal.startswith(".intel_syntax")) {
4630  getParser().setAssemblerDialect(1);
4631  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4632  if (Parser.getTok().getString() == "noprefix")
4633  Parser.Lex();
4634  else if (Parser.getTok().getString() == "prefix")
4635  return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4636  "supported: registers must not have "
4637  "a '%' prefix in .intel_syntax");
4638  }
4639  return false;
4640  } else if (IDVal == ".nops")
4641  return parseDirectiveNops(DirectiveID.getLoc());
4642  else if (IDVal == ".even")
4643  return parseDirectiveEven(DirectiveID.getLoc());
4644  else if (IDVal == ".cv_fpo_proc")
4645  return parseDirectiveFPOProc(DirectiveID.getLoc());
4646  else if (IDVal == ".cv_fpo_setframe")
4647  return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4648  else if (IDVal == ".cv_fpo_pushreg")
4649  return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4650  else if (IDVal == ".cv_fpo_stackalloc")
4651  return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4652  else if (IDVal == ".cv_fpo_stackalign")
4653  return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4654  else if (IDVal == ".cv_fpo_endprologue")
4655  return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4656  else if (IDVal == ".cv_fpo_endproc")
4657  return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4658  else if (IDVal == ".seh_pushreg" ||
4659  (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4660  return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4661  else if (IDVal == ".seh_setframe" ||
4662  (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4663  return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4664  else if (IDVal == ".seh_savereg" ||
4665  (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4666  return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4667  else if (IDVal == ".seh_savexmm" ||
4668  (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4669  return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4670  else if (IDVal == ".seh_pushframe" ||
4671  (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4672  return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4673 
4674  return true;
4675 }
4676 
4677 bool X86AsmParser::parseDirectiveArch() {
4678  // Ignore .arch for now.
4679  getParser().parseStringToEndOfStatement();
4680  return false;
4681 }
4682 
4683 /// parseDirectiveNops
4684 /// ::= .nops size[, control]
4685 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4686  int64_t NumBytes = 0, Control = 0;
4687  SMLoc NumBytesLoc, ControlLoc;
4688  const MCSubtargetInfo& STI = getSTI();
4689  NumBytesLoc = getTok().getLoc();
4690  if (getParser().checkForValidSection() ||
4691  getParser().parseAbsoluteExpression(NumBytes))
4692  return true;
4693 
4694  if (parseOptionalToken(AsmToken::Comma)) {
4695  ControlLoc = getTok().getLoc();
4696  if (getParser().parseAbsoluteExpression(Control))
4697  return true;
4698  }
4699  if (getParser().parseEOL())
4700  return true;
4701 
4702  if (NumBytes <= 0) {
4703  Error(NumBytesLoc, "'.nops' directive with non-positive size");
4704  return false;
4705  }
4706 
4707  if (Control < 0) {
4708  Error(ControlLoc, "'.nops' directive with negative NOP size");
4709  return false;
4710  }
4711 
4712  /// Emit nops
4713  getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4714 
4715  return false;
4716 }
4717 
4718 /// parseDirectiveEven
4719 /// ::= .even
4720 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4721  if (parseEOL())
4722  return false;
4723 
4724  const MCSection *Section = getStreamer().getCurrentSectionOnly();
4725  if (!Section) {
4726  getStreamer().initSections(false, getSTI());
4727  Section = getStreamer().getCurrentSectionOnly();
4728  }
4729  if (Section->useCodeAlign())
4730  getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4731  else
4732  getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4733  return false;
4734 }
4735 
4736 /// ParseDirectiveCode
4737 /// ::= .code16 | .code32 | .code64
4738 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4739  MCAsmParser &Parser = getParser();
4740  Code16GCC = false;
4741  if (IDVal == ".code16") {
4742  Parser.Lex();
4743  if (!is16BitMode()) {
4744  SwitchMode(X86::Is16Bit);
4745  getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4746  }
4747  } else if (IDVal == ".code16gcc") {
4748  // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4749  Parser.Lex();
4750  Code16GCC = true;
4751  if (!is16BitMode()) {
4752  SwitchMode(X86::Is16Bit);
4753  getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4754  }
4755  } else if (IDVal == ".code32") {
4756  Parser.Lex();
4757  if (!is32BitMode()) {
4758  SwitchMode(X86::Is32Bit);
4759  getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4760  }
4761  } else if (IDVal == ".code64") {
4762  Parser.Lex();
4763  if (!is64BitMode()) {
4764  SwitchMode(X86::Is64Bit);
4765  getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4766  }
4767  } else {
4768  Error(L, "unknown directive " + IDVal);
4769  return false;
4770  }
4771 
4772  return false;
4773 }
4774 
4775 // .cv_fpo_proc foo
4776 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4777  MCAsmParser &Parser = getParser();
4778  StringRef ProcName;
4779  int64_t ParamsSize;
4780  if (Parser.parseIdentifier(ProcName))
4781  return Parser.TokError("expected symbol name");
4782  if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4783  return true;
4784  if (!isUIntN(32, ParamsSize))
4785  return Parser.TokError("parameters size out of range");
4786  if (parseEOL())
4787  return true;
4788  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4789  return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4790 }
4791 
4792 // .cv_fpo_setframe ebp
4793 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4794  unsigned Reg;
4795  SMLoc DummyLoc;
4796  if (ParseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4797  return true;
4798  return getTargetStreamer().emitFPOSetFrame(Reg, L);
4799 }
4800 
4801 // .cv_fpo_pushreg ebx
4802 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4803  unsigned Reg;
4804  SMLoc DummyLoc;
4805  if (ParseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4806  return true;
4807  return getTargetStreamer().emitFPOPushReg(Reg, L);
4808 }
4809 
4810 // .cv_fpo_stackalloc 20
4811 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4812  MCAsmParser &Parser = getParser();
4813  int64_t Offset;
4814  if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4815  return true;
4816  return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4817 }
4818 
4819 // .cv_fpo_stackalign 8
4820 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4821  MCAsmParser &Parser = getParser();
4822  int64_t Offset;
4823  if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4824  return true;
4825  return getTargetStreamer().emitFPOStackAlign(Offset, L);
4826 }
4827 
4828 // .cv_fpo_endprologue
4829 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4830  MCAsmParser &Parser = getParser();
4831  if (Parser.parseEOL())
4832  return true;
4833  return getTargetStreamer().emitFPOEndPrologue(L);
4834 }
4835 
4836 // .cv_fpo_endproc
4837 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4838  MCAsmParser &Parser = getParser();
4839  if (Parser.parseEOL())
4840  return true;
4841  return getTargetStreamer().emitFPOEndProc(L);
4842 }
4843 
4844 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4845  unsigned &RegNo) {
4846  SMLoc startLoc = getLexer().getLoc();
4847  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4848 
4849  // Try parsing the argument as a register first.
4850  if (getLexer().getTok().isNot(AsmToken::Integer)) {
4851  SMLoc endLoc;
4852  if (ParseRegister(RegNo, startLoc, endLoc))
4853  return true;
4854 
4855  if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4856  return Error(startLoc,
4857  "register is not supported for use with this directive");
4858  }
4859  } else {
4860  // Otherwise, an integer number matching the encoding of the desired
4861  // register may appear.
4862  int64_t EncodedReg;
4863  if (getParser().parseAbsoluteExpression(EncodedReg))
4864  return true;
4865 
4866  // The SEH register number is the same as the encoding register number. Map
4867  // from the encoding back to the LLVM register number.
4868  RegNo = 0;
4869  for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4870  if (MRI->getEncodingValue(Reg) == EncodedReg) {
4871  RegNo = Reg;
4872  break;
4873  }
4874  }
4875  if (RegNo == 0) {
4876  return Error(startLoc,
4877  "incorrect register number for use with this directive");
4878  }
4879  }
4880 
4881  return false;
4882 }
4883 
4884 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4885  unsigned Reg = 0;
4886  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4887  return true;
4888 
4889  if (getLexer().isNot(AsmToken::EndOfStatement))
4890  return TokError("expected end of directive");
4891 
4892  getParser().Lex();
4893  getStreamer().emitWinCFIPushReg(Reg, Loc);
4894  return false;
4895 }
4896 
4897 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4898  unsigned Reg = 0;
4899  int64_t Off;
4900  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4901  return true;
4902  if (getLexer().isNot(AsmToken::Comma))
4903  return TokError("you must specify a stack pointer offset");
4904 
4905  getParser().Lex();
4906  if (getParser().parseAbsoluteExpression(Off))
4907  return true;
4908 
4909  if (getLexer().isNot(AsmToken::EndOfStatement))
4910  return TokError("expected end of directive");
4911 
4912  getParser().Lex();
4913  getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4914  return false;
4915 }
4916 
4917 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4918  unsigned Reg = 0;
4919  int64_t Off;
4920  if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4921  return true;
4922  if (getLexer().isNot(AsmToken::Comma))
4923  return TokError("you must specify an offset on the stack");
4924 
4925  getParser().Lex();
4926  if (getParser().parseAbsoluteExpression(Off))
4927  return true;
4928 
4929  if (getLexer().isNot(AsmToken::EndOfStatement))
4930  return TokError("expected end of directive");
4931 
4932  getParser().Lex();
4933  getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4934  return false;
4935 }
4936 
4937 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4938  unsigned Reg = 0;
4939  int64_t Off;
4940  if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4941  return true;
4942  if (getLexer().isNot(AsmToken::Comma))
4943  return TokError("you must specify an offset on the stack");
4944 
4945  getParser().Lex();
4946  if (getParser().parseAbsoluteExpression(Off))
4947  return true;
4948 
4949  if (getLexer().isNot(AsmToken::EndOfStatement))
4950  return TokError("expected end of directive");
4951 
4952  getParser().Lex();
4953  getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4954  return false;
4955 }
4956 
4957 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4958  bool Code = false;
4959  StringRef CodeID;
4960  if (getLexer().is(AsmToken::At)) {
4961  SMLoc startLoc = getLexer().getLoc();
4962  getParser().Lex();
4963  if (!getParser().parseIdentifier(CodeID)) {
4964  if (CodeID != "code")
4965  return Error(startLoc, "expected @code");
4966  Code = true;
4967  }
4968  }
4969 
4970  if (getLexer().isNot(AsmToken::EndOfStatement))
4971  return TokError("expected end of directive");
4972 
4973  getParser().Lex();
4974  getStreamer().emitWinCFIPushFrame(Code, Loc);
4975  return false;
4976 }
4977 
4978 // Force static initialization.
4982 }
4983 
4984 #define GET_REGISTER_MATCHER
4985 #define GET_MATCHER_IMPLEMENTATION
4986 #define GET_SUBTARGET_FEATURE_NAME
4987 #include "X86GenAsmMatcher.inc"
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:102
checkScale
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
Definition: X86AsmParser.cpp:50
i
i
Definition: README.txt:29
llvm::MCAsmParser
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
llvm::AsmFieldInfo::Offset
unsigned Offset
Definition: MCAsmParser.h:105
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
getSubtargetFeatureName
static const char * getSubtargetFeatureName(uint64_t Val)
llvm::X86Operand::getEndLoc
SMLoc getEndLoc() const override
getEndLoc - Get the location of the last token of this operand.
Definition: X86Operand.h:101
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::X86Operand::CreateMem
static std::unique_ptr< X86Operand > CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size=0, StringRef SymName=StringRef(), void *OpDecl=nullptr, unsigned FrontendSize=0, bool UseUpRegs=false, bool MaybeDirectBranchDest=true)
Create an absolute memory operand.
Definition: X86Operand.h:682
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AsmToken::is
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
llvm::X86Operand::Mem
struct MemOp Mem
Definition: X86Operand.h:86
llvm::MCAsmLexer
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
llvm::MCOperand::isReg
bool isReg() const
Definition: MCInst.h:61
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:160
llvm::pdb::PDB_DataKind::Member
@ Member
llvm::X86::TO_POS_INF
@ TO_POS_INF
Definition: X86BaseInfo.h:49
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::AsmToken::LBrac
@ LBrac
Definition: MCAsmMacro.h:48
Note
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles Note
Definition: README.txt:239
llvm::AsmToken::Dot
@ Dot
Definition: MCAsmMacro.h:49
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
llvm::X86II::REX_W
@ REX_W
Definition: X86BaseInfo.h:843
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::X86Operand::isMem8
bool isMem8() const
Definition: X86Operand.h:309
llvm::MCAsmParser::parseEOL
bool parseEOL()
Definition: MCAsmParser.cpp:49
llvm::AsmToken::getIntVal
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::X86II::ExplicitVEXPrefix
@ ExplicitVEXPrefix
Definition: X86BaseInfo.h:975
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:52
llvm::X86::COND_BE
@ COND_BE
Definition: X86BaseInfo.h:87
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::tgtok::Code
@ Code
Definition: TGLexer.h:50
MCParsedAsmOperand.h
llvm::MCAsmParser::parseIdentifier
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
CH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference CH
Definition: README-X86-64.txt:44
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::X86::COND_GE
@ COND_GE
Definition: X86BaseInfo.h:94
llvm::MCSymbol::isUndefined
bool isUndefined(bool SetUsed=true) const
isUndefined - Check if this symbol undefined (i.e., implicitly defined).
Definition: MCSymbol.h:252
llvm::X86::AddrNumOperands
@ AddrNumOperands
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
llvm::pdb::PDB_BuiltinType::Variant
@ Variant
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MemOp
Definition: TargetLowering.h:111
llvm::X86::COND_L
@ COND_L
Definition: X86BaseInfo.h:93
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::X86::IP_USE_DISP8
@ IP_USE_DISP8
Definition: X86BaseInfo.h:68
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
llvm::MCAsmParser::parseOptionalToken
bool parseOptionalToken(AsmToken::TokenKind T)
Attempt to parse and consume token, returning true on success.
Definition: MCAsmParser.cpp:80
llvm::AsmTypeInfo::Name
StringRef Name
Definition: MCAsmParser.h:97
llvm::StringRef::substr
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:564
llvm::FenceInst
An instruction for ordering other memory operations.
Definition: Instructions.h:433
llvm::X86Operand::isMem
bool isMem() const override
isMem - Is this a memory operand?
Definition: X86Operand.h:305
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::InlineAsmIdentifierInfo
Definition: MCAsmParser.h:37
STLExtras.h
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::InlineAsmIdentifierInfo::IK_EnumVal
@ IK_EnumVal
Definition: MCAsmParser.h:41
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
llvm::getX86SubSuperRegisterOrZero
MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned, bool High=false)
Returns the sub or super register of a specific X86 register.
Definition: X86MCTargetDesc.cpp:744
getSym
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
Definition: IRObjectFile.cpp:37
MCAsmParser.h
llvm::X86::COND_S
@ COND_S
Definition: X86BaseInfo.h:89
llvm::MCInst::getNumOperands
unsigned getNumOperands() const
Definition: MCInst.h:208
MCTargetAsmParser.h
llvm::X86::IP_HAS_NOTRACK
@ IP_HAS_NOTRACK
Definition: X86BaseInfo.h:63
llvm::X86Operand::isReg
bool isReg() const override
isReg - Is this a register operand?
Definition: X86Operand.h:478
llvm::MCAsmParser::parseAbsoluteExpression
virtual bool parseAbsoluteExpression(int64_t &Res)=0
Parse an expression which must evaluate to an absolute value.
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
llvm::X86Operand::CreatePrefix
static std::unique_ptr< X86Operand > CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc)
Definition: X86Operand.h:660
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::dwarf::toStringRef
StringRef toStringRef(const std::optional< DWARFFormValue > &V, StringRef Default={})
Take an optional DWARFFormValue and try to extract a string value from it.
Definition: DWARFFormValue.h:193
llvm::AsmToken::Minus
@ Minus
Definition: MCAsmMacro.h:45
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:82
llvm::AsmToken::LParen
@ LParen
Definition: MCAsmMacro.h:48
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::MCAsmParser::Lex
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
llvm::MCStreamer
Streaming machine code generation interface.
Definition: MCStreamer.h:213
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::AsmToken::Dollar
@ Dollar
Definition: MCAsmMacro.h:49
llvm::MCAsmParser::TokError
bool TokError(const Twine &Msg, SMRange Range=std::nullopt)
Report an error at the current lexer location.
Definition: MCAsmParser.cpp:97
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
llvm::MCAsmParser::parseExpression
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
llvm::RegisterMCAsmParser
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
Definition: TargetRegistry.h:1399
llvm::MatchOperand_Success
@ MatchOperand_Success
Definition: MCTargetAsmParser.h:127
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::StringRef::startswith
bool startswith(StringRef Prefix) const
Definition: StringRef.h:260
SmallString.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
getPrefixes
static unsigned getPrefixes(OperandVector &Operands)
Definition: X86AsmParser.cpp:4099
llvm::AsmToken::GreaterGreater
@ GreaterGreater
Definition: MCAsmMacro.h:54
llvm::AsmTypeInfo::Size
unsigned Size
Definition: MCAsmParser.h:98
Twine.h
llvm::MCAsmParser::parseIntToken
bool parseIntToken(int64_t &V, const Twine &ErrMsg)
Definition: MCAsmParser.cpp:72
llvm::X86::IP_USE_EVEX
@ IP_USE_EVEX
Definition: X86BaseInfo.h:67
MCContext.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AsmTypeInfo
Definition: MCAsmParser.h:96
SI
@ SI
Definition: SIInstrInfo.cpp:7966
MCInstrInfo.h
llvm::MCOperand::getImm
int64_t getImm() const
Definition: MCInst.h:80
MCSymbol.h
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::X86II::isX86_64ExtendedReg
bool isX86_64ExtendedReg(unsigned RegNo)
Definition: X86BaseInfo.h:1191
llvm::MCAF_Code16
@ MCAF_Code16
.code16 (X86) / .code 16 (ARM)
Definition: MCDirectives.h:55
MCInst.h
false
Definition: StackSlotColoring.cpp:141
llvm::MCInstrDesc::isTerminator
bool isTerminator() const
Returns true if this instruction part of the terminator for a basic block.
Definition: MCInstrDesc.h:298
check
#define check(cond)
llvm::N86::ESP
@ ESP
Definition: X86MCTargetDesc.h:51
llvm::MCSymbol::getVariableValue
const MCExpr * getVariableValue(bool SetUsed=true) const
getVariableValue - Get the value for variable symbols.
Definition: MCSymbol.h:298
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:422
MCSubtargetInfo.h
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:112
llvm::AsmToken::Star
@ Star
Definition: MCAsmMacro.h:49
llvm::MCAsmParser::getContext
virtual MCContext & getContext()=0
llvm::ParseInstructionInfo::AsmRewrites
SmallVectorImpl< AsmRewrite > * AsmRewrites
Definition: MCTargetAsmParser.h:119
llvm::MCAsmParser::isParsingMasm
virtual bool isParsingMasm() const
Definition: MCAsmParser.h:188
llvm::X86::COND_A
@ COND_A
Definition: X86BaseInfo.h:88
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1486
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::X86Operand::isImm
bool isImm() const override
isImm - Is this an immediate operand?
Definition: X86Operand.h:224
llvm::MCAsmParser::addAliasForDirective
virtual void addAliasForDirective(StringRef Directive, StringRef Alias)=0
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::AsmToken::getKind
TokenKind getKind() const
Definition: MCAsmMacro.h:81
llvm::MCTargetStreamer
Target specific streamer interface.
Definition: MCStreamer.h:94
llvm::InlineAsmIdentifierInfo::IK_Label
@ IK_Label
Definition: MCAsmParser.h:40
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::X86::TO_NEG_INF
@ TO_NEG_INF
Definition: X86BaseInfo.h:48
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
X86Operand.h
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
isNot
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Definition: AMDGPULegalizerInfo.cpp:3196
llvm::AsmToken::Percent
@ Percent
Definition: MCAsmMacro.h:52
llvm::isIntN
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:427
llvm::X86::IP_USE_DISP32
@ IP_USE_DISP32
Definition: X86BaseInfo.h:69
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
SourceMgr.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MCAsmParser::parsePrimaryExpr
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo)=0
Parse a primary expression.
llvm::X86Operand::getLocRange
SMRange getLocRange() const
getLocRange - Get the range between the first and last token of this operand.
Definition: X86Operand.h:105
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
llvm::StringRef::slice
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:676
llvm::StringRef::equals
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::MCSubtargetInfo::ToggleFeature
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
Definition: MCSubtargetInfo.cpp:241
llvm::AsmToken::Error
@ Error
Definition: MCAsmMacro.h:25
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
X86MCExpr.h
TSFlags
uint64_t TSFlags
Definition: RISCVInsertVSETVLI.cpp:595
llvm::X86::COND_NO
@ COND_NO
Definition: X86BaseInfo.h:82
llvm::MCInstrDesc::mayLoad
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:435
llvm::cl::opt< bool >
llvm::AsmTypeInfo::ElementSize
unsigned ElementSize
Definition: MCAsmParser.h:99
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
llvm::X86II::EVEX
@ EVEX
Definition: X86BaseInfo.h:922
x2
gcc mainline compiles it x2(%rip)
MCAsmLexer.h
llvm::MCOperand::isImm
bool isImm() const
Definition: MCInst.h:62
X86MCTargetDesc.h
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:419
llvm::X86::COND_AE
@ COND_AE
Definition: X86BaseInfo.h:84
llvm::ParseInstructionInfo
Definition: MCTargetAsmParser.h:118
llvm::MCSymbolRefExpr::VariantKind
VariantKind
Definition: MCExpr.h:194
llvm::X86II::EVEX_K
@ EVEX_K
Definition: X86BaseInfo.h:947
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1895
llvm::InlineAsmIdentifierInfo::IK_Invalid
@ IK_Invalid
Definition: MCAsmParser.h:39
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
llvm::SMLoc::isValid
bool isValid() const
Definition: SMLoc.h:29
llvm::IntelExpr
Definition: MCTargetAsmParser.h:64
uint64_t
llvm::X86Operand::isVectorReg
bool isVectorReg() const
Definition: X86Operand.h:494
llvm::X86Operand
X86Operand - Instances of this class represent a parsed X86 machine instruction.
Definition: X86Operand.h:31
llvm::MCInstrDesc::isCall
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:285
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
LLVMInitializeX86AsmParser
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
Definition: X86AsmParser.cpp:4979
X86TargetStreamer.h
llvm::X86Operand::CreateImm
static std::unique_ptr< X86Operand > CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, StringRef SymName=StringRef(), void *OpDecl=nullptr, bool GlobalRef=true)
Definition: X86Operand.h:666
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::InlineAsmIdentifierInfo::Enum
EnumIdentifier Enum
Definition: MCAsmParser.h:62
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::MCStreamer::emitInstruction
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Definition: MCStreamer.cpp:1094
llvm::AsmToken::Colon
@ Colon
Definition: MCAsmMacro.h:43
llvm::MatchOperand_ParseFail
@ MatchOperand_ParseFail
Definition: MCTargetAsmParser.h:129
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
MCRegisterInfo.h
size
i< reg-> size
Definition: README.txt:166
llvm::StringRef::lower
std::string lower() const
Definition: StringRef.cpp:112
llvm::AsmFieldInfo
Definition: MCAsmParser.h:103
llvm::X86MCExpr::create
static const X86MCExpr * create(int64_t RegNo, MCContext &Ctx)
Definition: X86MCExpr.h:37
llvm::X86::IP_NO_PREFIX
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:57
llvm::InlineAsmIdentifierInfo::IK_Var
@ IK_Var
Definition: MCAsmParser.h:42
llvm::AsmToken::Slash
@ Slash
Definition: MCAsmMacro.h:46
llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:293