LLVM 17.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
15#include "X86AsmParserCommon.h"
16#include "X86Operand.h"
17#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
24#include "llvm/MC/MCInst.h"
25#include "llvm/MC/MCInstrInfo.h"
31#include "llvm/MC/MCSection.h"
32#include "llvm/MC/MCStreamer.h"
34#include "llvm/MC/MCSymbol.h"
40#include <algorithm>
41#include <memory>
42
43using namespace llvm;
44
46 "x86-experimental-lvi-inline-asm-hardening",
47 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48 " Injection (LVI). This feature is experimental."), cl::Hidden);
49
50static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
51 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
52 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
53 return true;
54 }
55 return false;
56}
57
58namespace {
59
60static const char OpPrecedence[] = {
61 0, // IC_OR
62 1, // IC_XOR
63 2, // IC_AND
64 4, // IC_LSHIFT
65 4, // IC_RSHIFT
66 5, // IC_PLUS
67 5, // IC_MINUS
68 6, // IC_MULTIPLY
69 6, // IC_DIVIDE
70 6, // IC_MOD
71 7, // IC_NOT
72 8, // IC_NEG
73 9, // IC_RPAREN
74 10, // IC_LPAREN
75 0, // IC_IMM
76 0, // IC_REGISTER
77 3, // IC_EQ
78 3, // IC_NE
79 3, // IC_LT
80 3, // IC_LE
81 3, // IC_GT
82 3 // IC_GE
83};
84
85class X86AsmParser : public MCTargetAsmParser {
86 ParseInstructionInfo *InstInfo;
87 bool Code16GCC;
88 unsigned ForcedDataPrefix = 0;
89
90 enum VEXEncoding {
91 VEXEncoding_Default,
92 VEXEncoding_VEX,
93 VEXEncoding_VEX2,
94 VEXEncoding_VEX3,
95 VEXEncoding_EVEX,
96 };
97
98 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
99
100 enum DispEncoding {
101 DispEncoding_Default,
102 DispEncoding_Disp8,
103 DispEncoding_Disp32,
104 };
105
106 DispEncoding ForcedDispEncoding = DispEncoding_Default;
107
108private:
109 SMLoc consumeToken() {
110 MCAsmParser &Parser = getParser();
111 SMLoc Result = Parser.getTok().getLoc();
112 Parser.Lex();
113 return Result;
114 }
115
116 X86TargetStreamer &getTargetStreamer() {
117 assert(getParser().getStreamer().getTargetStreamer() &&
118 "do not have a target streamer");
120 return static_cast<X86TargetStreamer &>(TS);
121 }
122
123 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
124 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
125 bool matchingInlineAsm, unsigned VariantID = 0) {
126 // In Code16GCC mode, match as 32-bit.
127 if (Code16GCC)
128 SwitchMode(X86::Is32Bit);
129 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
130 MissingFeatures, matchingInlineAsm,
131 VariantID);
132 if (Code16GCC)
133 SwitchMode(X86::Is16Bit);
134 return rv;
135 }
136
137 enum InfixCalculatorTok {
138 IC_OR = 0,
139 IC_XOR,
140 IC_AND,
141 IC_LSHIFT,
142 IC_RSHIFT,
143 IC_PLUS,
144 IC_MINUS,
145 IC_MULTIPLY,
146 IC_DIVIDE,
147 IC_MOD,
148 IC_NOT,
149 IC_NEG,
150 IC_RPAREN,
151 IC_LPAREN,
152 IC_IMM,
153 IC_REGISTER,
154 IC_EQ,
155 IC_NE,
156 IC_LT,
157 IC_LE,
158 IC_GT,
159 IC_GE
160 };
161
162 enum IntelOperatorKind {
163 IOK_INVALID = 0,
164 IOK_LENGTH,
165 IOK_SIZE,
166 IOK_TYPE,
167 };
168
169 enum MasmOperatorKind {
170 MOK_INVALID = 0,
171 MOK_LENGTHOF,
172 MOK_SIZEOF,
173 MOK_TYPE,
174 };
175
176 class InfixCalculator {
177 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
178 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
179 SmallVector<ICToken, 4> PostfixStack;
180
181 bool isUnaryOperator(InfixCalculatorTok Op) const {
182 return Op == IC_NEG || Op == IC_NOT;
183 }
184
185 public:
186 int64_t popOperand() {
187 assert (!PostfixStack.empty() && "Poped an empty stack!");
188 ICToken Op = PostfixStack.pop_back_val();
189 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
190 return -1; // The invalid Scale value will be caught later by checkScale
191 return Op.second;
192 }
193 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
194 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
195 "Unexpected operand!");
196 PostfixStack.push_back(std::make_pair(Op, Val));
197 }
198
199 void popOperator() { InfixOperatorStack.pop_back(); }
200 void pushOperator(InfixCalculatorTok Op) {
201 // Push the new operator if the stack is empty.
202 if (InfixOperatorStack.empty()) {
203 InfixOperatorStack.push_back(Op);
204 return;
205 }
206
207 // Push the new operator if it has a higher precedence than the operator
208 // on the top of the stack or the operator on the top of the stack is a
209 // left parentheses.
210 unsigned Idx = InfixOperatorStack.size() - 1;
211 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
212 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
213 InfixOperatorStack.push_back(Op);
214 return;
215 }
216
217 // The operator on the top of the stack has higher precedence than the
218 // new operator.
219 unsigned ParenCount = 0;
220 while (true) {
221 // Nothing to process.
222 if (InfixOperatorStack.empty())
223 break;
224
225 Idx = InfixOperatorStack.size() - 1;
226 StackOp = InfixOperatorStack[Idx];
227 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
228 break;
229
230 // If we have an even parentheses count and we see a left parentheses,
231 // then stop processing.
232 if (!ParenCount && StackOp == IC_LPAREN)
233 break;
234
235 if (StackOp == IC_RPAREN) {
236 ++ParenCount;
237 InfixOperatorStack.pop_back();
238 } else if (StackOp == IC_LPAREN) {
239 --ParenCount;
240 InfixOperatorStack.pop_back();
241 } else {
242 InfixOperatorStack.pop_back();
243 PostfixStack.push_back(std::make_pair(StackOp, 0));
244 }
245 }
246 // Push the new operator.
247 InfixOperatorStack.push_back(Op);
248 }
249
250 int64_t execute() {
251 // Push any remaining operators onto the postfix stack.
252 while (!InfixOperatorStack.empty()) {
253 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
254 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
255 PostfixStack.push_back(std::make_pair(StackOp, 0));
256 }
257
258 if (PostfixStack.empty())
259 return 0;
260
261 SmallVector<ICToken, 16> OperandStack;
262 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
263 ICToken Op = PostfixStack[i];
264 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
265 OperandStack.push_back(Op);
266 } else if (isUnaryOperator(Op.first)) {
267 assert (OperandStack.size() > 0 && "Too few operands.");
268 ICToken Operand = OperandStack.pop_back_val();
269 assert (Operand.first == IC_IMM &&
270 "Unary operation with a register!");
271 switch (Op.first) {
272 default:
273 report_fatal_error("Unexpected operator!");
274 break;
275 case IC_NEG:
276 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
277 break;
278 case IC_NOT:
279 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
280 break;
281 }
282 } else {
283 assert (OperandStack.size() > 1 && "Too few operands.");
284 int64_t Val;
285 ICToken Op2 = OperandStack.pop_back_val();
286 ICToken Op1 = OperandStack.pop_back_val();
287 switch (Op.first) {
288 default:
289 report_fatal_error("Unexpected operator!");
290 break;
291 case IC_PLUS:
292 Val = Op1.second + Op2.second;
293 OperandStack.push_back(std::make_pair(IC_IMM, Val));
294 break;
295 case IC_MINUS:
296 Val = Op1.second - Op2.second;
297 OperandStack.push_back(std::make_pair(IC_IMM, Val));
298 break;
299 case IC_MULTIPLY:
300 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
301 "Multiply operation with an immediate and a register!");
302 Val = Op1.second * Op2.second;
303 OperandStack.push_back(std::make_pair(IC_IMM, Val));
304 break;
305 case IC_DIVIDE:
306 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
307 "Divide operation with an immediate and a register!");
308 assert (Op2.second != 0 && "Division by zero!");
309 Val = Op1.second / Op2.second;
310 OperandStack.push_back(std::make_pair(IC_IMM, Val));
311 break;
312 case IC_MOD:
313 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
314 "Modulo operation with an immediate and a register!");
315 Val = Op1.second % Op2.second;
316 OperandStack.push_back(std::make_pair(IC_IMM, Val));
317 break;
318 case IC_OR:
319 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
320 "Or operation with an immediate and a register!");
321 Val = Op1.second | Op2.second;
322 OperandStack.push_back(std::make_pair(IC_IMM, Val));
323 break;
324 case IC_XOR:
325 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
326 "Xor operation with an immediate and a register!");
327 Val = Op1.second ^ Op2.second;
328 OperandStack.push_back(std::make_pair(IC_IMM, Val));
329 break;
330 case IC_AND:
331 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
332 "And operation with an immediate and a register!");
333 Val = Op1.second & Op2.second;
334 OperandStack.push_back(std::make_pair(IC_IMM, Val));
335 break;
336 case IC_LSHIFT:
337 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
338 "Left shift operation with an immediate and a register!");
339 Val = Op1.second << Op2.second;
340 OperandStack.push_back(std::make_pair(IC_IMM, Val));
341 break;
342 case IC_RSHIFT:
343 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
344 "Right shift operation with an immediate and a register!");
345 Val = Op1.second >> Op2.second;
346 OperandStack.push_back(std::make_pair(IC_IMM, Val));
347 break;
348 case IC_EQ:
349 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
350 "Equals operation with an immediate and a register!");
351 Val = (Op1.second == Op2.second) ? -1 : 0;
352 OperandStack.push_back(std::make_pair(IC_IMM, Val));
353 break;
354 case IC_NE:
355 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
356 "Not-equals operation with an immediate and a register!");
357 Val = (Op1.second != Op2.second) ? -1 : 0;
358 OperandStack.push_back(std::make_pair(IC_IMM, Val));
359 break;
360 case IC_LT:
361 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
362 "Less-than operation with an immediate and a register!");
363 Val = (Op1.second < Op2.second) ? -1 : 0;
364 OperandStack.push_back(std::make_pair(IC_IMM, Val));
365 break;
366 case IC_LE:
367 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
368 "Less-than-or-equal operation with an immediate and a "
369 "register!");
370 Val = (Op1.second <= Op2.second) ? -1 : 0;
371 OperandStack.push_back(std::make_pair(IC_IMM, Val));
372 break;
373 case IC_GT:
374 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
375 "Greater-than operation with an immediate and a register!");
376 Val = (Op1.second > Op2.second) ? -1 : 0;
377 OperandStack.push_back(std::make_pair(IC_IMM, Val));
378 break;
379 case IC_GE:
380 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
381 "Greater-than-or-equal operation with an immediate and a "
382 "register!");
383 Val = (Op1.second >= Op2.second) ? -1 : 0;
384 OperandStack.push_back(std::make_pair(IC_IMM, Val));
385 break;
386 }
387 }
388 }
389 assert (OperandStack.size() == 1 && "Expected a single result.");
390 return OperandStack.pop_back_val().second;
391 }
392 };
393
394 enum IntelExprState {
395 IES_INIT,
396 IES_OR,
397 IES_XOR,
398 IES_AND,
399 IES_EQ,
400 IES_NE,
401 IES_LT,
402 IES_LE,
403 IES_GT,
404 IES_GE,
405 IES_LSHIFT,
406 IES_RSHIFT,
407 IES_PLUS,
408 IES_MINUS,
409 IES_OFFSET,
410 IES_CAST,
411 IES_NOT,
412 IES_MULTIPLY,
413 IES_DIVIDE,
414 IES_MOD,
415 IES_LBRAC,
416 IES_RBRAC,
417 IES_LPAREN,
418 IES_RPAREN,
419 IES_REGISTER,
420 IES_INTEGER,
421 IES_IDENTIFIER,
422 IES_ERROR
423 };
424
425 class IntelExprStateMachine {
426 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
427 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
428 int64_t Imm = 0;
429 const MCExpr *Sym = nullptr;
430 StringRef SymName;
431 InfixCalculator IC;
433 short BracCount = 0;
434 bool MemExpr = false;
435 bool OffsetOperator = false;
436 bool AttachToOperandIdx = false;
437 bool IsPIC = false;
438 SMLoc OffsetOperatorLoc;
439 AsmTypeInfo CurType;
440
441 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
442 if (Sym) {
443 ErrMsg = "cannot use more than one symbol in memory operand";
444 return true;
445 }
446 Sym = Val;
447 SymName = ID;
448 return false;
449 }
450
451 public:
452 IntelExprStateMachine() = default;
453
454 void addImm(int64_t imm) { Imm += imm; }
455 short getBracCount() const { return BracCount; }
456 bool isMemExpr() const { return MemExpr; }
457 bool isOffsetOperator() const { return OffsetOperator; }
458 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
459 unsigned getBaseReg() const { return BaseReg; }
460 unsigned getIndexReg() const { return IndexReg; }
461 unsigned getScale() const { return Scale; }
462 const MCExpr *getSym() const { return Sym; }
463 StringRef getSymName() const { return SymName; }
464 StringRef getType() const { return CurType.Name; }
465 unsigned getSize() const { return CurType.Size; }
466 unsigned getElementSize() const { return CurType.ElementSize; }
467 unsigned getLength() const { return CurType.Length; }
468 int64_t getImm() { return Imm + IC.execute(); }
469 bool isValidEndState() const {
470 return State == IES_RBRAC || State == IES_INTEGER;
471 }
472
473 // Is the intel expression appended after an operand index.
474 // [OperandIdx][Intel Expression]
475 // This is neccessary for checking if it is an independent
476 // intel expression at back end when parse inline asm.
477 void setAppendAfterOperand() { AttachToOperandIdx = true; }
478
479 bool isPIC() const { return IsPIC; }
480 void setPIC() { IsPIC = true; }
481
482 bool hadError() const { return State == IES_ERROR; }
483 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
484
485 bool regsUseUpError(StringRef &ErrMsg) {
486 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
487 // can not intruduce additional register in inline asm in PIC model.
488 if (IsPIC && AttachToOperandIdx)
489 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
490 else
491 ErrMsg = "BaseReg/IndexReg already set!";
492 return true;
493 }
494
495 void onOr() {
496 IntelExprState CurrState = State;
497 switch (State) {
498 default:
499 State = IES_ERROR;
500 break;
501 case IES_INTEGER:
502 case IES_RPAREN:
503 case IES_REGISTER:
504 State = IES_OR;
505 IC.pushOperator(IC_OR);
506 break;
507 }
508 PrevState = CurrState;
509 }
510 void onXor() {
511 IntelExprState CurrState = State;
512 switch (State) {
513 default:
514 State = IES_ERROR;
515 break;
516 case IES_INTEGER:
517 case IES_RPAREN:
518 case IES_REGISTER:
519 State = IES_XOR;
520 IC.pushOperator(IC_XOR);
521 break;
522 }
523 PrevState = CurrState;
524 }
525 void onAnd() {
526 IntelExprState CurrState = State;
527 switch (State) {
528 default:
529 State = IES_ERROR;
530 break;
531 case IES_INTEGER:
532 case IES_RPAREN:
533 case IES_REGISTER:
534 State = IES_AND;
535 IC.pushOperator(IC_AND);
536 break;
537 }
538 PrevState = CurrState;
539 }
540 void onEq() {
541 IntelExprState CurrState = State;
542 switch (State) {
543 default:
544 State = IES_ERROR;
545 break;
546 case IES_INTEGER:
547 case IES_RPAREN:
548 case IES_REGISTER:
549 State = IES_EQ;
550 IC.pushOperator(IC_EQ);
551 break;
552 }
553 PrevState = CurrState;
554 }
555 void onNE() {
556 IntelExprState CurrState = State;
557 switch (State) {
558 default:
559 State = IES_ERROR;
560 break;
561 case IES_INTEGER:
562 case IES_RPAREN:
563 case IES_REGISTER:
564 State = IES_NE;
565 IC.pushOperator(IC_NE);
566 break;
567 }
568 PrevState = CurrState;
569 }
570 void onLT() {
571 IntelExprState CurrState = State;
572 switch (State) {
573 default:
574 State = IES_ERROR;
575 break;
576 case IES_INTEGER:
577 case IES_RPAREN:
578 case IES_REGISTER:
579 State = IES_LT;
580 IC.pushOperator(IC_LT);
581 break;
582 }
583 PrevState = CurrState;
584 }
585 void onLE() {
586 IntelExprState CurrState = State;
587 switch (State) {
588 default:
589 State = IES_ERROR;
590 break;
591 case IES_INTEGER:
592 case IES_RPAREN:
593 case IES_REGISTER:
594 State = IES_LE;
595 IC.pushOperator(IC_LE);
596 break;
597 }
598 PrevState = CurrState;
599 }
600 void onGT() {
601 IntelExprState CurrState = State;
602 switch (State) {
603 default:
604 State = IES_ERROR;
605 break;
606 case IES_INTEGER:
607 case IES_RPAREN:
608 case IES_REGISTER:
609 State = IES_GT;
610 IC.pushOperator(IC_GT);
611 break;
612 }
613 PrevState = CurrState;
614 }
615 void onGE() {
616 IntelExprState CurrState = State;
617 switch (State) {
618 default:
619 State = IES_ERROR;
620 break;
621 case IES_INTEGER:
622 case IES_RPAREN:
623 case IES_REGISTER:
624 State = IES_GE;
625 IC.pushOperator(IC_GE);
626 break;
627 }
628 PrevState = CurrState;
629 }
630 void onLShift() {
631 IntelExprState CurrState = State;
632 switch (State) {
633 default:
634 State = IES_ERROR;
635 break;
636 case IES_INTEGER:
637 case IES_RPAREN:
638 case IES_REGISTER:
639 State = IES_LSHIFT;
640 IC.pushOperator(IC_LSHIFT);
641 break;
642 }
643 PrevState = CurrState;
644 }
645 void onRShift() {
646 IntelExprState CurrState = State;
647 switch (State) {
648 default:
649 State = IES_ERROR;
650 break;
651 case IES_INTEGER:
652 case IES_RPAREN:
653 case IES_REGISTER:
654 State = IES_RSHIFT;
655 IC.pushOperator(IC_RSHIFT);
656 break;
657 }
658 PrevState = CurrState;
659 }
660 bool onPlus(StringRef &ErrMsg) {
661 IntelExprState CurrState = State;
662 switch (State) {
663 default:
664 State = IES_ERROR;
665 break;
666 case IES_INTEGER:
667 case IES_RPAREN:
668 case IES_REGISTER:
669 case IES_OFFSET:
670 State = IES_PLUS;
671 IC.pushOperator(IC_PLUS);
672 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
673 // If we already have a BaseReg, then assume this is the IndexReg with
674 // no explicit scale.
675 if (!BaseReg) {
676 BaseReg = TmpReg;
677 } else {
678 if (IndexReg)
679 return regsUseUpError(ErrMsg);
680 IndexReg = TmpReg;
681 Scale = 0;
682 }
683 }
684 break;
685 }
686 PrevState = CurrState;
687 return false;
688 }
689 bool onMinus(StringRef &ErrMsg) {
690 IntelExprState CurrState = State;
691 switch (State) {
692 default:
693 State = IES_ERROR;
694 break;
695 case IES_OR:
696 case IES_XOR:
697 case IES_AND:
698 case IES_EQ:
699 case IES_NE:
700 case IES_LT:
701 case IES_LE:
702 case IES_GT:
703 case IES_GE:
704 case IES_LSHIFT:
705 case IES_RSHIFT:
706 case IES_PLUS:
707 case IES_NOT:
708 case IES_MULTIPLY:
709 case IES_DIVIDE:
710 case IES_MOD:
711 case IES_LPAREN:
712 case IES_RPAREN:
713 case IES_LBRAC:
714 case IES_RBRAC:
715 case IES_INTEGER:
716 case IES_REGISTER:
717 case IES_INIT:
718 case IES_OFFSET:
719 State = IES_MINUS;
720 // push minus operator if it is not a negate operator
721 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
722 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
723 CurrState == IES_OFFSET)
724 IC.pushOperator(IC_MINUS);
725 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
726 // We have negate operator for Scale: it's illegal
727 ErrMsg = "Scale can't be negative";
728 return true;
729 } else
730 IC.pushOperator(IC_NEG);
731 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
732 // If we already have a BaseReg, then assume this is the IndexReg with
733 // no explicit scale.
734 if (!BaseReg) {
735 BaseReg = TmpReg;
736 } else {
737 if (IndexReg)
738 return regsUseUpError(ErrMsg);
739 IndexReg = TmpReg;
740 Scale = 0;
741 }
742 }
743 break;
744 }
745 PrevState = CurrState;
746 return false;
747 }
748 void onNot() {
749 IntelExprState CurrState = State;
750 switch (State) {
751 default:
752 State = IES_ERROR;
753 break;
754 case IES_OR:
755 case IES_XOR:
756 case IES_AND:
757 case IES_EQ:
758 case IES_NE:
759 case IES_LT:
760 case IES_LE:
761 case IES_GT:
762 case IES_GE:
763 case IES_LSHIFT:
764 case IES_RSHIFT:
765 case IES_PLUS:
766 case IES_MINUS:
767 case IES_NOT:
768 case IES_MULTIPLY:
769 case IES_DIVIDE:
770 case IES_MOD:
771 case IES_LPAREN:
772 case IES_LBRAC:
773 case IES_INIT:
774 State = IES_NOT;
775 IC.pushOperator(IC_NOT);
776 break;
777 }
778 PrevState = CurrState;
779 }
780 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
781 IntelExprState CurrState = State;
782 switch (State) {
783 default:
784 State = IES_ERROR;
785 break;
786 case IES_PLUS:
787 case IES_LPAREN:
788 case IES_LBRAC:
789 State = IES_REGISTER;
790 TmpReg = Reg;
791 IC.pushOperand(IC_REGISTER);
792 break;
793 case IES_MULTIPLY:
794 // Index Register - Scale * Register
795 if (PrevState == IES_INTEGER) {
796 if (IndexReg)
797 return regsUseUpError(ErrMsg);
798 State = IES_REGISTER;
799 IndexReg = Reg;
800 // Get the scale and replace the 'Scale * Register' with '0'.
801 Scale = IC.popOperand();
802 if (checkScale(Scale, ErrMsg))
803 return true;
804 IC.pushOperand(IC_IMM);
805 IC.popOperator();
806 } else {
807 State = IES_ERROR;
808 }
809 break;
810 }
811 PrevState = CurrState;
812 return false;
813 }
814 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
815 const InlineAsmIdentifierInfo &IDInfo,
816 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
817 StringRef &ErrMsg) {
818 // InlineAsm: Treat an enum value as an integer
819 if (ParsingMSInlineAsm)
821 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
822 // Treat a symbolic constant like an integer
823 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
824 return onInteger(CE->getValue(), ErrMsg);
825 PrevState = State;
826 switch (State) {
827 default:
828 State = IES_ERROR;
829 break;
830 case IES_CAST:
831 case IES_PLUS:
832 case IES_MINUS:
833 case IES_NOT:
834 case IES_INIT:
835 case IES_LBRAC:
836 case IES_LPAREN:
837 if (setSymRef(SymRef, SymRefName, ErrMsg))
838 return true;
839 MemExpr = true;
840 State = IES_INTEGER;
841 IC.pushOperand(IC_IMM);
842 if (ParsingMSInlineAsm)
843 Info = IDInfo;
844 setTypeInfo(Type);
845 break;
846 }
847 return false;
848 }
849 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
850 IntelExprState CurrState = State;
851 switch (State) {
852 default:
853 State = IES_ERROR;
854 break;
855 case IES_PLUS:
856 case IES_MINUS:
857 case IES_NOT:
858 case IES_OR:
859 case IES_XOR:
860 case IES_AND:
861 case IES_EQ:
862 case IES_NE:
863 case IES_LT:
864 case IES_LE:
865 case IES_GT:
866 case IES_GE:
867 case IES_LSHIFT:
868 case IES_RSHIFT:
869 case IES_DIVIDE:
870 case IES_MOD:
871 case IES_MULTIPLY:
872 case IES_LPAREN:
873 case IES_INIT:
874 case IES_LBRAC:
875 State = IES_INTEGER;
876 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
877 // Index Register - Register * Scale
878 if (IndexReg)
879 return regsUseUpError(ErrMsg);
880 IndexReg = TmpReg;
881 Scale = TmpInt;
882 if (checkScale(Scale, ErrMsg))
883 return true;
884 // Get the scale and replace the 'Register * Scale' with '0'.
885 IC.popOperator();
886 } else {
887 IC.pushOperand(IC_IMM, TmpInt);
888 }
889 break;
890 }
891 PrevState = CurrState;
892 return false;
893 }
894 void onStar() {
895 PrevState = State;
896 switch (State) {
897 default:
898 State = IES_ERROR;
899 break;
900 case IES_INTEGER:
901 case IES_REGISTER:
902 case IES_RPAREN:
903 State = IES_MULTIPLY;
904 IC.pushOperator(IC_MULTIPLY);
905 break;
906 }
907 }
908 void onDivide() {
909 PrevState = State;
910 switch (State) {
911 default:
912 State = IES_ERROR;
913 break;
914 case IES_INTEGER:
915 case IES_RPAREN:
916 State = IES_DIVIDE;
917 IC.pushOperator(IC_DIVIDE);
918 break;
919 }
920 }
921 void onMod() {
922 PrevState = State;
923 switch (State) {
924 default:
925 State = IES_ERROR;
926 break;
927 case IES_INTEGER:
928 case IES_RPAREN:
929 State = IES_MOD;
930 IC.pushOperator(IC_MOD);
931 break;
932 }
933 }
934 bool onLBrac() {
935 if (BracCount)
936 return true;
937 PrevState = State;
938 switch (State) {
939 default:
940 State = IES_ERROR;
941 break;
942 case IES_RBRAC:
943 case IES_INTEGER:
944 case IES_RPAREN:
945 State = IES_PLUS;
946 IC.pushOperator(IC_PLUS);
947 CurType.Length = 1;
948 CurType.Size = CurType.ElementSize;
949 break;
950 case IES_INIT:
951 case IES_CAST:
952 assert(!BracCount && "BracCount should be zero on parsing's start");
953 State = IES_LBRAC;
954 break;
955 }
956 MemExpr = true;
957 BracCount++;
958 return false;
959 }
960 bool onRBrac(StringRef &ErrMsg) {
961 IntelExprState CurrState = State;
962 switch (State) {
963 default:
964 State = IES_ERROR;
965 break;
966 case IES_INTEGER:
967 case IES_OFFSET:
968 case IES_REGISTER:
969 case IES_RPAREN:
970 if (BracCount-- != 1) {
971 ErrMsg = "unexpected bracket encountered";
972 return true;
973 }
974 State = IES_RBRAC;
975 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
976 // If we already have a BaseReg, then assume this is the IndexReg with
977 // no explicit scale.
978 if (!BaseReg) {
979 BaseReg = TmpReg;
980 } else {
981 if (IndexReg)
982 return regsUseUpError(ErrMsg);
983 IndexReg = TmpReg;
984 Scale = 0;
985 }
986 }
987 break;
988 }
989 PrevState = CurrState;
990 return false;
991 }
992 void onLParen() {
993 IntelExprState CurrState = State;
994 switch (State) {
995 default:
996 State = IES_ERROR;
997 break;
998 case IES_PLUS:
999 case IES_MINUS:
1000 case IES_NOT:
1001 case IES_OR:
1002 case IES_XOR:
1003 case IES_AND:
1004 case IES_EQ:
1005 case IES_NE:
1006 case IES_LT:
1007 case IES_LE:
1008 case IES_GT:
1009 case IES_GE:
1010 case IES_LSHIFT:
1011 case IES_RSHIFT:
1012 case IES_MULTIPLY:
1013 case IES_DIVIDE:
1014 case IES_MOD:
1015 case IES_LPAREN:
1016 case IES_INIT:
1017 case IES_LBRAC:
1018 State = IES_LPAREN;
1019 IC.pushOperator(IC_LPAREN);
1020 break;
1021 }
1022 PrevState = CurrState;
1023 }
1024 void onRParen() {
1025 PrevState = State;
1026 switch (State) {
1027 default:
1028 State = IES_ERROR;
1029 break;
1030 case IES_INTEGER:
1031 case IES_OFFSET:
1032 case IES_REGISTER:
1033 case IES_RBRAC:
1034 case IES_RPAREN:
1035 State = IES_RPAREN;
1036 IC.pushOperator(IC_RPAREN);
1037 break;
1038 }
1039 }
1040 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1041 const InlineAsmIdentifierInfo &IDInfo,
1042 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1043 PrevState = State;
1044 switch (State) {
1045 default:
1046 ErrMsg = "unexpected offset operator expression";
1047 return true;
1048 case IES_PLUS:
1049 case IES_INIT:
1050 case IES_LBRAC:
1051 if (setSymRef(Val, ID, ErrMsg))
1052 return true;
1053 OffsetOperator = true;
1054 OffsetOperatorLoc = OffsetLoc;
1055 State = IES_OFFSET;
1056 // As we cannot yet resolve the actual value (offset), we retain
1057 // the requested semantics by pushing a '0' to the operands stack
1058 IC.pushOperand(IC_IMM);
1059 if (ParsingMSInlineAsm) {
1060 Info = IDInfo;
1061 }
1062 break;
1063 }
1064 return false;
1065 }
1066 void onCast(AsmTypeInfo Info) {
1067 PrevState = State;
1068 switch (State) {
1069 default:
1070 State = IES_ERROR;
1071 break;
1072 case IES_LPAREN:
1073 setTypeInfo(Info);
1074 State = IES_CAST;
1075 break;
1076 }
1077 }
1078 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1079 };
1080
1081 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1082 bool MatchingInlineAsm = false) {
1083 MCAsmParser &Parser = getParser();
1084 if (MatchingInlineAsm) {
1085 if (!getLexer().isAtStartOfStatement())
1086 Parser.eatToEndOfStatement();
1087 return false;
1088 }
1089 return Parser.Error(L, Msg, Range);
1090 }
1091
1092 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1093 SMLoc EndLoc);
1094 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1095 bool RestoreOnFailure);
1096
1097 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1098 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1099 bool IsSIReg(unsigned Reg);
1100 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1101 void
1102 AddDefaultSrcDestOperands(OperandVector &Operands,
1103 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1104 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1105 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1106 OperandVector &FinalOperands);
1107 bool parseOperand(OperandVector &Operands, StringRef Name);
1108 bool parseATTOperand(OperandVector &Operands);
1109 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1110 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1111 InlineAsmIdentifierInfo &Info, SMLoc &End);
1112 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1113 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1114 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1115 unsigned IdentifyMasmOperator(StringRef Name);
1116 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1117 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1118 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1119 bool &ParseError, SMLoc &End);
1120 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1121 bool &ParseError, SMLoc &End);
1122 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1123 SMLoc End);
1124 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1125 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1127 bool IsUnevaluatedOperand, SMLoc &End,
1128 bool IsParsingOffsetOperator = false);
1129 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1130 IntelExprStateMachine &SM);
1131
1132 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1133 SMLoc EndLoc, OperandVector &Operands);
1134
1135 X86::CondCode ParseConditionCode(StringRef CCode);
1136
1137 bool ParseIntelMemoryOperandSize(unsigned &Size);
1138 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1139 unsigned BaseReg, unsigned IndexReg,
1140 unsigned Scale, SMLoc Start, SMLoc End,
1141 unsigned Size, StringRef Identifier,
1142 const InlineAsmIdentifierInfo &Info,
1144
1145 bool parseDirectiveArch();
1146 bool parseDirectiveNops(SMLoc L);
1147 bool parseDirectiveEven(SMLoc L);
1148 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1149
1150 /// CodeView FPO data directives.
1151 bool parseDirectiveFPOProc(SMLoc L);
1152 bool parseDirectiveFPOSetFrame(SMLoc L);
1153 bool parseDirectiveFPOPushReg(SMLoc L);
1154 bool parseDirectiveFPOStackAlloc(SMLoc L);
1155 bool parseDirectiveFPOStackAlign(SMLoc L);
1156 bool parseDirectiveFPOEndPrologue(SMLoc L);
1157 bool parseDirectiveFPOEndProc(SMLoc L);
1158
1159 /// SEH directives.
1160 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1161 bool parseDirectiveSEHPushReg(SMLoc);
1162 bool parseDirectiveSEHSetFrame(SMLoc);
1163 bool parseDirectiveSEHSaveReg(SMLoc);
1164 bool parseDirectiveSEHSaveXMM(SMLoc);
1165 bool parseDirectiveSEHPushFrame(SMLoc);
1166
1167 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1168
1169 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1170 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1171
1172 // Load Value Injection (LVI) Mitigations for machine code
1173 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1174 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1175 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1176
1177 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1178 /// instrumentation around Inst.
1179 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1180
1181 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1184 bool MatchingInlineAsm) override;
1185
1186 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1187 MCStreamer &Out, bool MatchingInlineAsm);
1188
1189 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1190 bool MatchingInlineAsm);
1191
1192 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1195 bool MatchingInlineAsm);
1196
1197 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1200 bool MatchingInlineAsm);
1201
1202 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1203
1204 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1205 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1206 /// return false if no parsing errors occurred, true otherwise.
1207 bool HandleAVX512Operand(OperandVector &Operands);
1208
1209 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1210
1211 bool is64BitMode() const {
1212 // FIXME: Can tablegen auto-generate this?
1213 return getSTI().hasFeature(X86::Is64Bit);
1214 }
1215 bool is32BitMode() const {
1216 // FIXME: Can tablegen auto-generate this?
1217 return getSTI().hasFeature(X86::Is32Bit);
1218 }
1219 bool is16BitMode() const {
1220 // FIXME: Can tablegen auto-generate this?
1221 return getSTI().hasFeature(X86::Is16Bit);
1222 }
1223 void SwitchMode(unsigned mode) {
1224 MCSubtargetInfo &STI = copySTI();
1225 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1226 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1227 FeatureBitset FB = ComputeAvailableFeatures(
1228 STI.ToggleFeature(OldMode.flip(mode)));
1230
1231 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1232 }
1233
1234 unsigned getPointerWidth() {
1235 if (is16BitMode()) return 16;
1236 if (is32BitMode()) return 32;
1237 if (is64BitMode()) return 64;
1238 llvm_unreachable("invalid mode");
1239 }
1240
1241 bool isParsingIntelSyntax() {
1242 return getParser().getAssemblerDialect();
1243 }
1244
1245 /// @name Auto-generated Matcher Functions
1246 /// {
1247
1248#define GET_ASSEMBLER_HEADER
1249#include "X86GenAsmMatcher.inc"
1250
1251 /// }
1252
1253public:
1254 enum X86MatchResultTy {
1255 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1256#define GET_OPERAND_DIAGNOSTIC_TYPES
1257#include "X86GenAsmMatcher.inc"
1258 };
1259
1260 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1261 const MCInstrInfo &mii, const MCTargetOptions &Options)
1262 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1263 Code16GCC(false) {
1264
1265 Parser.addAliasForDirective(".word", ".2byte");
1266
1267 // Initialize the set of available features.
1268 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1269 }
1270
1271 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1272 SMLoc &EndLoc) override;
1274 SMLoc &EndLoc) override;
1275
1276 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1277
1279 SMLoc NameLoc, OperandVector &Operands) override;
1280
1281 bool ParseDirective(AsmToken DirectiveID) override;
1282};
1283} // end anonymous namespace
1284
1285/// @name Auto-generated Match Functions
1286/// {
1287
1289
1290/// }
1291
1292static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1293 unsigned Scale, bool Is64BitMode,
1294 StringRef &ErrMsg) {
1295 // If we have both a base register and an index register make sure they are
1296 // both 64-bit or 32-bit registers.
1297 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1298
1299 if (BaseReg != 0 &&
1300 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1301 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1302 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1303 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1304 ErrMsg = "invalid base+index expression";
1305 return true;
1306 }
1307
1308 if (IndexReg != 0 &&
1309 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1310 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1311 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1312 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1313 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1314 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1315 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1316 ErrMsg = "invalid base+index expression";
1317 return true;
1318 }
1319
1320 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1321 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1322 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1323 ErrMsg = "invalid base+index expression";
1324 return true;
1325 }
1326
1327 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1328 // and then only in non-64-bit modes.
1329 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1330 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1331 BaseReg != X86::SI && BaseReg != X86::DI))) {
1332 ErrMsg = "invalid 16-bit base register";
1333 return true;
1334 }
1335
1336 if (BaseReg == 0 &&
1337 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1338 ErrMsg = "16-bit memory operand may not include only index register";
1339 return true;
1340 }
1341
1342 if (BaseReg != 0 && IndexReg != 0) {
1343 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1344 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1345 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1346 IndexReg == X86::EIZ)) {
1347 ErrMsg = "base register is 64-bit, but index register is not";
1348 return true;
1349 }
1350 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1351 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1352 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1353 IndexReg == X86::RIZ)) {
1354 ErrMsg = "base register is 32-bit, but index register is not";
1355 return true;
1356 }
1357 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1358 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1359 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1360 ErrMsg = "base register is 16-bit, but index register is not";
1361 return true;
1362 }
1363 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1364 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1365 ErrMsg = "invalid 16-bit base/index register combination";
1366 return true;
1367 }
1368 }
1369 }
1370
1371 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1372 if (!Is64BitMode && BaseReg != 0 &&
1373 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1374 ErrMsg = "IP-relative addressing requires 64-bit mode";
1375 return true;
1376 }
1377
1378 return checkScale(Scale, ErrMsg);
1379}
1380
1381bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1382 SMLoc StartLoc, SMLoc EndLoc) {
1383 // If we encounter a %, ignore it. This code handles registers with and
1384 // without the prefix, unprefixed registers can occur in cfi directives.
1385 RegName.consume_front("%");
1386
1387 RegNo = MatchRegisterName(RegName);
1388
1389 // If the match failed, try the register name as lowercase.
1390 if (RegNo == 0)
1391 RegNo = MatchRegisterName(RegName.lower());
1392
1393 // The "flags" and "mxcsr" registers cannot be referenced directly.
1394 // Treat it as an identifier instead.
1395 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1396 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1397 RegNo = 0;
1398
1399 if (!is64BitMode()) {
1400 // FIXME: This should be done using Requires<Not64BitMode> and
1401 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1402 // checked.
1403 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1404 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1407 return Error(StartLoc,
1408 "register %" + RegName + " is only available in 64-bit mode",
1409 SMRange(StartLoc, EndLoc));
1410 }
1411 }
1412
1413 // If this is "db[0-15]", match it as an alias
1414 // for dr[0-15].
1415 if (RegNo == 0 && RegName.startswith("db")) {
1416 if (RegName.size() == 3) {
1417 switch (RegName[2]) {
1418 case '0':
1419 RegNo = X86::DR0;
1420 break;
1421 case '1':
1422 RegNo = X86::DR1;
1423 break;
1424 case '2':
1425 RegNo = X86::DR2;
1426 break;
1427 case '3':
1428 RegNo = X86::DR3;
1429 break;
1430 case '4':
1431 RegNo = X86::DR4;
1432 break;
1433 case '5':
1434 RegNo = X86::DR5;
1435 break;
1436 case '6':
1437 RegNo = X86::DR6;
1438 break;
1439 case '7':
1440 RegNo = X86::DR7;
1441 break;
1442 case '8':
1443 RegNo = X86::DR8;
1444 break;
1445 case '9':
1446 RegNo = X86::DR9;
1447 break;
1448 }
1449 } else if (RegName.size() == 4 && RegName[2] == '1') {
1450 switch (RegName[3]) {
1451 case '0':
1452 RegNo = X86::DR10;
1453 break;
1454 case '1':
1455 RegNo = X86::DR11;
1456 break;
1457 case '2':
1458 RegNo = X86::DR12;
1459 break;
1460 case '3':
1461 RegNo = X86::DR13;
1462 break;
1463 case '4':
1464 RegNo = X86::DR14;
1465 break;
1466 case '5':
1467 RegNo = X86::DR15;
1468 break;
1469 }
1470 }
1471 }
1472
1473 if (RegNo == 0) {
1474 if (isParsingIntelSyntax())
1475 return true;
1476 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1477 }
1478 return false;
1479}
1480
1481bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1482 SMLoc &EndLoc, bool RestoreOnFailure) {
1483 MCAsmParser &Parser = getParser();
1484 MCAsmLexer &Lexer = getLexer();
1485 RegNo = 0;
1486
1488 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1489 if (RestoreOnFailure) {
1490 while (!Tokens.empty()) {
1491 Lexer.UnLex(Tokens.pop_back_val());
1492 }
1493 }
1494 };
1495
1496 const AsmToken &PercentTok = Parser.getTok();
1497 StartLoc = PercentTok.getLoc();
1498
1499 // If we encounter a %, ignore it. This code handles registers with and
1500 // without the prefix, unprefixed registers can occur in cfi directives.
1501 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1502 Tokens.push_back(PercentTok);
1503 Parser.Lex(); // Eat percent token.
1504 }
1505
1506 const AsmToken &Tok = Parser.getTok();
1507 EndLoc = Tok.getEndLoc();
1508
1509 if (Tok.isNot(AsmToken::Identifier)) {
1510 OnFailure();
1511 if (isParsingIntelSyntax()) return true;
1512 return Error(StartLoc, "invalid register name",
1513 SMRange(StartLoc, EndLoc));
1514 }
1515
1516 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1517 OnFailure();
1518 return true;
1519 }
1520
1521 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1522 if (RegNo == X86::ST0) {
1523 Tokens.push_back(Tok);
1524 Parser.Lex(); // Eat 'st'
1525
1526 // Check to see if we have '(4)' after %st.
1527 if (Lexer.isNot(AsmToken::LParen))
1528 return false;
1529 // Lex the paren.
1530 Tokens.push_back(Parser.getTok());
1531 Parser.Lex();
1532
1533 const AsmToken &IntTok = Parser.getTok();
1534 if (IntTok.isNot(AsmToken::Integer)) {
1535 OnFailure();
1536 return Error(IntTok.getLoc(), "expected stack index");
1537 }
1538 switch (IntTok.getIntVal()) {
1539 case 0: RegNo = X86::ST0; break;
1540 case 1: RegNo = X86::ST1; break;
1541 case 2: RegNo = X86::ST2; break;
1542 case 3: RegNo = X86::ST3; break;
1543 case 4: RegNo = X86::ST4; break;
1544 case 5: RegNo = X86::ST5; break;
1545 case 6: RegNo = X86::ST6; break;
1546 case 7: RegNo = X86::ST7; break;
1547 default:
1548 OnFailure();
1549 return Error(IntTok.getLoc(), "invalid stack index");
1550 }
1551
1552 // Lex IntTok
1553 Tokens.push_back(IntTok);
1554 Parser.Lex();
1555 if (Lexer.isNot(AsmToken::RParen)) {
1556 OnFailure();
1557 return Error(Parser.getTok().getLoc(), "expected ')'");
1558 }
1559
1560 EndLoc = Parser.getTok().getEndLoc();
1561 Parser.Lex(); // Eat ')'
1562 return false;
1563 }
1564
1565 EndLoc = Parser.getTok().getEndLoc();
1566
1567 if (RegNo == 0) {
1568 OnFailure();
1569 if (isParsingIntelSyntax()) return true;
1570 return Error(StartLoc, "invalid register name",
1571 SMRange(StartLoc, EndLoc));
1572 }
1573
1574 Parser.Lex(); // Eat identifier token.
1575 return false;
1576}
1577
1578bool X86AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1579 SMLoc &EndLoc) {
1580 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1581}
1582
1583OperandMatchResultTy X86AsmParser::tryParseRegister(MCRegister &RegNo,
1584 SMLoc &StartLoc,
1585 SMLoc &EndLoc) {
1586 bool Result =
1587 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1588 bool PendingErrors = getParser().hasPendingError();
1589 getParser().clearPendingErrors();
1590 if (PendingErrors)
1592 if (Result)
1593 return MatchOperand_NoMatch;
1594 return MatchOperand_Success;
1595}
1596
1597std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1598 bool Parse32 = is32BitMode() || Code16GCC;
1599 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1600 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1601 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1602 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1603 Loc, Loc, 0);
1604}
1605
1606std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1607 bool Parse32 = is32BitMode() || Code16GCC;
1608 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1609 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1610 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1611 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1612 Loc, Loc, 0);
1613}
1614
1615bool X86AsmParser::IsSIReg(unsigned Reg) {
1616 switch (Reg) {
1617 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1618 case X86::RSI:
1619 case X86::ESI:
1620 case X86::SI:
1621 return true;
1622 case X86::RDI:
1623 case X86::EDI:
1624 case X86::DI:
1625 return false;
1626 }
1627}
1628
1629unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1630 bool IsSIReg) {
1631 switch (RegClassID) {
1632 default: llvm_unreachable("Unexpected register class");
1633 case X86::GR64RegClassID:
1634 return IsSIReg ? X86::RSI : X86::RDI;
1635 case X86::GR32RegClassID:
1636 return IsSIReg ? X86::ESI : X86::EDI;
1637 case X86::GR16RegClassID:
1638 return IsSIReg ? X86::SI : X86::DI;
1639 }
1640}
1641
1642void X86AsmParser::AddDefaultSrcDestOperands(
1643 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1644 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1645 if (isParsingIntelSyntax()) {
1646 Operands.push_back(std::move(Dst));
1647 Operands.push_back(std::move(Src));
1648 }
1649 else {
1650 Operands.push_back(std::move(Src));
1651 Operands.push_back(std::move(Dst));
1652 }
1653}
1654
1655bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1656 OperandVector &FinalOperands) {
1657
1658 if (OrigOperands.size() > 1) {
1659 // Check if sizes match, OrigOperands also contains the instruction name
1660 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1661 "Operand size mismatch");
1662
1664 // Verify types match
1665 int RegClassID = -1;
1666 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1667 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1668 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1669
1670 if (FinalOp.isReg() &&
1671 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1672 // Return false and let a normal complaint about bogus operands happen
1673 return false;
1674
1675 if (FinalOp.isMem()) {
1676
1677 if (!OrigOp.isMem())
1678 // Return false and let a normal complaint about bogus operands happen
1679 return false;
1680
1681 unsigned OrigReg = OrigOp.Mem.BaseReg;
1682 unsigned FinalReg = FinalOp.Mem.BaseReg;
1683
1684 // If we've already encounterd a register class, make sure all register
1685 // bases are of the same register class
1686 if (RegClassID != -1 &&
1687 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1688 return Error(OrigOp.getStartLoc(),
1689 "mismatching source and destination index registers");
1690 }
1691
1692 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1693 RegClassID = X86::GR64RegClassID;
1694 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1695 RegClassID = X86::GR32RegClassID;
1696 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1697 RegClassID = X86::GR16RegClassID;
1698 else
1699 // Unexpected register class type
1700 // Return false and let a normal complaint about bogus operands happen
1701 return false;
1702
1703 bool IsSI = IsSIReg(FinalReg);
1704 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1705
1706 if (FinalReg != OrigReg) {
1707 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1708 Warnings.push_back(std::make_pair(
1709 OrigOp.getStartLoc(),
1710 "memory operand is only for determining the size, " + RegName +
1711 " will be used for the location"));
1712 }
1713
1714 FinalOp.Mem.Size = OrigOp.Mem.Size;
1715 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1716 FinalOp.Mem.BaseReg = FinalReg;
1717 }
1718 }
1719
1720 // Produce warnings only if all the operands passed the adjustment - prevent
1721 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1722 for (auto &WarningMsg : Warnings) {
1723 Warning(WarningMsg.first, WarningMsg.second);
1724 }
1725
1726 // Remove old operands
1727 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1728 OrigOperands.pop_back();
1729 }
1730 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1731 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1732 OrigOperands.push_back(std::move(FinalOperands[i]));
1733
1734 return false;
1735}
1736
1737bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1738 if (isParsingIntelSyntax())
1739 return parseIntelOperand(Operands, Name);
1740
1741 return parseATTOperand(Operands);
1742}
1743
1744bool X86AsmParser::CreateMemForMSInlineAsm(
1745 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1746 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1748 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1749 // some other label reference.
1751 // Insert an explicit size if the user didn't have one.
1752 if (!Size) {
1753 Size = getPointerWidth();
1754 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1755 /*Len=*/0, Size);
1756 }
1757 // Create an absolute memory reference in order to match against
1758 // instructions taking a PC relative operand.
1759 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1760 End, Size, Identifier,
1761 Info.Label.Decl));
1762 return false;
1763 }
1764 // We either have a direct symbol reference, or an offset from a symbol. The
1765 // parser always puts the symbol on the LHS, so look there for size
1766 // calculation purposes.
1767 unsigned FrontendSize = 0;
1768 void *Decl = nullptr;
1769 bool IsGlobalLV = false;
1771 // Size is in terms of bits in this context.
1772 FrontendSize = Info.Var.Type * 8;
1773 Decl = Info.Var.Decl;
1774 IsGlobalLV = Info.Var.IsGlobalLV;
1775 }
1776 // It is widely common for MS InlineAsm to use a global variable and one/two
1777 // registers in a mmory expression, and though unaccessible via rip/eip.
1778 if (IsGlobalLV && (BaseReg || IndexReg)) {
1779 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1780 End, Size, Identifier, Decl, 0,
1781 BaseReg && IndexReg));
1782 return false;
1783 }
1784 // Otherwise, we set the base register to a non-zero value
1785 // if we don't know the actual value at this time. This is necessary to
1786 // get the matching correct in some cases.
1787 BaseReg = BaseReg ? BaseReg : 1;
1789 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1790 Size,
1791 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1792 return false;
1793}
1794
1795// Some binary bitwise operators have a named synonymous
1796// Query a candidate string for being such a named operator
1797// and if so - invoke the appropriate handler
1798bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1799 IntelExprStateMachine &SM,
1800 bool &ParseError, SMLoc &End) {
1801 // A named operator should be either lower or upper case, but not a mix...
1802 // except in MASM, which uses full case-insensitivity.
1803 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1804 !getParser().isParsingMasm())
1805 return false;
1806 if (Name.equals_insensitive("not")) {
1807 SM.onNot();
1808 } else if (Name.equals_insensitive("or")) {
1809 SM.onOr();
1810 } else if (Name.equals_insensitive("shl")) {
1811 SM.onLShift();
1812 } else if (Name.equals_insensitive("shr")) {
1813 SM.onRShift();
1814 } else if (Name.equals_insensitive("xor")) {
1815 SM.onXor();
1816 } else if (Name.equals_insensitive("and")) {
1817 SM.onAnd();
1818 } else if (Name.equals_insensitive("mod")) {
1819 SM.onMod();
1820 } else if (Name.equals_insensitive("offset")) {
1821 SMLoc OffsetLoc = getTok().getLoc();
1822 const MCExpr *Val = nullptr;
1823 StringRef ID;
1825 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1826 if (ParseError)
1827 return true;
1828 StringRef ErrMsg;
1829 ParseError =
1830 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1831 if (ParseError)
1832 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1833 } else {
1834 return false;
1835 }
1836 if (!Name.equals_insensitive("offset"))
1837 End = consumeToken();
1838 return true;
1839}
1840bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1841 IntelExprStateMachine &SM,
1842 bool &ParseError, SMLoc &End) {
1843 if (Name.equals_insensitive("eq")) {
1844 SM.onEq();
1845 } else if (Name.equals_insensitive("ne")) {
1846 SM.onNE();
1847 } else if (Name.equals_insensitive("lt")) {
1848 SM.onLT();
1849 } else if (Name.equals_insensitive("le")) {
1850 SM.onLE();
1851 } else if (Name.equals_insensitive("gt")) {
1852 SM.onGT();
1853 } else if (Name.equals_insensitive("ge")) {
1854 SM.onGE();
1855 } else {
1856 return false;
1857 }
1858 End = consumeToken();
1859 return true;
1860}
1861
1862// Check if current intel expression append after an operand.
1863// Like: [Operand][Intel Expression]
1864void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1865 IntelExprStateMachine &SM) {
1866 if (PrevTK != AsmToken::RBrac)
1867 return;
1868
1869 SM.setAppendAfterOperand();
1870}
1871
1872bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1873 MCAsmParser &Parser = getParser();
1874 StringRef ErrMsg;
1875
1877
1878 if (getContext().getObjectFileInfo()->isPositionIndependent())
1879 SM.setPIC();
1880
1881 bool Done = false;
1882 while (!Done) {
1883 // Get a fresh reference on each loop iteration in case the previous
1884 // iteration moved the token storage during UnLex().
1885 const AsmToken &Tok = Parser.getTok();
1886
1887 bool UpdateLocLex = true;
1888 AsmToken::TokenKind TK = getLexer().getKind();
1889
1890 switch (TK) {
1891 default:
1892 if ((Done = SM.isValidEndState()))
1893 break;
1894 return Error(Tok.getLoc(), "unknown token in expression");
1895 case AsmToken::Error:
1896 return Error(getLexer().getErrLoc(), getLexer().getErr());
1897 break;
1899 Done = true;
1900 break;
1901 case AsmToken::Real:
1902 // DotOperator: [ebx].0
1903 UpdateLocLex = false;
1904 if (ParseIntelDotOperator(SM, End))
1905 return true;
1906 break;
1907 case AsmToken::Dot:
1908 if (!Parser.isParsingMasm()) {
1909 if ((Done = SM.isValidEndState()))
1910 break;
1911 return Error(Tok.getLoc(), "unknown token in expression");
1912 }
1913 // MASM allows spaces around the dot operator (e.g., "var . x")
1914 Lex();
1915 UpdateLocLex = false;
1916 if (ParseIntelDotOperator(SM, End))
1917 return true;
1918 break;
1919 case AsmToken::Dollar:
1920 if (!Parser.isParsingMasm()) {
1921 if ((Done = SM.isValidEndState()))
1922 break;
1923 return Error(Tok.getLoc(), "unknown token in expression");
1924 }
1925 [[fallthrough]];
1926 case AsmToken::String: {
1927 if (Parser.isParsingMasm()) {
1928 // MASM parsers handle strings in expressions as constants.
1929 SMLoc ValueLoc = Tok.getLoc();
1930 int64_t Res;
1931 const MCExpr *Val;
1932 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1933 return true;
1934 UpdateLocLex = false;
1935 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1936 return Error(ValueLoc, "expected absolute value");
1937 if (SM.onInteger(Res, ErrMsg))
1938 return Error(ValueLoc, ErrMsg);
1939 break;
1940 }
1941 [[fallthrough]];
1942 }
1943 case AsmToken::At:
1944 case AsmToken::Identifier: {
1945 SMLoc IdentLoc = Tok.getLoc();
1947 UpdateLocLex = false;
1948 if (Parser.isParsingMasm()) {
1949 size_t DotOffset = Identifier.find_first_of('.');
1950 if (DotOffset != StringRef::npos) {
1951 consumeToken();
1952 StringRef LHS = Identifier.slice(0, DotOffset);
1953 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1954 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1955 if (!RHS.empty()) {
1956 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1957 }
1958 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1959 if (!LHS.empty()) {
1960 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1961 }
1962 break;
1963 }
1964 }
1965 // (MASM only) <TYPE> PTR operator
1966 if (Parser.isParsingMasm()) {
1967 const AsmToken &NextTok = getLexer().peekTok();
1968 if (NextTok.is(AsmToken::Identifier) &&
1969 NextTok.getIdentifier().equals_insensitive("ptr")) {
1971 if (Parser.lookUpType(Identifier, Info))
1972 return Error(Tok.getLoc(), "unknown type");
1973 SM.onCast(Info);
1974 // Eat type and PTR.
1975 consumeToken();
1976 End = consumeToken();
1977 break;
1978 }
1979 }
1980 // Register, or (MASM only) <register>.<field>
1982 if (Tok.is(AsmToken::Identifier)) {
1983 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1984 if (SM.onRegister(Reg, ErrMsg))
1985 return Error(IdentLoc, ErrMsg);
1986 break;
1987 }
1988 if (Parser.isParsingMasm()) {
1989 const std::pair<StringRef, StringRef> IDField =
1990 Tok.getString().split('.');
1991 const StringRef ID = IDField.first, Field = IDField.second;
1992 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1993 if (!Field.empty() &&
1994 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1995 if (SM.onRegister(Reg, ErrMsg))
1996 return Error(IdentLoc, ErrMsg);
1997
1999 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2000 if (Parser.lookUpField(Field, Info))
2001 return Error(FieldStartLoc, "unknown offset");
2002 else if (SM.onPlus(ErrMsg))
2003 return Error(getTok().getLoc(), ErrMsg);
2004 else if (SM.onInteger(Info.Offset, ErrMsg))
2005 return Error(IdentLoc, ErrMsg);
2006 SM.setTypeInfo(Info.Type);
2007
2008 End = consumeToken();
2009 break;
2010 }
2011 }
2012 }
2013 // Operator synonymous ("not", "or" etc.)
2014 bool ParseError = false;
2015 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2016 if (ParseError)
2017 return true;
2018 break;
2019 }
2020 if (Parser.isParsingMasm() &&
2021 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2022 if (ParseError)
2023 return true;
2024 break;
2025 }
2026 // Symbol reference, when parsing assembly content
2028 AsmFieldInfo FieldInfo;
2029 const MCExpr *Val;
2030 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2031 // MS Dot Operator expression
2032 if (Identifier.count('.') &&
2033 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2034 if (ParseIntelDotOperator(SM, End))
2035 return true;
2036 break;
2037 }
2038 }
2039 if (isParsingMSInlineAsm()) {
2040 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2041 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2042 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2043 if (SM.onInteger(Val, ErrMsg))
2044 return Error(IdentLoc, ErrMsg);
2045 } else {
2046 return true;
2047 }
2048 break;
2049 }
2050 // MS InlineAsm identifier
2051 // Call parseIdentifier() to combine @ with the identifier behind it.
2052 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2053 return Error(IdentLoc, "expected identifier");
2054 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2055 return true;
2056 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2057 true, ErrMsg))
2058 return Error(IdentLoc, ErrMsg);
2059 break;
2060 }
2061 if (Parser.isParsingMasm()) {
2062 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2063 int64_t Val;
2064 if (ParseMasmOperator(OpKind, Val))
2065 return true;
2066 if (SM.onInteger(Val, ErrMsg))
2067 return Error(IdentLoc, ErrMsg);
2068 break;
2069 }
2070 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2071 // Field offset immediate; <TYPE>.<field specification>
2072 Lex(); // eat type
2073 bool EndDot = parseOptionalToken(AsmToken::Dot);
2074 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2075 getTok().getString().startswith("."))) {
2076 getParser().parseIdentifier(Identifier);
2077 if (!EndDot)
2078 Identifier.consume_front(".");
2079 EndDot = Identifier.consume_back(".");
2080 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2081 FieldInfo)) {
2082 SMLoc IDEnd =
2084 return Error(IdentLoc, "Unable to lookup field reference!",
2085 SMRange(IdentLoc, IDEnd));
2086 }
2087 if (!EndDot)
2088 EndDot = parseOptionalToken(AsmToken::Dot);
2089 }
2090 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2091 return Error(IdentLoc, ErrMsg);
2092 break;
2093 }
2094 }
2095 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2096 return Error(Tok.getLoc(), "Unexpected identifier!");
2097 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2098 false, ErrMsg)) {
2099 return Error(IdentLoc, ErrMsg);
2100 }
2101 break;
2102 }
2103 case AsmToken::Integer: {
2104 // Look for 'b' or 'f' following an Integer as a directional label
2105 SMLoc Loc = getTok().getLoc();
2106 int64_t IntVal = getTok().getIntVal();
2107 End = consumeToken();
2108 UpdateLocLex = false;
2109 if (getLexer().getKind() == AsmToken::Identifier) {
2110 StringRef IDVal = getTok().getString();
2111 if (IDVal == "f" || IDVal == "b") {
2112 MCSymbol *Sym =
2113 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2115 const MCExpr *Val =
2116 MCSymbolRefExpr::create(Sym, Variant, getContext());
2117 if (IDVal == "b" && Sym->isUndefined())
2118 return Error(Loc, "invalid reference to undefined symbol");
2119 StringRef Identifier = Sym->getName();
2122 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2123 isParsingMSInlineAsm(), ErrMsg))
2124 return Error(Loc, ErrMsg);
2125 End = consumeToken();
2126 } else {
2127 if (SM.onInteger(IntVal, ErrMsg))
2128 return Error(Loc, ErrMsg);
2129 }
2130 } else {
2131 if (SM.onInteger(IntVal, ErrMsg))
2132 return Error(Loc, ErrMsg);
2133 }
2134 break;
2135 }
2136 case AsmToken::Plus:
2137 if (SM.onPlus(ErrMsg))
2138 return Error(getTok().getLoc(), ErrMsg);
2139 break;
2140 case AsmToken::Minus:
2141 if (SM.onMinus(ErrMsg))
2142 return Error(getTok().getLoc(), ErrMsg);
2143 break;
2144 case AsmToken::Tilde: SM.onNot(); break;
2145 case AsmToken::Star: SM.onStar(); break;
2146 case AsmToken::Slash: SM.onDivide(); break;
2147 case AsmToken::Percent: SM.onMod(); break;
2148 case AsmToken::Pipe: SM.onOr(); break;
2149 case AsmToken::Caret: SM.onXor(); break;
2150 case AsmToken::Amp: SM.onAnd(); break;
2151 case AsmToken::LessLess:
2152 SM.onLShift(); break;
2154 SM.onRShift(); break;
2155 case AsmToken::LBrac:
2156 if (SM.onLBrac())
2157 return Error(Tok.getLoc(), "unexpected bracket encountered");
2158 tryParseOperandIdx(PrevTK, SM);
2159 break;
2160 case AsmToken::RBrac:
2161 if (SM.onRBrac(ErrMsg)) {
2162 return Error(Tok.getLoc(), ErrMsg);
2163 }
2164 break;
2165 case AsmToken::LParen: SM.onLParen(); break;
2166 case AsmToken::RParen: SM.onRParen(); break;
2167 }
2168 if (SM.hadError())
2169 return Error(Tok.getLoc(), "unknown token in expression");
2170
2171 if (!Done && UpdateLocLex)
2172 End = consumeToken();
2173
2174 PrevTK = TK;
2175 }
2176 return false;
2177}
2178
2179void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2180 SMLoc Start, SMLoc End) {
2181 SMLoc Loc = Start;
2182 unsigned ExprLen = End.getPointer() - Start.getPointer();
2183 // Skip everything before a symbol displacement (if we have one)
2184 if (SM.getSym() && !SM.isOffsetOperator()) {
2185 StringRef SymName = SM.getSymName();
2186 if (unsigned Len = SymName.data() - Start.getPointer())
2187 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2188 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2189 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2190 // If we have only a symbol than there's no need for complex rewrite,
2191 // simply skip everything after it
2192 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2193 if (ExprLen)
2194 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2195 return;
2196 }
2197 }
2198 // Build an Intel Expression rewrite
2199 StringRef BaseRegStr;
2200 StringRef IndexRegStr;
2201 StringRef OffsetNameStr;
2202 if (SM.getBaseReg())
2203 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2204 if (SM.getIndexReg())
2205 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2206 if (SM.isOffsetOperator())
2207 OffsetNameStr = SM.getSymName();
2208 // Emit it
2209 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2210 SM.getImm(), SM.isMemExpr());
2211 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2212}
2213
2214// Inline assembly may use variable names with namespace alias qualifiers.
2215bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2216 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2217 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2218 MCAsmParser &Parser = getParser();
2219 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2220 Val = nullptr;
2221
2222 StringRef LineBuf(Identifier.data());
2223 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2224
2225 const AsmToken &Tok = Parser.getTok();
2226 SMLoc Loc = Tok.getLoc();
2227
2228 // Advance the token stream until the end of the current token is
2229 // after the end of what the frontend claimed.
2230 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2231 do {
2232 End = Tok.getEndLoc();
2233 getLexer().Lex();
2234 } while (End.getPointer() < EndPtr);
2235 Identifier = LineBuf;
2236
2237 // The frontend should end parsing on an assembler token boundary, unless it
2238 // failed parsing.
2239 assert((End.getPointer() == EndPtr ||
2241 "frontend claimed part of a token?");
2242
2243 // If the identifier lookup was unsuccessful, assume that we are dealing with
2244 // a label.
2246 StringRef InternalName =
2247 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2248 Loc, false);
2249 assert(InternalName.size() && "We should have an internal name here.");
2250 // Push a rewrite for replacing the identifier name with the internal name,
2251 // unless we are parsing the operand of an offset operator
2252 if (!IsParsingOffsetOperator)
2253 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2254 InternalName);
2255 else
2256 Identifier = InternalName;
2257 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2258 return false;
2259 // Create the symbol reference.
2260 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2262 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2263 return false;
2264}
2265
2266//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2267bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2268 MCAsmParser &Parser = getParser();
2269 const AsmToken &Tok = Parser.getTok();
2270 // Eat "{" and mark the current place.
2271 const SMLoc consumedToken = consumeToken();
2272 if (Tok.isNot(AsmToken::Identifier))
2273 return Error(Tok.getLoc(), "Expected an identifier after {");
2274 if (Tok.getIdentifier().startswith("r")){
2275 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2276 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2277 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2278 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2279 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2280 .Default(-1);
2281 if (-1 == rndMode)
2282 return Error(Tok.getLoc(), "Invalid rounding mode.");
2283 Parser.Lex(); // Eat "r*" of r*-sae
2284 if (!getLexer().is(AsmToken::Minus))
2285 return Error(Tok.getLoc(), "Expected - at this point");
2286 Parser.Lex(); // Eat "-"
2287 Parser.Lex(); // Eat the sae
2288 if (!getLexer().is(AsmToken::RCurly))
2289 return Error(Tok.getLoc(), "Expected } at this point");
2290 SMLoc End = Tok.getEndLoc();
2291 Parser.Lex(); // Eat "}"
2292 const MCExpr *RndModeOp =
2293 MCConstantExpr::create(rndMode, Parser.getContext());
2294 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2295 return false;
2296 }
2297 if(Tok.getIdentifier().equals("sae")){
2298 Parser.Lex(); // Eat the sae
2299 if (!getLexer().is(AsmToken::RCurly))
2300 return Error(Tok.getLoc(), "Expected } at this point");
2301 Parser.Lex(); // Eat "}"
2302 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2303 return false;
2304 }
2305 return Error(Tok.getLoc(), "unknown token in expression");
2306}
2307
2308/// Parse the '.' operator.
2309bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2310 SMLoc &End) {
2311 const AsmToken &Tok = getTok();
2313
2314 // Drop the optional '.'.
2315 StringRef DotDispStr = Tok.getString();
2316 if (DotDispStr.startswith("."))
2317 DotDispStr = DotDispStr.drop_front(1);
2318 StringRef TrailingDot;
2319
2320 // .Imm gets lexed as a real.
2321 if (Tok.is(AsmToken::Real)) {
2322 APInt DotDisp;
2323 DotDispStr.getAsInteger(10, DotDisp);
2324 Info.Offset = DotDisp.getZExtValue();
2325 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2326 Tok.is(AsmToken::Identifier)) {
2327 if (DotDispStr.endswith(".")) {
2328 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2329 DotDispStr = DotDispStr.drop_back(1);
2330 }
2331 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2332 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2333 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2334 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2335 getParser().lookUpField(DotDispStr, Info) &&
2336 (!SemaCallback ||
2337 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2338 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2339 } else {
2340 return Error(Tok.getLoc(), "Unexpected token type!");
2341 }
2342
2343 // Eat the DotExpression and update End
2344 End = SMLoc::getFromPointer(DotDispStr.data());
2345 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2346 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2347 Lex();
2348 if (!TrailingDot.empty())
2349 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2350 SM.addImm(Info.Offset);
2351 SM.setTypeInfo(Info.Type);
2352 return false;
2353}
2354
2355/// Parse the 'offset' operator.
2356/// This operator is used to specify the location of a given operand
2357bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2359 SMLoc &End) {
2360 // Eat offset, mark start of identifier.
2361 SMLoc Start = Lex().getLoc();
2362 ID = getTok().getString();
2363 if (!isParsingMSInlineAsm()) {
2364 if ((getTok().isNot(AsmToken::Identifier) &&
2365 getTok().isNot(AsmToken::String)) ||
2366 getParser().parsePrimaryExpr(Val, End, nullptr))
2367 return Error(Start, "unexpected token!");
2368 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2369 return Error(Start, "unable to lookup expression");
2370 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2371 return Error(Start, "offset operator cannot yet handle constants");
2372 }
2373 return false;
2374}
2375
2376// Query a candidate string for being an Intel assembly operator
2377// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2378unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2380 .Cases("TYPE","type",IOK_TYPE)
2381 .Cases("SIZE","size",IOK_SIZE)
2382 .Cases("LENGTH","length",IOK_LENGTH)
2383 .Default(IOK_INVALID);
2384}
2385
2386/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2387/// returns the number of elements in an array. It returns the value 1 for
2388/// non-array variables. The SIZE operator returns the size of a C or C++
2389/// variable. A variable's size is the product of its LENGTH and TYPE. The
2390/// TYPE operator returns the size of a C or C++ type or variable. If the
2391/// variable is an array, TYPE returns the size of a single element.
2392unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2393 MCAsmParser &Parser = getParser();
2394 const AsmToken &Tok = Parser.getTok();
2395 Parser.Lex(); // Eat operator.
2396
2397 const MCExpr *Val = nullptr;
2399 SMLoc Start = Tok.getLoc(), End;
2401 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2402 /*IsUnevaluatedOperand=*/true, End))
2403 return 0;
2404
2406 Error(Start, "unable to lookup expression");
2407 return 0;
2408 }
2409
2410 unsigned CVal = 0;
2411 switch(OpKind) {
2412 default: llvm_unreachable("Unexpected operand kind!");
2413 case IOK_LENGTH: CVal = Info.Var.Length; break;
2414 case IOK_SIZE: CVal = Info.Var.Size; break;
2415 case IOK_TYPE: CVal = Info.Var.Type; break;
2416 }
2417
2418 return CVal;
2419}
2420
2421// Query a candidate string for being an Intel assembly operator
2422// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2423unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2424 return StringSwitch<unsigned>(Name.lower())
2425 .Case("type", MOK_TYPE)
2426 .Cases("size", "sizeof", MOK_SIZEOF)
2427 .Cases("length", "lengthof", MOK_LENGTHOF)
2428 .Default(MOK_INVALID);
2429}
2430
2431/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2432/// returns the number of elements in an array. It returns the value 1 for
2433/// non-array variables. The SIZEOF operator returns the size of a type or
2434/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2435/// The TYPE operator returns the size of a variable. If the variable is an
2436/// array, TYPE returns the size of a single element.
2437bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2438 MCAsmParser &Parser = getParser();
2439 SMLoc OpLoc = Parser.getTok().getLoc();
2440 Parser.Lex(); // Eat operator.
2441
2442 Val = 0;
2443 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2444 // Check for SIZEOF(<type>) and TYPE(<type>).
2445 bool InParens = Parser.getTok().is(AsmToken::LParen);
2446 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2448 if (IDTok.is(AsmToken::Identifier) &&
2449 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2450 Val = Type.Size;
2451
2452 // Eat tokens.
2453 if (InParens)
2454 parseToken(AsmToken::LParen);
2455 parseToken(AsmToken::Identifier);
2456 if (InParens)
2457 parseToken(AsmToken::RParen);
2458 }
2459 }
2460
2461 if (!Val) {
2462 IntelExprStateMachine SM;
2463 SMLoc End, Start = Parser.getTok().getLoc();
2464 if (ParseIntelExpression(SM, End))
2465 return true;
2466
2467 switch (OpKind) {
2468 default:
2469 llvm_unreachable("Unexpected operand kind!");
2470 case MOK_SIZEOF:
2471 Val = SM.getSize();
2472 break;
2473 case MOK_LENGTHOF:
2474 Val = SM.getLength();
2475 break;
2476 case MOK_TYPE:
2477 Val = SM.getElementSize();
2478 break;
2479 }
2480
2481 if (!Val)
2482 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2483 }
2484
2485 return false;
2486}
2487
2488bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2489 Size = StringSwitch<unsigned>(getTok().getString())
2490 .Cases("BYTE", "byte", 8)
2491 .Cases("WORD", "word", 16)
2492 .Cases("DWORD", "dword", 32)
2493 .Cases("FLOAT", "float", 32)
2494 .Cases("LONG", "long", 32)
2495 .Cases("FWORD", "fword", 48)
2496 .Cases("DOUBLE", "double", 64)
2497 .Cases("QWORD", "qword", 64)
2498 .Cases("MMWORD","mmword", 64)
2499 .Cases("XWORD", "xword", 80)
2500 .Cases("TBYTE", "tbyte", 80)
2501 .Cases("XMMWORD", "xmmword", 128)
2502 .Cases("YMMWORD", "ymmword", 256)
2503 .Cases("ZMMWORD", "zmmword", 512)
2504 .Default(0);
2505 if (Size) {
2506 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2507 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2508 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2509 Lex(); // Eat ptr.
2510 }
2511 return false;
2512}
2513
2514bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2515 MCAsmParser &Parser = getParser();
2516 const AsmToken &Tok = Parser.getTok();
2517 SMLoc Start, End;
2518
2519 // Parse optional Size directive.
2520 unsigned Size;
2521 if (ParseIntelMemoryOperandSize(Size))
2522 return true;
2523 bool PtrInOperand = bool(Size);
2524
2525 Start = Tok.getLoc();
2526
2527 // Rounding mode operand.
2528 if (getLexer().is(AsmToken::LCurly))
2529 return ParseRoundingModeOp(Start, Operands);
2530
2531 // Register operand.
2532 MCRegister RegNo;
2533 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2534 if (RegNo == X86::RIP)
2535 return Error(Start, "rip can only be used as a base register");
2536 // A Register followed by ':' is considered a segment override
2537 if (Tok.isNot(AsmToken::Colon)) {
2538 if (PtrInOperand)
2539 return Error(Start, "expected memory operand after 'ptr', "
2540 "found register operand instead");
2541 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2542 return false;
2543 }
2544 // An alleged segment override. check if we have a valid segment register
2545 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2546 return Error(Start, "invalid segment register");
2547 // Eat ':' and update Start location
2548 Start = Lex().getLoc();
2549 }
2550
2551 // Immediates and Memory
2552 IntelExprStateMachine SM;
2553 if (ParseIntelExpression(SM, End))
2554 return true;
2555
2556 if (isParsingMSInlineAsm())
2557 RewriteIntelExpression(SM, Start, Tok.getLoc());
2558
2559 int64_t Imm = SM.getImm();
2560 const MCExpr *Disp = SM.getSym();
2561 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2562 if (Disp && Imm)
2563 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2564 if (!Disp)
2565 Disp = ImmDisp;
2566
2567 // RegNo != 0 specifies a valid segment register,
2568 // and we are parsing a segment override
2569 if (!SM.isMemExpr() && !RegNo) {
2570 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2571 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2573 // Disp includes the address of a variable; make sure this is recorded
2574 // for later handling.
2575 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2576 SM.getSymName(), Info.Var.Decl,
2577 Info.Var.IsGlobalLV));
2578 return false;
2579 }
2580 }
2581
2582 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2583 return false;
2584 }
2585
2586 StringRef ErrMsg;
2587 unsigned BaseReg = SM.getBaseReg();
2588 unsigned IndexReg = SM.getIndexReg();
2589 if (IndexReg && BaseReg == X86::RIP)
2590 BaseReg = 0;
2591 unsigned Scale = SM.getScale();
2592 if (!PtrInOperand)
2593 Size = SM.getElementSize() << 3;
2594
2595 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2596 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2597 std::swap(BaseReg, IndexReg);
2598
2599 // If BaseReg is a vector register and IndexReg is not, swap them unless
2600 // Scale was specified in which case it would be an error.
2601 if (Scale == 0 &&
2602 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2603 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2604 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2605 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2606 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2607 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2608 std::swap(BaseReg, IndexReg);
2609
2610 if (Scale != 0 &&
2611 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2612 return Error(Start, "16-bit addresses cannot have a scale");
2613
2614 // If there was no explicit scale specified, change it to 1.
2615 if (Scale == 0)
2616 Scale = 1;
2617
2618 // If this is a 16-bit addressing mode with the base and index in the wrong
2619 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2620 // shared with att syntax where order matters.
2621 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2622 (IndexReg == X86::BX || IndexReg == X86::BP))
2623 std::swap(BaseReg, IndexReg);
2624
2625 if ((BaseReg || IndexReg) &&
2626 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2627 ErrMsg))
2628 return Error(Start, ErrMsg);
2629 if (isParsingMSInlineAsm())
2630 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2631 End, Size, SM.getSymName(),
2632 SM.getIdentifierInfo(), Operands);
2633
2634 // When parsing x64 MS-style assembly, all non-absolute references to a named
2635 // variable default to RIP-relative.
2636 unsigned DefaultBaseReg = X86::NoRegister;
2637 bool MaybeDirectBranchDest = true;
2638
2639 if (Parser.isParsingMasm()) {
2640 bool IsUnconditionalBranch =
2641 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2642 if (is64BitMode() && SM.getElementSize() > 0) {
2643 DefaultBaseReg = X86::RIP;
2644 }
2645 if (IsUnconditionalBranch) {
2646 if (PtrInOperand) {
2647 MaybeDirectBranchDest = false;
2648 if (is64BitMode())
2649 DefaultBaseReg = X86::RIP;
2650 } else if (!BaseReg && !IndexReg && Disp &&
2651 Disp->getKind() == MCExpr::SymbolRef) {
2652 if (is64BitMode()) {
2653 if (SM.getSize() == 8) {
2654 MaybeDirectBranchDest = false;
2655 DefaultBaseReg = X86::RIP;
2656 }
2657 } else {
2658 if (SM.getSize() == 4 || SM.getSize() == 2)
2659 MaybeDirectBranchDest = false;
2660 }
2661 }
2662 }
2663 }
2664
2665 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2667 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2668 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2669 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2670 else
2672 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2673 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2674 MaybeDirectBranchDest));
2675 return false;
2676}
2677
2678bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2679 MCAsmParser &Parser = getParser();
2680 switch (getLexer().getKind()) {
2681 case AsmToken::Dollar: {
2682 // $42 or $ID -> immediate.
2683 SMLoc Start = Parser.getTok().getLoc(), End;
2684 Parser.Lex();
2685 const MCExpr *Val;
2686 // This is an immediate, so we should not parse a register. Do a precheck
2687 // for '%' to supercede intra-register parse errors.
2688 SMLoc L = Parser.getTok().getLoc();
2689 if (check(getLexer().is(AsmToken::Percent), L,
2690 "expected immediate expression") ||
2691 getParser().parseExpression(Val, End) ||
2692 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2693 return true;
2694 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2695 return false;
2696 }
2697 case AsmToken::LCurly: {
2698 SMLoc Start = Parser.getTok().getLoc();
2699 return ParseRoundingModeOp(Start, Operands);
2700 }
2701 default: {
2702 // This a memory operand or a register. We have some parsing complications
2703 // as a '(' may be part of an immediate expression or the addressing mode
2704 // block. This is complicated by the fact that an assembler-level variable
2705 // may refer either to a register or an immediate expression.
2706
2707 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2708 const MCExpr *Expr = nullptr;
2709 unsigned Reg = 0;
2710 if (getLexer().isNot(AsmToken::LParen)) {
2711 // No '(' so this is either a displacement expression or a register.
2712 if (Parser.parseExpression(Expr, EndLoc))
2713 return true;
2714 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2715 // Segment Register. Reset Expr and copy value to register.
2716 Expr = nullptr;
2717 Reg = RE->getRegNo();
2718
2719 // Check the register.
2720 if (Reg == X86::EIZ || Reg == X86::RIZ)
2721 return Error(
2722 Loc, "%eiz and %riz can only be used as index registers",
2723 SMRange(Loc, EndLoc));
2724 if (Reg == X86::RIP)
2725 return Error(Loc, "%rip can only be used as a base register",
2726 SMRange(Loc, EndLoc));
2727 // Return register that are not segment prefixes immediately.
2728 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2729 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2730 return false;
2731 }
2732 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2733 return Error(Loc, "invalid segment register");
2734 // Accept a '*' absolute memory reference after the segment. Place it
2735 // before the full memory operand.
2736 if (getLexer().is(AsmToken::Star))
2737 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2738 }
2739 }
2740 // This is a Memory operand.
2741 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2742 }
2743 }
2744}
2745
2746// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2747// otherwise the EFLAGS Condition Code enumerator.
2748X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2750 .Case("o", X86::COND_O) // Overflow
2751 .Case("no", X86::COND_NO) // No Overflow
2752 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2753 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2754 .Cases("e", "z", X86::COND_E) // Equal/Zero
2755 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2756 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2757 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2758 .Case("s", X86::COND_S) // Sign
2759 .Case("ns", X86::COND_NS) // No Sign
2760 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2761 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2762 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2763 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2764 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2765 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2767}
2768
2769// true on failure, false otherwise
2770// If no {z} mark was found - Parser doesn't advance
2771bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2772 const SMLoc &StartLoc) {
2773 MCAsmParser &Parser = getParser();
2774 // Assuming we are just pass the '{' mark, quering the next token
2775 // Searched for {z}, but none was found. Return false, as no parsing error was
2776 // encountered
2777 if (!(getLexer().is(AsmToken::Identifier) &&
2778 (getLexer().getTok().getIdentifier() == "z")))
2779 return false;
2780 Parser.Lex(); // Eat z
2781 // Query and eat the '}' mark
2782 if (!getLexer().is(AsmToken::RCurly))
2783 return Error(getLexer().getLoc(), "Expected } at this point");
2784 Parser.Lex(); // Eat '}'
2785 // Assign Z with the {z} mark operand
2786 Z = X86Operand::CreateToken("{z}", StartLoc);
2787 return false;
2788}
2789
2790// true on failure, false otherwise
2791bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2792 MCAsmParser &Parser = getParser();
2793 if (getLexer().is(AsmToken::LCurly)) {
2794 // Eat "{" and mark the current place.
2795 const SMLoc consumedToken = consumeToken();
2796 // Distinguish {1to<NUM>} from {%k<NUM>}.
2797 if(getLexer().is(AsmToken::Integer)) {
2798 // Parse memory broadcasting ({1to<NUM>}).
2799 if (getLexer().getTok().getIntVal() != 1)
2800 return TokError("Expected 1to<NUM> at this point");
2801 StringRef Prefix = getLexer().getTok().getString();
2802 Parser.Lex(); // Eat first token of 1to8
2803 if (!getLexer().is(AsmToken::Identifier))
2804 return TokError("Expected 1to<NUM> at this point");
2805 // Recognize only reasonable suffixes.
2806 SmallVector<char, 5> BroadcastVector;
2807 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2808 .toStringRef(BroadcastVector);
2809 if (!BroadcastString.startswith("1to"))
2810 return TokError("Expected 1to<NUM> at this point");
2811 const char *BroadcastPrimitive =
2812 StringSwitch<const char *>(BroadcastString)
2813 .Case("1to2", "{1to2}")
2814 .Case("1to4", "{1to4}")
2815 .Case("1to8", "{1to8}")
2816 .Case("1to16", "{1to16}")
2817 .Case("1to32", "{1to32}")
2818 .Default(nullptr);
2819 if (!BroadcastPrimitive)
2820 return TokError("Invalid memory broadcast primitive.");
2821 Parser.Lex(); // Eat trailing token of 1toN
2822 if (!getLexer().is(AsmToken::RCurly))
2823 return TokError("Expected } at this point");
2824 Parser.Lex(); // Eat "}"
2825 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2826 consumedToken));
2827 // No AVX512 specific primitives can pass
2828 // after memory broadcasting, so return.
2829 return false;
2830 } else {
2831 // Parse either {k}{z}, {z}{k}, {k} or {z}
2832 // last one have no meaning, but GCC accepts it
2833 // Currently, we're just pass a '{' mark
2834 std::unique_ptr<X86Operand> Z;
2835 if (ParseZ(Z, consumedToken))
2836 return true;
2837 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2838 // no errors.
2839 // Query for the need of further parsing for a {%k<NUM>} mark
2840 if (!Z || getLexer().is(AsmToken::LCurly)) {
2841 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2842 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2843 // expected
2844 MCRegister RegNo;
2845 SMLoc RegLoc;
2846 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2847 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2848 if (RegNo == X86::K0)
2849 return Error(RegLoc, "Register k0 can't be used as write mask");
2850 if (!getLexer().is(AsmToken::RCurly))
2851 return Error(getLexer().getLoc(), "Expected } at this point");
2852 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2853 Operands.push_back(
2854 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2855 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2856 } else
2857 return Error(getLexer().getLoc(),
2858 "Expected an op-mask register at this point");
2859 // {%k<NUM>} mark is found, inquire for {z}
2860 if (getLexer().is(AsmToken::LCurly) && !Z) {
2861 // Have we've found a parsing error, or found no (expected) {z} mark
2862 // - report an error
2863 if (ParseZ(Z, consumeToken()) || !Z)
2864 return Error(getLexer().getLoc(),
2865 "Expected a {z} mark at this point");
2866
2867 }
2868 // '{z}' on its own is meaningless, hence should be ignored.
2869 // on the contrary - have it been accompanied by a K register,
2870 // allow it.
2871 if (Z)
2872 Operands.push_back(std::move(Z));
2873 }
2874 }
2875 }
2876 return false;
2877}
2878
2879/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2880/// has already been parsed if present. disp may be provided as well.
2881bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2882 SMLoc StartLoc, SMLoc EndLoc,
2884 MCAsmParser &Parser = getParser();
2885 SMLoc Loc;
2886 // Based on the initial passed values, we may be in any of these cases, we are
2887 // in one of these cases (with current position (*)):
2888
2889 // 1. seg : * disp (base-index-scale-expr)
2890 // 2. seg : *(disp) (base-index-scale-expr)
2891 // 3. seg : *(base-index-scale-expr)
2892 // 4. disp *(base-index-scale-expr)
2893 // 5. *(disp) (base-index-scale-expr)
2894 // 6. *(base-index-scale-expr)
2895 // 7. disp *
2896 // 8. *(disp)
2897
2898 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2899 // checking if the first object after the parenthesis is a register (or an
2900 // identifier referring to a register) and parse the displacement or default
2901 // to 0 as appropriate.
2902 auto isAtMemOperand = [this]() {
2903 if (this->getLexer().isNot(AsmToken::LParen))
2904 return false;
2905 AsmToken Buf[2];
2906 StringRef Id;
2907 auto TokCount = this->getLexer().peekTokens(Buf, true);
2908 if (TokCount == 0)
2909 return false;
2910 switch (Buf[0].getKind()) {
2911 case AsmToken::Percent:
2912 case AsmToken::Comma:
2913 return true;
2914 // These lower cases are doing a peekIdentifier.
2915 case AsmToken::At:
2916 case AsmToken::Dollar:
2917 if ((TokCount > 1) &&
2918 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2919 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2920 Id = StringRef(Buf[0].getLoc().getPointer(),
2921 Buf[1].getIdentifier().size() + 1);
2922 break;
2924 case AsmToken::String:
2925 Id = Buf[0].getIdentifier();
2926 break;
2927 default:
2928 return false;
2929 }
2930 // We have an ID. Check if it is bound to a register.
2931 if (!Id.empty()) {
2932 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2933 if (Sym->isVariable()) {
2934 auto V = Sym->getVariableValue(/*SetUsed*/ false);
2935 return isa<X86MCExpr>(V);
2936 }
2937 }
2938 return false;
2939 };
2940
2941 if (!Disp) {
2942 // Parse immediate if we're not at a mem operand yet.
2943 if (!isAtMemOperand()) {
2944 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2945 return true;
2946 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2947 } else {
2948 // Disp is implicitly zero if we haven't parsed it yet.
2949 Disp = MCConstantExpr::create(0, Parser.getContext());
2950 }
2951 }
2952
2953 // We are now either at the end of the operand or at the '(' at the start of a
2954 // base-index-scale-expr.
2955
2956 if (!parseOptionalToken(AsmToken::LParen)) {
2957 if (SegReg == 0)
2958 Operands.push_back(
2959 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2960 else
2961 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2962 0, 0, 1, StartLoc, EndLoc));
2963 return false;
2964 }
2965
2966 // If we reached here, then eat the '(' and Process
2967 // the rest of the memory operand.
2968 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2969 SMLoc BaseLoc = getLexer().getLoc();
2970 const MCExpr *E;
2971 StringRef ErrMsg;
2972
2973 // Parse BaseReg if one is provided.
2974 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2975 if (Parser.parseExpression(E, EndLoc) ||
2976 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2977 return true;
2978
2979 // Check the register.
2980 BaseReg = cast<X86MCExpr>(E)->getRegNo();
2981 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2982 return Error(BaseLoc, "eiz and riz can only be used as index registers",
2983 SMRange(BaseLoc, EndLoc));
2984 }
2985
2986 if (parseOptionalToken(AsmToken::Comma)) {
2987 // Following the comma we should have either an index register, or a scale
2988 // value. We don't support the later form, but we want to parse it
2989 // correctly.
2990 //
2991 // Even though it would be completely consistent to support syntax like
2992 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2993 if (getLexer().isNot(AsmToken::RParen)) {
2994 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2995 return true;
2996
2997 if (!isa<X86MCExpr>(E)) {
2998 // We've parsed an unexpected Scale Value instead of an index
2999 // register. Interpret it as an absolute.
3000 int64_t ScaleVal;
3001 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3002 return Error(Loc, "expected absolute expression");
3003 if (ScaleVal != 1)
3004 Warning(Loc, "scale factor without index register is ignored");
3005 Scale = 1;
3006 } else { // IndexReg Found.
3007 IndexReg = cast<X86MCExpr>(E)->getRegNo();
3008
3009 if (BaseReg == X86::RIP)
3010 return Error(Loc,
3011 "%rip as base register can not have an index register");
3012 if (IndexReg == X86::RIP)
3013 return Error(Loc, "%rip is not allowed as an index register");
3014
3015 if (parseOptionalToken(AsmToken::Comma)) {
3016 // Parse the scale amount:
3017 // ::= ',' [scale-expression]
3018
3019 // A scale amount without an index is ignored.
3020 if (getLexer().isNot(AsmToken::RParen)) {
3021 int64_t ScaleVal;
3022 if (Parser.parseTokenLoc(Loc) ||
3023 Parser.parseAbsoluteExpression(ScaleVal))
3024 return Error(Loc, "expected scale expression");
3025 Scale = (unsigned)ScaleVal;
3026 // Validate the scale amount.
3027 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3028 Scale != 1)
3029 return Error(Loc, "scale factor in 16-bit address must be 1");
3030 if (checkScale(Scale, ErrMsg))
3031 return Error(Loc, ErrMsg);
3032 }
3033 }
3034 }
3035 }
3036 }
3037
3038 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3039 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3040 return true;
3041
3042 // This is to support otherwise illegal operand (%dx) found in various
3043 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3044 // be supported. Mark such DX variants separately fix only in special cases.
3045 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3046 isa<MCConstantExpr>(Disp) &&
3047 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3048 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3049 return false;
3050 }
3051
3052 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3053 ErrMsg))
3054 return Error(BaseLoc, ErrMsg);
3055
3056 if (SegReg || BaseReg || IndexReg)
3057 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3058 BaseReg, IndexReg, Scale, StartLoc,
3059 EndLoc));
3060 else
3061 Operands.push_back(
3062 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3063 return false;
3064}
3065
3066// Parse either a standard primary expression or a register.
3067bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3068 MCAsmParser &Parser = getParser();
3069 // See if this is a register first.
3070 if (getTok().is(AsmToken::Percent) ||
3071 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3072 MatchRegisterName(Parser.getTok().getString()))) {
3073 SMLoc StartLoc = Parser.getTok().getLoc();
3074 MCRegister RegNo;
3075 if (parseRegister(RegNo, StartLoc, EndLoc))
3076 return true;
3077 Res = X86MCExpr::create(RegNo, Parser.getContext());
3078 return false;
3079 }
3080 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3081}
3082
3083bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3084 SMLoc NameLoc, OperandVector &Operands) {
3085 MCAsmParser &Parser = getParser();
3086 InstInfo = &Info;
3087
3088 // Reset the forced VEX encoding.
3089 ForcedVEXEncoding = VEXEncoding_Default;
3090 ForcedDispEncoding = DispEncoding_Default;
3091
3092 // Parse pseudo prefixes.
3093 while (true) {
3094 if (Name == "{") {
3095 if (getLexer().isNot(AsmToken::Identifier))
3096 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3097 std::string Prefix = Parser.getTok().getString().lower();
3098 Parser.Lex(); // Eat identifier.
3099 if (getLexer().isNot(AsmToken::RCurly))
3100 return Error(Parser.getTok().getLoc(), "Expected '}'");
3101 Parser.Lex(); // Eat curly.
3102
3103 if (Prefix == "vex")
3104 ForcedVEXEncoding = VEXEncoding_VEX;
3105 else if (Prefix == "vex2")
3106 ForcedVEXEncoding = VEXEncoding_VEX2;
3107 else if (Prefix == "vex3")
3108 ForcedVEXEncoding = VEXEncoding_VEX3;
3109 else if (Prefix == "evex")
3110 ForcedVEXEncoding = VEXEncoding_EVEX;
3111 else if (Prefix == "disp8")
3112 ForcedDispEncoding = DispEncoding_Disp8;
3113 else if (Prefix == "disp32")
3114 ForcedDispEncoding = DispEncoding_Disp32;
3115 else
3116 return Error(NameLoc, "unknown prefix");
3117
3118 NameLoc = Parser.getTok().getLoc();
3119 if (getLexer().is(AsmToken::LCurly)) {
3120 Parser.Lex();
3121 Name = "{";
3122 } else {
3123 if (getLexer().isNot(AsmToken::Identifier))
3124 return Error(Parser.getTok().getLoc(), "Expected identifier");
3125 // FIXME: The mnemonic won't match correctly if its not in lower case.
3126 Name = Parser.getTok().getString();
3127 Parser.Lex();
3128 }
3129 continue;
3130 }
3131 // Parse MASM style pseudo prefixes.
3132 if (isParsingMSInlineAsm()) {
3133 if (Name.equals_insensitive("vex"))
3134 ForcedVEXEncoding = VEXEncoding_VEX;
3135 else if (Name.equals_insensitive("vex2"))
3136 ForcedVEXEncoding = VEXEncoding_VEX2;
3137 else if (Name.equals_insensitive("vex3"))
3138 ForcedVEXEncoding = VEXEncoding_VEX3;
3139 else if (Name.equals_insensitive("evex"))
3140 ForcedVEXEncoding = VEXEncoding_EVEX;
3141
3142 if (ForcedVEXEncoding != VEXEncoding_Default) {
3143 if (getLexer().isNot(AsmToken::Identifier))
3144 return Error(Parser.getTok().getLoc(), "Expected identifier");
3145 // FIXME: The mnemonic won't match correctly if its not in lower case.
3146 Name = Parser.getTok().getString();
3147 NameLoc = Parser.getTok().getLoc();
3148 Parser.Lex();
3149 }
3150 }
3151 break;
3152 }
3153
3154 // Support the suffix syntax for overriding displacement size as well.
3155 if (Name.consume_back(".d32")) {
3156 ForcedDispEncoding = DispEncoding_Disp32;
3157 } else if (Name.consume_back(".d8")) {
3158 ForcedDispEncoding = DispEncoding_Disp8;
3159 }
3160
3161 StringRef PatchedName = Name;
3162
3163 // Hack to skip "short" following Jcc.
3164 if (isParsingIntelSyntax() &&
3165 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3166 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3167 (PatchedName.startswith("j") &&
3168 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3169 StringRef NextTok = Parser.getTok().getString();
3170 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3171 : NextTok == "short") {
3172 SMLoc NameEndLoc =
3173 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3174 // Eat the short keyword.
3175 Parser.Lex();
3176 // MS and GAS ignore the short keyword; they both determine the jmp type
3177 // based on the distance of the label. (NASM does emit different code with
3178 // and without "short," though.)
3179 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3180 NextTok.size() + 1);
3181 }
3182 }
3183
3184 // FIXME: Hack to recognize setneb as setne.
3185 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3186 PatchedName != "setb" && PatchedName != "setnb")
3187 PatchedName = PatchedName.substr(0, Name.size()-1);
3188
3189 unsigned ComparisonPredicate = ~0U;
3190
3191 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3192 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3193 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3194 PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
3195 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3196 bool IsVCMP = PatchedName[0] == 'v';
3197 unsigned CCIdx = IsVCMP ? 4 : 3;
3198 unsigned CC = StringSwitch<unsigned>(
3199 PatchedName.slice(CCIdx, PatchedName.size() - 2))
3200 .Case("eq", 0x00)
3201 .Case("eq_oq", 0x00)
3202 .Case("lt", 0x01)
3203 .Case("lt_os", 0x01)
3204 .Case("le", 0x02)
3205 .Case("le_os", 0x02)
3206 .Case("unord", 0x03)
3207 .Case("unord_q", 0x03)
3208 .Case("neq", 0x04)
3209 .Case("neq_uq", 0x04)
3210 .Case("nlt", 0x05)
3211 .Case("nlt_us", 0x05)
3212 .Case("nle", 0x06)
3213 .Case("nle_us", 0x06)
3214 .Case("ord", 0x07)
3215 .Case("ord_q", 0x07)
3216 /* AVX only from here */
3217 .Case("eq_uq", 0x08)
3218 .Case("nge", 0x09)
3219 .Case("nge_us", 0x09)
3220 .Case("ngt", 0x0A)
3221 .Case("ngt_us", 0x0A)
3222 .Case("false", 0x0B)
3223 .Case("false_oq", 0x0B)
3224 .Case("neq_oq", 0x0C)
3225 .Case("ge", 0x0D)
3226 .Case("ge_os", 0x0D)
3227 .Case("gt", 0x0E)
3228 .Case("gt_os", 0x0E)
3229 .Case("true", 0x0F)
3230 .Case("true_uq", 0x0F)
3231 .Case("eq_os", 0x10)
3232 .Case("lt_oq", 0x11)
3233 .Case("le_oq", 0x12)
3234 .Case("unord_s", 0x13)
3235 .Case("neq_us", 0x14)
3236 .Case("nlt_uq", 0x15)
3237 .Case("nle_uq", 0x16)
3238 .Case("ord_s", 0x17)
3239 .Case("eq_us", 0x18)
3240 .Case("nge_uq", 0x19)
3241 .Case("ngt_uq", 0x1A)
3242 .Case("false_os", 0x1B)
3243 .Case("neq_os", 0x1C)
3244 .Case("ge_oq", 0x1D)
3245 .Case("gt_oq", 0x1E)
3246 .Case("true_us", 0x1F)
3247 .Default(~0U);
3248 if (CC != ~0U && (IsVCMP || CC < 8) &&
3249 (IsVCMP || PatchedName.back() != 'h')) {
3250 if (PatchedName.endswith("ss"))
3251 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3252 else if (PatchedName.endswith("sd"))
3253 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3254 else if (PatchedName.endswith("ps"))
3255 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3256 else if (PatchedName.endswith("pd"))
3257 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3258 else if (PatchedName.endswith("sh"))
3259 PatchedName = "vcmpsh";
3260 else if (PatchedName.endswith("ph"))
3261 PatchedName = "vcmpph";
3262 else
3263 llvm_unreachable("Unexpected suffix!");
3264
3265 ComparisonPredicate = CC;
3266 }
3267 }
3268
3269 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3270 if (PatchedName.startswith("vpcmp") &&
3271 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3272 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3273 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3274 unsigned CC = StringSwitch<unsigned>(
3275 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3276 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3277 .Case("lt", 0x1)
3278 .Case("le", 0x2)
3279 //.Case("false", 0x3) // Not a documented alias.
3280 .Case("neq", 0x4)
3281 .Case("nlt", 0x5)
3282 .Case("nle", 0x6)
3283 //.Case("true", 0x7) // Not a documented alias.
3284 .Default(~0U);
3285 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3286 switch (PatchedName.back()) {
3287 default: llvm_unreachable("Unexpected character!");
3288 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3289 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3290 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3291 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3292 }
3293 // Set up the immediate to push into the operands later.
3294 ComparisonPredicate = CC;
3295 }
3296 }
3297
3298 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3299 if (PatchedName.startswith("vpcom") &&
3300 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3301 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3302 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3303 unsigned CC = StringSwitch<unsigned>(
3304 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3305 .Case("lt", 0x0)
3306 .Case("le", 0x1)
3307 .Case("gt", 0x2)
3308 .Case("ge", 0x3)
3309 .Case("eq", 0x4)
3310 .Case("neq", 0x5)
3311 .Case("false", 0x6)
3312 .Case("true", 0x7)
3313 .Default(~0U);
3314 if (CC != ~0U) {
3315 switch (PatchedName.back()) {
3316 default: llvm_unreachable("Unexpected character!");
3317 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3318 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3319 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3320 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3321 }
3322 // Set up the immediate to push into the operands later.
3323 ComparisonPredicate = CC;
3324 }
3325 }
3326
3327
3328 // Determine whether this is an instruction prefix.
3329 // FIXME:
3330 // Enhance prefixes integrity robustness. for example, following forms
3331 // are currently tolerated:
3332 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3333 // lock addq %rax, %rbx ; Destination operand must be of memory type
3334 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3335 bool IsPrefix =
3337 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3338 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3339 .Cases("xacquire", "xrelease", true)
3340 .Cases("acquire", "release", isParsingIntelSyntax())
3341 .Default(false);
3342
3343 auto isLockRepeatNtPrefix = [](StringRef N) {
3344 return StringSwitch<bool>(N)
3345 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3346 .Default(false);
3347 };
3348
3349 bool CurlyAsEndOfStatement = false;
3350
3351 unsigned Flags = X86::IP_NO_PREFIX;
3352 while (isLockRepeatNtPrefix(Name.lower())) {
3353 unsigned Prefix =
3355 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3356 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3357 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3358 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3359 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3360 Flags |= Prefix;
3361 if (getLexer().is(AsmToken::EndOfStatement)) {
3362 // We don't have real instr with the given prefix
3363 // let's use the prefix as the instr.
3364 // TODO: there could be several prefixes one after another
3366 break;
3367 }
3368 // FIXME: The mnemonic won't match correctly if its not in lower case.
3369 Name = Parser.getTok().getString();
3370 Parser.Lex(); // eat the prefix
3371 // Hack: we could have something like "rep # some comment" or
3372 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3373 while (Name.startswith(";") || Name.startswith("\n") ||
3374 Name.startswith("#") || Name.startswith("\t") ||
3375 Name.startswith("/")) {
3376 // FIXME: The mnemonic won't match correctly if its not in lower case.
3377 Name = Parser.getTok().getString();
3378 Parser.Lex(); // go to next prefix or instr
3379 }
3380 }
3381
3382 if (Flags)
3383 PatchedName = Name;
3384
3385 // Hacks to handle 'data16' and 'data32'
3386 if (PatchedName == "data16" && is16BitMode()) {
3387 return Error(NameLoc, "redundant data16 prefix");
3388 }
3389 if (PatchedName == "data32") {
3390 if (is32BitMode())
3391 return Error(NameLoc, "redundant data32 prefix");
3392 if (is64BitMode())
3393 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3394 // Hack to 'data16' for the table lookup.
3395 PatchedName = "data16";
3396
3397 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3398 StringRef Next = Parser.getTok().getString();
3399 getLexer().Lex();
3400 // data32 effectively changes the instruction suffix.
3401 // TODO Generalize.
3402 if (Next == "callw")
3403 Next = "calll";
3404 if (Next == "ljmpw")
3405 Next = "ljmpl";
3406
3407 Name = Next;
3408 PatchedName = Name;
3409 ForcedDataPrefix = X86::Is32Bit;
3410 IsPrefix = false;
3411 }
3412 }
3413
3414 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3415
3416 // Push the immediate if we extracted one from the mnemonic.
3417 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3418 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3419 getParser().getContext());
3420 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3421 }
3422
3423 // This does the actual operand parsing. Don't parse any more if we have a
3424 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3425 // just want to parse the "lock" as the first instruction and the "incl" as
3426 // the next one.
3427 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3428 // Parse '*' modifier.
3429 if (getLexer().is(AsmToken::Star))
3430 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3431
3432 // Read the operands.
3433 while (true) {
3434 if (parseOperand(Operands, Name))
3435 return true;
3436 if (HandleAVX512Operand(Operands))
3437 return true;
3438
3439 // check for comma and eat it
3440 if (getLexer().is(AsmToken::Comma))
3441 Parser.Lex();
3442 else
3443 break;
3444 }
3445
3446 // In MS inline asm curly braces mark the beginning/end of a block,
3447 // therefore they should be interepreted as end of statement
3448 CurlyAsEndOfStatement =
3449 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3450 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3451 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3452 return TokError("unexpected token in argument list");
3453 }
3454
3455 // Push the immediate if we extracted one from the mnemonic.
3456 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3457 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3458 getParser().getContext());
3459 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3460 }
3461
3462 // Consume the EndOfStatement or the prefix separator Slash
3463 if (getLexer().is(AsmToken::EndOfStatement) ||
3464 (IsPrefix && getLexer().is(AsmToken::Slash)))
3465 Parser.Lex();
3466 else if (CurlyAsEndOfStatement)
3467 // Add an actual EndOfStatement before the curly brace
3468 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3469 getLexer().getTok().getLoc(), 0);
3470
3471 // This is for gas compatibility and cannot be done in td.
3472 // Adding "p" for some floating point with no argument.
3473 // For example: fsub --> fsubp
3474 bool IsFp =
3475 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3476 if (IsFp && Operands.size() == 1) {
3477 const char *Repl = StringSwitch<const char *>(Name)
3478 .Case("fsub", "fsubp")
3479 .Case("fdiv", "fdivp")
3480 .Case("fsubr", "fsubrp")
3481 .Case("fdivr", "fdivrp");
3482 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3483 }
3484
3485 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3486 (Operands.size() == 3)) {
3487 X86Operand &Op1 = (X86Operand &)*Operands[1];
3488 X86Operand &Op2 = (X86Operand &)*Operands[2];
3489 SMLoc Loc = Op1.getEndLoc();
3490 // Moving a 32 or 16 bit value into a segment register has the same
3491 // behavior. Modify such instructions to always take shorter form.
3492 if (Op1.isReg() && Op2.isReg() &&
3493 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3494 Op2.getReg()) &&
3495 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3496 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3497 // Change instruction name to match new instruction.
3498 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3499 Name = is16BitMode() ? "movw" : "movl";
3500 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3501 }
3502 // Select the correct equivalent 16-/32-bit source register.
3503 MCRegister Reg =
3504 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3505 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3506 }
3507 }
3508
3509 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3510 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3511 // documented form in various unofficial manuals, so a lot of code uses it.
3512 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3513 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3514 Operands.size() == 3) {
3515 X86Operand &Op = (X86Operand &)*Operands.back();
3516 if (Op.isDXReg())
3517 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3518 Op.getEndLoc());
3519 }
3520 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3521 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3522 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3523 Operands.size() == 3) {
3524 X86Operand &Op = (X86Operand &)*Operands[1];
3525 if (Op.isDXReg())
3526 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3527 Op.getEndLoc());
3528 }
3529
3531 bool HadVerifyError = false;
3532
3533 // Append default arguments to "ins[bwld]"
3534 if (Name.startswith("ins") &&
3535 (Operands.size() == 1 || Operands.size() == 3) &&
3536 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3537 Name == "ins")) {
3538
3539 AddDefaultSrcDestOperands(TmpOperands,
3540 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3541 DefaultMemDIOperand(NameLoc));
3542 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3543 }
3544
3545 // Append default arguments to "outs[bwld]"
3546 if (Name.startswith("outs") &&
3547 (Operands.size() == 1 || Operands.size() == 3) &&
3548 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3549 Name == "outsd" || Name == "outs")) {
3550 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3551 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3552 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3553 }
3554
3555 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3556 // values of $SIREG according to the mode. It would be nice if this
3557 // could be achieved with InstAlias in the tables.
3558 if (Name.startswith("lods") &&
3559 (Operands.size() == 1 || Operands.size() == 2) &&
3560 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3561 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3562 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3563 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3564 }
3565
3566 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3567 // values of $DIREG according to the mode. It would be nice if this
3568 // could be achieved with InstAlias in the tables.
3569 if (Name.startswith("stos") &&
3570 (Operands.size() == 1 || Operands.size() == 2) &&
3571 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3572 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3573 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3574 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3575 }
3576
3577 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3578 // values of $DIREG according to the mode. It would be nice if this
3579 // could be achieved with InstAlias in the tables.
3580 if (Name.startswith("scas") &&
3581 (Operands.size() == 1 || Operands.size() == 2) &&
3582 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3583 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3584 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3585 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3586 }
3587
3588 // Add default SI and DI operands to "cmps[bwlq]".
3589 if (Name.startswith("cmps") &&
3590 (Operands.size() == 1 || Operands.size() == 3) &&
3591 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3592 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3593 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3594 DefaultMemSIOperand(NameLoc));
3595 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3596 }
3597
3598 // Add default SI and DI operands to "movs[bwlq]".
3599 if (((Name.startswith("movs") &&
3600 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3601 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3602 (Name.startswith("smov") &&
3603 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3604 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3605 (Operands.size() == 1 || Operands.size() == 3)) {
3606 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3607 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3608 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3609 DefaultMemDIOperand(NameLoc));
3610 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3611 }
3612
3613 // Check if we encountered an error for one the string insturctions
3614 if (HadVerifyError) {
3615 return HadVerifyError;
3616 }
3617
3618 // Transforms "xlat mem8" into "xlatb"
3619 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3620 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3621 if (Op1.isMem8()) {
3622 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3623 "size, (R|E)BX will be used for the location");
3624 Operands.pop_back();
3625 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3626 }
3627 }
3628
3629 if (Flags)
3630 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3631 return false;
3632}
3633
3634bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3635 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3636
3637 switch (Inst.getOpcode()) {
3638 default: return false;
3639 case X86::JMP_1:
3640 // {disp32} forces a larger displacement as if the instruction was relaxed.
3641 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3642 // This matches GNU assembler.
3643 if (ForcedDispEncoding == DispEncoding_Disp32) {
3644 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3645 return true;
3646 }
3647
3648 return false;
3649 case X86::JCC_1:
3650 // {disp32} forces a larger displacement as if the instruction was relaxed.
3651 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3652 // This matches GNU assembler.
3653 if (ForcedDispEncoding == DispEncoding_Disp32) {
3654 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3655 return true;
3656 }
3657
3658 return false;
3659 case X86::VMOVZPQILo2PQIrr:
3660 case X86::VMOVAPDrr:
3661 case X86::VMOVAPDYrr:
3662 case X86::VMOVAPSrr:
3663 case X86::VMOVAPSYrr:
3664 case X86::VMOVDQArr:
3665 case X86::VMOVDQAYrr:
3666 case X86::VMOVDQUrr:
3667 case X86::VMOVDQUYrr:
3668 case X86::VMOVUPDrr:
3669 case X86::VMOVUPDYrr:
3670 case X86::VMOVUPSrr:
3671 case X86::VMOVUPSYrr: {
3672 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3673 // the registers is extended, but other isn't.
3674 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3675 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3676 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
3677 return false;
3678
3679 unsigned NewOpc;
3680 switch (Inst.getOpcode()) {
3681 default: llvm_unreachable("Invalid opcode");
3682 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
3683 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
3684 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
3685 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
3686 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
3687 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
3688 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
3689 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
3690 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
3691 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
3692 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
3693 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
3694 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
3695 }
3696 Inst.setOpcode(NewOpc);
3697 return true;
3698 }
3699 case X86::VMOVSDrr:
3700 case X86::VMOVSSrr: {
3701 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3702 // the registers is extended, but other isn't.
3703 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3704 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3705 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
3706 return false;
3707
3708 unsigned NewOpc;
3709 switch (Inst.getOpcode()) {
3710 default: llvm_unreachable("Invalid opcode");
3711 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
3712 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
3713 }
3714 Inst.setOpcode(NewOpc);
3715 return true;
3716 }
3717 case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
3718 case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
3719 case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
3720 case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
3721 case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
3722 case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
3723 case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
3724 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3725 // FIXME: It would be great if we could just do this with an InstAlias.
3726 if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
3727 return false;
3728
3729 unsigned NewOpc;
3730 switch (Inst.getOpcode()) {
3731 default: llvm_unreachable("Invalid opcode");
3732 case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
3733 case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
3734 case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
3735 case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
3736 case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
3737 case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
3738 case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
3739 case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
3740 case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
3741 case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
3742 case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
3743 case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
3744 case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
3745 case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
3746 case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
3747 case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
3748 case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
3749 case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
3750 case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
3751 case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
3752 case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
3753 case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
3754 case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
3755 case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
3756 case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
3757 case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
3758 case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
3759 case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
3760 }
3761
3762 MCInst TmpInst;
3763 TmpInst.setOpcode(NewOpc);
3764 TmpInst.addOperand(Inst.getOperand(0));
3765 TmpInst.addOperand(Inst.getOperand(1));
3766 Inst = TmpInst;
3767 return true;
3768 }
3769 case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
3770 case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
3771 case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
3772 case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
3773 case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
3774 case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
3775 case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
3776 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3777 // FIXME: It would be great if we could just do this with an InstAlias.
3778 if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
3780 return false;
3781
3782 unsigned NewOpc;
3783 switch (Inst.getOpcode()) {
3784 default: llvm_unreachable("Invalid opcode");
3785 case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
3786 case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
3787 case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
3788 case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
3789 case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
3790 case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
3791 case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
3792 case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
3793 case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
3794 case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
3795 case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
3796 case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
3797 case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
3798 case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
3799 case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
3800 case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
3801 case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
3802 case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
3803 case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
3804 case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
3805 case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
3806 case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
3807 case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
3808 case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
3809 case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
3810 case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
3811 case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
3812 case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
3813 }
3814
3815 MCInst TmpInst;
3816 TmpInst.setOpcode(NewOpc);
3817 for (int i = 0; i != X86::AddrNumOperands; ++i)
3818 TmpInst.addOperand(Inst.getOperand(i));
3819 Inst = TmpInst;
3820 return true;
3821 }
3822 case X86::INT: {
3823 // Transforms "int $3" into "int3" as a size optimization. We can't write an
3824 // instalias with an immediate operand yet.
3825 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3826 return false;
3827
3828 MCInst TmpInst;
3829 TmpInst.setOpcode(X86::INT3);
3830 Inst = TmpInst;
3831 return true;
3832 }
3833 }
3834}
3835
3836bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3837 using namespace X86;
3838 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3839 unsigned Opcode = Inst.getOpcode();
3840 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3841 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3842 isVFMADDCSH(Opcode)) {
3843 unsigned Dest = Inst.getOperand(0).getReg();
3844 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3845 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3846 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3847 "distinct from source registers");
3848 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3849 isVFMULCSH(Opcode)) {
3850 unsigned Dest = Inst.getOperand(0).getReg();
3851 // The mask variants have different operand list. Scan from the third
3852 // operand to avoid emitting incorrect warning.
3853 // VFMULCPHZrr Dest, Src1, Src2
3854 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3855 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3856 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1;
3857 i < Inst.getNumOperands(); i++)
3858 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3859 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3860 "distinct from source registers");
3861 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3862 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3863 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3864 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3866 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3867 if (Src2Enc % 4 != 0) {
3869 unsigned GroupStart = (Src2Enc / 4) * 4;
3870 unsigned GroupEnd = GroupStart + 3;
3871 return Warning(Ops[0]->getStartLoc(),
3872 "source register '" + RegName + "' implicitly denotes '" +
3873 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3874 RegName.take_front(3) + Twine(GroupEnd) +
3875 "' source group");
3876 }
3877 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3878 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3879 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3880 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3881 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3882 if (HasEVEX) {
3883 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3884 unsigned Index = MRI->getEncodingValue(
3885 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3886 if (Dest == Index)
3887 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3888 "should be distinct");
3889 } else {
3890 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3891 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3892 unsigned Index = MRI->getEncodingValue(
3893 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3894 if (Dest == Mask || Dest == Index || Mask == Index)
3895 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3896 "registers should be distinct");
3897 }
3898 }
3899
3900 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3901 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3902 if ((TSFlags & X86II::EncodingMask) == 0) {
3903 MCPhysReg HReg = X86::NoRegister;
3904 bool UsesRex = TSFlags & X86II::REX_W;
3905 unsigned NumOps = Inst.getNumOperands();
3906 for (unsigned i = 0; i != NumOps; ++i) {
3907 const MCOperand &MO = Inst.getOperand(i);
3908 if (!MO.isReg())
3909 continue;
3910 unsigned Reg = MO.getReg();
3911 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3912 HReg = Reg;
3915 UsesRex = true;
3916 }
3917
3918 if (UsesRex && HReg != X86::NoRegister) {
3920 return Error(Ops[0]->getStartLoc(),
3921 "can't encode '" + RegName + "' in an instruction requiring "
3922 "REX prefix");
3923 }
3924 }
3925
3926 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
3927 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
3928 if (!MO.isReg() || MO.getReg() != X86::RIP)
3929 return Warning(
3930 Ops[0]->getStartLoc(),
3931 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
3932 : "'prefetchit1'")) +
3933 " only supports RIP-relative address");
3934 }
3935 return false;
3936}
3937
3938static const char *getSubtargetFeatureName(uint64_t Val);
3939
3940void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3941 Warning(Loc, "Instruction may be vulnerable to LVI and "
3942 "requires manual mitigation");
3943 Note(SMLoc(), "See https://software.intel.com/"
3944 "security-software-guidance/insights/"
3945 "deep-dive-load-value-injection#specialinstructions"
3946 " for more information");
3947}
3948
3949/// RET instructions and also instructions that indirect calls/jumps from memory
3950/// combine a load and a branch within a single instruction. To mitigate these
3951/// instructions against LVI, they must be decomposed into separate load and
3952/// branch instructions, with an LFENCE in between. For more details, see:
3953/// - X86LoadValueInjectionRetHardening.cpp
3954/// - X86LoadValueInjectionIndirectThunks.cpp
3955/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3956///
3957/// Returns `true` if a mitigation was applied or warning was emitted.
3958void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3959 // Information on control-flow instructions that require manual mitigation can
3960 // be found here:
3961 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3962 switch (Inst.getOpcode()) {
3963 case X86::RET16:
3964 case X86::RET32:
3965 case X86::RET64:
3966 case X86::RETI16:
3967 case X86::RETI32:
3968 case X86::RETI64: {
3969 MCInst ShlInst, FenceInst;
3970 bool Parse32 = is32BitMode() || Code16GCC;
3971 unsigned Basereg =
3972 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3973 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3974 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3975 /*BaseReg=*/Basereg, /*IndexReg=*/0,
3976 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3977 ShlInst.setOpcode(X86::SHL64mi);
3978 ShlMemOp->addMemOperands(ShlInst, 5);
3979 ShlInst.addOperand(MCOperand::createImm(0));
3980 FenceInst.setOpcode(X86::LFENCE);
3981 Out.emitInstruction(ShlInst, getSTI());
3982 Out.emitInstruction(FenceInst, getSTI());
3983 return;
3984 }
3985 case X86::JMP16m:
3986 case X86::JMP32m:
3987 case X86::JMP64m:
3988 case X86::CALL16m:
3989 case X86::CALL32m:
3990 case X86::CALL64m:
3991 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3992 return;
3993 }
3994}
3995
3996/// To mitigate LVI, every instruction that performs a load can be followed by
3997/// an LFENCE instruction to squash any potential mis-speculation. There are
3998/// some instructions that require additional considerations, and may requre
3999/// manual mitigation. For more details, see:
4000/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4001///
4002/// Returns `true` if a mitigation was applied or warning was emitted.
4003void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4004 MCStreamer &Out) {
4005 auto Opcode = Inst.getOpcode();
4006 auto Flags = Inst.getFlags();
4007 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4008 // Information on REP string instructions that require manual mitigation can
4009 // be found here:
4010 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4011 switch (Opcode) {
4012 case X86::CMPSB:
4013 case X86::CMPSW:
4014 case X86::CMPSL:
4015 case X86::CMPSQ:
4016 case X86::SCASB:
4017 case X86::SCASW:
4018 case X86::SCASL:
4019 case X86::SCASQ:
4020 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4021 return;
4022 }
4023 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4024 // If a REP instruction is found on its own line, it may or may not be
4025 // followed by a vulnerable instruction. Emit a warning just in case.
4026 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4027 return;
4028 }
4029
4030 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4031
4032 // Can't mitigate after terminators or calls. A control flow change may have
4033 // already occurred.
4034 if (MCID.isTerminator() || MCID.isCall())
4035 return;
4036
4037 // LFENCE has the mayLoad property, don't double fence.
4038 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4040 FenceInst.setOpcode(X86::LFENCE);
4041 Out.emitInstruction(FenceInst, getSTI());
4042 }
4043}
4044
4045void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4046 MCStreamer &Out) {
4048 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4049 applyLVICFIMitigation(Inst, Out);
4050
4051 Out.emitInstruction(Inst, getSTI());
4052
4054 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4055 applyLVILoadHardeningMitigation(Inst, Out);
4056}
4057
4058bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4061 bool MatchingInlineAsm) {
4062 if (isParsingIntelSyntax())
4063 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4064 MatchingInlineAsm);
4065 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4066 MatchingInlineAsm);
4067}
4068
4069void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4071 bool MatchingInlineAsm) {
4072 // FIXME: This should be replaced with a real .td file alias mechanism.
4073 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4074 // call.
4075 const char *Repl = StringSwitch<const char *>(Op.getToken())
4076 .Case("finit", "fninit")
4077 .Case("fsave", "fnsave")
4078 .Case("fstcw", "fnstcw")
4079 .Case("fstcww", "fnstcw")
4080 .Case("fstenv", "fnstenv")
4081 .Case("fstsw", "fnstsw")
4082 .Case("fstsww", "fnstsw")
4083 .Case("fclex", "fnclex")
4084 .Default(nullptr);
4085 if (Repl) {
4086 MCInst Inst;
4087 Inst.setOpcode(X86::WAIT);
4088 Inst.setLoc(IDLoc);
4089 if (!MatchingInlineAsm)
4090 emitInstruction(Inst, Operands, Out);
4091 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4092 }
4093}
4094
4095bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4096 const FeatureBitset &MissingFeatures,
4097 bool MatchingInlineAsm) {
4098 assert(MissingFeatures.any() && "Unknown missing feature!");
4101 OS << "instruction requires:";
4102 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4103 if (MissingFeatures[i])
4104 OS << ' ' << getSubtargetFeatureName(i);
4105 }
4106 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4107}
4108
4110 unsigned Result = 0;
4111 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4112 if (Prefix.isPrefix()) {
4113 Result = Prefix.getPrefix();
4114 Operands.pop_back();
4115 }
4116 return Result;
4117}
4118
4119unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4120 unsigned Opc = Inst.getOpcode();
4121 const MCInstrDesc &MCID = MII.get(Opc);
4122
4123 if (ForcedVEXEncoding == VEXEncoding_EVEX &&
4125 return Match_Unsupported;
4126
4127 if ((ForcedVEXEncoding == VEXEncoding_VEX ||
4128 ForcedVEXEncoding == VEXEncoding_VEX2 ||
4129 ForcedVEXEncoding == VEXEncoding_VEX3) &&
4131 return Match_Unsupported;
4132
4133 // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4134 if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
4135 (ForcedVEXEncoding != VEXEncoding_VEX &&
4136 ForcedVEXEncoding != VEXEncoding_VEX2 &&
4137 ForcedVEXEncoding != VEXEncoding_VEX3))
4138 return Match_Unsupported;
4139
4140 return Match_Success;
4141}
4142
4143bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
4145 MCStreamer &Out,
4147 bool MatchingInlineAsm) {
4148 assert(!Operands.empty() && "Unexpect empty operand list!");
4149 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4150 SMRange EmptyRange = std::nullopt;
4151
4152 // First, handle aliases that expand to multiple instructions.
4153 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4154 Out, MatchingInlineAsm);
4155 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4156 unsigned Prefixes = getPrefixes(Operands);
4157
4158 MCInst Inst;
4159
4160 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4161 // encoder and printer.
4162 if (ForcedVEXEncoding == VEXEncoding_VEX)
4163 Prefixes |= X86::IP_USE_VEX;
4164 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4165 Prefixes |= X86::IP_USE_VEX2;
4166 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4167 Prefixes |= X86::IP_USE_VEX3;
4168 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4169 Prefixes |= X86::IP_USE_EVEX;
4170
4171 // Set encoded flags for {disp8} and {disp32}.
4172 if (ForcedDispEncoding == DispEncoding_Disp8)
4173 Prefixes |= X86::IP_USE_DISP8;
4174 else if (ForcedDispEncoding == DispEncoding_Disp32)
4175 Prefixes |= X86::IP_USE_DISP32;
4176
4177 if (Prefixes)
4178 Inst.setFlags(Prefixes);
4179
4180 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4181 // when matching the instruction.
4182 if (ForcedDataPrefix == X86::Is32Bit)
4183 SwitchMode(X86::Is32Bit);
4184 // First, try a direct match.
4185 FeatureBitset MissingFeatures;
4186 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4187 MissingFeatures, MatchingInlineAsm,
4188 isParsingIntelSyntax());
4189 if (ForcedDataPrefix == X86::Is32Bit) {
4190 SwitchMode(X86::Is16Bit);
4191 ForcedDataPrefix = 0;
4192 }
4193 switch (OriginalError) {
4194 default: llvm_unreachable("Unexpected match result!");
4195 case Match_Success:
4196 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4197 return true;
4198 // Some instructions need post-processing to, for example, tweak which
4199 // encoding is selected. Loop on it while changes happen so the
4200 // individual transformations can chain off each other.
4201 if (!MatchingInlineAsm)
4202 while (processInstruction(Inst, Operands))
4203 ;
4204
4205 Inst.setLoc(IDLoc);
4206 if (!MatchingInlineAsm)
4207 emitInstruction(Inst, Operands, Out);
4208 Opcode = Inst.getOpcode();
4209 return false;
4210 case Match_InvalidImmUnsignedi4: {
4211 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4212 if (ErrorLoc == SMLoc())
4213 ErrorLoc = IDLoc;
4214 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4215 EmptyRange, MatchingInlineAsm);
4216 }
4217 case Match_MissingFeature:
4218 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4219 case Match_InvalidOperand:
4220 case Match_MnemonicFail:
4221 case Match_Unsupported:
4222 break;
4223 }
4224 if (Op.getToken().empty()) {
4225 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4226 MatchingInlineAsm);
4227 return true;
4228 }
4229
4230 // FIXME: Ideally, we would only attempt suffix matches for things which are
4231 // valid prefixes, and we could just infer the right unambiguous
4232 // type. However, that requires substantially more matcher support than the
4233 // following hack.
4234
4235 // Change the operand to point to a temporary token.
4236 StringRef Base = Op.getToken();
4237 SmallString<16> Tmp;
4238 Tmp += Base;
4239 Tmp += ' ';
4240 Op.setTokenValue(Tmp);
4241
4242 // If this instruction starts with an 'f', then it is a floating point stack
4243 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4244 // 80-bit floating point, which use the suffixes s,l,t respectively.
4245 //
4246 // Otherwise, we assume that this may be an integer instruction, which comes
4247 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4248 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4249 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4250 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4251
4252 // Check for the various suffix matches.
4253 uint64_t ErrorInfoIgnore;
4254 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4255 unsigned Match[4];
4256
4257 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4258 // So we should make sure the suffix matcher only works for memory variant
4259 // that has the same size with the suffix.
4260 // FIXME: This flag is a workaround for legacy instructions that didn't
4261 // declare non suffix variant assembly.
4262 bool HasVectorReg = false;
4263 X86Operand *MemOp = nullptr;
4264 for (const auto &Op : Operands) {
4265 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4266 if (X86Op->isVectorReg())
4267 HasVectorReg = true;
4268 else if (X86Op->isMem()) {
4269 MemOp = X86Op;
4270 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4271 // Have we found an unqualified memory operand,
4272 // break. IA allows only one memory operand.
4273 break;
4274 }
4275 }
4276
4277 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4278 Tmp.back() = Suffixes[I];
4279 if (MemOp && HasVectorReg)
4280 MemOp->Mem.Size = MemSize[I];
4281 Match[I] = Match_MnemonicFail;
4282 if (MemOp || !HasVectorReg) {
4283 Match[I] =
4284 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4285 MatchingInlineAsm, isParsingIntelSyntax());
4286 // If this returned as a missing feature failure, remember that.
4287 if (Match[I] == Match_MissingFeature)
4288 ErrorInfoMissingFeatures = MissingFeatures;
4289 }
4290 }
4291
4292 // Restore the old token.
4293 Op.setTokenValue(Base);
4294
4295 // If exactly one matched, then we treat that as a successful match (and the
4296 // instruction will already have been filled in correctly, since the failing
4297 // matches won't have modified it).
4298 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4299 if (NumSuccessfulMatches == 1) {
4300 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4301 return true;
4302 // Some instructions need post-processing to, for example, tweak which
4303 // encoding is selected. Loop on it while changes happen so the
4304 // individual transformations can chain off each other.
4305 if (!MatchingInlineAsm)
4306 while (processInstruction(Inst, Operands))
4307 ;
4308
4309 Inst.setLoc(IDLoc);
4310 if (!MatchingInlineAsm)
4311 emitInstruction(Inst, Operands, Out);
4312 Opcode = Inst.getOpcode();
4313 return false;
4314 }
4315
4316 // Otherwise, the match failed, try to produce a decent error message.
4317
4318 // If we had multiple suffix matches, then identify this as an ambiguous
4319 // match.
4320 if (NumSuccessfulMatches > 1) {
4321 char MatchChars[4];
4322 unsigned NumMatches = 0;
4323 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4324 if (Match[I] == Match_Success)
4325 MatchChars[NumMatches++] = Suffixes[I];
4326
4329 OS << "ambiguous instructions require an explicit suffix (could be ";
4330 for (unsigned i = 0; i != NumMatches; ++i) {
4331 if (i != 0)
4332 OS << ", ";
4333 if (i + 1 == NumMatches)
4334 OS << "or ";
4335 OS << "'" << Base << MatchChars[i] << "'";
4336 }
4337 OS << ")";
4338 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4339 return true;
4340 }
4341
4342 // Okay, we know that none of the variants matched successfully.
4343
4344 // If all of the instructions reported an invalid mnemonic, then the original
4345 // mnemonic was invalid.
4346 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4347 if (OriginalError == Match_MnemonicFail)
4348 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4349 Op.getLocRange(), MatchingInlineAsm);
4350
4351 if (OriginalError == Match_Unsupported)
4352 return Error(IDLoc, "unsupported instruction", EmptyRange,
4353 MatchingInlineAsm);
4354
4355 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4356 // Recover location info for the operand if we know which was the problem.
4357 if (ErrorInfo != ~0ULL) {
4358 if (ErrorInfo >= Operands.size())
4359 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4360 MatchingInlineAsm);
4361
4362 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4363 if (Operand.getStartLoc().isValid()) {
4364 SMRange OperandRange = Operand.getLocRange();
4365 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4366 OperandRange, MatchingInlineAsm);
4367 }
4368 }
4369
4370 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4371 MatchingInlineAsm);
4372 }
4373
4374 // If one instruction matched as unsupported, report this as unsupported.
4375 if (llvm::count(Match, Match_Unsupported) == 1) {
4376 return Error(IDLoc, "unsupported instruction", EmptyRange,
4377 MatchingInlineAsm);
4378 }
4379
4380 // If one instruction matched with a missing feature, report this as a
4381 // missing feature.
4382 if (llvm::count(Match, Match_MissingFeature) == 1) {
4383 ErrorInfo = Match_MissingFeature;
4384 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4385 MatchingInlineAsm);
4386 }
4387
4388 // If one instruction matched with an invalid operand, report this as an
4389 // operand failure.
4390 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4391 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4392 MatchingInlineAsm);
4393 }
4394
4395 // If all of these were an outright failure, report it in a useless way.
4396 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4397 EmptyRange, MatchingInlineAsm);
4398 return true;
4399}
4400
4401bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4403 MCStreamer &Out,
4405 bool MatchingInlineAsm) {
4406 assert(!Operands.empty() && "Unexpect empty operand list!");
4407 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4408 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4409 SMRange EmptyRange = std::nullopt;
4410 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4411 unsigned Prefixes = getPrefixes(Operands);
4412
4413 // First, handle aliases that expand to multiple instructions.
4414 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4415 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4416
4417 MCInst Inst;
4418
4419 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4420 // encoder and printer.
4421 if (ForcedVEXEncoding == VEXEncoding_VEX)
4422 Prefixes |= X86::IP_USE_VEX;
4423 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4424 Prefixes |= X86::IP_USE_VEX2;
4425 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4426 Prefixes |= X86::IP_USE_VEX3;
4427 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4428 Prefixes |= X86::IP_USE_EVEX;
4429
4430 // Set encoded flags for {disp8} and {disp32}.
4431 if (ForcedDispEncoding == DispEncoding_Disp8)
4432 Prefixes |= X86::IP_USE_DISP8;
4433 else if (ForcedDispEncoding == DispEncoding_Disp32)
4434 Prefixes |= X86::IP_USE_DISP32;
4435
4436 if (Prefixes)
4437 Inst.setFlags(Prefixes);
4438
4439 // Find one unsized memory operand, if present.
4440 X86Operand *UnsizedMemOp = nullptr;
4441 for (const auto &Op : Operands) {
4442 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4443 if (X86Op->isMemUnsized()) {
4444 UnsizedMemOp = X86Op;
4445 // Have we found an unqualified memory operand,
4446 // break. IA allows only one memory operand.
4447 break;
4448 }
4449 }
4450
4451 // Allow some instructions to have implicitly pointer-sized operands. This is
4452 // compatible with gas.
4453 if (UnsizedMemOp) {
4454 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4455 for (const char *Instr : PtrSizedInstrs) {
4456 if (Mnemonic == Instr) {
4457 UnsizedMemOp->Mem.Size = getPointerWidth();
4458 break;
4459 }
4460 }
4461 }
4462
4464 FeatureBitset ErrorInfoMissingFeatures;
4465 FeatureBitset MissingFeatures;
4466
4467 // If unsized push has immediate operand we should default the default pointer
4468 // size for the size.
4469 if (Mnemonic == "push" && Operands.size() == 2) {
4470 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4471 if (X86Op->isImm()) {
4472 // If it's not a constant fall through and let remainder take care of it.
4473 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4474 unsigned Size = getPointerWidth();
4475 if (CE &&
4476 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4477 SmallString<16> Tmp;
4478 Tmp += Base;
4479 Tmp += (is64BitMode())
4480 ? "q"
4481 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4482 Op.setTokenValue(Tmp);
4483 // Do match in ATT mode to allow explicit suffix usage.
4484 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4485 MissingFeatures, MatchingInlineAsm,
4486 false /*isParsingIntelSyntax()*/));
4487 Op.setTokenValue(Base);
4488 }
4489 }
4490 }
4491
4492 // If an unsized memory operand is present, try to match with each memory
4493 // operand size. In Intel assembly, the size is not part of the instruction
4494 // mnemonic.
4495 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4496 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4497 for (unsigned Size : MopSizes) {
4498 UnsizedMemOp->Mem.Size = Size;
4499 uint64_t ErrorInfoIgnore;
4500 unsigned LastOpcode = Inst.getOpcode();
4501 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4502 MissingFeatures, MatchingInlineAsm,
4503 isParsingIntelSyntax());
4504 if (Match.empty() || LastOpcode != Inst.getOpcode())
4505 Match.push_back(M);
4506
4507 // If this returned as a missing feature failure, remember that.
4508 if (Match.back() == Match_MissingFeature)
4509 ErrorInfoMissingFeatures = MissingFeatures;
4510 }
4511
4512 // Restore the size of the unsized memory operand if we modified it.
4513 UnsizedMemOp->Mem.Size = 0;
4514 }
4515
4516 // If we haven't matched anything yet, this is not a basic integer or FPU
4517 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4518 // matching with the unsized operand.
4519 if (Match.empty()) {
4520 Match.push_back(MatchInstruction(
4521 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4522 isParsingIntelSyntax()));
4523 // If this returned as a missing feature failure, remember that.
4524 if (Match.back() == Match_MissingFeature)
4525 ErrorInfoMissingFeatures = MissingFeatures;
4526 }
4527
4528 // Restore the size of the unsized memory operand if we modified it.
4529 if (UnsizedMemOp)
4530 UnsizedMemOp->Mem.Size = 0;
4531
4532 // If it's a bad mnemonic, all results will be the same.
4533 if (Match.back() == Match_MnemonicFail) {
4534 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4535 Op.getLocRange(), MatchingInlineAsm);
4536 }
4537
4538 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4539
4540 // If matching was ambiguous and we had size information from the frontend,
4541 // try again with that. This handles cases like "movxz eax, m8/m16".
4542 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4543 UnsizedMemOp->getMemFrontendSize()) {
4544 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4545 unsigned M = MatchInstruction(
4546 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4547 isParsingIntelSyntax());
4548 if (M == Match_Success)
4549 NumSuccessfulMatches = 1;
4550
4551 // Add a rewrite that encodes the size information we used from the
4552 // frontend.
4553 InstInfo->AsmRewrites->emplace_back(
4554 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4555 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4556 }
4557
4558 // If exactly one matched, then we treat that as a successful match (and the
4559 // instruction will already have been filled in correctly, since the failing
4560 // matches won't have modified it).
4561 if (NumSuccessfulMatches == 1) {
4562 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4563 return true;
4564 // Some instructions need post-processing to, for example, tweak which
4565 // encoding is selected. Loop on it while changes happen so the individual
4566 // transformations can chain off each other.
4567 if (!MatchingInlineAsm)
4568 while (processInstruction(Inst, Operands))
4569 ;
4570 Inst.setLoc(IDLoc);
4571 if (!MatchingInlineAsm)
4572 emitInstruction(Inst, Operands, Out);
4573 Opcode = Inst.getOpcode();
4574 return false;
4575 } else if (NumSuccessfulMatches > 1) {
4576 assert(UnsizedMemOp &&
4577 "multiple matches only possible with unsized memory operands");
4578 return Error(UnsizedMemOp->getStartLoc(),
4579 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4580 UnsizedMemOp->getLocRange());
4581 }
4582
4583 // If one instruction matched as unsupported, report this as unsupported.
4584 if (llvm::count(Match, Match_Unsupported) == 1) {
4585 return Error(IDLoc, "unsupported instruction", EmptyRange,
4586 MatchingInlineAsm);
4587 }
4588
4589 // If one instruction matched with a missing feature, report this as a
4590 // missing feature.
4591 if (llvm::count(Match, Match_MissingFeature) == 1) {
4592 ErrorInfo = Match_MissingFeature;
4593 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4594 MatchingInlineAsm);
4595 }
4596
4597 // If one instruction matched with an invalid operand, report this as an
4598 // operand failure.
4599 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4600 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4601 MatchingInlineAsm);
4602 }
4603
4604 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4605 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4606 if (ErrorLoc == SMLoc())
4607 ErrorLoc = IDLoc;
4608 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4609 EmptyRange, MatchingInlineAsm);
4610 }
4611
4612 // If all of these were an outright failure, report it in a useless way.
4613 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4614 MatchingInlineAsm);
4615}
4616
4617bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4618 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4619}
4620
4621bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4622 MCAsmParser &Parser = getParser();
4623 StringRef IDVal = DirectiveID.getIdentifier();
4624 if (IDVal.startswith(".arch"))
4625 return parseDirectiveArch();
4626 if (IDVal.startswith(".code"))
4627 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4628 else if (IDVal.startswith(".att_syntax")) {
4629 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4630 if (Parser.getTok().getString() == "prefix")
4631 Parser.Lex();
4632 else if (Parser.getTok().getString() == "noprefix")
4633 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4634 "supported: registers must have a "
4635 "'%' prefix in .att_syntax");
4636 }
4637 getParser().setAssemblerDialect(0);
4638 return false;
4639 } else if (IDVal.startswith(".intel_syntax")) {
4640 getParser().setAssemblerDialect(1);
4641 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4642 if (Parser.getTok().getString() == "noprefix")
4643 Parser.Lex();
4644 else if (Parser.getTok().getString() == "prefix")
4645 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4646 "supported: registers must not have "
4647 "a '%' prefix in .intel_syntax");
4648 }
4649 return false;
4650 } else if (IDVal == ".nops")
4651 return parseDirectiveNops(DirectiveID.getLoc());
4652 else if (IDVal == ".even")
4653 return parseDirectiveEven(DirectiveID.getLoc());
4654 else if (IDVal == ".cv_fpo_proc")
4655 return parseDirectiveFPOProc(DirectiveID.getLoc());
4656 else if (IDVal == ".cv_fpo_setframe")
4657 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4658 else if (IDVal == ".cv_fpo_pushreg")
4659 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4660 else if (IDVal == ".cv_fpo_stackalloc")
4661 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4662 else if (IDVal == ".cv_fpo_stackalign")
4663 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4664 else if (IDVal == ".cv_fpo_endprologue")
4665 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4666 else if (IDVal == ".cv_fpo_endproc")
4667 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4668 else if (IDVal == ".seh_pushreg" ||
4669 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4670 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4671 else if (IDVal == ".seh_setframe" ||
4672 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4673 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4674 else if (IDVal == ".seh_savereg" ||
4675 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4676 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4677 else if (IDVal == ".seh_savexmm" ||
4678 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4679 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4680 else if (IDVal == ".seh_pushframe" ||
4681 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4682 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4683
4684 return true;
4685}
4686
4687bool X86AsmParser::parseDirectiveArch() {
4688 // Ignore .arch for now.
4689 getParser().parseStringToEndOfStatement();
4690 return false;
4691}
4692
4693/// parseDirectiveNops
4694/// ::= .nops size[, control]
4695bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4696 int64_t NumBytes = 0, Control = 0;
4697 SMLoc NumBytesLoc, ControlLoc;
4698 const MCSubtargetInfo& STI = getSTI();
4699 NumBytesLoc = getTok().getLoc();
4700 if (getParser().checkForValidSection() ||
4701 getParser().parseAbsoluteExpression(NumBytes))
4702 return true;
4703
4704 if (parseOptionalToken(AsmToken::Comma)) {
4705 ControlLoc = getTok().getLoc();
4706 if (getParser().parseAbsoluteExpression(Control))
4707 return true;
4708 }
4709 if (getParser().parseEOL())
4710 return true;
4711
4712 if (NumBytes <= 0) {
4713 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4714 return false;
4715 }
4716
4717 if (Control < 0) {
4718 Error(ControlLoc, "'.nops' directive with negative NOP size");
4719 return false;
4720 }
4721
4722 /// Emit nops
4723 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4724
4725 return false;
4726}
4727
4728/// parseDirectiveEven
4729/// ::= .even
4730bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4731 if (parseEOL())
4732 return false;
4733
4734 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4735 if (!Section) {
4736 getStreamer().initSections(false, getSTI());
4737 Section = getStreamer().getCurrentSectionOnly();
4738 }
4739 if (Section->useCodeAlign())
4740 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4741 else
4742 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4743 return false;
4744}
4745
4746/// ParseDirectiveCode
4747/// ::= .code16 | .code32 | .code64
4748bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4749 MCAsmParser &Parser = getParser();
4750 Code16GCC = false;
4751 if (IDVal == ".code16") {
4752 Parser.Lex();
4753 if (!is16BitMode()) {
4754 SwitchMode(X86::Is16Bit);
4755 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4756 }
4757 } else if (IDVal == ".code16gcc") {
4758 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4759 Parser.Lex();
4760 Code16GCC = true;
4761 if (!is16BitMode()) {
4762 SwitchMode(X86::Is16Bit);
4763 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4764 }
4765 } else if (IDVal == ".code32") {
4766 Parser.Lex();
4767 if (!is32BitMode()) {
4768 SwitchMode(X86::Is32Bit);
4769 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4770 }
4771 } else if (IDVal == ".code64") {
4772 Parser.Lex();
4773 if (!is64BitMode()) {
4774 SwitchMode(X86::Is64Bit);
4775 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4776 }
4777 } else {
4778 Error(L, "unknown directive " + IDVal);
4779 return false;
4780 }
4781
4782 return false;
4783}
4784
4785// .cv_fpo_proc foo
4786bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4787 MCAsmParser &Parser = getParser();
4788 StringRef ProcName;
4789 int64_t ParamsSize;
4790 if (Parser.parseIdentifier(ProcName))
4791 return Parser.TokError("expected symbol name");
4792 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4793 return true;
4794 if (!isUIntN(32, ParamsSize))
4795 return Parser.TokError("parameters size out of range");
4796 if (parseEOL())
4797 return true;
4798 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4799 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4800}
4801
4802// .cv_fpo_setframe ebp
4803bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4805 SMLoc DummyLoc;
4806 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4807 return true;
4808 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4809}
4810
4811// .cv_fpo_pushreg ebx
4812bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4814 SMLoc DummyLoc;
4815 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4816 return true;
4817 return getTargetStreamer().emitFPOPushReg(Reg, L);
4818}
4819
4820// .cv_fpo_stackalloc 20
4821bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4822 MCAsmParser &Parser = getParser();
4823 int64_t Offset;
4824 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4825 return true;
4826 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4827}
4828
4829// .cv_fpo_stackalign 8
4830bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4831 MCAsmParser &Parser = getParser();
4832 int64_t Offset;
4833 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4834 return true;
4835 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4836}
4837
4838// .cv_fpo_endprologue
4839bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4840 MCAsmParser &Parser = getParser();
4841 if (Parser.parseEOL())
4842 return true;
4843 return getTargetStreamer().emitFPOEndPrologue(L);
4844}
4845
4846// .cv_fpo_endproc
4847bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4848 MCAsmParser &Parser = getParser();
4849 if (Parser.parseEOL())
4850 return true;
4851 return getTargetStreamer().emitFPOEndProc(L);
4852}
4853
4854bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4855 MCRegister &RegNo) {
4856 SMLoc startLoc = getLexer().getLoc();
4857 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4858
4859 // Try parsing the argument as a register first.
4860 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4861 SMLoc endLoc;
4862 if (parseRegister(RegNo, startLoc, endLoc))
4863 return true;
4864
4865 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4866 return Error(startLoc,
4867 "register is not supported for use with this directive");
4868 }
4869 } else {
4870 // Otherwise, an integer number matching the encoding of the desired
4871 // register may appear.
4872 int64_t EncodedReg;
4873 if (getParser().parseAbsoluteExpression(EncodedReg))
4874 return true;
4875
4876 // The SEH register number is the same as the encoding register number. Map
4877 // from the encoding back to the LLVM register number.
4878 RegNo = 0;
4879 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4880 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4881 RegNo = Reg;
4882 break;
4883 }
4884 }
4885 if (RegNo == 0) {
4886 return Error(startLoc,
4887 "incorrect register number for use with this directive");
4888 }
4889 }
4890
4891 return false;
4892}
4893
4894bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4896 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4897 return true;
4898
4899 if (getLexer().isNot(AsmToken::EndOfStatement))
4900 return TokError("expected end of directive");
4901
4902 getParser().Lex();
4903 getStreamer().emitWinCFIPushReg(Reg, Loc);
4904 return false;
4905}
4906
4907bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4909 int64_t Off;
4910 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4911 return true;
4912 if (getLexer().isNot(AsmToken::Comma))
4913 return TokError("you must specify a stack pointer offset");
4914
4915 getParser().Lex();
4916 if (getParser().parseAbsoluteExpression(Off))
4917 return true;
4918
4919 if (getLexer().isNot(AsmToken::EndOfStatement))
4920 return TokError("expected end of directive");
4921
4922 getParser().Lex();
4923 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4924 return false;
4925}
4926
4927bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4929 int64_t Off;
4930 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4931 return true;
4932 if (getLexer().isNot(AsmToken::Comma))
4933 return TokError("you must specify an offset on the stack");
4934
4935 getParser().Lex();
4936 if (getParser().parseAbsoluteExpression(Off))
4937 return true;
4938
4939 if (getLexer().isNot(AsmToken::EndOfStatement))
4940 return TokError("expected end of directive");
4941
4942 getParser().Lex();
4943 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4944 return false;
4945}
4946
4947bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4949 int64_t Off;
4950 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4951 return true;
4952 if (getLexer().isNot(AsmToken::Comma))
4953 return TokError("you must specify an offset on the stack");
4954
4955 getParser().Lex();
4956 if (getParser().parseAbsoluteExpression(Off))
4957 return true;
4958
4959 if (getLexer().isNot(AsmToken::EndOfStatement))
4960 return TokError("expected end of directive");
4961
4962 getParser().Lex();
4963 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4964 return false;
4965}
4966
4967bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4968 bool Code = false;
4969 StringRef CodeID;
4970 if (getLexer().is(AsmToken::At)) {
4971 SMLoc startLoc = getLexer().getLoc();
4972 getParser().Lex();
4973 if (!getParser().parseIdentifier(CodeID)) {
4974 if (CodeID != "code")
4975 return Error(startLoc, "expected @code");
4976 Code = true;
4977 }
4978 }
4979
4980 if (getLexer().isNot(AsmToken::EndOfStatement))
4981 return TokError("expected end of directive");
4982
4983 getParser().Lex();
4984 getStreamer().emitWinCFIPushFrame(Code, Loc);
4985 return false;
4986}
4987
4988// Force static initialization.
4992}
4993
4994#define GET_REGISTER_MATCHER
4995#define GET_MATCHER_IMPLEMENTATION
4996#define GET_SUBTARGET_FEATURE_NAME
4997#include "X86GenAsmMatcher.inc"
unsigned const MachineRegisterInfo * MRI
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define check(cond)
amode Optimize addressing mode
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static bool IsVCMP(unsigned Opcode)
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
uint64_t TSFlags
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
@ Flags
Definition: TextStubV5.cpp:93
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
static cl::opt< bool > LVIInlineAsmHardening("x86-experimental-lvi-inline-asm-hardening", cl::desc("Harden inline assembly code that may be vulnerable to Load Value" " Injection (LVI). This feature is experimental."), cl::Hidden)
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
static unsigned getPrefixes(OperandVector &Operands)
static const char * getSubtargetFeatureName(uint64_t Val)
static unsigned MatchRegisterName(StringRef Name)
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, unsigned Scale, bool Is64BitMode, StringRef &ErrMsg)
}
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
Class for arbitrary precision integers.
Definition: APInt.h:75
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
bool isNot(TokenKind K) const
Definition: MCAsmMacro.h:83
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30