LLVM 22.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "X86Operand.h"
18#include "X86RegisterInfo.h"
19#include "llvm-c/Visibility.h"
20#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCExpr.h"
28#include "llvm/MC/MCInst.h"
29#include "llvm/MC/MCInstrInfo.h"
34#include "llvm/MC/MCRegister.h"
36#include "llvm/MC/MCSection.h"
37#include "llvm/MC/MCStreamer.h"
39#include "llvm/MC/MCSymbol.h"
45#include <algorithm>
46#include <cstdint>
47#include <memory>
48
49using namespace llvm;
50
52 "x86-experimental-lvi-inline-asm-hardening",
53 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
54 " Injection (LVI). This feature is experimental."), cl::Hidden);
55
56static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
57 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
58 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
59 return true;
60 }
61 return false;
62}
63
64namespace {
65
66// Including the generated SSE2AVX compression tables.
67#define GET_X86_SSE2AVX_TABLE
68#include "X86GenInstrMapping.inc"
69
70static const char OpPrecedence[] = {
71 0, // IC_OR
72 1, // IC_XOR
73 2, // IC_AND
74 4, // IC_LSHIFT
75 4, // IC_RSHIFT
76 5, // IC_PLUS
77 5, // IC_MINUS
78 6, // IC_MULTIPLY
79 6, // IC_DIVIDE
80 6, // IC_MOD
81 7, // IC_NOT
82 8, // IC_NEG
83 9, // IC_RPAREN
84 10, // IC_LPAREN
85 0, // IC_IMM
86 0, // IC_REGISTER
87 3, // IC_EQ
88 3, // IC_NE
89 3, // IC_LT
90 3, // IC_LE
91 3, // IC_GT
92 3 // IC_GE
93};
94
95class X86AsmParser : public MCTargetAsmParser {
96 ParseInstructionInfo *InstInfo;
97 bool Code16GCC;
98 unsigned ForcedDataPrefix = 0;
99
100 enum OpcodePrefix {
101 OpcodePrefix_Default,
102 OpcodePrefix_REX,
103 OpcodePrefix_REX2,
104 OpcodePrefix_VEX,
105 OpcodePrefix_VEX2,
106 OpcodePrefix_VEX3,
107 OpcodePrefix_EVEX,
108 };
109
110 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
111
112 enum DispEncoding {
113 DispEncoding_Default,
114 DispEncoding_Disp8,
115 DispEncoding_Disp32,
116 };
117
118 DispEncoding ForcedDispEncoding = DispEncoding_Default;
119
120 // Does this instruction use apx extended register?
121 bool UseApxExtendedReg = false;
122 // Is this instruction explicitly required not to update flags?
123 bool ForcedNoFlag = false;
124
125private:
126 SMLoc consumeToken() {
127 MCAsmParser &Parser = getParser();
128 SMLoc Result = Parser.getTok().getLoc();
129 Parser.Lex();
130 return Result;
131 }
132
133 bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
134 return Token == AsmToken::LCurly;
135 }
136
137 X86TargetStreamer &getTargetStreamer() {
138 assert(getParser().getStreamer().getTargetStreamer() &&
139 "do not have a target streamer");
140 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
141 return static_cast<X86TargetStreamer &>(TS);
142 }
143
144 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
145 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
146 bool matchingInlineAsm, unsigned VariantID = 0) {
147 // In Code16GCC mode, match as 32-bit.
148 if (Code16GCC)
149 SwitchMode(X86::Is32Bit);
150 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
151 MissingFeatures, matchingInlineAsm,
152 VariantID);
153 if (Code16GCC)
154 SwitchMode(X86::Is16Bit);
155 return rv;
156 }
157
158 enum InfixCalculatorTok {
159 IC_OR = 0,
160 IC_XOR,
161 IC_AND,
162 IC_LSHIFT,
163 IC_RSHIFT,
164 IC_PLUS,
165 IC_MINUS,
166 IC_MULTIPLY,
167 IC_DIVIDE,
168 IC_MOD,
169 IC_NOT,
170 IC_NEG,
171 IC_RPAREN,
172 IC_LPAREN,
173 IC_IMM,
174 IC_REGISTER,
175 IC_EQ,
176 IC_NE,
177 IC_LT,
178 IC_LE,
179 IC_GT,
180 IC_GE
181 };
182
183 enum IntelOperatorKind {
184 IOK_INVALID = 0,
185 IOK_LENGTH,
186 IOK_SIZE,
187 IOK_TYPE,
188 };
189
190 enum MasmOperatorKind {
191 MOK_INVALID = 0,
192 MOK_LENGTHOF,
193 MOK_SIZEOF,
194 MOK_TYPE,
195 };
196
197 class InfixCalculator {
198 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
199 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
200 SmallVector<ICToken, 4> PostfixStack;
201
202 bool isUnaryOperator(InfixCalculatorTok Op) const {
203 return Op == IC_NEG || Op == IC_NOT;
204 }
205
206 public:
207 int64_t popOperand() {
208 assert (!PostfixStack.empty() && "Poped an empty stack!");
209 ICToken Op = PostfixStack.pop_back_val();
210 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
211 return -1; // The invalid Scale value will be caught later by checkScale
212 return Op.second;
213 }
214 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
215 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
216 "Unexpected operand!");
217 PostfixStack.push_back(std::make_pair(Op, Val));
218 }
219
220 void popOperator() { InfixOperatorStack.pop_back(); }
221 void pushOperator(InfixCalculatorTok Op) {
222 // Push the new operator if the stack is empty.
223 if (InfixOperatorStack.empty()) {
224 InfixOperatorStack.push_back(Op);
225 return;
226 }
227
228 // Push the new operator if it has a higher precedence than the operator
229 // on the top of the stack or the operator on the top of the stack is a
230 // left parentheses.
231 unsigned Idx = InfixOperatorStack.size() - 1;
232 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
233 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
234 InfixOperatorStack.push_back(Op);
235 return;
236 }
237
238 // The operator on the top of the stack has higher precedence than the
239 // new operator.
240 unsigned ParenCount = 0;
241 while (true) {
242 // Nothing to process.
243 if (InfixOperatorStack.empty())
244 break;
245
246 Idx = InfixOperatorStack.size() - 1;
247 StackOp = InfixOperatorStack[Idx];
248 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
249 break;
250
251 // If we have an even parentheses count and we see a left parentheses,
252 // then stop processing.
253 if (!ParenCount && StackOp == IC_LPAREN)
254 break;
255
256 if (StackOp == IC_RPAREN) {
257 ++ParenCount;
258 InfixOperatorStack.pop_back();
259 } else if (StackOp == IC_LPAREN) {
260 --ParenCount;
261 InfixOperatorStack.pop_back();
262 } else {
263 InfixOperatorStack.pop_back();
264 PostfixStack.push_back(std::make_pair(StackOp, 0));
265 }
266 }
267 // Push the new operator.
268 InfixOperatorStack.push_back(Op);
269 }
270
271 int64_t execute() {
272 // Push any remaining operators onto the postfix stack.
273 while (!InfixOperatorStack.empty()) {
274 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
275 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
276 PostfixStack.push_back(std::make_pair(StackOp, 0));
277 }
278
279 if (PostfixStack.empty())
280 return 0;
281
282 SmallVector<ICToken, 16> OperandStack;
283 for (const ICToken &Op : PostfixStack) {
284 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
285 OperandStack.push_back(Op);
286 } else if (isUnaryOperator(Op.first)) {
287 assert (OperandStack.size() > 0 && "Too few operands.");
288 ICToken Operand = OperandStack.pop_back_val();
289 assert (Operand.first == IC_IMM &&
290 "Unary operation with a register!");
291 switch (Op.first) {
292 default:
293 report_fatal_error("Unexpected operator!");
294 break;
295 case IC_NEG:
296 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
297 break;
298 case IC_NOT:
299 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
300 break;
301 }
302 } else {
303 assert (OperandStack.size() > 1 && "Too few operands.");
304 int64_t Val;
305 ICToken Op2 = OperandStack.pop_back_val();
306 ICToken Op1 = OperandStack.pop_back_val();
307 switch (Op.first) {
308 default:
309 report_fatal_error("Unexpected operator!");
310 break;
311 case IC_PLUS:
312 Val = Op1.second + Op2.second;
313 OperandStack.push_back(std::make_pair(IC_IMM, Val));
314 break;
315 case IC_MINUS:
316 Val = Op1.second - Op2.second;
317 OperandStack.push_back(std::make_pair(IC_IMM, Val));
318 break;
319 case IC_MULTIPLY:
320 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
321 "Multiply operation with an immediate and a register!");
322 Val = Op1.second * Op2.second;
323 OperandStack.push_back(std::make_pair(IC_IMM, Val));
324 break;
325 case IC_DIVIDE:
326 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
327 "Divide operation with an immediate and a register!");
328 assert (Op2.second != 0 && "Division by zero!");
329 Val = Op1.second / Op2.second;
330 OperandStack.push_back(std::make_pair(IC_IMM, Val));
331 break;
332 case IC_MOD:
333 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
334 "Modulo operation with an immediate and a register!");
335 Val = Op1.second % Op2.second;
336 OperandStack.push_back(std::make_pair(IC_IMM, Val));
337 break;
338 case IC_OR:
339 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
340 "Or operation with an immediate and a register!");
341 Val = Op1.second | Op2.second;
342 OperandStack.push_back(std::make_pair(IC_IMM, Val));
343 break;
344 case IC_XOR:
345 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
346 "Xor operation with an immediate and a register!");
347 Val = Op1.second ^ Op2.second;
348 OperandStack.push_back(std::make_pair(IC_IMM, Val));
349 break;
350 case IC_AND:
351 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
352 "And operation with an immediate and a register!");
353 Val = Op1.second & Op2.second;
354 OperandStack.push_back(std::make_pair(IC_IMM, Val));
355 break;
356 case IC_LSHIFT:
357 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
358 "Left shift operation with an immediate and a register!");
359 Val = Op1.second << Op2.second;
360 OperandStack.push_back(std::make_pair(IC_IMM, Val));
361 break;
362 case IC_RSHIFT:
363 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
364 "Right shift operation with an immediate and a register!");
365 Val = Op1.second >> Op2.second;
366 OperandStack.push_back(std::make_pair(IC_IMM, Val));
367 break;
368 case IC_EQ:
369 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
370 "Equals operation with an immediate and a register!");
371 Val = (Op1.second == Op2.second) ? -1 : 0;
372 OperandStack.push_back(std::make_pair(IC_IMM, Val));
373 break;
374 case IC_NE:
375 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
376 "Not-equals operation with an immediate and a register!");
377 Val = (Op1.second != Op2.second) ? -1 : 0;
378 OperandStack.push_back(std::make_pair(IC_IMM, Val));
379 break;
380 case IC_LT:
381 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
382 "Less-than operation with an immediate and a register!");
383 Val = (Op1.second < Op2.second) ? -1 : 0;
384 OperandStack.push_back(std::make_pair(IC_IMM, Val));
385 break;
386 case IC_LE:
387 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
388 "Less-than-or-equal operation with an immediate and a "
389 "register!");
390 Val = (Op1.second <= Op2.second) ? -1 : 0;
391 OperandStack.push_back(std::make_pair(IC_IMM, Val));
392 break;
393 case IC_GT:
394 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
395 "Greater-than operation with an immediate and a register!");
396 Val = (Op1.second > Op2.second) ? -1 : 0;
397 OperandStack.push_back(std::make_pair(IC_IMM, Val));
398 break;
399 case IC_GE:
400 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
401 "Greater-than-or-equal operation with an immediate and a "
402 "register!");
403 Val = (Op1.second >= Op2.second) ? -1 : 0;
404 OperandStack.push_back(std::make_pair(IC_IMM, Val));
405 break;
406 }
407 }
408 }
409 assert (OperandStack.size() == 1 && "Expected a single result.");
410 return OperandStack.pop_back_val().second;
411 }
412 };
413
414 enum IntelExprState {
415 IES_INIT,
416 IES_OR,
417 IES_XOR,
418 IES_AND,
419 IES_EQ,
420 IES_NE,
421 IES_LT,
422 IES_LE,
423 IES_GT,
424 IES_GE,
425 IES_LSHIFT,
426 IES_RSHIFT,
427 IES_PLUS,
428 IES_MINUS,
429 IES_OFFSET,
430 IES_CAST,
431 IES_NOT,
432 IES_MULTIPLY,
433 IES_DIVIDE,
434 IES_MOD,
435 IES_LBRAC,
436 IES_RBRAC,
437 IES_LPAREN,
438 IES_RPAREN,
439 IES_REGISTER,
440 IES_INTEGER,
441 IES_ERROR
442 };
443
444 class IntelExprStateMachine {
445 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
446 MCRegister BaseReg, IndexReg, TmpReg;
447 unsigned Scale = 0;
448 int64_t Imm = 0;
449 const MCExpr *Sym = nullptr;
450 StringRef SymName;
451 InfixCalculator IC;
452 InlineAsmIdentifierInfo Info;
453 short BracCount = 0;
454 bool MemExpr = false;
455 bool BracketUsed = false;
456 bool OffsetOperator = false;
457 bool AttachToOperandIdx = false;
458 bool IsPIC = false;
459 SMLoc OffsetOperatorLoc;
460 AsmTypeInfo CurType;
461
462 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
463 if (Sym) {
464 ErrMsg = "cannot use more than one symbol in memory operand";
465 return true;
466 }
467 Sym = Val;
468 SymName = ID;
469 return false;
470 }
471
472 public:
473 IntelExprStateMachine() = default;
474
475 void addImm(int64_t imm) { Imm += imm; }
476 short getBracCount() const { return BracCount; }
477 bool isMemExpr() const { return MemExpr; }
478 bool isBracketUsed() const { return BracketUsed; }
479 bool isOffsetOperator() const { return OffsetOperator; }
480 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
481 MCRegister getBaseReg() const { return BaseReg; }
482 MCRegister getIndexReg() const { return IndexReg; }
483 unsigned getScale() const { return Scale; }
484 const MCExpr *getSym() const { return Sym; }
485 StringRef getSymName() const { return SymName; }
486 StringRef getType() const { return CurType.Name; }
487 unsigned getSize() const { return CurType.Size; }
488 unsigned getElementSize() const { return CurType.ElementSize; }
489 unsigned getLength() const { return CurType.Length; }
490 int64_t getImm() { return Imm + IC.execute(); }
491 bool isValidEndState() const {
492 return State == IES_RBRAC || State == IES_RPAREN ||
493 State == IES_INTEGER || State == IES_REGISTER ||
494 State == IES_OFFSET;
495 }
496
497 // Is the intel expression appended after an operand index.
498 // [OperandIdx][Intel Expression]
499 // This is neccessary for checking if it is an independent
500 // intel expression at back end when parse inline asm.
501 void setAppendAfterOperand() { AttachToOperandIdx = true; }
502
503 bool isPIC() const { return IsPIC; }
504 void setPIC() { IsPIC = true; }
505
506 bool hadError() const { return State == IES_ERROR; }
507 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
508
509 bool regsUseUpError(StringRef &ErrMsg) {
510 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
511 // can not intruduce additional register in inline asm in PIC model.
512 if (IsPIC && AttachToOperandIdx)
513 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
514 else
515 ErrMsg = "BaseReg/IndexReg already set!";
516 return true;
517 }
518
519 void onOr() {
520 IntelExprState CurrState = State;
521 switch (State) {
522 default:
523 State = IES_ERROR;
524 break;
525 case IES_INTEGER:
526 case IES_RPAREN:
527 case IES_REGISTER:
528 State = IES_OR;
529 IC.pushOperator(IC_OR);
530 break;
531 }
532 PrevState = CurrState;
533 }
534 void onXor() {
535 IntelExprState CurrState = State;
536 switch (State) {
537 default:
538 State = IES_ERROR;
539 break;
540 case IES_INTEGER:
541 case IES_RPAREN:
542 case IES_REGISTER:
543 State = IES_XOR;
544 IC.pushOperator(IC_XOR);
545 break;
546 }
547 PrevState = CurrState;
548 }
549 void onAnd() {
550 IntelExprState CurrState = State;
551 switch (State) {
552 default:
553 State = IES_ERROR;
554 break;
555 case IES_INTEGER:
556 case IES_RPAREN:
557 case IES_REGISTER:
558 State = IES_AND;
559 IC.pushOperator(IC_AND);
560 break;
561 }
562 PrevState = CurrState;
563 }
564 void onEq() {
565 IntelExprState CurrState = State;
566 switch (State) {
567 default:
568 State = IES_ERROR;
569 break;
570 case IES_INTEGER:
571 case IES_RPAREN:
572 case IES_REGISTER:
573 State = IES_EQ;
574 IC.pushOperator(IC_EQ);
575 break;
576 }
577 PrevState = CurrState;
578 }
579 void onNE() {
580 IntelExprState CurrState = State;
581 switch (State) {
582 default:
583 State = IES_ERROR;
584 break;
585 case IES_INTEGER:
586 case IES_RPAREN:
587 case IES_REGISTER:
588 State = IES_NE;
589 IC.pushOperator(IC_NE);
590 break;
591 }
592 PrevState = CurrState;
593 }
594 void onLT() {
595 IntelExprState CurrState = State;
596 switch (State) {
597 default:
598 State = IES_ERROR;
599 break;
600 case IES_INTEGER:
601 case IES_RPAREN:
602 case IES_REGISTER:
603 State = IES_LT;
604 IC.pushOperator(IC_LT);
605 break;
606 }
607 PrevState = CurrState;
608 }
609 void onLE() {
610 IntelExprState CurrState = State;
611 switch (State) {
612 default:
613 State = IES_ERROR;
614 break;
615 case IES_INTEGER:
616 case IES_RPAREN:
617 case IES_REGISTER:
618 State = IES_LE;
619 IC.pushOperator(IC_LE);
620 break;
621 }
622 PrevState = CurrState;
623 }
624 void onGT() {
625 IntelExprState CurrState = State;
626 switch (State) {
627 default:
628 State = IES_ERROR;
629 break;
630 case IES_INTEGER:
631 case IES_RPAREN:
632 case IES_REGISTER:
633 State = IES_GT;
634 IC.pushOperator(IC_GT);
635 break;
636 }
637 PrevState = CurrState;
638 }
639 void onGE() {
640 IntelExprState CurrState = State;
641 switch (State) {
642 default:
643 State = IES_ERROR;
644 break;
645 case IES_INTEGER:
646 case IES_RPAREN:
647 case IES_REGISTER:
648 State = IES_GE;
649 IC.pushOperator(IC_GE);
650 break;
651 }
652 PrevState = CurrState;
653 }
654 void onLShift() {
655 IntelExprState CurrState = State;
656 switch (State) {
657 default:
658 State = IES_ERROR;
659 break;
660 case IES_INTEGER:
661 case IES_RPAREN:
662 case IES_REGISTER:
663 State = IES_LSHIFT;
664 IC.pushOperator(IC_LSHIFT);
665 break;
666 }
667 PrevState = CurrState;
668 }
669 void onRShift() {
670 IntelExprState CurrState = State;
671 switch (State) {
672 default:
673 State = IES_ERROR;
674 break;
675 case IES_INTEGER:
676 case IES_RPAREN:
677 case IES_REGISTER:
678 State = IES_RSHIFT;
679 IC.pushOperator(IC_RSHIFT);
680 break;
681 }
682 PrevState = CurrState;
683 }
684 bool onPlus(StringRef &ErrMsg) {
685 IntelExprState CurrState = State;
686 switch (State) {
687 default:
688 State = IES_ERROR;
689 break;
690 case IES_INTEGER:
691 case IES_RPAREN:
692 case IES_REGISTER:
693 case IES_OFFSET:
694 State = IES_PLUS;
695 IC.pushOperator(IC_PLUS);
696 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
697 // If we already have a BaseReg, then assume this is the IndexReg with
698 // no explicit scale.
699 if (!BaseReg) {
700 BaseReg = TmpReg;
701 } else {
702 if (IndexReg)
703 return regsUseUpError(ErrMsg);
704 IndexReg = TmpReg;
705 Scale = 0;
706 }
707 }
708 break;
709 }
710 PrevState = CurrState;
711 return false;
712 }
713 bool onMinus(StringRef &ErrMsg) {
714 IntelExprState CurrState = State;
715 switch (State) {
716 default:
717 State = IES_ERROR;
718 break;
719 case IES_OR:
720 case IES_XOR:
721 case IES_AND:
722 case IES_EQ:
723 case IES_NE:
724 case IES_LT:
725 case IES_LE:
726 case IES_GT:
727 case IES_GE:
728 case IES_LSHIFT:
729 case IES_RSHIFT:
730 case IES_PLUS:
731 case IES_NOT:
732 case IES_MULTIPLY:
733 case IES_DIVIDE:
734 case IES_MOD:
735 case IES_LPAREN:
736 case IES_RPAREN:
737 case IES_LBRAC:
738 case IES_RBRAC:
739 case IES_INTEGER:
740 case IES_REGISTER:
741 case IES_INIT:
742 case IES_OFFSET:
743 State = IES_MINUS;
744 // push minus operator if it is not a negate operator
745 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
746 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
747 CurrState == IES_OFFSET)
748 IC.pushOperator(IC_MINUS);
749 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
750 // We have negate operator for Scale: it's illegal
751 ErrMsg = "Scale can't be negative";
752 return true;
753 } else
754 IC.pushOperator(IC_NEG);
755 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
756 // If we already have a BaseReg, then assume this is the IndexReg with
757 // no explicit scale.
758 if (!BaseReg) {
759 BaseReg = TmpReg;
760 } else {
761 if (IndexReg)
762 return regsUseUpError(ErrMsg);
763 IndexReg = TmpReg;
764 Scale = 0;
765 }
766 }
767 break;
768 }
769 PrevState = CurrState;
770 return false;
771 }
772 void onNot() {
773 IntelExprState CurrState = State;
774 switch (State) {
775 default:
776 State = IES_ERROR;
777 break;
778 case IES_OR:
779 case IES_XOR:
780 case IES_AND:
781 case IES_EQ:
782 case IES_NE:
783 case IES_LT:
784 case IES_LE:
785 case IES_GT:
786 case IES_GE:
787 case IES_LSHIFT:
788 case IES_RSHIFT:
789 case IES_PLUS:
790 case IES_MINUS:
791 case IES_NOT:
792 case IES_MULTIPLY:
793 case IES_DIVIDE:
794 case IES_MOD:
795 case IES_LPAREN:
796 case IES_LBRAC:
797 case IES_INIT:
798 State = IES_NOT;
799 IC.pushOperator(IC_NOT);
800 break;
801 }
802 PrevState = CurrState;
803 }
804 bool onRegister(MCRegister Reg, StringRef &ErrMsg) {
805 IntelExprState CurrState = State;
806 switch (State) {
807 default:
808 State = IES_ERROR;
809 break;
810 case IES_PLUS:
811 case IES_LPAREN:
812 case IES_LBRAC:
813 State = IES_REGISTER;
814 TmpReg = Reg;
815 IC.pushOperand(IC_REGISTER);
816 break;
817 case IES_MULTIPLY:
818 // Index Register - Scale * Register
819 if (PrevState == IES_INTEGER) {
820 if (IndexReg)
821 return regsUseUpError(ErrMsg);
822 State = IES_REGISTER;
823 IndexReg = Reg;
824 // Get the scale and replace the 'Scale * Register' with '0'.
825 Scale = IC.popOperand();
826 if (checkScale(Scale, ErrMsg))
827 return true;
828 IC.pushOperand(IC_IMM);
829 IC.popOperator();
830 } else {
831 State = IES_ERROR;
832 }
833 break;
834 }
835 PrevState = CurrState;
836 return false;
837 }
838 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
839 const InlineAsmIdentifierInfo &IDInfo,
840 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
841 StringRef &ErrMsg) {
842 // InlineAsm: Treat an enum value as an integer
843 if (ParsingMSInlineAsm)
845 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
846 // Treat a symbolic constant like an integer
847 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
848 return onInteger(CE->getValue(), ErrMsg);
849 PrevState = State;
850 switch (State) {
851 default:
852 State = IES_ERROR;
853 break;
854 case IES_CAST:
855 case IES_PLUS:
856 case IES_MINUS:
857 case IES_NOT:
858 case IES_INIT:
859 case IES_LBRAC:
860 case IES_LPAREN:
861 if (setSymRef(SymRef, SymRefName, ErrMsg))
862 return true;
863 MemExpr = true;
864 State = IES_INTEGER;
865 IC.pushOperand(IC_IMM);
866 if (ParsingMSInlineAsm)
867 Info = IDInfo;
868 setTypeInfo(Type);
869 break;
870 }
871 return false;
872 }
873 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
874 IntelExprState CurrState = State;
875 switch (State) {
876 default:
877 State = IES_ERROR;
878 break;
879 case IES_PLUS:
880 case IES_MINUS:
881 case IES_NOT:
882 case IES_OR:
883 case IES_XOR:
884 case IES_AND:
885 case IES_EQ:
886 case IES_NE:
887 case IES_LT:
888 case IES_LE:
889 case IES_GT:
890 case IES_GE:
891 case IES_LSHIFT:
892 case IES_RSHIFT:
893 case IES_DIVIDE:
894 case IES_MOD:
895 case IES_MULTIPLY:
896 case IES_LPAREN:
897 case IES_INIT:
898 case IES_LBRAC:
899 State = IES_INTEGER;
900 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
901 // Index Register - Register * Scale
902 if (IndexReg)
903 return regsUseUpError(ErrMsg);
904 IndexReg = TmpReg;
905 Scale = TmpInt;
906 if (checkScale(Scale, ErrMsg))
907 return true;
908 // Get the scale and replace the 'Register * Scale' with '0'.
909 IC.popOperator();
910 } else {
911 IC.pushOperand(IC_IMM, TmpInt);
912 }
913 break;
914 }
915 PrevState = CurrState;
916 return false;
917 }
918 void onStar() {
919 PrevState = State;
920 switch (State) {
921 default:
922 State = IES_ERROR;
923 break;
924 case IES_INTEGER:
925 case IES_REGISTER:
926 case IES_RPAREN:
927 State = IES_MULTIPLY;
928 IC.pushOperator(IC_MULTIPLY);
929 break;
930 }
931 }
932 void onDivide() {
933 PrevState = State;
934 switch (State) {
935 default:
936 State = IES_ERROR;
937 break;
938 case IES_INTEGER:
939 case IES_RPAREN:
940 State = IES_DIVIDE;
941 IC.pushOperator(IC_DIVIDE);
942 break;
943 }
944 }
945 void onMod() {
946 PrevState = State;
947 switch (State) {
948 default:
949 State = IES_ERROR;
950 break;
951 case IES_INTEGER:
952 case IES_RPAREN:
953 State = IES_MOD;
954 IC.pushOperator(IC_MOD);
955 break;
956 }
957 }
958 bool onLBrac() {
959 if (BracCount)
960 return true;
961 PrevState = State;
962 switch (State) {
963 default:
964 State = IES_ERROR;
965 break;
966 case IES_RBRAC:
967 case IES_INTEGER:
968 case IES_RPAREN:
969 State = IES_PLUS;
970 IC.pushOperator(IC_PLUS);
971 CurType.Length = 1;
972 CurType.Size = CurType.ElementSize;
973 break;
974 case IES_INIT:
975 case IES_CAST:
976 assert(!BracCount && "BracCount should be zero on parsing's start");
977 State = IES_LBRAC;
978 break;
979 }
980 MemExpr = true;
981 BracketUsed = true;
982 BracCount++;
983 return false;
984 }
985 bool onRBrac(StringRef &ErrMsg) {
986 IntelExprState CurrState = State;
987 switch (State) {
988 default:
989 State = IES_ERROR;
990 break;
991 case IES_INTEGER:
992 case IES_OFFSET:
993 case IES_REGISTER:
994 case IES_RPAREN:
995 if (BracCount-- != 1) {
996 ErrMsg = "unexpected bracket encountered";
997 return true;
998 }
999 State = IES_RBRAC;
1000 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
1001 // If we already have a BaseReg, then assume this is the IndexReg with
1002 // no explicit scale.
1003 if (!BaseReg) {
1004 BaseReg = TmpReg;
1005 } else {
1006 if (IndexReg)
1007 return regsUseUpError(ErrMsg);
1008 IndexReg = TmpReg;
1009 Scale = 0;
1010 }
1011 }
1012 break;
1013 }
1014 PrevState = CurrState;
1015 return false;
1016 }
1017 void onLParen() {
1018 IntelExprState CurrState = State;
1019 switch (State) {
1020 default:
1021 State = IES_ERROR;
1022 break;
1023 case IES_PLUS:
1024 case IES_MINUS:
1025 case IES_NOT:
1026 case IES_OR:
1027 case IES_XOR:
1028 case IES_AND:
1029 case IES_EQ:
1030 case IES_NE:
1031 case IES_LT:
1032 case IES_LE:
1033 case IES_GT:
1034 case IES_GE:
1035 case IES_LSHIFT:
1036 case IES_RSHIFT:
1037 case IES_MULTIPLY:
1038 case IES_DIVIDE:
1039 case IES_MOD:
1040 case IES_LPAREN:
1041 case IES_INIT:
1042 case IES_LBRAC:
1043 State = IES_LPAREN;
1044 IC.pushOperator(IC_LPAREN);
1045 break;
1046 }
1047 PrevState = CurrState;
1048 }
1049 bool onRParen(StringRef &ErrMsg) {
1050 IntelExprState CurrState = State;
1051 switch (State) {
1052 default:
1053 State = IES_ERROR;
1054 break;
1055 case IES_INTEGER:
1056 case IES_OFFSET:
1057 case IES_REGISTER:
1058 case IES_RBRAC:
1059 case IES_RPAREN:
1060 State = IES_RPAREN;
1061 // In the case of a multiply, onRegister has already set IndexReg
1062 // directly, with appropriate scale.
1063 // Otherwise if we just saw a register it has only been stored in
1064 // TmpReg, so we need to store it into the state machine.
1065 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
1066 // If we already have a BaseReg, then assume this is the IndexReg with
1067 // no explicit scale.
1068 if (!BaseReg) {
1069 BaseReg = TmpReg;
1070 } else {
1071 if (IndexReg)
1072 return regsUseUpError(ErrMsg);
1073 IndexReg = TmpReg;
1074 Scale = 0;
1075 }
1076 }
1077 IC.pushOperator(IC_RPAREN);
1078 break;
1079 }
1080 PrevState = CurrState;
1081 return false;
1082 }
1083 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1084 const InlineAsmIdentifierInfo &IDInfo,
1085 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1086 PrevState = State;
1087 switch (State) {
1088 default:
1089 ErrMsg = "unexpected offset operator expression";
1090 return true;
1091 case IES_PLUS:
1092 case IES_INIT:
1093 case IES_LBRAC:
1094 if (setSymRef(Val, ID, ErrMsg))
1095 return true;
1096 OffsetOperator = true;
1097 OffsetOperatorLoc = OffsetLoc;
1098 State = IES_OFFSET;
1099 // As we cannot yet resolve the actual value (offset), we retain
1100 // the requested semantics by pushing a '0' to the operands stack
1101 IC.pushOperand(IC_IMM);
1102 if (ParsingMSInlineAsm) {
1103 Info = IDInfo;
1104 }
1105 break;
1106 }
1107 return false;
1108 }
1109 void onCast(AsmTypeInfo Info) {
1110 PrevState = State;
1111 switch (State) {
1112 default:
1113 State = IES_ERROR;
1114 break;
1115 case IES_LPAREN:
1116 setTypeInfo(Info);
1117 State = IES_CAST;
1118 break;
1119 }
1120 }
1121 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1122 };
1123
1124 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1125 bool MatchingInlineAsm = false) {
1126 MCAsmParser &Parser = getParser();
1127 if (MatchingInlineAsm) {
1128 return false;
1129 }
1130 return Parser.Error(L, Msg, Range);
1131 }
1132
1133 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1134 SMLoc EndLoc);
1135 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1136 bool RestoreOnFailure);
1137
1138 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1139 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1140 bool IsSIReg(MCRegister Reg);
1141 MCRegister GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg);
1142 void
1143 AddDefaultSrcDestOperands(OperandVector &Operands,
1144 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1145 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1146 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1147 OperandVector &FinalOperands);
1148 bool parseOperand(OperandVector &Operands, StringRef Name);
1149 bool parseATTOperand(OperandVector &Operands);
1150 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1151 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1152 InlineAsmIdentifierInfo &Info, SMLoc &End);
1153 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1154 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1155 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1156 unsigned IdentifyMasmOperator(StringRef Name);
1157 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1158 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1159 bool parseCFlagsOp(OperandVector &Operands);
1160 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1161 bool &ParseError, SMLoc &End);
1162 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1163 bool &ParseError, SMLoc &End);
1164 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1165 SMLoc End);
1166 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1167 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1168 InlineAsmIdentifierInfo &Info,
1169 bool IsUnevaluatedOperand, SMLoc &End,
1170 bool IsParsingOffsetOperator = false);
1171 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1172 IntelExprStateMachine &SM);
1173
1174 bool ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc,
1175 SMLoc EndLoc, OperandVector &Operands);
1176
1177 X86::CondCode ParseConditionCode(StringRef CCode);
1178
1179 bool ParseIntelMemoryOperandSize(unsigned &Size, StringRef *SizeStr);
1180 bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
1181 MCRegister BaseReg, MCRegister IndexReg,
1182 unsigned Scale, bool NonAbsMem, SMLoc Start,
1183 SMLoc End, unsigned Size, StringRef Identifier,
1184 const InlineAsmIdentifierInfo &Info,
1186
1187 bool parseDirectiveArch();
1188 bool parseDirectiveNops(SMLoc L);
1189 bool parseDirectiveEven(SMLoc L);
1190 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1191
1192 /// CodeView FPO data directives.
1193 bool parseDirectiveFPOProc(SMLoc L);
1194 bool parseDirectiveFPOSetFrame(SMLoc L);
1195 bool parseDirectiveFPOPushReg(SMLoc L);
1196 bool parseDirectiveFPOStackAlloc(SMLoc L);
1197 bool parseDirectiveFPOStackAlign(SMLoc L);
1198 bool parseDirectiveFPOEndPrologue(SMLoc L);
1199 bool parseDirectiveFPOEndProc(SMLoc L);
1200
1201 /// SEH directives.
1202 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1203 bool parseDirectiveSEHPushReg(SMLoc);
1204 bool parseDirectiveSEHSetFrame(SMLoc);
1205 bool parseDirectiveSEHSaveReg(SMLoc);
1206 bool parseDirectiveSEHSaveXMM(SMLoc);
1207 bool parseDirectiveSEHPushFrame(SMLoc);
1208
1209 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1210
1211 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1212 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1213
1214 // Load Value Injection (LVI) Mitigations for machine code
1215 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1216 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1217 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1218
1219 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1220 /// instrumentation around Inst.
1221 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1222
1223 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1224 OperandVector &Operands, MCStreamer &Out,
1225 uint64_t &ErrorInfo,
1226 bool MatchingInlineAsm) override;
1227
1228 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1229 MCStreamer &Out, bool MatchingInlineAsm);
1230
1231 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1232 bool MatchingInlineAsm);
1233
1234 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1235 OperandVector &Operands, MCStreamer &Out,
1236 uint64_t &ErrorInfo, bool MatchingInlineAsm);
1237
1238 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1239 OperandVector &Operands, MCStreamer &Out,
1240 uint64_t &ErrorInfo,
1241 bool MatchingInlineAsm);
1242
1243 bool omitRegisterFromClobberLists(MCRegister Reg) override;
1244
1245 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1246 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1247 /// return false if no parsing errors occurred, true otherwise.
1248 bool HandleAVX512Operand(OperandVector &Operands);
1249
1250 bool ParseZ(std::unique_ptr<X86Operand> &Z, SMLoc StartLoc);
1251
1252 bool is64BitMode() const {
1253 // FIXME: Can tablegen auto-generate this?
1254 return getSTI().hasFeature(X86::Is64Bit);
1255 }
1256 bool is32BitMode() const {
1257 // FIXME: Can tablegen auto-generate this?
1258 return getSTI().hasFeature(X86::Is32Bit);
1259 }
1260 bool is16BitMode() const {
1261 // FIXME: Can tablegen auto-generate this?
1262 return getSTI().hasFeature(X86::Is16Bit);
1263 }
1264 void SwitchMode(unsigned mode) {
1265 MCSubtargetInfo &STI = copySTI();
1266 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1267 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1268 FeatureBitset FB = ComputeAvailableFeatures(
1269 STI.ToggleFeature(OldMode.flip(mode)));
1270 setAvailableFeatures(FB);
1271
1272 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1273 }
1274
1275 unsigned getPointerWidth() {
1276 if (is16BitMode()) return 16;
1277 if (is32BitMode()) return 32;
1278 if (is64BitMode()) return 64;
1279 llvm_unreachable("invalid mode");
1280 }
1281
1282 bool isParsingIntelSyntax() {
1283 return getParser().getAssemblerDialect();
1284 }
1285
1286 /// @name Auto-generated Matcher Functions
1287 /// {
1288
1289#define GET_ASSEMBLER_HEADER
1290#include "X86GenAsmMatcher.inc"
1291
1292 /// }
1293
1294public:
1295 enum X86MatchResultTy {
1296 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1297#define GET_OPERAND_DIAGNOSTIC_TYPES
1298#include "X86GenAsmMatcher.inc"
1299 };
1300
1301 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1302 const MCInstrInfo &mii, const MCTargetOptions &Options)
1303 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1304 Code16GCC(false) {
1305
1306 Parser.addAliasForDirective(".word", ".2byte");
1307
1308 // Initialize the set of available features.
1309 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1310 }
1311
1312 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1313 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1314 SMLoc &EndLoc) override;
1315
1316 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1317
1318 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1319 SMLoc NameLoc, OperandVector &Operands) override;
1320
1321 bool ParseDirective(AsmToken DirectiveID) override;
1322};
1323} // end anonymous namespace
1324
1325#define GET_REGISTER_MATCHER
1326#define GET_SUBTARGET_FEATURE_NAME
1327#include "X86GenAsmMatcher.inc"
1328
1330 MCRegister IndexReg, unsigned Scale,
1331 bool Is64BitMode,
1332 StringRef &ErrMsg) {
1333 // If we have both a base register and an index register make sure they are
1334 // both 64-bit or 32-bit registers.
1335 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1336
1337 if (BaseReg &&
1338 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1339 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1340 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1341 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1342 ErrMsg = "invalid base+index expression";
1343 return true;
1344 }
1345
1346 if (IndexReg &&
1347 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1348 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1349 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1350 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1351 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1352 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1353 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1354 ErrMsg = "invalid base+index expression";
1355 return true;
1356 }
1357
1358 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg) ||
1359 IndexReg == X86::EIP || IndexReg == X86::RIP || IndexReg == X86::ESP ||
1360 IndexReg == X86::RSP) {
1361 ErrMsg = "invalid base+index expression";
1362 return true;
1363 }
1364
1365 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1366 // and then only in non-64-bit modes.
1367 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1368 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1369 BaseReg != X86::SI && BaseReg != X86::DI))) {
1370 ErrMsg = "invalid 16-bit base register";
1371 return true;
1372 }
1373
1374 if (!BaseReg &&
1375 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1376 ErrMsg = "16-bit memory operand may not include only index register";
1377 return true;
1378 }
1379
1380 if (BaseReg && IndexReg) {
1381 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1382 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1383 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1384 IndexReg == X86::EIZ)) {
1385 ErrMsg = "base register is 64-bit, but index register is not";
1386 return true;
1387 }
1388 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1389 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1390 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1391 IndexReg == X86::RIZ)) {
1392 ErrMsg = "base register is 32-bit, but index register is not";
1393 return true;
1394 }
1395 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1396 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1397 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1398 ErrMsg = "base register is 16-bit, but index register is not";
1399 return true;
1400 }
1401 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1402 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1403 ErrMsg = "invalid 16-bit base/index register combination";
1404 return true;
1405 }
1406 }
1407 }
1408
1409 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1410 if (!Is64BitMode && (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1411 ErrMsg = "IP-relative addressing requires 64-bit mode";
1412 return true;
1413 }
1414
1415 return checkScale(Scale, ErrMsg);
1416}
1417
1418bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1419 SMLoc StartLoc, SMLoc EndLoc) {
1420 // If we encounter a %, ignore it. This code handles registers with and
1421 // without the prefix, unprefixed registers can occur in cfi directives.
1422 RegName.consume_front("%");
1423
1424 RegNo = MatchRegisterName(RegName);
1425
1426 // If the match failed, try the register name as lowercase.
1427 if (!RegNo)
1428 RegNo = MatchRegisterName(RegName.lower());
1429
1430 // The "flags" and "mxcsr" registers cannot be referenced directly.
1431 // Treat it as an identifier instead.
1432 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1433 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1434 RegNo = MCRegister();
1435
1436 if (!is64BitMode()) {
1437 // FIXME: This should be done using Requires<Not64BitMode> and
1438 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1439 // checked.
1440 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1441 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1444 return Error(StartLoc,
1445 "register %" + RegName + " is only available in 64-bit mode",
1446 SMRange(StartLoc, EndLoc));
1447 }
1448 }
1449
1450 if (X86II::isApxExtendedReg(RegNo))
1451 UseApxExtendedReg = true;
1452
1453 // If this is "db[0-15]", match it as an alias
1454 // for dr[0-15].
1455 if (!RegNo && RegName.starts_with("db")) {
1456 if (RegName.size() == 3) {
1457 switch (RegName[2]) {
1458 case '0':
1459 RegNo = X86::DR0;
1460 break;
1461 case '1':
1462 RegNo = X86::DR1;
1463 break;
1464 case '2':
1465 RegNo = X86::DR2;
1466 break;
1467 case '3':
1468 RegNo = X86::DR3;
1469 break;
1470 case '4':
1471 RegNo = X86::DR4;
1472 break;
1473 case '5':
1474 RegNo = X86::DR5;
1475 break;
1476 case '6':
1477 RegNo = X86::DR6;
1478 break;
1479 case '7':
1480 RegNo = X86::DR7;
1481 break;
1482 case '8':
1483 RegNo = X86::DR8;
1484 break;
1485 case '9':
1486 RegNo = X86::DR9;
1487 break;
1488 }
1489 } else if (RegName.size() == 4 && RegName[2] == '1') {
1490 switch (RegName[3]) {
1491 case '0':
1492 RegNo = X86::DR10;
1493 break;
1494 case '1':
1495 RegNo = X86::DR11;
1496 break;
1497 case '2':
1498 RegNo = X86::DR12;
1499 break;
1500 case '3':
1501 RegNo = X86::DR13;
1502 break;
1503 case '4':
1504 RegNo = X86::DR14;
1505 break;
1506 case '5':
1507 RegNo = X86::DR15;
1508 break;
1509 }
1510 }
1511 }
1512
1513 if (!RegNo) {
1514 if (isParsingIntelSyntax())
1515 return true;
1516 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1517 }
1518 return false;
1519}
1520
1521bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1522 SMLoc &EndLoc, bool RestoreOnFailure) {
1523 MCAsmParser &Parser = getParser();
1524 AsmLexer &Lexer = getLexer();
1525 RegNo = MCRegister();
1526
1528 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1529 if (RestoreOnFailure) {
1530 while (!Tokens.empty()) {
1531 Lexer.UnLex(Tokens.pop_back_val());
1532 }
1533 }
1534 };
1535
1536 const AsmToken &PercentTok = Parser.getTok();
1537 StartLoc = PercentTok.getLoc();
1538
1539 // If we encounter a %, ignore it. This code handles registers with and
1540 // without the prefix, unprefixed registers can occur in cfi directives.
1541 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1542 Tokens.push_back(PercentTok);
1543 Parser.Lex(); // Eat percent token.
1544 }
1545
1546 const AsmToken &Tok = Parser.getTok();
1547 EndLoc = Tok.getEndLoc();
1548
1549 if (Tok.isNot(AsmToken::Identifier)) {
1550 OnFailure();
1551 if (isParsingIntelSyntax()) return true;
1552 return Error(StartLoc, "invalid register name",
1553 SMRange(StartLoc, EndLoc));
1554 }
1555
1556 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1557 OnFailure();
1558 return true;
1559 }
1560
1561 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1562 if (RegNo == X86::ST0) {
1563 Tokens.push_back(Tok);
1564 Parser.Lex(); // Eat 'st'
1565
1566 // Check to see if we have '(4)' after %st.
1567 if (Lexer.isNot(AsmToken::LParen))
1568 return false;
1569 // Lex the paren.
1570 Tokens.push_back(Parser.getTok());
1571 Parser.Lex();
1572
1573 const AsmToken &IntTok = Parser.getTok();
1574 if (IntTok.isNot(AsmToken::Integer)) {
1575 OnFailure();
1576 return Error(IntTok.getLoc(), "expected stack index");
1577 }
1578 switch (IntTok.getIntVal()) {
1579 case 0: RegNo = X86::ST0; break;
1580 case 1: RegNo = X86::ST1; break;
1581 case 2: RegNo = X86::ST2; break;
1582 case 3: RegNo = X86::ST3; break;
1583 case 4: RegNo = X86::ST4; break;
1584 case 5: RegNo = X86::ST5; break;
1585 case 6: RegNo = X86::ST6; break;
1586 case 7: RegNo = X86::ST7; break;
1587 default:
1588 OnFailure();
1589 return Error(IntTok.getLoc(), "invalid stack index");
1590 }
1591
1592 // Lex IntTok
1593 Tokens.push_back(IntTok);
1594 Parser.Lex();
1595 if (Lexer.isNot(AsmToken::RParen)) {
1596 OnFailure();
1597 return Error(Parser.getTok().getLoc(), "expected ')'");
1598 }
1599
1600 EndLoc = Parser.getTok().getEndLoc();
1601 Parser.Lex(); // Eat ')'
1602 return false;
1603 }
1604
1605 EndLoc = Parser.getTok().getEndLoc();
1606
1607 if (!RegNo) {
1608 OnFailure();
1609 if (isParsingIntelSyntax()) return true;
1610 return Error(StartLoc, "invalid register name",
1611 SMRange(StartLoc, EndLoc));
1612 }
1613
1614 Parser.Lex(); // Eat identifier token.
1615 return false;
1616}
1617
1618bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1619 SMLoc &EndLoc) {
1620 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1621}
1622
1623ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1624 SMLoc &EndLoc) {
1625 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1626 bool PendingErrors = getParser().hasPendingError();
1627 getParser().clearPendingErrors();
1628 if (PendingErrors)
1629 return ParseStatus::Failure;
1630 if (Result)
1631 return ParseStatus::NoMatch;
1632 return ParseStatus::Success;
1633}
1634
1635std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1636 bool Parse32 = is32BitMode() || Code16GCC;
1637 MCRegister Basereg =
1638 is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1639 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1640 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1641 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1642 Loc, Loc, 0);
1643}
1644
1645std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1646 bool Parse32 = is32BitMode() || Code16GCC;
1647 MCRegister Basereg =
1648 is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1649 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1650 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1651 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1652 Loc, Loc, 0);
1653}
1654
1655bool X86AsmParser::IsSIReg(MCRegister Reg) {
1656 switch (Reg.id()) {
1657 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1658 case X86::RSI:
1659 case X86::ESI:
1660 case X86::SI:
1661 return true;
1662 case X86::RDI:
1663 case X86::EDI:
1664 case X86::DI:
1665 return false;
1666 }
1667}
1668
1669MCRegister X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg) {
1670 switch (RegClassID) {
1671 default: llvm_unreachable("Unexpected register class");
1672 case X86::GR64RegClassID:
1673 return IsSIReg ? X86::RSI : X86::RDI;
1674 case X86::GR32RegClassID:
1675 return IsSIReg ? X86::ESI : X86::EDI;
1676 case X86::GR16RegClassID:
1677 return IsSIReg ? X86::SI : X86::DI;
1678 }
1679}
1680
1681void X86AsmParser::AddDefaultSrcDestOperands(
1682 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1683 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1684 if (isParsingIntelSyntax()) {
1685 Operands.push_back(std::move(Dst));
1686 Operands.push_back(std::move(Src));
1687 }
1688 else {
1689 Operands.push_back(std::move(Src));
1690 Operands.push_back(std::move(Dst));
1691 }
1692}
1693
1694bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1695 OperandVector &FinalOperands) {
1696
1697 if (OrigOperands.size() > 1) {
1698 // Check if sizes match, OrigOperands also contains the instruction name
1699 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1700 "Operand size mismatch");
1701
1703 // Verify types match
1704 int RegClassID = -1;
1705 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1706 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1707 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1708
1709 if (FinalOp.isReg() &&
1710 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1711 // Return false and let a normal complaint about bogus operands happen
1712 return false;
1713
1714 if (FinalOp.isMem()) {
1715
1716 if (!OrigOp.isMem())
1717 // Return false and let a normal complaint about bogus operands happen
1718 return false;
1719
1720 MCRegister OrigReg = OrigOp.Mem.BaseReg;
1721 MCRegister FinalReg = FinalOp.Mem.BaseReg;
1722
1723 // If we've already encounterd a register class, make sure all register
1724 // bases are of the same register class
1725 if (RegClassID != -1 &&
1726 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1727 return Error(OrigOp.getStartLoc(),
1728 "mismatching source and destination index registers");
1729 }
1730
1731 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1732 RegClassID = X86::GR64RegClassID;
1733 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1734 RegClassID = X86::GR32RegClassID;
1735 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1736 RegClassID = X86::GR16RegClassID;
1737 else
1738 // Unexpected register class type
1739 // Return false and let a normal complaint about bogus operands happen
1740 return false;
1741
1742 bool IsSI = IsSIReg(FinalReg);
1743 FinalReg = GetSIDIForRegClass(RegClassID, IsSI);
1744
1745 if (FinalReg != OrigReg) {
1746 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1747 Warnings.push_back(std::make_pair(
1748 OrigOp.getStartLoc(),
1749 "memory operand is only for determining the size, " + RegName +
1750 " will be used for the location"));
1751 }
1752
1753 FinalOp.Mem.Size = OrigOp.Mem.Size;
1754 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1755 FinalOp.Mem.BaseReg = FinalReg;
1756 }
1757 }
1758
1759 // Produce warnings only if all the operands passed the adjustment - prevent
1760 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1761 for (auto &WarningMsg : Warnings) {
1762 Warning(WarningMsg.first, WarningMsg.second);
1763 }
1764
1765 // Remove old operands
1766 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1767 OrigOperands.pop_back();
1768 }
1769 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1770 for (auto &Op : FinalOperands)
1771 OrigOperands.push_back(std::move(Op));
1772
1773 return false;
1774}
1775
1776bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1777 if (isParsingIntelSyntax())
1778 return parseIntelOperand(Operands, Name);
1779
1780 return parseATTOperand(Operands);
1781}
1782
1783bool X86AsmParser::CreateMemForMSInlineAsm(
1784 MCRegister SegReg, const MCExpr *Disp, MCRegister BaseReg,
1785 MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End,
1786 unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info,
1788 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1789 // some other label reference.
1791 // Create an absolute memory reference in order to match against
1792 // instructions taking a PC relative operand.
1793 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1794 End, Size, Identifier,
1795 Info.Label.Decl));
1796 return false;
1797 }
1798 // We either have a direct symbol reference, or an offset from a symbol. The
1799 // parser always puts the symbol on the LHS, so look there for size
1800 // calculation purposes.
1801 unsigned FrontendSize = 0;
1802 void *Decl = nullptr;
1803 bool IsGlobalLV = false;
1805 // Size is in terms of bits in this context.
1806 FrontendSize = Info.Var.Type * 8;
1807 Decl = Info.Var.Decl;
1808 IsGlobalLV = Info.Var.IsGlobalLV;
1809 }
1810 // It is widely common for MS InlineAsm to use a global variable and one/two
1811 // registers in a mmory expression, and though unaccessible via rip/eip.
1812 if (IsGlobalLV) {
1813 if (BaseReg || IndexReg) {
1814 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1815 End, Size, Identifier, Decl, 0,
1816 BaseReg && IndexReg));
1817 return false;
1818 }
1819 if (NonAbsMem)
1820 BaseReg = 1; // Make isAbsMem() false
1821 }
1823 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1824 Size,
1825 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1826 return false;
1827}
1828
1829// Some binary bitwise operators have a named synonymous
1830// Query a candidate string for being such a named operator
1831// and if so - invoke the appropriate handler
1832bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1833 IntelExprStateMachine &SM,
1834 bool &ParseError, SMLoc &End) {
1835 // A named operator should be either lower or upper case, but not a mix...
1836 // except in MASM, which uses full case-insensitivity.
1837 if (Name != Name.lower() && Name != Name.upper() &&
1838 !getParser().isParsingMasm())
1839 return false;
1840 if (Name.equals_insensitive("not")) {
1841 SM.onNot();
1842 } else if (Name.equals_insensitive("or")) {
1843 SM.onOr();
1844 } else if (Name.equals_insensitive("shl")) {
1845 SM.onLShift();
1846 } else if (Name.equals_insensitive("shr")) {
1847 SM.onRShift();
1848 } else if (Name.equals_insensitive("xor")) {
1849 SM.onXor();
1850 } else if (Name.equals_insensitive("and")) {
1851 SM.onAnd();
1852 } else if (Name.equals_insensitive("mod")) {
1853 SM.onMod();
1854 } else if (Name.equals_insensitive("offset")) {
1855 SMLoc OffsetLoc = getTok().getLoc();
1856 const MCExpr *Val = nullptr;
1857 StringRef ID;
1858 InlineAsmIdentifierInfo Info;
1859 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1860 if (ParseError)
1861 return true;
1862 StringRef ErrMsg;
1863 ParseError =
1864 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1865 if (ParseError)
1866 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1867 } else {
1868 return false;
1869 }
1870 if (!Name.equals_insensitive("offset"))
1871 End = consumeToken();
1872 return true;
1873}
1874bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1875 IntelExprStateMachine &SM,
1876 bool &ParseError, SMLoc &End) {
1877 if (Name.equals_insensitive("eq")) {
1878 SM.onEq();
1879 } else if (Name.equals_insensitive("ne")) {
1880 SM.onNE();
1881 } else if (Name.equals_insensitive("lt")) {
1882 SM.onLT();
1883 } else if (Name.equals_insensitive("le")) {
1884 SM.onLE();
1885 } else if (Name.equals_insensitive("gt")) {
1886 SM.onGT();
1887 } else if (Name.equals_insensitive("ge")) {
1888 SM.onGE();
1889 } else {
1890 return false;
1891 }
1892 End = consumeToken();
1893 return true;
1894}
1895
1896// Check if current intel expression append after an operand.
1897// Like: [Operand][Intel Expression]
1898void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1899 IntelExprStateMachine &SM) {
1900 if (PrevTK != AsmToken::RBrac)
1901 return;
1902
1903 SM.setAppendAfterOperand();
1904}
1905
1906bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1907 MCAsmParser &Parser = getParser();
1908 StringRef ErrMsg;
1909
1911
1912 if (getContext().getObjectFileInfo()->isPositionIndependent())
1913 SM.setPIC();
1914
1915 bool Done = false;
1916 while (!Done) {
1917 // Get a fresh reference on each loop iteration in case the previous
1918 // iteration moved the token storage during UnLex().
1919 const AsmToken &Tok = Parser.getTok();
1920
1921 bool UpdateLocLex = true;
1922 AsmToken::TokenKind TK = getLexer().getKind();
1923
1924 switch (TK) {
1925 default:
1926 if ((Done = SM.isValidEndState()))
1927 break;
1928 return Error(Tok.getLoc(), "unknown token in expression");
1929 case AsmToken::Error:
1930 return Error(getLexer().getErrLoc(), getLexer().getErr());
1931 break;
1932 case AsmToken::Real:
1933 // DotOperator: [ebx].0
1934 UpdateLocLex = false;
1935 if (ParseIntelDotOperator(SM, End))
1936 return true;
1937 break;
1938 case AsmToken::Dot:
1939 if (!Parser.isParsingMasm()) {
1940 if ((Done = SM.isValidEndState()))
1941 break;
1942 return Error(Tok.getLoc(), "unknown token in expression");
1943 }
1944 // MASM allows spaces around the dot operator (e.g., "var . x")
1945 Lex();
1946 UpdateLocLex = false;
1947 if (ParseIntelDotOperator(SM, End))
1948 return true;
1949 break;
1950 case AsmToken::Dollar:
1951 if (!Parser.isParsingMasm()) {
1952 if ((Done = SM.isValidEndState()))
1953 break;
1954 return Error(Tok.getLoc(), "unknown token in expression");
1955 }
1956 [[fallthrough]];
1957 case AsmToken::String: {
1958 if (Parser.isParsingMasm()) {
1959 // MASM parsers handle strings in expressions as constants.
1960 SMLoc ValueLoc = Tok.getLoc();
1961 int64_t Res;
1962 const MCExpr *Val;
1963 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1964 return true;
1965 UpdateLocLex = false;
1966 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1967 return Error(ValueLoc, "expected absolute value");
1968 if (SM.onInteger(Res, ErrMsg))
1969 return Error(ValueLoc, ErrMsg);
1970 break;
1971 }
1972 [[fallthrough]];
1973 }
1974 case AsmToken::At:
1975 case AsmToken::Identifier: {
1976 SMLoc IdentLoc = Tok.getLoc();
1977 StringRef Identifier = Tok.getString();
1978 UpdateLocLex = false;
1979 if (Parser.isParsingMasm()) {
1980 size_t DotOffset = Identifier.find_first_of('.');
1981 if (DotOffset != StringRef::npos) {
1982 consumeToken();
1983 StringRef LHS = Identifier.slice(0, DotOffset);
1984 StringRef Dot = Identifier.substr(DotOffset, 1);
1985 StringRef RHS = Identifier.substr(DotOffset + 1);
1986 if (!RHS.empty()) {
1987 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1988 }
1989 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1990 if (!LHS.empty()) {
1991 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1992 }
1993 break;
1994 }
1995 }
1996 // (MASM only) <TYPE> PTR operator
1997 if (Parser.isParsingMasm()) {
1998 const AsmToken &NextTok = getLexer().peekTok();
1999 if (NextTok.is(AsmToken::Identifier) &&
2000 NextTok.getIdentifier().equals_insensitive("ptr")) {
2001 AsmTypeInfo Info;
2002 if (Parser.lookUpType(Identifier, Info))
2003 return Error(Tok.getLoc(), "unknown type");
2004 SM.onCast(Info);
2005 // Eat type and PTR.
2006 consumeToken();
2007 End = consumeToken();
2008 break;
2009 }
2010 }
2011 // Register, or (MASM only) <register>.<field>
2012 MCRegister Reg;
2013 if (Tok.is(AsmToken::Identifier)) {
2014 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
2015 if (SM.onRegister(Reg, ErrMsg))
2016 return Error(IdentLoc, ErrMsg);
2017 break;
2018 }
2019 if (Parser.isParsingMasm()) {
2020 const std::pair<StringRef, StringRef> IDField =
2021 Tok.getString().split('.');
2022 const StringRef ID = IDField.first, Field = IDField.second;
2023 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
2024 if (!Field.empty() &&
2025 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
2026 if (SM.onRegister(Reg, ErrMsg))
2027 return Error(IdentLoc, ErrMsg);
2028
2029 AsmFieldInfo Info;
2030 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2031 if (Parser.lookUpField(Field, Info))
2032 return Error(FieldStartLoc, "unknown offset");
2033 else if (SM.onPlus(ErrMsg))
2034 return Error(getTok().getLoc(), ErrMsg);
2035 else if (SM.onInteger(Info.Offset, ErrMsg))
2036 return Error(IdentLoc, ErrMsg);
2037 SM.setTypeInfo(Info.Type);
2038
2039 End = consumeToken();
2040 break;
2041 }
2042 }
2043 }
2044 // Operator synonymous ("not", "or" etc.)
2045 bool ParseError = false;
2046 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2047 if (ParseError)
2048 return true;
2049 break;
2050 }
2051 if (Parser.isParsingMasm() &&
2052 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2053 if (ParseError)
2054 return true;
2055 break;
2056 }
2057 // Symbol reference, when parsing assembly content
2058 InlineAsmIdentifierInfo Info;
2059 AsmFieldInfo FieldInfo;
2060 const MCExpr *Val;
2061 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2062 // MS Dot Operator expression
2063 if (Identifier.contains('.') &&
2064 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2065 if (ParseIntelDotOperator(SM, End))
2066 return true;
2067 break;
2068 }
2069 }
2070 if (isParsingMSInlineAsm()) {
2071 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2072 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2073 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2074 if (SM.onInteger(Val, ErrMsg))
2075 return Error(IdentLoc, ErrMsg);
2076 } else {
2077 return true;
2078 }
2079 break;
2080 }
2081 // MS InlineAsm identifier
2082 // Call parseIdentifier() to combine @ with the identifier behind it.
2083 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2084 return Error(IdentLoc, "expected identifier");
2085 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2086 return true;
2087 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2088 true, ErrMsg))
2089 return Error(IdentLoc, ErrMsg);
2090 break;
2091 }
2092 if (Parser.isParsingMasm()) {
2093 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2094 int64_t Val;
2095 if (ParseMasmOperator(OpKind, Val))
2096 return true;
2097 if (SM.onInteger(Val, ErrMsg))
2098 return Error(IdentLoc, ErrMsg);
2099 break;
2100 }
2101 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2102 // Field offset immediate; <TYPE>.<field specification>
2103 Lex(); // eat type
2104 bool EndDot = parseOptionalToken(AsmToken::Dot);
2105 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2106 getTok().getString().starts_with("."))) {
2107 getParser().parseIdentifier(Identifier);
2108 if (!EndDot)
2109 Identifier.consume_front(".");
2110 EndDot = Identifier.consume_back(".");
2111 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2112 FieldInfo)) {
2113 SMLoc IDEnd =
2115 return Error(IdentLoc, "Unable to lookup field reference!",
2116 SMRange(IdentLoc, IDEnd));
2117 }
2118 if (!EndDot)
2119 EndDot = parseOptionalToken(AsmToken::Dot);
2120 }
2121 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2122 return Error(IdentLoc, ErrMsg);
2123 break;
2124 }
2125 }
2126 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2127 return Error(Tok.getLoc(), "Unexpected identifier!");
2128 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2129 false, ErrMsg)) {
2130 return Error(IdentLoc, ErrMsg);
2131 }
2132 break;
2133 }
2134 case AsmToken::Integer: {
2135 // Look for 'b' or 'f' following an Integer as a directional label
2136 SMLoc Loc = getTok().getLoc();
2137 int64_t IntVal = getTok().getIntVal();
2138 End = consumeToken();
2139 UpdateLocLex = false;
2140 if (getLexer().getKind() == AsmToken::Identifier) {
2141 StringRef IDVal = getTok().getString();
2142 if (IDVal == "f" || IDVal == "b") {
2143 MCSymbol *Sym =
2144 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2145 auto Variant = X86::S_None;
2146 const MCExpr *Val =
2147 MCSymbolRefExpr::create(Sym, Variant, getContext());
2148 if (IDVal == "b" && Sym->isUndefined())
2149 return Error(Loc, "invalid reference to undefined symbol");
2150 StringRef Identifier = Sym->getName();
2151 InlineAsmIdentifierInfo Info;
2152 AsmTypeInfo Type;
2153 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2154 isParsingMSInlineAsm(), ErrMsg))
2155 return Error(Loc, ErrMsg);
2156 End = consumeToken();
2157 } else {
2158 if (SM.onInteger(IntVal, ErrMsg))
2159 return Error(Loc, ErrMsg);
2160 }
2161 } else {
2162 if (SM.onInteger(IntVal, ErrMsg))
2163 return Error(Loc, ErrMsg);
2164 }
2165 break;
2166 }
2167 case AsmToken::Plus:
2168 if (SM.onPlus(ErrMsg))
2169 return Error(getTok().getLoc(), ErrMsg);
2170 break;
2171 case AsmToken::Minus:
2172 if (SM.onMinus(ErrMsg))
2173 return Error(getTok().getLoc(), ErrMsg);
2174 break;
2175 case AsmToken::Tilde: SM.onNot(); break;
2176 case AsmToken::Star: SM.onStar(); break;
2177 case AsmToken::Slash: SM.onDivide(); break;
2178 case AsmToken::Percent: SM.onMod(); break;
2179 case AsmToken::Pipe: SM.onOr(); break;
2180 case AsmToken::Caret: SM.onXor(); break;
2181 case AsmToken::Amp: SM.onAnd(); break;
2182 case AsmToken::LessLess:
2183 SM.onLShift(); break;
2185 SM.onRShift(); break;
2186 case AsmToken::LBrac:
2187 if (SM.onLBrac())
2188 return Error(Tok.getLoc(), "unexpected bracket encountered");
2189 tryParseOperandIdx(PrevTK, SM);
2190 break;
2191 case AsmToken::RBrac:
2192 if (SM.onRBrac(ErrMsg)) {
2193 return Error(Tok.getLoc(), ErrMsg);
2194 }
2195 break;
2196 case AsmToken::LParen: SM.onLParen(); break;
2197 case AsmToken::RParen:
2198 if (SM.onRParen(ErrMsg)) {
2199 return Error(Tok.getLoc(), ErrMsg);
2200 }
2201 break;
2202 }
2203 if (SM.hadError())
2204 return Error(Tok.getLoc(), "unknown token in expression");
2205
2206 if (!Done && UpdateLocLex)
2207 End = consumeToken();
2208
2209 PrevTK = TK;
2210 }
2211 return false;
2212}
2213
2214void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2215 SMLoc Start, SMLoc End) {
2216 SMLoc Loc = Start;
2217 unsigned ExprLen = End.getPointer() - Start.getPointer();
2218 // Skip everything before a symbol displacement (if we have one)
2219 if (SM.getSym() && !SM.isOffsetOperator()) {
2220 StringRef SymName = SM.getSymName();
2221 if (unsigned Len = SymName.data() - Start.getPointer())
2222 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2223 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2224 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2225 // If we have only a symbol than there's no need for complex rewrite,
2226 // simply skip everything after it
2227 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2228 if (ExprLen)
2229 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2230 return;
2231 }
2232 }
2233 // Build an Intel Expression rewrite
2234 StringRef BaseRegStr;
2235 StringRef IndexRegStr;
2236 StringRef OffsetNameStr;
2237 if (SM.getBaseReg())
2238 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2239 if (SM.getIndexReg())
2240 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2241 if (SM.isOffsetOperator())
2242 OffsetNameStr = SM.getSymName();
2243 // Emit it
2244 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2245 SM.getImm(), SM.isMemExpr());
2246 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2247}
2248
2249// Inline assembly may use variable names with namespace alias qualifiers.
2250bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2251 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2252 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2253 MCAsmParser &Parser = getParser();
2254 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2255 Val = nullptr;
2256
2257 StringRef LineBuf(Identifier.data());
2258 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2259
2260 const AsmToken &Tok = Parser.getTok();
2261 SMLoc Loc = Tok.getLoc();
2262
2263 // Advance the token stream until the end of the current token is
2264 // after the end of what the frontend claimed.
2265 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2266 do {
2267 End = Tok.getEndLoc();
2268 getLexer().Lex();
2269 } while (End.getPointer() < EndPtr);
2270 Identifier = LineBuf;
2271
2272 // The frontend should end parsing on an assembler token boundary, unless it
2273 // failed parsing.
2274 assert((End.getPointer() == EndPtr ||
2276 "frontend claimed part of a token?");
2277
2278 // If the identifier lookup was unsuccessful, assume that we are dealing with
2279 // a label.
2281 StringRef InternalName =
2282 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2283 Loc, false);
2284 assert(InternalName.size() && "We should have an internal name here.");
2285 // Push a rewrite for replacing the identifier name with the internal name,
2286 // unless we are parsing the operand of an offset operator
2287 if (!IsParsingOffsetOperator)
2288 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2289 InternalName);
2290 else
2291 Identifier = InternalName;
2292 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2293 return false;
2294 // Create the symbol reference.
2295 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2296 auto Variant = X86::S_None;
2297 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2298 return false;
2299}
2300
2301//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2302bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2303 MCAsmParser &Parser = getParser();
2304 const AsmToken &Tok = Parser.getTok();
2305 // Eat "{" and mark the current place.
2306 const SMLoc consumedToken = consumeToken();
2307 if (Tok.isNot(AsmToken::Identifier))
2308 return Error(Tok.getLoc(), "Expected an identifier after {");
2309 if (Tok.getIdentifier().starts_with("r")) {
2310 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2311 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2312 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2313 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2314 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2315 .Default(-1);
2316 if (-1 == rndMode)
2317 return Error(Tok.getLoc(), "Invalid rounding mode.");
2318 Parser.Lex(); // Eat "r*" of r*-sae
2319 if (!getLexer().is(AsmToken::Minus))
2320 return Error(Tok.getLoc(), "Expected - at this point");
2321 Parser.Lex(); // Eat "-"
2322 Parser.Lex(); // Eat the sae
2323 if (!getLexer().is(AsmToken::RCurly))
2324 return Error(Tok.getLoc(), "Expected } at this point");
2325 SMLoc End = Tok.getEndLoc();
2326 Parser.Lex(); // Eat "}"
2327 const MCExpr *RndModeOp =
2328 MCConstantExpr::create(rndMode, Parser.getContext());
2329 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2330 return false;
2331 }
2332 if (Tok.getIdentifier() == "sae") {
2333 Parser.Lex(); // Eat the sae
2334 if (!getLexer().is(AsmToken::RCurly))
2335 return Error(Tok.getLoc(), "Expected } at this point");
2336 Parser.Lex(); // Eat "}"
2337 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2338 return false;
2339 }
2340 return Error(Tok.getLoc(), "unknown token in expression");
2341}
2342
2343/// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2344/// mnemonic.
2345bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2346 MCAsmParser &Parser = getParser();
2347 AsmToken Tok = Parser.getTok();
2348 const SMLoc Start = Tok.getLoc();
2349 if (!Tok.is(AsmToken::LCurly))
2350 return Error(Tok.getLoc(), "Expected { at this point");
2351 Parser.Lex(); // Eat "{"
2352 Tok = Parser.getTok();
2353 if (Tok.getIdentifier().lower() != "dfv")
2354 return Error(Tok.getLoc(), "Expected dfv at this point");
2355 Parser.Lex(); // Eat "dfv"
2356 Tok = Parser.getTok();
2357 if (!Tok.is(AsmToken::Equal))
2358 return Error(Tok.getLoc(), "Expected = at this point");
2359 Parser.Lex(); // Eat "="
2360
2361 Tok = Parser.getTok();
2362 SMLoc End;
2363 if (Tok.is(AsmToken::RCurly)) {
2364 End = Tok.getEndLoc();
2366 MCConstantExpr::create(0, Parser.getContext()), Start, End));
2367 Parser.Lex(); // Eat "}"
2368 return false;
2369 }
2370 unsigned CFlags = 0;
2371 for (unsigned I = 0; I < 4; ++I) {
2372 Tok = Parser.getTok();
2373 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2374 .Case("of", 0x8)
2375 .Case("sf", 0x4)
2376 .Case("zf", 0x2)
2377 .Case("cf", 0x1)
2378 .Default(~0U);
2379 if (CFlag == ~0U)
2380 return Error(Tok.getLoc(), "Invalid conditional flags");
2381
2382 if (CFlags & CFlag)
2383 return Error(Tok.getLoc(), "Duplicated conditional flag");
2384 CFlags |= CFlag;
2385
2386 Parser.Lex(); // Eat one conditional flag
2387 Tok = Parser.getTok();
2388 if (Tok.is(AsmToken::RCurly)) {
2389 End = Tok.getEndLoc();
2391 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2392 Parser.Lex(); // Eat "}"
2393 return false;
2394 } else if (I == 3) {
2395 return Error(Tok.getLoc(), "Expected } at this point");
2396 } else if (Tok.isNot(AsmToken::Comma)) {
2397 return Error(Tok.getLoc(), "Expected } or , at this point");
2398 }
2399 Parser.Lex(); // Eat ","
2400 }
2401 llvm_unreachable("Unexpected control flow");
2402}
2403
2404/// Parse the '.' operator.
2405bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2406 SMLoc &End) {
2407 const AsmToken &Tok = getTok();
2408 AsmFieldInfo Info;
2409
2410 // Drop the optional '.'.
2411 StringRef DotDispStr = Tok.getString();
2412 DotDispStr.consume_front(".");
2413 bool TrailingDot = false;
2414
2415 // .Imm gets lexed as a real.
2416 if (Tok.is(AsmToken::Real)) {
2417 APInt DotDisp;
2418 if (DotDispStr.getAsInteger(10, DotDisp))
2419 return Error(Tok.getLoc(), "Unexpected offset");
2420 Info.Offset = DotDisp.getZExtValue();
2421 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2422 Tok.is(AsmToken::Identifier)) {
2423 TrailingDot = DotDispStr.consume_back(".");
2424 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2425 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2426 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2427 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2428 getParser().lookUpField(DotDispStr, Info) &&
2429 (!SemaCallback ||
2430 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2431 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2432 } else {
2433 return Error(Tok.getLoc(), "Unexpected token type!");
2434 }
2435
2436 // Eat the DotExpression and update End
2437 End = SMLoc::getFromPointer(DotDispStr.data());
2438 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2439 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2440 Lex();
2441 if (TrailingDot)
2442 getLexer().UnLex(AsmToken(AsmToken::Dot, "."));
2443 SM.addImm(Info.Offset);
2444 SM.setTypeInfo(Info.Type);
2445 return false;
2446}
2447
2448/// Parse the 'offset' operator.
2449/// This operator is used to specify the location of a given operand
2450bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2451 InlineAsmIdentifierInfo &Info,
2452 SMLoc &End) {
2453 // Eat offset, mark start of identifier.
2454 SMLoc Start = Lex().getLoc();
2455 ID = getTok().getString();
2456 if (!isParsingMSInlineAsm()) {
2457 if ((getTok().isNot(AsmToken::Identifier) &&
2458 getTok().isNot(AsmToken::String)) ||
2459 getParser().parsePrimaryExpr(Val, End, nullptr))
2460 return Error(Start, "unexpected token!");
2461 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2462 return Error(Start, "unable to lookup expression");
2463 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2464 return Error(Start, "offset operator cannot yet handle constants");
2465 }
2466 return false;
2467}
2468
2469// Query a candidate string for being an Intel assembly operator
2470// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2471unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2472 return StringSwitch<unsigned>(Name)
2473 .Cases("TYPE","type",IOK_TYPE)
2474 .Cases("SIZE","size",IOK_SIZE)
2475 .Cases("LENGTH","length",IOK_LENGTH)
2476 .Default(IOK_INVALID);
2477}
2478
2479/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2480/// returns the number of elements in an array. It returns the value 1 for
2481/// non-array variables. The SIZE operator returns the size of a C or C++
2482/// variable. A variable's size is the product of its LENGTH and TYPE. The
2483/// TYPE operator returns the size of a C or C++ type or variable. If the
2484/// variable is an array, TYPE returns the size of a single element.
2485unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2486 MCAsmParser &Parser = getParser();
2487 const AsmToken &Tok = Parser.getTok();
2488 Parser.Lex(); // Eat operator.
2489
2490 const MCExpr *Val = nullptr;
2491 InlineAsmIdentifierInfo Info;
2492 SMLoc Start = Tok.getLoc(), End;
2493 StringRef Identifier = Tok.getString();
2494 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2495 /*IsUnevaluatedOperand=*/true, End))
2496 return 0;
2497
2499 Error(Start, "unable to lookup expression");
2500 return 0;
2501 }
2502
2503 unsigned CVal = 0;
2504 switch(OpKind) {
2505 default: llvm_unreachable("Unexpected operand kind!");
2506 case IOK_LENGTH: CVal = Info.Var.Length; break;
2507 case IOK_SIZE: CVal = Info.Var.Size; break;
2508 case IOK_TYPE: CVal = Info.Var.Type; break;
2509 }
2510
2511 return CVal;
2512}
2513
2514// Query a candidate string for being an Intel assembly operator
2515// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2516unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2517 return StringSwitch<unsigned>(Name.lower())
2518 .Case("type", MOK_TYPE)
2519 .Cases("size", "sizeof", MOK_SIZEOF)
2520 .Cases("length", "lengthof", MOK_LENGTHOF)
2521 .Default(MOK_INVALID);
2522}
2523
2524/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2525/// returns the number of elements in an array. It returns the value 1 for
2526/// non-array variables. The SIZEOF operator returns the size of a type or
2527/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2528/// The TYPE operator returns the size of a variable. If the variable is an
2529/// array, TYPE returns the size of a single element.
2530bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2531 MCAsmParser &Parser = getParser();
2532 SMLoc OpLoc = Parser.getTok().getLoc();
2533 Parser.Lex(); // Eat operator.
2534
2535 Val = 0;
2536 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2537 // Check for SIZEOF(<type>) and TYPE(<type>).
2538 bool InParens = Parser.getTok().is(AsmToken::LParen);
2539 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2540 AsmTypeInfo Type;
2541 if (IDTok.is(AsmToken::Identifier) &&
2542 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2543 Val = Type.Size;
2544
2545 // Eat tokens.
2546 if (InParens)
2547 parseToken(AsmToken::LParen);
2548 parseToken(AsmToken::Identifier);
2549 if (InParens)
2550 parseToken(AsmToken::RParen);
2551 }
2552 }
2553
2554 if (!Val) {
2555 IntelExprStateMachine SM;
2556 SMLoc End, Start = Parser.getTok().getLoc();
2557 if (ParseIntelExpression(SM, End))
2558 return true;
2559
2560 switch (OpKind) {
2561 default:
2562 llvm_unreachable("Unexpected operand kind!");
2563 case MOK_SIZEOF:
2564 Val = SM.getSize();
2565 break;
2566 case MOK_LENGTHOF:
2567 Val = SM.getLength();
2568 break;
2569 case MOK_TYPE:
2570 Val = SM.getElementSize();
2571 break;
2572 }
2573
2574 if (!Val)
2575 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2576 }
2577
2578 return false;
2579}
2580
2581bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
2582 StringRef *SizeStr) {
2583 Size = StringSwitch<unsigned>(getTok().getString())
2584 .Cases("BYTE", "byte", 8)
2585 .Cases("WORD", "word", 16)
2586 .Cases("DWORD", "dword", 32)
2587 .Cases("FLOAT", "float", 32)
2588 .Cases("LONG", "long", 32)
2589 .Cases("FWORD", "fword", 48)
2590 .Cases("DOUBLE", "double", 64)
2591 .Cases("QWORD", "qword", 64)
2592 .Cases("MMWORD","mmword", 64)
2593 .Cases("XWORD", "xword", 80)
2594 .Cases("TBYTE", "tbyte", 80)
2595 .Cases("XMMWORD", "xmmword", 128)
2596 .Cases("YMMWORD", "ymmword", 256)
2597 .Cases("ZMMWORD", "zmmword", 512)
2598 .Default(0);
2599 if (Size) {
2600 if (SizeStr)
2601 *SizeStr = getTok().getString();
2602 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2603 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2604 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2605 Lex(); // Eat ptr.
2606 }
2607 return false;
2608}
2609
2611 if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
2612 return 8;
2613 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
2614 return 16;
2615 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
2616 return 32;
2617 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
2618 return 64;
2619 // Unknown register size
2620 return 0;
2621}
2622
2623bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2624 MCAsmParser &Parser = getParser();
2625 const AsmToken &Tok = Parser.getTok();
2626 SMLoc Start, End;
2627
2628 // Parse optional Size directive.
2629 unsigned Size;
2630 StringRef SizeStr;
2631 if (ParseIntelMemoryOperandSize(Size, &SizeStr))
2632 return true;
2633 bool PtrInOperand = bool(Size);
2634
2635 Start = Tok.getLoc();
2636
2637 // Rounding mode operand.
2638 if (getLexer().is(AsmToken::LCurly))
2639 return ParseRoundingModeOp(Start, Operands);
2640
2641 // Register operand.
2642 MCRegister RegNo;
2643 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2644 if (RegNo == X86::RIP)
2645 return Error(Start, "rip can only be used as a base register");
2646 // A Register followed by ':' is considered a segment override
2647 if (Tok.isNot(AsmToken::Colon)) {
2648 if (PtrInOperand) {
2649 if (!Parser.isParsingMasm())
2650 return Error(Start, "expected memory operand after 'ptr', "
2651 "found register operand instead");
2652
2653 // If we are parsing MASM, we are allowed to cast registers to their own
2654 // sizes, but not to other types.
2655 uint16_t RegSize =
2656 RegSizeInBits(*getContext().getRegisterInfo(), RegNo);
2657 if (RegSize == 0)
2658 return Error(
2659 Start,
2660 "cannot cast register '" +
2661 StringRef(getContext().getRegisterInfo()->getName(RegNo)) +
2662 "'; its size is not easily defined.");
2663 if (RegSize != Size)
2664 return Error(
2665 Start,
2666 std::to_string(RegSize) + "-bit register '" +
2667 StringRef(getContext().getRegisterInfo()->getName(RegNo)) +
2668 "' cannot be used as a " + std::to_string(Size) + "-bit " +
2669 SizeStr.upper());
2670 }
2671 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2672 return false;
2673 }
2674 // An alleged segment override. check if we have a valid segment register
2675 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2676 return Error(Start, "invalid segment register");
2677 // Eat ':' and update Start location
2678 Start = Lex().getLoc();
2679 }
2680
2681 // Immediates and Memory
2682 IntelExprStateMachine SM;
2683 if (ParseIntelExpression(SM, End))
2684 return true;
2685
2686 if (isParsingMSInlineAsm())
2687 RewriteIntelExpression(SM, Start, Tok.getLoc());
2688
2689 int64_t Imm = SM.getImm();
2690 const MCExpr *Disp = SM.getSym();
2691 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2692 if (Disp && Imm)
2693 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2694 if (!Disp)
2695 Disp = ImmDisp;
2696
2697 // RegNo != 0 specifies a valid segment register,
2698 // and we are parsing a segment override
2699 if (!SM.isMemExpr() && !RegNo) {
2700 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2701 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2703 // Disp includes the address of a variable; make sure this is recorded
2704 // for later handling.
2705 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2706 SM.getSymName(), Info.Var.Decl,
2707 Info.Var.IsGlobalLV));
2708 return false;
2709 }
2710 }
2711
2712 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2713 return false;
2714 }
2715
2716 StringRef ErrMsg;
2717 MCRegister BaseReg = SM.getBaseReg();
2718 MCRegister IndexReg = SM.getIndexReg();
2719 if (IndexReg && BaseReg == X86::RIP)
2720 BaseReg = MCRegister();
2721 unsigned Scale = SM.getScale();
2722 if (!PtrInOperand)
2723 Size = SM.getElementSize() << 3;
2724
2725 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2726 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2727 std::swap(BaseReg, IndexReg);
2728
2729 // If BaseReg is a vector register and IndexReg is not, swap them unless
2730 // Scale was specified in which case it would be an error.
2731 if (Scale == 0 &&
2732 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2733 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2734 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2735 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2736 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2737 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2738 std::swap(BaseReg, IndexReg);
2739
2740 if (Scale != 0 &&
2741 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2742 return Error(Start, "16-bit addresses cannot have a scale");
2743
2744 // If there was no explicit scale specified, change it to 1.
2745 if (Scale == 0)
2746 Scale = 1;
2747
2748 // If this is a 16-bit addressing mode with the base and index in the wrong
2749 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2750 // shared with att syntax where order matters.
2751 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2752 (IndexReg == X86::BX || IndexReg == X86::BP))
2753 std::swap(BaseReg, IndexReg);
2754
2755 if ((BaseReg || IndexReg) &&
2756 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2757 ErrMsg))
2758 return Error(Start, ErrMsg);
2759 bool IsUnconditionalBranch =
2760 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2761 if (isParsingMSInlineAsm())
2762 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2763 IsUnconditionalBranch && is64BitMode(),
2764 Start, End, Size, SM.getSymName(),
2765 SM.getIdentifierInfo(), Operands);
2766
2767 // When parsing x64 MS-style assembly, all non-absolute references to a named
2768 // variable default to RIP-relative.
2769 MCRegister DefaultBaseReg;
2770 bool MaybeDirectBranchDest = true;
2771
2772 if (Parser.isParsingMasm()) {
2773 if (is64BitMode() &&
2774 ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
2775 DefaultBaseReg = X86::RIP;
2776 }
2777 if (IsUnconditionalBranch) {
2778 if (PtrInOperand) {
2779 MaybeDirectBranchDest = false;
2780 if (is64BitMode())
2781 DefaultBaseReg = X86::RIP;
2782 } else if (!BaseReg && !IndexReg && Disp &&
2783 Disp->getKind() == MCExpr::SymbolRef) {
2784 if (is64BitMode()) {
2785 if (SM.getSize() == 8) {
2786 MaybeDirectBranchDest = false;
2787 DefaultBaseReg = X86::RIP;
2788 }
2789 } else {
2790 if (SM.getSize() == 4 || SM.getSize() == 2)
2791 MaybeDirectBranchDest = false;
2792 }
2793 }
2794 }
2795 } else if (IsUnconditionalBranch) {
2796 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2797 if (!PtrInOperand && SM.isOffsetOperator())
2798 return Error(
2799 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2800 if (PtrInOperand || SM.isBracketUsed())
2801 MaybeDirectBranchDest = false;
2802 }
2803
2804 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
2806 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2807 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2808 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2809 else
2811 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2812 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2813 MaybeDirectBranchDest));
2814 return false;
2815}
2816
2817bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2818 MCAsmParser &Parser = getParser();
2819 switch (getLexer().getKind()) {
2820 case AsmToken::Dollar: {
2821 // $42 or $ID -> immediate.
2822 SMLoc Start = Parser.getTok().getLoc(), End;
2823 Parser.Lex();
2824 const MCExpr *Val;
2825 // This is an immediate, so we should not parse a register. Do a precheck
2826 // for '%' to supercede intra-register parse errors.
2827 SMLoc L = Parser.getTok().getLoc();
2828 if (check(getLexer().is(AsmToken::Percent), L,
2829 "expected immediate expression") ||
2830 getParser().parseExpression(Val, End) ||
2831 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2832 return true;
2833 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2834 return false;
2835 }
2836 case AsmToken::LCurly: {
2837 SMLoc Start = Parser.getTok().getLoc();
2838 return ParseRoundingModeOp(Start, Operands);
2839 }
2840 default: {
2841 // This a memory operand or a register. We have some parsing complications
2842 // as a '(' may be part of an immediate expression or the addressing mode
2843 // block. This is complicated by the fact that an assembler-level variable
2844 // may refer either to a register or an immediate expression.
2845
2846 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2847 const MCExpr *Expr = nullptr;
2848 MCRegister Reg;
2849 if (getLexer().isNot(AsmToken::LParen)) {
2850 // No '(' so this is either a displacement expression or a register.
2851 if (Parser.parseExpression(Expr, EndLoc))
2852 return true;
2853 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2854 // Segment Register. Reset Expr and copy value to register.
2855 Expr = nullptr;
2856 Reg = RE->getReg();
2857
2858 // Check the register.
2859 if (Reg == X86::EIZ || Reg == X86::RIZ)
2860 return Error(
2861 Loc, "%eiz and %riz can only be used as index registers",
2862 SMRange(Loc, EndLoc));
2863 if (Reg == X86::RIP)
2864 return Error(Loc, "%rip can only be used as a base register",
2865 SMRange(Loc, EndLoc));
2866 // Return register that are not segment prefixes immediately.
2867 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2868 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2869 return false;
2870 }
2871 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2872 return Error(Loc, "invalid segment register");
2873 // Accept a '*' absolute memory reference after the segment. Place it
2874 // before the full memory operand.
2875 if (getLexer().is(AsmToken::Star))
2876 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2877 }
2878 }
2879 // This is a Memory operand.
2880 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2881 }
2882 }
2883}
2884
2885// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2886// otherwise the EFLAGS Condition Code enumerator.
2887X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2888 return StringSwitch<X86::CondCode>(CC)
2889 .Case("o", X86::COND_O) // Overflow
2890 .Case("no", X86::COND_NO) // No Overflow
2891 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2892 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2893 .Cases("e", "z", X86::COND_E) // Equal/Zero
2894 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2895 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2896 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2897 .Case("s", X86::COND_S) // Sign
2898 .Case("ns", X86::COND_NS) // No Sign
2899 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2900 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2901 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2902 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2903 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2904 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2905 .Default(X86::COND_INVALID);
2906}
2907
2908// true on failure, false otherwise
2909// If no {z} mark was found - Parser doesn't advance
2910bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, SMLoc StartLoc) {
2911 MCAsmParser &Parser = getParser();
2912 // Assuming we are just pass the '{' mark, quering the next token
2913 // Searched for {z}, but none was found. Return false, as no parsing error was
2914 // encountered
2915 if (!(getLexer().is(AsmToken::Identifier) &&
2916 (getLexer().getTok().getIdentifier() == "z")))
2917 return false;
2918 Parser.Lex(); // Eat z
2919 // Query and eat the '}' mark
2920 if (!getLexer().is(AsmToken::RCurly))
2921 return Error(getLexer().getLoc(), "Expected } at this point");
2922 Parser.Lex(); // Eat '}'
2923 // Assign Z with the {z} mark operand
2924 Z = X86Operand::CreateToken("{z}", StartLoc);
2925 return false;
2926}
2927
2928// true on failure, false otherwise
2929bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2930 MCAsmParser &Parser = getParser();
2931 if (getLexer().is(AsmToken::LCurly)) {
2932 // Eat "{" and mark the current place.
2933 const SMLoc consumedToken = consumeToken();
2934 // Distinguish {1to<NUM>} from {%k<NUM>}.
2935 if(getLexer().is(AsmToken::Integer)) {
2936 // Parse memory broadcasting ({1to<NUM>}).
2937 if (getLexer().getTok().getIntVal() != 1)
2938 return TokError("Expected 1to<NUM> at this point");
2939 StringRef Prefix = getLexer().getTok().getString();
2940 Parser.Lex(); // Eat first token of 1to8
2941 if (!getLexer().is(AsmToken::Identifier))
2942 return TokError("Expected 1to<NUM> at this point");
2943 // Recognize only reasonable suffixes.
2944 SmallVector<char, 5> BroadcastVector;
2945 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2946 .toStringRef(BroadcastVector);
2947 if (!BroadcastString.starts_with("1to"))
2948 return TokError("Expected 1to<NUM> at this point");
2949 const char *BroadcastPrimitive =
2950 StringSwitch<const char *>(BroadcastString)
2951 .Case("1to2", "{1to2}")
2952 .Case("1to4", "{1to4}")
2953 .Case("1to8", "{1to8}")
2954 .Case("1to16", "{1to16}")
2955 .Case("1to32", "{1to32}")
2956 .Default(nullptr);
2957 if (!BroadcastPrimitive)
2958 return TokError("Invalid memory broadcast primitive.");
2959 Parser.Lex(); // Eat trailing token of 1toN
2960 if (!getLexer().is(AsmToken::RCurly))
2961 return TokError("Expected } at this point");
2962 Parser.Lex(); // Eat "}"
2963 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2964 consumedToken));
2965 // No AVX512 specific primitives can pass
2966 // after memory broadcasting, so return.
2967 return false;
2968 } else {
2969 // Parse either {k}{z}, {z}{k}, {k} or {z}
2970 // last one have no meaning, but GCC accepts it
2971 // Currently, we're just pass a '{' mark
2972 std::unique_ptr<X86Operand> Z;
2973 if (ParseZ(Z, consumedToken))
2974 return true;
2975 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2976 // no errors.
2977 // Query for the need of further parsing for a {%k<NUM>} mark
2978 if (!Z || getLexer().is(AsmToken::LCurly)) {
2979 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2980 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2981 // expected
2982 MCRegister RegNo;
2983 SMLoc RegLoc;
2984 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2985 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2986 if (RegNo == X86::K0)
2987 return Error(RegLoc, "Register k0 can't be used as write mask");
2988 if (!getLexer().is(AsmToken::RCurly))
2989 return Error(getLexer().getLoc(), "Expected } at this point");
2990 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2991 Operands.push_back(
2992 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2993 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2994 } else
2995 return Error(getLexer().getLoc(),
2996 "Expected an op-mask register at this point");
2997 // {%k<NUM>} mark is found, inquire for {z}
2998 if (getLexer().is(AsmToken::LCurly) && !Z) {
2999 // Have we've found a parsing error, or found no (expected) {z} mark
3000 // - report an error
3001 if (ParseZ(Z, consumeToken()) || !Z)
3002 return Error(getLexer().getLoc(),
3003 "Expected a {z} mark at this point");
3004
3005 }
3006 // '{z}' on its own is meaningless, hence should be ignored.
3007 // on the contrary - have it been accompanied by a K register,
3008 // allow it.
3009 if (Z)
3010 Operands.push_back(std::move(Z));
3011 }
3012 }
3013 }
3014 return false;
3015}
3016
3017/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
3018/// has already been parsed if present. disp may be provided as well.
3019bool X86AsmParser::ParseMemOperand(MCRegister SegReg, const MCExpr *Disp,
3020 SMLoc StartLoc, SMLoc EndLoc,
3022 MCAsmParser &Parser = getParser();
3023 SMLoc Loc;
3024 // Based on the initial passed values, we may be in any of these cases, we are
3025 // in one of these cases (with current position (*)):
3026
3027 // 1. seg : * disp (base-index-scale-expr)
3028 // 2. seg : *(disp) (base-index-scale-expr)
3029 // 3. seg : *(base-index-scale-expr)
3030 // 4. disp *(base-index-scale-expr)
3031 // 5. *(disp) (base-index-scale-expr)
3032 // 6. *(base-index-scale-expr)
3033 // 7. disp *
3034 // 8. *(disp)
3035
3036 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
3037 // checking if the first object after the parenthesis is a register (or an
3038 // identifier referring to a register) and parse the displacement or default
3039 // to 0 as appropriate.
3040 auto isAtMemOperand = [this]() {
3041 if (this->getLexer().isNot(AsmToken::LParen))
3042 return false;
3043 AsmToken Buf[2];
3044 StringRef Id;
3045 auto TokCount = this->getLexer().peekTokens(Buf, true);
3046 if (TokCount == 0)
3047 return false;
3048 switch (Buf[0].getKind()) {
3049 case AsmToken::Percent:
3050 case AsmToken::Comma:
3051 return true;
3052 // These lower cases are doing a peekIdentifier.
3053 case AsmToken::At:
3054 case AsmToken::Dollar:
3055 if ((TokCount > 1) &&
3056 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
3057 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
3058 Id = StringRef(Buf[0].getLoc().getPointer(),
3059 Buf[1].getIdentifier().size() + 1);
3060 break;
3062 case AsmToken::String:
3063 Id = Buf[0].getIdentifier();
3064 break;
3065 default:
3066 return false;
3067 }
3068 // We have an ID. Check if it is bound to a register.
3069 if (!Id.empty()) {
3070 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3071 if (Sym->isVariable()) {
3072 auto V = Sym->getVariableValue();
3073 return isa<X86MCExpr>(V);
3074 }
3075 }
3076 return false;
3077 };
3078
3079 if (!Disp) {
3080 // Parse immediate if we're not at a mem operand yet.
3081 if (!isAtMemOperand()) {
3082 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3083 return true;
3084 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3085 } else {
3086 // Disp is implicitly zero if we haven't parsed it yet.
3087 Disp = MCConstantExpr::create(0, Parser.getContext());
3088 }
3089 }
3090
3091 // We are now either at the end of the operand or at the '(' at the start of a
3092 // base-index-scale-expr.
3093
3094 if (!parseOptionalToken(AsmToken::LParen)) {
3095 if (!SegReg)
3096 Operands.push_back(
3097 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3098 else
3099 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3100 0, 0, 1, StartLoc, EndLoc));
3101 return false;
3102 }
3103
3104 // If we reached here, then eat the '(' and Process
3105 // the rest of the memory operand.
3106 MCRegister BaseReg, IndexReg;
3107 unsigned Scale = 1;
3108 SMLoc BaseLoc = getLexer().getLoc();
3109 const MCExpr *E;
3110 StringRef ErrMsg;
3111
3112 // Parse BaseReg if one is provided.
3113 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3114 if (Parser.parseExpression(E, EndLoc) ||
3115 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3116 return true;
3117
3118 // Check the register.
3119 BaseReg = cast<X86MCExpr>(E)->getReg();
3120 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3121 return Error(BaseLoc, "eiz and riz can only be used as index registers",
3122 SMRange(BaseLoc, EndLoc));
3123 }
3124
3125 if (parseOptionalToken(AsmToken::Comma)) {
3126 // Following the comma we should have either an index register, or a scale
3127 // value. We don't support the later form, but we want to parse it
3128 // correctly.
3129 //
3130 // Even though it would be completely consistent to support syntax like
3131 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3132 if (getLexer().isNot(AsmToken::RParen)) {
3133 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3134 return true;
3135
3136 if (!isa<X86MCExpr>(E)) {
3137 // We've parsed an unexpected Scale Value instead of an index
3138 // register. Interpret it as an absolute.
3139 int64_t ScaleVal;
3140 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3141 return Error(Loc, "expected absolute expression");
3142 if (ScaleVal != 1)
3143 Warning(Loc, "scale factor without index register is ignored");
3144 Scale = 1;
3145 } else { // IndexReg Found.
3146 IndexReg = cast<X86MCExpr>(E)->getReg();
3147
3148 if (BaseReg == X86::RIP)
3149 return Error(Loc,
3150 "%rip as base register can not have an index register");
3151 if (IndexReg == X86::RIP)
3152 return Error(Loc, "%rip is not allowed as an index register");
3153
3154 if (parseOptionalToken(AsmToken::Comma)) {
3155 // Parse the scale amount:
3156 // ::= ',' [scale-expression]
3157
3158 // A scale amount without an index is ignored.
3159 if (getLexer().isNot(AsmToken::RParen)) {
3160 int64_t ScaleVal;
3161 if (Parser.parseTokenLoc(Loc) ||
3162 Parser.parseAbsoluteExpression(ScaleVal))
3163 return Error(Loc, "expected scale expression");
3164 Scale = (unsigned)ScaleVal;
3165 // Validate the scale amount.
3166 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3167 Scale != 1)
3168 return Error(Loc, "scale factor in 16-bit address must be 1");
3169 if (checkScale(Scale, ErrMsg))
3170 return Error(Loc, ErrMsg);
3171 }
3172 }
3173 }
3174 }
3175 }
3176
3177 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3178 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3179 return true;
3180
3181 // This is to support otherwise illegal operand (%dx) found in various
3182 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3183 // be supported. Mark such DX variants separately fix only in special cases.
3184 if (BaseReg == X86::DX && !IndexReg && Scale == 1 && !SegReg &&
3185 isa<MCConstantExpr>(Disp) &&
3186 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3187 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3188 return false;
3189 }
3190
3191 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3192 ErrMsg))
3193 return Error(BaseLoc, ErrMsg);
3194
3195 // If the displacement is a constant, check overflows. For 64-bit addressing,
3196 // gas requires isInt<32> and otherwise reports an error. For others, gas
3197 // reports a warning and allows a wider range. E.g. gas allows
3198 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3199 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3200 if (BaseReg || IndexReg) {
3201 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3202 auto Imm = CE->getValue();
3203 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3204 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3205 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3206 if (Is64) {
3207 if (!isInt<32>(Imm))
3208 return Error(BaseLoc, "displacement " + Twine(Imm) +
3209 " is not within [-2147483648, 2147483647]");
3210 } else if (!Is16) {
3211 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3212 Warning(BaseLoc, "displacement " + Twine(Imm) +
3213 " shortened to 32-bit signed " +
3214 Twine(static_cast<int32_t>(Imm)));
3215 }
3216 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3217 Warning(BaseLoc, "displacement " + Twine(Imm) +
3218 " shortened to 16-bit signed " +
3219 Twine(static_cast<int16_t>(Imm)));
3220 }
3221 }
3222 }
3223
3224 if (SegReg || BaseReg || IndexReg)
3225 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3226 BaseReg, IndexReg, Scale, StartLoc,
3227 EndLoc));
3228 else
3229 Operands.push_back(
3230 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3231 return false;
3232}
3233
3234// Parse either a standard primary expression or a register.
3235bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3236 MCAsmParser &Parser = getParser();
3237 // See if this is a register first.
3238 if (getTok().is(AsmToken::Percent) ||
3239 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3240 MatchRegisterName(Parser.getTok().getString()))) {
3241 SMLoc StartLoc = Parser.getTok().getLoc();
3242 MCRegister RegNo;
3243 if (parseRegister(RegNo, StartLoc, EndLoc))
3244 return true;
3245 Res = X86MCExpr::create(RegNo, Parser.getContext());
3246 return false;
3247 }
3248 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3249}
3250
3251bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
3252 SMLoc NameLoc, OperandVector &Operands) {
3253 MCAsmParser &Parser = getParser();
3254 InstInfo = &Info;
3255
3256 // Reset the forced VEX encoding.
3257 ForcedOpcodePrefix = OpcodePrefix_Default;
3258 ForcedDispEncoding = DispEncoding_Default;
3259 UseApxExtendedReg = false;
3260 ForcedNoFlag = false;
3261
3262 // Parse pseudo prefixes.
3263 while (true) {
3264 if (Name == "{") {
3265 if (getLexer().isNot(AsmToken::Identifier))
3266 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3267 std::string Prefix = Parser.getTok().getString().lower();
3268 Parser.Lex(); // Eat identifier.
3269 if (getLexer().isNot(AsmToken::RCurly))
3270 return Error(Parser.getTok().getLoc(), "Expected '}'");
3271 Parser.Lex(); // Eat curly.
3272
3273 if (Prefix == "rex")
3274 ForcedOpcodePrefix = OpcodePrefix_REX;
3275 else if (Prefix == "rex2")
3276 ForcedOpcodePrefix = OpcodePrefix_REX2;
3277 else if (Prefix == "vex")
3278 ForcedOpcodePrefix = OpcodePrefix_VEX;
3279 else if (Prefix == "vex2")
3280 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3281 else if (Prefix == "vex3")
3282 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3283 else if (Prefix == "evex")
3284 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3285 else if (Prefix == "disp8")
3286 ForcedDispEncoding = DispEncoding_Disp8;
3287 else if (Prefix == "disp32")
3288 ForcedDispEncoding = DispEncoding_Disp32;
3289 else if (Prefix == "nf")
3290 ForcedNoFlag = true;
3291 else
3292 return Error(NameLoc, "unknown prefix");
3293
3294 NameLoc = Parser.getTok().getLoc();
3295 if (getLexer().is(AsmToken::LCurly)) {
3296 Parser.Lex();
3297 Name = "{";
3298 } else {
3299 if (getLexer().isNot(AsmToken::Identifier))
3300 return Error(Parser.getTok().getLoc(), "Expected identifier");
3301 // FIXME: The mnemonic won't match correctly if its not in lower case.
3302 Name = Parser.getTok().getString();
3303 Parser.Lex();
3304 }
3305 continue;
3306 }
3307 // Parse MASM style pseudo prefixes.
3308 if (isParsingMSInlineAsm()) {
3309 if (Name.equals_insensitive("vex"))
3310 ForcedOpcodePrefix = OpcodePrefix_VEX;
3311 else if (Name.equals_insensitive("vex2"))
3312 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3313 else if (Name.equals_insensitive("vex3"))
3314 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3315 else if (Name.equals_insensitive("evex"))
3316 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3317
3318 if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3319 if (getLexer().isNot(AsmToken::Identifier))
3320 return Error(Parser.getTok().getLoc(), "Expected identifier");
3321 // FIXME: The mnemonic won't match correctly if its not in lower case.
3322 Name = Parser.getTok().getString();
3323 NameLoc = Parser.getTok().getLoc();
3324 Parser.Lex();
3325 }
3326 }
3327 break;
3328 }
3329
3330 // Support the suffix syntax for overriding displacement size as well.
3331 if (Name.consume_back(".d32")) {
3332 ForcedDispEncoding = DispEncoding_Disp32;
3333 } else if (Name.consume_back(".d8")) {
3334 ForcedDispEncoding = DispEncoding_Disp8;
3335 }
3336
3337 StringRef PatchedName = Name;
3338
3339 // Hack to skip "short" following Jcc.
3340 if (isParsingIntelSyntax() &&
3341 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3342 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3343 (PatchedName.starts_with("j") &&
3344 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3345 StringRef NextTok = Parser.getTok().getString();
3346 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3347 : NextTok == "short") {
3348 SMLoc NameEndLoc =
3349 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3350 // Eat the short keyword.
3351 Parser.Lex();
3352 // MS and GAS ignore the short keyword; they both determine the jmp type
3353 // based on the distance of the label. (NASM does emit different code with
3354 // and without "short," though.)
3355 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3356 NextTok.size() + 1);
3357 }
3358 }
3359
3360 // FIXME: Hack to recognize setneb as setne.
3361 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3362 PatchedName != "setzub" && PatchedName != "setzunb" &&
3363 PatchedName != "setb" && PatchedName != "setnb")
3364 PatchedName = PatchedName.substr(0, Name.size()-1);
3365
3366 unsigned ComparisonPredicate = ~0U;
3367
3368 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3369 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3370 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3371 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3372 PatchedName.ends_with("bf16") || PatchedName.ends_with("ps") ||
3373 PatchedName.ends_with("pd"))) {
3374 bool IsVCMP = PatchedName[0] == 'v';
3375 unsigned CCIdx = IsVCMP ? 4 : 3;
3376 unsigned suffixLength = PatchedName.ends_with("bf16") ? 5 : 2;
3377 unsigned CC = StringSwitch<unsigned>(
3378 PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
3379 .Case("eq", 0x00)
3380 .Case("eq_oq", 0x00)
3381 .Case("lt", 0x01)
3382 .Case("lt_os", 0x01)
3383 .Case("le", 0x02)
3384 .Case("le_os", 0x02)
3385 .Case("unord", 0x03)
3386 .Case("unord_q", 0x03)
3387 .Case("neq", 0x04)
3388 .Case("neq_uq", 0x04)
3389 .Case("nlt", 0x05)
3390 .Case("nlt_us", 0x05)
3391 .Case("nle", 0x06)
3392 .Case("nle_us", 0x06)
3393 .Case("ord", 0x07)
3394 .Case("ord_q", 0x07)
3395 /* AVX only from here */
3396 .Case("eq_uq", 0x08)
3397 .Case("nge", 0x09)
3398 .Case("nge_us", 0x09)
3399 .Case("ngt", 0x0A)
3400 .Case("ngt_us", 0x0A)
3401 .Case("false", 0x0B)
3402 .Case("false_oq", 0x0B)
3403 .Case("neq_oq", 0x0C)
3404 .Case("ge", 0x0D)
3405 .Case("ge_os", 0x0D)
3406 .Case("gt", 0x0E)
3407 .Case("gt_os", 0x0E)
3408 .Case("true", 0x0F)
3409 .Case("true_uq", 0x0F)
3410 .Case("eq_os", 0x10)
3411 .Case("lt_oq", 0x11)
3412 .Case("le_oq", 0x12)
3413 .Case("unord_s", 0x13)
3414 .Case("neq_us", 0x14)
3415 .Case("nlt_uq", 0x15)
3416 .Case("nle_uq", 0x16)
3417 .Case("ord_s", 0x17)
3418 .Case("eq_us", 0x18)
3419 .Case("nge_uq", 0x19)
3420 .Case("ngt_uq", 0x1A)
3421 .Case("false_os", 0x1B)
3422 .Case("neq_os", 0x1C)
3423 .Case("ge_oq", 0x1D)
3424 .Case("gt_oq", 0x1E)
3425 .Case("true_us", 0x1F)
3426 .Default(~0U);
3427 if (CC != ~0U && (IsVCMP || CC < 8) &&
3428 (IsVCMP || PatchedName.back() != 'h')) {
3429 if (PatchedName.ends_with("ss"))
3430 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3431 else if (PatchedName.ends_with("sd"))
3432 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3433 else if (PatchedName.ends_with("ps"))
3434 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3435 else if (PatchedName.ends_with("pd"))
3436 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3437 else if (PatchedName.ends_with("sh"))
3438 PatchedName = "vcmpsh";
3439 else if (PatchedName.ends_with("ph"))
3440 PatchedName = "vcmpph";
3441 else if (PatchedName.ends_with("bf16"))
3442 PatchedName = "vcmpbf16";
3443 else
3444 llvm_unreachable("Unexpected suffix!");
3445
3446 ComparisonPredicate = CC;
3447 }
3448 }
3449
3450 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3451 if (PatchedName.starts_with("vpcmp") &&
3452 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3453 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3454 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3455 unsigned CC = StringSwitch<unsigned>(
3456 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3457 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3458 .Case("lt", 0x1)
3459 .Case("le", 0x2)
3460 //.Case("false", 0x3) // Not a documented alias.
3461 .Case("neq", 0x4)
3462 .Case("nlt", 0x5)
3463 .Case("nle", 0x6)
3464 //.Case("true", 0x7) // Not a documented alias.
3465 .Default(~0U);
3466 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3467 switch (PatchedName.back()) {
3468 default: llvm_unreachable("Unexpected character!");
3469 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3470 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3471 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3472 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3473 }
3474 // Set up the immediate to push into the operands later.
3475 ComparisonPredicate = CC;
3476 }
3477 }
3478
3479 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3480 if (PatchedName.starts_with("vpcom") &&
3481 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3482 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3483 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3484 unsigned CC = StringSwitch<unsigned>(
3485 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3486 .Case("lt", 0x0)
3487 .Case("le", 0x1)
3488 .Case("gt", 0x2)
3489 .Case("ge", 0x3)
3490 .Case("eq", 0x4)
3491 .Case("neq", 0x5)
3492 .Case("false", 0x6)
3493 .Case("true", 0x7)
3494 .Default(~0U);
3495 if (CC != ~0U) {
3496 switch (PatchedName.back()) {
3497 default: llvm_unreachable("Unexpected character!");
3498 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3499 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3500 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3501 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3502 }
3503 // Set up the immediate to push into the operands later.
3504 ComparisonPredicate = CC;
3505 }
3506 }
3507
3508 // Determine whether this is an instruction prefix.
3509 // FIXME:
3510 // Enhance prefixes integrity robustness. for example, following forms
3511 // are currently tolerated:
3512 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3513 // lock addq %rax, %rbx ; Destination operand must be of memory type
3514 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3515 bool IsPrefix =
3516 StringSwitch<bool>(Name)
3517 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3518 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3519 .Cases("xacquire", "xrelease", true)
3520 .Cases("acquire", "release", isParsingIntelSyntax())
3521 .Default(false);
3522
3523 auto isLockRepeatNtPrefix = [](StringRef N) {
3524 return StringSwitch<bool>(N)
3525 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3526 .Default(false);
3527 };
3528
3529 bool CurlyAsEndOfStatement = false;
3530
3531 unsigned Flags = X86::IP_NO_PREFIX;
3532 while (isLockRepeatNtPrefix(Name.lower())) {
3533 unsigned Prefix =
3534 StringSwitch<unsigned>(Name)
3535 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3536 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3537 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3538 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3539 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3540 Flags |= Prefix;
3541 if (getLexer().is(AsmToken::EndOfStatement)) {
3542 // We don't have real instr with the given prefix
3543 // let's use the prefix as the instr.
3544 // TODO: there could be several prefixes one after another
3546 break;
3547 }
3548 // FIXME: The mnemonic won't match correctly if its not in lower case.
3549 Name = Parser.getTok().getString();
3550 Parser.Lex(); // eat the prefix
3551 // Hack: we could have something like "rep # some comment" or
3552 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3553 while (Name.starts_with(";") || Name.starts_with("\n") ||
3554 Name.starts_with("#") || Name.starts_with("\t") ||
3555 Name.starts_with("/")) {
3556 // FIXME: The mnemonic won't match correctly if its not in lower case.
3557 Name = Parser.getTok().getString();
3558 Parser.Lex(); // go to next prefix or instr
3559 }
3560 }
3561
3562 if (Flags)
3563 PatchedName = Name;
3564
3565 // Hacks to handle 'data16' and 'data32'
3566 if (PatchedName == "data16" && is16BitMode()) {
3567 return Error(NameLoc, "redundant data16 prefix");
3568 }
3569 if (PatchedName == "data32") {
3570 if (is32BitMode())
3571 return Error(NameLoc, "redundant data32 prefix");
3572 if (is64BitMode())
3573 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3574 // Hack to 'data16' for the table lookup.
3575 PatchedName = "data16";
3576
3577 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3578 StringRef Next = Parser.getTok().getString();
3579 getLexer().Lex();
3580 // data32 effectively changes the instruction suffix.
3581 // TODO Generalize.
3582 if (Next == "callw")
3583 Next = "calll";
3584 if (Next == "ljmpw")
3585 Next = "ljmpl";
3586
3587 Name = Next;
3588 PatchedName = Name;
3589 ForcedDataPrefix = X86::Is32Bit;
3590 IsPrefix = false;
3591 }
3592 }
3593
3594 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3595
3596 // Push the immediate if we extracted one from the mnemonic.
3597 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3598 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3599 getParser().getContext());
3600 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3601 }
3602
3603 // Parse condtional flags after mnemonic.
3604 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3605 parseCFlagsOp(Operands))
3606 return true;
3607
3608 // This does the actual operand parsing. Don't parse any more if we have a
3609 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3610 // just want to parse the "lock" as the first instruction and the "incl" as
3611 // the next one.
3612 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3613 // Parse '*' modifier.
3614 if (getLexer().is(AsmToken::Star))
3615 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3616
3617 // Read the operands.
3618 while (true) {
3619 if (parseOperand(Operands, Name))
3620 return true;
3621 if (HandleAVX512Operand(Operands))
3622 return true;
3623
3624 // check for comma and eat it
3625 if (getLexer().is(AsmToken::Comma))
3626 Parser.Lex();
3627 else
3628 break;
3629 }
3630
3631 // In MS inline asm curly braces mark the beginning/end of a block,
3632 // therefore they should be interepreted as end of statement
3633 CurlyAsEndOfStatement =
3634 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3635 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3636 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3637 return TokError("unexpected token in argument list");
3638 }
3639
3640 // Push the immediate if we extracted one from the mnemonic.
3641 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3642 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3643 getParser().getContext());
3644 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3645 }
3646
3647 // Consume the EndOfStatement or the prefix separator Slash
3648 if (getLexer().is(AsmToken::EndOfStatement) ||
3649 (IsPrefix && getLexer().is(AsmToken::Slash)))
3650 Parser.Lex();
3651 else if (CurlyAsEndOfStatement)
3652 // Add an actual EndOfStatement before the curly brace
3653 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3654 getLexer().getTok().getLoc(), 0);
3655
3656 // This is for gas compatibility and cannot be done in td.
3657 // Adding "p" for some floating point with no argument.
3658 // For example: fsub --> fsubp
3659 bool IsFp =
3660 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3661 if (IsFp && Operands.size() == 1) {
3662 const char *Repl = StringSwitch<const char *>(Name)
3663 .Case("fsub", "fsubp")
3664 .Case("fdiv", "fdivp")
3665 .Case("fsubr", "fsubrp")
3666 .Case("fdivr", "fdivrp");
3667 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3668 }
3669
3670 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3671 (Operands.size() == 3)) {
3672 X86Operand &Op1 = (X86Operand &)*Operands[1];
3673 X86Operand &Op2 = (X86Operand &)*Operands[2];
3674 SMLoc Loc = Op1.getEndLoc();
3675 // Moving a 32 or 16 bit value into a segment register has the same
3676 // behavior. Modify such instructions to always take shorter form.
3677 if (Op1.isReg() && Op2.isReg() &&
3678 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3679 Op2.getReg()) &&
3680 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3681 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3682 // Change instruction name to match new instruction.
3683 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3684 Name = is16BitMode() ? "movw" : "movl";
3685 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3686 }
3687 // Select the correct equivalent 16-/32-bit source register.
3688 MCRegister Reg =
3689 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3690 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3691 }
3692 }
3693
3694 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3695 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3696 // documented form in various unofficial manuals, so a lot of code uses it.
3697 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3698 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3699 Operands.size() == 3) {
3700 X86Operand &Op = (X86Operand &)*Operands.back();
3701 if (Op.isDXReg())
3702 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3703 Op.getEndLoc());
3704 }
3705 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3706 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3707 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3708 Operands.size() == 3) {
3709 X86Operand &Op = (X86Operand &)*Operands[1];
3710 if (Op.isDXReg())
3711 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3712 Op.getEndLoc());
3713 }
3714
3716 bool HadVerifyError = false;
3717
3718 // Append default arguments to "ins[bwld]"
3719 if (Name.starts_with("ins") &&
3720 (Operands.size() == 1 || Operands.size() == 3) &&
3721 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3722 Name == "ins")) {
3723
3724 AddDefaultSrcDestOperands(TmpOperands,
3725 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3726 DefaultMemDIOperand(NameLoc));
3727 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3728 }
3729
3730 // Append default arguments to "outs[bwld]"
3731 if (Name.starts_with("outs") &&
3732 (Operands.size() == 1 || Operands.size() == 3) &&
3733 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3734 Name == "outsd" || Name == "outs")) {
3735 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3736 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3737 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3738 }
3739
3740 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3741 // values of $SIREG according to the mode. It would be nice if this
3742 // could be achieved with InstAlias in the tables.
3743 if (Name.starts_with("lods") &&
3744 (Operands.size() == 1 || Operands.size() == 2) &&
3745 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3746 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3747 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3748 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3749 }
3750
3751 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3752 // values of $DIREG according to the mode. It would be nice if this
3753 // could be achieved with InstAlias in the tables.
3754 if (Name.starts_with("stos") &&
3755 (Operands.size() == 1 || Operands.size() == 2) &&
3756 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3757 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3758 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3759 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3760 }
3761
3762 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3763 // values of $DIREG according to the mode. It would be nice if this
3764 // could be achieved with InstAlias in the tables.
3765 if (Name.starts_with("scas") &&
3766 (Operands.size() == 1 || Operands.size() == 2) &&
3767 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3768 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3769 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3770 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3771 }
3772
3773 // Add default SI and DI operands to "cmps[bwlq]".
3774 if (Name.starts_with("cmps") &&
3775 (Operands.size() == 1 || Operands.size() == 3) &&
3776 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3777 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3778 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3779 DefaultMemSIOperand(NameLoc));
3780 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3781 }
3782
3783 // Add default SI and DI operands to "movs[bwlq]".
3784 if (((Name.starts_with("movs") &&
3785 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3786 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3787 (Name.starts_with("smov") &&
3788 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3789 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3790 (Operands.size() == 1 || Operands.size() == 3)) {
3791 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3792 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3793 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3794 DefaultMemDIOperand(NameLoc));
3795 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3796 }
3797
3798 // Check if we encountered an error for one the string insturctions
3799 if (HadVerifyError) {
3800 return HadVerifyError;
3801 }
3802
3803 // Transforms "xlat mem8" into "xlatb"
3804 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3805 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3806 if (Op1.isMem8()) {
3807 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3808 "size, (R|E)BX will be used for the location");
3809 Operands.pop_back();
3810 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3811 }
3812 }
3813
3814 if (Flags)
3815 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3816 return false;
3817}
3818
3819static bool convertSSEToAVX(MCInst &Inst) {
3820 ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3821 unsigned Opcode = Inst.getOpcode();
3822 const auto I = llvm::lower_bound(Table, Opcode);
3823 if (I == Table.end() || I->OldOpc != Opcode)
3824 return false;
3825
3826 Inst.setOpcode(I->NewOpc);
3827 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3828 // operand compare to SSE variant, which is added below
3829 if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3830 X86::isPBLENDVB(Opcode))
3831 Inst.addOperand(Inst.getOperand(2));
3832
3833 return true;
3834}
3835
3836bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3837 if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3838 return true;
3839
3840 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3841 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3842 return true;
3843
3845 return true;
3846
3847 auto replaceWithCCMPCTEST = [&](unsigned Opcode) -> bool {
3848 if (ForcedOpcodePrefix == OpcodePrefix_EVEX) {
3849 Inst.setFlags(~(X86::IP_USE_EVEX)&Inst.getFlags());
3850 Inst.setOpcode(Opcode);
3853 return true;
3854 }
3855 return false;
3856 };
3857
3858 switch (Inst.getOpcode()) {
3859 default: return false;
3860 case X86::JMP_1:
3861 // {disp32} forces a larger displacement as if the instruction was relaxed.
3862 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3863 // This matches GNU assembler.
3864 if (ForcedDispEncoding == DispEncoding_Disp32) {
3865 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3866 return true;
3867 }
3868
3869 return false;
3870 case X86::JCC_1:
3871 // {disp32} forces a larger displacement as if the instruction was relaxed.
3872 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3873 // This matches GNU assembler.
3874 if (ForcedDispEncoding == DispEncoding_Disp32) {
3875 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3876 return true;
3877 }
3878
3879 return false;
3880 case X86::INT: {
3881 // Transforms "int $3" into "int3" as a size optimization.
3882 // We can't write this as an InstAlias.
3883 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3884 return false;
3885 Inst.clear();
3886 Inst.setOpcode(X86::INT3);
3887 return true;
3888 }
3889 // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3890 // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3891#define FROM_TO(FROM, TO) \
3892 case X86::FROM: \
3893 return replaceWithCCMPCTEST(X86::TO);
3894 FROM_TO(CMP64rr, CCMP64rr)
3895 FROM_TO(CMP64mi32, CCMP64mi32)
3896 FROM_TO(CMP64mi8, CCMP64mi8)
3897 FROM_TO(CMP64mr, CCMP64mr)
3898 FROM_TO(CMP64ri32, CCMP64ri32)
3899 FROM_TO(CMP64ri8, CCMP64ri8)
3900 FROM_TO(CMP64rm, CCMP64rm)
3901
3902 FROM_TO(CMP32rr, CCMP32rr)
3903 FROM_TO(CMP32mi, CCMP32mi)
3904 FROM_TO(CMP32mi8, CCMP32mi8)
3905 FROM_TO(CMP32mr, CCMP32mr)
3906 FROM_TO(CMP32ri, CCMP32ri)
3907 FROM_TO(CMP32ri8, CCMP32ri8)
3908 FROM_TO(CMP32rm, CCMP32rm)
3909
3910 FROM_TO(CMP16rr, CCMP16rr)
3911 FROM_TO(CMP16mi, CCMP16mi)
3912 FROM_TO(CMP16mi8, CCMP16mi8)
3913 FROM_TO(CMP16mr, CCMP16mr)
3914 FROM_TO(CMP16ri, CCMP16ri)
3915 FROM_TO(CMP16ri8, CCMP16ri8)
3916 FROM_TO(CMP16rm, CCMP16rm)
3917
3918 FROM_TO(CMP8rr, CCMP8rr)
3919 FROM_TO(CMP8mi, CCMP8mi)
3920 FROM_TO(CMP8mr, CCMP8mr)
3921 FROM_TO(CMP8ri, CCMP8ri)
3922 FROM_TO(CMP8rm, CCMP8rm)
3923
3924 FROM_TO(TEST64rr, CTEST64rr)
3925 FROM_TO(TEST64mi32, CTEST64mi32)
3926 FROM_TO(TEST64mr, CTEST64mr)
3927 FROM_TO(TEST64ri32, CTEST64ri32)
3928
3929 FROM_TO(TEST32rr, CTEST32rr)
3930 FROM_TO(TEST32mi, CTEST32mi)
3931 FROM_TO(TEST32mr, CTEST32mr)
3932 FROM_TO(TEST32ri, CTEST32ri)
3933
3934 FROM_TO(TEST16rr, CTEST16rr)
3935 FROM_TO(TEST16mi, CTEST16mi)
3936 FROM_TO(TEST16mr, CTEST16mr)
3937 FROM_TO(TEST16ri, CTEST16ri)
3938
3939 FROM_TO(TEST8rr, CTEST8rr)
3940 FROM_TO(TEST8mi, CTEST8mi)
3941 FROM_TO(TEST8mr, CTEST8mr)
3942 FROM_TO(TEST8ri, CTEST8ri)
3943#undef FROM_TO
3944 }
3945}
3946
3947bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3948 using namespace X86;
3949 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3950 unsigned Opcode = Inst.getOpcode();
3951 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3952 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3953 isVFMADDCSH(Opcode)) {
3954 MCRegister Dest = Inst.getOperand(0).getReg();
3955 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3956 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3957 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3958 "distinct from source registers");
3959 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3960 isVFMULCSH(Opcode)) {
3961 MCRegister Dest = Inst.getOperand(0).getReg();
3962 // The mask variants have different operand list. Scan from the third
3963 // operand to avoid emitting incorrect warning.
3964 // VFMULCPHZrr Dest, Src1, Src2
3965 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3966 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3967 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3968 i < Inst.getNumOperands(); i++)
3969 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3970 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3971 "distinct from source registers");
3972 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3973 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3974 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3975 MCRegister Src2 =
3977 .getReg();
3978 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3979 if (Src2Enc % 4 != 0) {
3981 unsigned GroupStart = (Src2Enc / 4) * 4;
3982 unsigned GroupEnd = GroupStart + 3;
3983 return Warning(Ops[0]->getStartLoc(),
3984 "source register '" + RegName + "' implicitly denotes '" +
3985 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3986 RegName.take_front(3) + Twine(GroupEnd) +
3987 "' source group");
3988 }
3989 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3990 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3991 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3992 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3993 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3994 if (HasEVEX) {
3995 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3996 unsigned Index = MRI->getEncodingValue(
3997 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3998 if (Dest == Index)
3999 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
4000 "should be distinct");
4001 } else {
4002 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
4003 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
4004 unsigned Index = MRI->getEncodingValue(
4005 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
4006 if (Dest == Mask || Dest == Index || Mask == Index)
4007 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
4008 "registers should be distinct");
4009 }
4010 } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) ||
4011 isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) ||
4012 isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) {
4013 MCRegister SrcDest = Inst.getOperand(0).getReg();
4014 MCRegister Src1 = Inst.getOperand(2).getReg();
4015 MCRegister Src2 = Inst.getOperand(3).getReg();
4016 if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2)
4017 return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct");
4018 }
4019
4020 // High 8-bit regs (AH/BH/CH/DH) are incompatible with encodings that imply
4021 // extended prefixes:
4022 // * Legacy path that would emit a REX (e.g. uses r8..r15 or sil/dil/bpl/spl)
4023 // * EVEX
4024 // * REX2
4025 // VEX/XOP don't use REX; they are excluded from the legacy check.
4026 const unsigned Enc = TSFlags & X86II::EncodingMask;
4027 if (Enc != X86II::VEX && Enc != X86II::XOP) {
4028 MCRegister HReg;
4029 bool UsesRex = TSFlags & X86II::REX_W;
4030 unsigned NumOps = Inst.getNumOperands();
4031 for (unsigned i = 0; i != NumOps; ++i) {
4032 const MCOperand &MO = Inst.getOperand(i);
4033 if (!MO.isReg())
4034 continue;
4035 MCRegister Reg = MO.getReg();
4036 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
4037 HReg = Reg;
4040 UsesRex = true;
4041 }
4042
4043 if (HReg &&
4044 (Enc == X86II::EVEX || ForcedOpcodePrefix == OpcodePrefix_REX2 ||
4045 ForcedOpcodePrefix == OpcodePrefix_REX || UsesRex)) {
4047 return Error(Ops[0]->getStartLoc(),
4048 "can't encode '" + RegName.str() +
4049 "' in an instruction requiring EVEX/REX2/REX prefix");
4050 }
4051 }
4052
4053 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
4054 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
4055 if (!MO.isReg() || MO.getReg() != X86::RIP)
4056 return Warning(
4057 Ops[0]->getStartLoc(),
4058 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
4059 : "'prefetchit1'")) +
4060 " only supports RIP-relative address");
4061 }
4062 return false;
4063}
4064
4065void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
4066 Warning(Loc, "Instruction may be vulnerable to LVI and "
4067 "requires manual mitigation");
4068 Note(SMLoc(), "See https://software.intel.com/"
4069 "security-software-guidance/insights/"
4070 "deep-dive-load-value-injection#specialinstructions"
4071 " for more information");
4072}
4073
4074/// RET instructions and also instructions that indirect calls/jumps from memory
4075/// combine a load and a branch within a single instruction. To mitigate these
4076/// instructions against LVI, they must be decomposed into separate load and
4077/// branch instructions, with an LFENCE in between. For more details, see:
4078/// - X86LoadValueInjectionRetHardening.cpp
4079/// - X86LoadValueInjectionIndirectThunks.cpp
4080/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4081///
4082/// Returns `true` if a mitigation was applied or warning was emitted.
4083void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
4084 // Information on control-flow instructions that require manual mitigation can
4085 // be found here:
4086 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4087 switch (Inst.getOpcode()) {
4088 case X86::RET16:
4089 case X86::RET32:
4090 case X86::RET64:
4091 case X86::RETI16:
4092 case X86::RETI32:
4093 case X86::RETI64: {
4094 MCInst ShlInst, FenceInst;
4095 bool Parse32 = is32BitMode() || Code16GCC;
4096 MCRegister Basereg =
4097 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
4098 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
4099 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
4100 /*BaseReg=*/Basereg, /*IndexReg=*/0,
4101 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
4102 ShlInst.setOpcode(X86::SHL64mi);
4103 ShlMemOp->addMemOperands(ShlInst, 5);
4104 ShlInst.addOperand(MCOperand::createImm(0));
4105 FenceInst.setOpcode(X86::LFENCE);
4106 Out.emitInstruction(ShlInst, getSTI());
4107 Out.emitInstruction(FenceInst, getSTI());
4108 return;
4109 }
4110 case X86::JMP16m:
4111 case X86::JMP32m:
4112 case X86::JMP64m:
4113 case X86::CALL16m:
4114 case X86::CALL32m:
4115 case X86::CALL64m:
4116 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4117 return;
4118 }
4119}
4120
4121/// To mitigate LVI, every instruction that performs a load can be followed by
4122/// an LFENCE instruction to squash any potential mis-speculation. There are
4123/// some instructions that require additional considerations, and may requre
4124/// manual mitigation. For more details, see:
4125/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4126///
4127/// Returns `true` if a mitigation was applied or warning was emitted.
4128void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4129 MCStreamer &Out) {
4130 auto Opcode = Inst.getOpcode();
4131 auto Flags = Inst.getFlags();
4132 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4133 // Information on REP string instructions that require manual mitigation can
4134 // be found here:
4135 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4136 switch (Opcode) {
4137 case X86::CMPSB:
4138 case X86::CMPSW:
4139 case X86::CMPSL:
4140 case X86::CMPSQ:
4141 case X86::SCASB:
4142 case X86::SCASW:
4143 case X86::SCASL:
4144 case X86::SCASQ:
4145 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4146 return;
4147 }
4148 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4149 // If a REP instruction is found on its own line, it may or may not be
4150 // followed by a vulnerable instruction. Emit a warning just in case.
4151 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4152 return;
4153 }
4154
4155 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4156
4157 // Can't mitigate after terminators or calls. A control flow change may have
4158 // already occurred.
4159 if (MCID.isTerminator() || MCID.isCall())
4160 return;
4161
4162 // LFENCE has the mayLoad property, don't double fence.
4163 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4164 MCInst FenceInst;
4165 FenceInst.setOpcode(X86::LFENCE);
4166 Out.emitInstruction(FenceInst, getSTI());
4167 }
4168}
4169
4170void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4171 MCStreamer &Out) {
4173 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4174 applyLVICFIMitigation(Inst, Out);
4175
4176 Out.emitInstruction(Inst, getSTI());
4177
4179 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4180 applyLVILoadHardeningMitigation(Inst, Out);
4181}
4182
4184 unsigned Result = 0;
4185 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4186 if (Prefix.isPrefix()) {
4187 Result = Prefix.getPrefix();
4188 Operands.pop_back();
4189 }
4190 return Result;
4191}
4192
4193bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4195 MCStreamer &Out, uint64_t &ErrorInfo,
4196 bool MatchingInlineAsm) {
4197 assert(!Operands.empty() && "Unexpect empty operand list!");
4198 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4199
4200 // First, handle aliases that expand to multiple instructions.
4201 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4202 Out, MatchingInlineAsm);
4203 unsigned Prefixes = getPrefixes(Operands);
4204
4205 MCInst Inst;
4206
4207 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4208 // the encoder and printer.
4209 if (ForcedOpcodePrefix == OpcodePrefix_REX)
4210 Prefixes |= X86::IP_USE_REX;
4211 else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4212 Prefixes |= X86::IP_USE_REX2;
4213 else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4214 Prefixes |= X86::IP_USE_VEX;
4215 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4216 Prefixes |= X86::IP_USE_VEX2;
4217 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4218 Prefixes |= X86::IP_USE_VEX3;
4219 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4220 Prefixes |= X86::IP_USE_EVEX;
4221
4222 // Set encoded flags for {disp8} and {disp32}.
4223 if (ForcedDispEncoding == DispEncoding_Disp8)
4224 Prefixes |= X86::IP_USE_DISP8;
4225 else if (ForcedDispEncoding == DispEncoding_Disp32)
4226 Prefixes |= X86::IP_USE_DISP32;
4227
4228 if (Prefixes)
4229 Inst.setFlags(Prefixes);
4230
4231 return isParsingIntelSyntax()
4232 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4233 ErrorInfo, MatchingInlineAsm)
4234 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4235 ErrorInfo, MatchingInlineAsm);
4236}
4237
4238void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4239 OperandVector &Operands, MCStreamer &Out,
4240 bool MatchingInlineAsm) {
4241 // FIXME: This should be replaced with a real .td file alias mechanism.
4242 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4243 // call.
4244 const char *Repl = StringSwitch<const char *>(Op.getToken())
4245 .Case("finit", "fninit")
4246 .Case("fsave", "fnsave")
4247 .Case("fstcw", "fnstcw")
4248 .Case("fstcww", "fnstcw")
4249 .Case("fstenv", "fnstenv")
4250 .Case("fstsw", "fnstsw")
4251 .Case("fstsww", "fnstsw")
4252 .Case("fclex", "fnclex")
4253 .Default(nullptr);
4254 if (Repl) {
4255 MCInst Inst;
4256 Inst.setOpcode(X86::WAIT);
4257 Inst.setLoc(IDLoc);
4258 if (!MatchingInlineAsm)
4259 emitInstruction(Inst, Operands, Out);
4260 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4261 }
4262}
4263
4264bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4265 const FeatureBitset &MissingFeatures,
4266 bool MatchingInlineAsm) {
4267 assert(MissingFeatures.any() && "Unknown missing feature!");
4268 SmallString<126> Msg;
4269 raw_svector_ostream OS(Msg);
4270 OS << "instruction requires:";
4271 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4272 if (MissingFeatures[i])
4273 OS << ' ' << getSubtargetFeatureName(i);
4274 }
4275 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4276}
4277
4278unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4279 unsigned Opc = Inst.getOpcode();
4280 const MCInstrDesc &MCID = MII.get(Opc);
4281 uint64_t TSFlags = MCID.TSFlags;
4282
4283 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4284 return Match_Unsupported;
4285 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4286 return Match_Unsupported;
4287
4288 switch (ForcedOpcodePrefix) {
4289 case OpcodePrefix_Default:
4290 break;
4291 case OpcodePrefix_REX:
4292 case OpcodePrefix_REX2:
4293 if (TSFlags & X86II::EncodingMask)
4294 return Match_Unsupported;
4295 break;
4296 case OpcodePrefix_VEX:
4297 case OpcodePrefix_VEX2:
4298 case OpcodePrefix_VEX3:
4299 if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4300 return Match_Unsupported;
4301 break;
4302 case OpcodePrefix_EVEX:
4303 if (is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
4304 !X86::isCMP(Opc) && !X86::isTEST(Opc))
4305 return Match_Unsupported;
4306 if (!is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX)
4307 return Match_Unsupported;
4308 break;
4309 }
4310
4312 (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4313 ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4314 ForcedOpcodePrefix != OpcodePrefix_VEX3))
4315 return Match_Unsupported;
4316
4317 return Match_Success;
4318}
4319
4320bool X86AsmParser::matchAndEmitATTInstruction(
4321 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4322 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4323 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4324 SMRange EmptyRange = std::nullopt;
4325 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4326 // when matching the instruction.
4327 if (ForcedDataPrefix == X86::Is32Bit)
4328 SwitchMode(X86::Is32Bit);
4329 // First, try a direct match.
4330 FeatureBitset MissingFeatures;
4331 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4332 MissingFeatures, MatchingInlineAsm,
4333 isParsingIntelSyntax());
4334 if (ForcedDataPrefix == X86::Is32Bit) {
4335 SwitchMode(X86::Is16Bit);
4336 ForcedDataPrefix = 0;
4337 }
4338 switch (OriginalError) {
4339 default: llvm_unreachable("Unexpected match result!");
4340 case Match_Success:
4341 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4342 return true;
4343 // Some instructions need post-processing to, for example, tweak which
4344 // encoding is selected. Loop on it while changes happen so the
4345 // individual transformations can chain off each other.
4346 if (!MatchingInlineAsm)
4347 while (processInstruction(Inst, Operands))
4348 ;
4349
4350 Inst.setLoc(IDLoc);
4351 if (!MatchingInlineAsm)
4352 emitInstruction(Inst, Operands, Out);
4353 Opcode = Inst.getOpcode();
4354 return false;
4355 case Match_InvalidImmUnsignedi4: {
4356 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4357 if (ErrorLoc == SMLoc())
4358 ErrorLoc = IDLoc;
4359 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4360 EmptyRange, MatchingInlineAsm);
4361 }
4362 case Match_MissingFeature:
4363 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4364 case Match_InvalidOperand:
4365 case Match_MnemonicFail:
4366 case Match_Unsupported:
4367 break;
4368 }
4369 if (Op.getToken().empty()) {
4370 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4371 MatchingInlineAsm);
4372 return true;
4373 }
4374
4375 // FIXME: Ideally, we would only attempt suffix matches for things which are
4376 // valid prefixes, and we could just infer the right unambiguous
4377 // type. However, that requires substantially more matcher support than the
4378 // following hack.
4379
4380 // Change the operand to point to a temporary token.
4381 StringRef Base = Op.getToken();
4382 SmallString<16> Tmp;
4383 Tmp += Base;
4384 Tmp += ' ';
4385 Op.setTokenValue(Tmp);
4386
4387 // If this instruction starts with an 'f', then it is a floating point stack
4388 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4389 // 80-bit floating point, which use the suffixes s,l,t respectively.
4390 //
4391 // Otherwise, we assume that this may be an integer instruction, which comes
4392 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4393 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4394 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4395 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4396
4397 // Check for the various suffix matches.
4398 uint64_t ErrorInfoIgnore;
4399 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4400 unsigned Match[4];
4401
4402 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4403 // So we should make sure the suffix matcher only works for memory variant
4404 // that has the same size with the suffix.
4405 // FIXME: This flag is a workaround for legacy instructions that didn't
4406 // declare non suffix variant assembly.
4407 bool HasVectorReg = false;
4408 X86Operand *MemOp = nullptr;
4409 for (const auto &Op : Operands) {
4410 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4411 if (X86Op->isVectorReg())
4412 HasVectorReg = true;
4413 else if (X86Op->isMem()) {
4414 MemOp = X86Op;
4415 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4416 // Have we found an unqualified memory operand,
4417 // break. IA allows only one memory operand.
4418 break;
4419 }
4420 }
4421
4422 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4423 Tmp.back() = Suffixes[I];
4424 if (MemOp && HasVectorReg)
4425 MemOp->Mem.Size = MemSize[I];
4426 Match[I] = Match_MnemonicFail;
4427 if (MemOp || !HasVectorReg) {
4428 Match[I] =
4429 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4430 MatchingInlineAsm, isParsingIntelSyntax());
4431 // If this returned as a missing feature failure, remember that.
4432 if (Match[I] == Match_MissingFeature)
4433 ErrorInfoMissingFeatures = MissingFeatures;
4434 }
4435 }
4436
4437 // Restore the old token.
4438 Op.setTokenValue(Base);
4439
4440 // If exactly one matched, then we treat that as a successful match (and the
4441 // instruction will already have been filled in correctly, since the failing
4442 // matches won't have modified it).
4443 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4444 if (NumSuccessfulMatches == 1) {
4445 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4446 return true;
4447 // Some instructions need post-processing to, for example, tweak which
4448 // encoding is selected. Loop on it while changes happen so the
4449 // individual transformations can chain off each other.
4450 if (!MatchingInlineAsm)
4451 while (processInstruction(Inst, Operands))
4452 ;
4453
4454 Inst.setLoc(IDLoc);
4455 if (!MatchingInlineAsm)
4456 emitInstruction(Inst, Operands, Out);
4457 Opcode = Inst.getOpcode();
4458 return false;
4459 }
4460
4461 // Otherwise, the match failed, try to produce a decent error message.
4462
4463 // If we had multiple suffix matches, then identify this as an ambiguous
4464 // match.
4465 if (NumSuccessfulMatches > 1) {
4466 char MatchChars[4];
4467 unsigned NumMatches = 0;
4468 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4469 if (Match[I] == Match_Success)
4470 MatchChars[NumMatches++] = Suffixes[I];
4471
4472 SmallString<126> Msg;
4473 raw_svector_ostream OS(Msg);
4474 OS << "ambiguous instructions require an explicit suffix (could be ";
4475 for (unsigned i = 0; i != NumMatches; ++i) {
4476 if (i != 0)
4477 OS << ", ";
4478 if (i + 1 == NumMatches)
4479 OS << "or ";
4480 OS << "'" << Base << MatchChars[i] << "'";
4481 }
4482 OS << ")";
4483 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4484 return true;
4485 }
4486
4487 // Okay, we know that none of the variants matched successfully.
4488
4489 // If all of the instructions reported an invalid mnemonic, then the original
4490 // mnemonic was invalid.
4491 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4492 if (OriginalError == Match_MnemonicFail)
4493 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4494 Op.getLocRange(), MatchingInlineAsm);
4495
4496 if (OriginalError == Match_Unsupported)
4497 return Error(IDLoc, "unsupported instruction", EmptyRange,
4498 MatchingInlineAsm);
4499
4500 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4501 // Recover location info for the operand if we know which was the problem.
4502 if (ErrorInfo != ~0ULL) {
4503 if (ErrorInfo >= Operands.size())
4504 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4505 MatchingInlineAsm);
4506
4507 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4508 if (Operand.getStartLoc().isValid()) {
4509 SMRange OperandRange = Operand.getLocRange();
4510 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4511 OperandRange, MatchingInlineAsm);
4512 }
4513 }
4514
4515 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4516 MatchingInlineAsm);
4517 }
4518
4519 // If one instruction matched as unsupported, report this as unsupported.
4520 if (llvm::count(Match, Match_Unsupported) == 1) {
4521 return Error(IDLoc, "unsupported instruction", EmptyRange,
4522 MatchingInlineAsm);
4523 }
4524
4525 // If one instruction matched with a missing feature, report this as a
4526 // missing feature.
4527 if (llvm::count(Match, Match_MissingFeature) == 1) {
4528 ErrorInfo = Match_MissingFeature;
4529 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4530 MatchingInlineAsm);
4531 }
4532
4533 // If one instruction matched with an invalid operand, report this as an
4534 // operand failure.
4535 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4536 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4537 MatchingInlineAsm);
4538 }
4539
4540 // If all of these were an outright failure, report it in a useless way.
4541 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4542 EmptyRange, MatchingInlineAsm);
4543 return true;
4544}
4545
4546bool X86AsmParser::matchAndEmitIntelInstruction(
4547 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4548 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4549 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4550 SMRange EmptyRange = std::nullopt;
4551 // Find one unsized memory operand, if present.
4552 X86Operand *UnsizedMemOp = nullptr;
4553 for (const auto &Op : Operands) {
4554 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4555 if (X86Op->isMemUnsized()) {
4556 UnsizedMemOp = X86Op;
4557 // Have we found an unqualified memory operand,
4558 // break. IA allows only one memory operand.
4559 break;
4560 }
4561 }
4562
4563 // Allow some instructions to have implicitly pointer-sized operands. This is
4564 // compatible with gas.
4565 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4566 if (UnsizedMemOp) {
4567 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push", "pop"};
4568 for (const char *Instr : PtrSizedInstrs) {
4569 if (Mnemonic == Instr) {
4570 UnsizedMemOp->Mem.Size = getPointerWidth();
4571 break;
4572 }
4573 }
4574 }
4575
4576 SmallVector<unsigned, 8> Match;
4577 FeatureBitset ErrorInfoMissingFeatures;
4578 FeatureBitset MissingFeatures;
4579 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4580
4581 // If unsized push has immediate operand we should default the default pointer
4582 // size for the size.
4583 if (Mnemonic == "push" && Operands.size() == 2) {
4584 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4585 if (X86Op->isImm()) {
4586 // If it's not a constant fall through and let remainder take care of it.
4587 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4588 unsigned Size = getPointerWidth();
4589 if (CE &&
4590 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4591 SmallString<16> Tmp;
4592 Tmp += Base;
4593 Tmp += (is64BitMode())
4594 ? "q"
4595 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4596 Op.setTokenValue(Tmp);
4597 // Do match in ATT mode to allow explicit suffix usage.
4598 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4599 MissingFeatures, MatchingInlineAsm,
4600 false /*isParsingIntelSyntax()*/));
4601 Op.setTokenValue(Base);
4602 }
4603 }
4604 }
4605
4606 // If an unsized memory operand is present, try to match with each memory
4607 // operand size. In Intel assembly, the size is not part of the instruction
4608 // mnemonic.
4609 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4610 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4611 for (unsigned Size : MopSizes) {
4612 UnsizedMemOp->Mem.Size = Size;
4613 uint64_t ErrorInfoIgnore;
4614 unsigned LastOpcode = Inst.getOpcode();
4615 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4616 MissingFeatures, MatchingInlineAsm,
4617 isParsingIntelSyntax());
4618 if (Match.empty() || LastOpcode != Inst.getOpcode())
4619 Match.push_back(M);
4620
4621 // If this returned as a missing feature failure, remember that.
4622 if (Match.back() == Match_MissingFeature)
4623 ErrorInfoMissingFeatures = MissingFeatures;
4624 }
4625
4626 // Restore the size of the unsized memory operand if we modified it.
4627 UnsizedMemOp->Mem.Size = 0;
4628 }
4629
4630 // If we haven't matched anything yet, this is not a basic integer or FPU
4631 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4632 // matching with the unsized operand.
4633 if (Match.empty()) {
4634 Match.push_back(MatchInstruction(
4635 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4636 isParsingIntelSyntax()));
4637 // If this returned as a missing feature failure, remember that.
4638 if (Match.back() == Match_MissingFeature)
4639 ErrorInfoMissingFeatures = MissingFeatures;
4640 }
4641
4642 // Restore the size of the unsized memory operand if we modified it.
4643 if (UnsizedMemOp)
4644 UnsizedMemOp->Mem.Size = 0;
4645
4646 // If it's a bad mnemonic, all results will be the same.
4647 if (Match.back() == Match_MnemonicFail) {
4648 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4649 Op.getLocRange(), MatchingInlineAsm);
4650 }
4651
4652 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4653
4654 // If matching was ambiguous and we had size information from the frontend,
4655 // try again with that. This handles cases like "movxz eax, m8/m16".
4656 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4657 UnsizedMemOp->getMemFrontendSize()) {
4658 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4659 unsigned M = MatchInstruction(
4660 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4661 isParsingIntelSyntax());
4662 if (M == Match_Success)
4663 NumSuccessfulMatches = 1;
4664
4665 // Add a rewrite that encodes the size information we used from the
4666 // frontend.
4667 InstInfo->AsmRewrites->emplace_back(
4668 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4669 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4670 }
4671
4672 // If exactly one matched, then we treat that as a successful match (and the
4673 // instruction will already have been filled in correctly, since the failing
4674 // matches won't have modified it).
4675 if (NumSuccessfulMatches == 1) {
4676 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4677 return true;
4678 // Some instructions need post-processing to, for example, tweak which
4679 // encoding is selected. Loop on it while changes happen so the individual
4680 // transformations can chain off each other.
4681 if (!MatchingInlineAsm)
4682 while (processInstruction(Inst, Operands))
4683 ;
4684 Inst.setLoc(IDLoc);
4685 if (!MatchingInlineAsm)
4686 emitInstruction(Inst, Operands, Out);
4687 Opcode = Inst.getOpcode();
4688 return false;
4689 } else if (NumSuccessfulMatches > 1) {
4690 assert(UnsizedMemOp &&
4691 "multiple matches only possible with unsized memory operands");
4692 return Error(UnsizedMemOp->getStartLoc(),
4693 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4694 UnsizedMemOp->getLocRange());
4695 }
4696
4697 // If one instruction matched as unsupported, report this as unsupported.
4698 if (llvm::count(Match, Match_Unsupported) == 1) {
4699 return Error(IDLoc, "unsupported instruction", EmptyRange,
4700 MatchingInlineAsm);
4701 }
4702
4703 // If one instruction matched with a missing feature, report this as a
4704 // missing feature.
4705 if (llvm::count(Match, Match_MissingFeature) == 1) {
4706 ErrorInfo = Match_MissingFeature;
4707 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4708 MatchingInlineAsm);
4709 }
4710
4711 // If one instruction matched with an invalid operand, report this as an
4712 // operand failure.
4713 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4714 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4715 MatchingInlineAsm);
4716 }
4717
4718 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4719 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4720 if (ErrorLoc == SMLoc())
4721 ErrorLoc = IDLoc;
4722 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4723 EmptyRange, MatchingInlineAsm);
4724 }
4725
4726 // If all of these were an outright failure, report it in a useless way.
4727 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4728 MatchingInlineAsm);
4729}
4730
4731bool X86AsmParser::omitRegisterFromClobberLists(MCRegister Reg) {
4732 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg);
4733}
4734
4735bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4736 MCAsmParser &Parser = getParser();
4737 StringRef IDVal = DirectiveID.getIdentifier();
4738 if (IDVal.starts_with(".arch"))
4739 return parseDirectiveArch();
4740 if (IDVal.starts_with(".code"))
4741 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4742 else if (IDVal.starts_with(".att_syntax")) {
4743 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4744 if (Parser.getTok().getString() == "prefix")
4745 Parser.Lex();
4746 else if (Parser.getTok().getString() == "noprefix")
4747 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4748 "supported: registers must have a "
4749 "'%' prefix in .att_syntax");
4750 }
4751 getParser().setAssemblerDialect(0);
4752 return false;
4753 } else if (IDVal.starts_with(".intel_syntax")) {
4754 getParser().setAssemblerDialect(1);
4755 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4756 if (Parser.getTok().getString() == "noprefix")
4757 Parser.Lex();
4758 else if (Parser.getTok().getString() == "prefix")
4759 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4760 "supported: registers must not have "
4761 "a '%' prefix in .intel_syntax");
4762 }
4763 return false;
4764 } else if (IDVal == ".nops")
4765 return parseDirectiveNops(DirectiveID.getLoc());
4766 else if (IDVal == ".even")
4767 return parseDirectiveEven(DirectiveID.getLoc());
4768 else if (IDVal == ".cv_fpo_proc")
4769 return parseDirectiveFPOProc(DirectiveID.getLoc());
4770 else if (IDVal == ".cv_fpo_setframe")
4771 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4772 else if (IDVal == ".cv_fpo_pushreg")
4773 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4774 else if (IDVal == ".cv_fpo_stackalloc")
4775 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4776 else if (IDVal == ".cv_fpo_stackalign")
4777 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4778 else if (IDVal == ".cv_fpo_endprologue")
4779 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4780 else if (IDVal == ".cv_fpo_endproc")
4781 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4782 else if (IDVal == ".seh_pushreg" ||
4783 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4784 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4785 else if (IDVal == ".seh_setframe" ||
4786 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4787 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4788 else if (IDVal == ".seh_savereg" ||
4789 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4790 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4791 else if (IDVal == ".seh_savexmm" ||
4792 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4793 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4794 else if (IDVal == ".seh_pushframe" ||
4795 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4796 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4797
4798 return true;
4799}
4800
4801bool X86AsmParser::parseDirectiveArch() {
4802 // Ignore .arch for now.
4803 getParser().parseStringToEndOfStatement();
4804 return false;
4805}
4806
4807/// parseDirectiveNops
4808/// ::= .nops size[, control]
4809bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4810 int64_t NumBytes = 0, Control = 0;
4811 SMLoc NumBytesLoc, ControlLoc;
4812 const MCSubtargetInfo& STI = getSTI();
4813 NumBytesLoc = getTok().getLoc();
4814 if (getParser().checkForValidSection() ||
4815 getParser().parseAbsoluteExpression(NumBytes))
4816 return true;
4817
4818 if (parseOptionalToken(AsmToken::Comma)) {
4819 ControlLoc = getTok().getLoc();
4820 if (getParser().parseAbsoluteExpression(Control))
4821 return true;
4822 }
4823 if (getParser().parseEOL())
4824 return true;
4825
4826 if (NumBytes <= 0) {
4827 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4828 return false;
4829 }
4830
4831 if (Control < 0) {
4832 Error(ControlLoc, "'.nops' directive with negative NOP size");
4833 return false;
4834 }
4835
4836 /// Emit nops
4837 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4838
4839 return false;
4840}
4841
4842/// parseDirectiveEven
4843/// ::= .even
4844bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4845 if (parseEOL())
4846 return false;
4847
4848 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4849 if (!Section) {
4850 getStreamer().initSections(false, getSTI());
4851 Section = getStreamer().getCurrentSectionOnly();
4852 }
4853 if (getContext().getAsmInfo()->useCodeAlign(*Section))
4854 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4855 else
4856 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4857 return false;
4858}
4859
4860/// ParseDirectiveCode
4861/// ::= .code16 | .code32 | .code64
4862bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4863 MCAsmParser &Parser = getParser();
4864 Code16GCC = false;
4865 if (IDVal == ".code16") {
4866 Parser.Lex();
4867 if (!is16BitMode()) {
4868 SwitchMode(X86::Is16Bit);
4869 getTargetStreamer().emitCode16();
4870 }
4871 } else if (IDVal == ".code16gcc") {
4872 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4873 Parser.Lex();
4874 Code16GCC = true;
4875 if (!is16BitMode()) {
4876 SwitchMode(X86::Is16Bit);
4877 getTargetStreamer().emitCode16();
4878 }
4879 } else if (IDVal == ".code32") {
4880 Parser.Lex();
4881 if (!is32BitMode()) {
4882 SwitchMode(X86::Is32Bit);
4883 getTargetStreamer().emitCode32();
4884 }
4885 } else if (IDVal == ".code64") {
4886 Parser.Lex();
4887 if (!is64BitMode()) {
4888 SwitchMode(X86::Is64Bit);
4889 getTargetStreamer().emitCode64();
4890 }
4891 } else {
4892 Error(L, "unknown directive " + IDVal);
4893 return false;
4894 }
4895
4896 return false;
4897}
4898
4899// .cv_fpo_proc foo
4900bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4901 MCAsmParser &Parser = getParser();
4902 StringRef ProcName;
4903 int64_t ParamsSize;
4904 if (Parser.parseIdentifier(ProcName))
4905 return Parser.TokError("expected symbol name");
4906 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4907 return true;
4908 if (!isUIntN(32, ParamsSize))
4909 return Parser.TokError("parameters size out of range");
4910 if (parseEOL())
4911 return true;
4912 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4913 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4914}
4915
4916// .cv_fpo_setframe ebp
4917bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4918 MCRegister Reg;
4919 SMLoc DummyLoc;
4920 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4921 return true;
4922 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4923}
4924
4925// .cv_fpo_pushreg ebx
4926bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4927 MCRegister Reg;
4928 SMLoc DummyLoc;
4929 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4930 return true;
4931 return getTargetStreamer().emitFPOPushReg(Reg, L);
4932}
4933
4934// .cv_fpo_stackalloc 20
4935bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4936 MCAsmParser &Parser = getParser();
4937 int64_t Offset;
4938 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4939 return true;
4940 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4941}
4942
4943// .cv_fpo_stackalign 8
4944bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4945 MCAsmParser &Parser = getParser();
4946 int64_t Offset;
4947 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4948 return true;
4949 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4950}
4951
4952// .cv_fpo_endprologue
4953bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4954 MCAsmParser &Parser = getParser();
4955 if (Parser.parseEOL())
4956 return true;
4957 return getTargetStreamer().emitFPOEndPrologue(L);
4958}
4959
4960// .cv_fpo_endproc
4961bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4962 MCAsmParser &Parser = getParser();
4963 if (Parser.parseEOL())
4964 return true;
4965 return getTargetStreamer().emitFPOEndProc(L);
4966}
4967
4968bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4969 MCRegister &RegNo) {
4970 SMLoc startLoc = getLexer().getLoc();
4971 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4972
4973 // Try parsing the argument as a register first.
4974 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4975 SMLoc endLoc;
4976 if (parseRegister(RegNo, startLoc, endLoc))
4977 return true;
4978
4979 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4980 return Error(startLoc,
4981 "register is not supported for use with this directive");
4982 }
4983 } else {
4984 // Otherwise, an integer number matching the encoding of the desired
4985 // register may appear.
4986 int64_t EncodedReg;
4987 if (getParser().parseAbsoluteExpression(EncodedReg))
4988 return true;
4989
4990 // The SEH register number is the same as the encoding register number. Map
4991 // from the encoding back to the LLVM register number.
4992 RegNo = MCRegister();
4993 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4994 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4995 RegNo = Reg;
4996 break;
4997 }
4998 }
4999 if (!RegNo) {
5000 return Error(startLoc,
5001 "incorrect register number for use with this directive");
5002 }
5003 }
5004
5005 return false;
5006}
5007
5008bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
5009 MCRegister Reg;
5010 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
5011 return true;
5012
5013 if (getLexer().isNot(AsmToken::EndOfStatement))
5014 return TokError("expected end of directive");
5015
5016 getParser().Lex();
5017 getStreamer().emitWinCFIPushReg(Reg, Loc);
5018 return false;
5019}
5020
5021bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
5022 MCRegister Reg;
5023 int64_t Off;
5024 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
5025 return true;
5026 if (getLexer().isNot(AsmToken::Comma))
5027 return TokError("you must specify a stack pointer offset");
5028
5029 getParser().Lex();
5030 if (getParser().parseAbsoluteExpression(Off))
5031 return true;
5032
5033 if (getLexer().isNot(AsmToken::EndOfStatement))
5034 return TokError("expected end of directive");
5035
5036 getParser().Lex();
5037 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
5038 return false;
5039}
5040
5041bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
5042 MCRegister Reg;
5043 int64_t Off;
5044 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
5045 return true;
5046 if (getLexer().isNot(AsmToken::Comma))
5047 return TokError("you must specify an offset on the stack");
5048
5049 getParser().Lex();
5050 if (getParser().parseAbsoluteExpression(Off))
5051 return true;
5052
5053 if (getLexer().isNot(AsmToken::EndOfStatement))
5054 return TokError("expected end of directive");
5055
5056 getParser().Lex();
5057 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
5058 return false;
5059}
5060
5061bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
5062 MCRegister Reg;
5063 int64_t Off;
5064 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
5065 return true;
5066 if (getLexer().isNot(AsmToken::Comma))
5067 return TokError("you must specify an offset on the stack");
5068
5069 getParser().Lex();
5070 if (getParser().parseAbsoluteExpression(Off))
5071 return true;
5072
5073 if (getLexer().isNot(AsmToken::EndOfStatement))
5074 return TokError("expected end of directive");
5075
5076 getParser().Lex();
5077 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
5078 return false;
5079}
5080
5081bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
5082 bool Code = false;
5083 StringRef CodeID;
5084 if (getLexer().is(AsmToken::At)) {
5085 SMLoc startLoc = getLexer().getLoc();
5086 getParser().Lex();
5087 if (!getParser().parseIdentifier(CodeID)) {
5088 if (CodeID != "code")
5089 return Error(startLoc, "expected @code");
5090 Code = true;
5091 }
5092 }
5093
5094 if (getLexer().isNot(AsmToken::EndOfStatement))
5095 return TokError("expected end of directive");
5096
5097 getParser().Lex();
5098 getStreamer().emitWinCFIPushFrame(Code, Loc);
5099 return false;
5100}
5101
5102// Force static initialization.
5107
5108#define GET_MATCHER_IMPLEMENTATION
5109#include "X86GenAsmMatcher.inc"
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static const char * getSubtargetFeatureName(uint64_t Val)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
amode Optimize addressing mode
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
static bool IsVCMP(unsigned Opcode)
Register Reg
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
OptimizedStructLayoutField Field
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallString class.
This file defines the SmallVector class.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
DEMANGLE_NAMESPACE_BEGIN bool starts_with(std::string_view self, char C) noexcept
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
#define LLVM_C_ABI
LLVM_C_ABI is the export/visibility macro used to mark symbols declared in llvm-c as exported when bu...
Definition Visibility.h:40
static cl::opt< bool > LVIInlineAsmHardening("x86-experimental-lvi-inline-asm-hardening", cl::desc("Harden inline assembly code that may be vulnerable to Load Value" " Injection (LVI). This feature is experimental."), cl::Hidden)
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
LLVM_C_ABI void LLVMInitializeX86AsmParser()
static bool convertSSEToAVX(MCInst &Inst)
static unsigned getPrefixes(OperandVector &Operands)
static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg, MCRegister IndexReg, unsigned Scale, bool Is64BitMode, StringRef &ErrMsg)
#define FROM_TO(FROM, TO)
uint16_t RegSizeInBits(const MCRegisterInfo &MRI, MCRegister RegNo)
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
void UnLex(AsmToken const &Token)
Definition AsmLexer.h:106
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition AsmLexer.h:150
LLVM_ABI SMLoc getLoc() const
Definition AsmLexer.cpp:32
int64_t getIntVal() const
Definition MCAsmMacro.h:108
bool isNot(TokenKind K) const
Definition MCAsmMacro.h:76
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
TokenKind getKind() const
Definition MCAsmMacro.h:74
LLVM_ABI SMLoc getEndLoc() const
Definition AsmLexer.cpp:34
StringRef getIdentifier() const
Get the identifier string for the current token, which should be an identifier or a string.
Definition MCAsmMacro.h:92
constexpr size_t size() const
bool parseIntToken(int64_t &V, const Twine &ErrMsg="expected integer")
MCContext & getContext()
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
const AsmToken & getTok() const
Get the current AsmToken from the stream.
virtual bool isParsingMasm() const
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
bool parseOptionalToken(AsmToken::TokenKind T)
Attempt to parse and consume token, returning true on success.
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, AsmTypeInfo *TypeInfo=nullptr)=0
Parse a primary expression.
virtual const AsmToken & Lex()=0
Get the next AsmToken in the stream, possibly handling file inclusion first.
virtual void addAliasForDirective(StringRef Directive, StringRef Alias)=0
virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const
bool TokError(const Twine &Msg, SMRange Range=std::nullopt)
Report an error at the current lexer location.
virtual bool parseAbsoluteExpression(int64_t &Res)=0
Parse an expression which must evaluate to an absolute value.
virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const
bool parseTokenLoc(SMLoc &Loc)
bool Error(SMLoc L, const Twine &Msg, SMRange Range=std::nullopt)
Return an error at the location L, with the message Msg.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
@ SymbolRef
References to labels and assigned expressions.
Definition MCExpr.h:43
ExprKind getKind() const
Definition MCExpr.h:85
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
unsigned getFlags() const
Definition MCInst.h:205
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
void setFlags(unsigned F)
Definition MCInst.h:204
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
void clear()
Definition MCInst.h:223
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool isCall() const
Return true if the instruction is a call.
bool isTerminator() const
Returns true if this instruction part of the terminator for a basic block.
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
bool isUndefined() const
isUndefined - Check if this symbol undefined (i.e., implicitly defined).
Definition MCSymbol.h:243
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
static constexpr StatusTy Failure
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
LLVM_ABI std::string upper() const
Convert the given ASCII string to uppercase.
char back() const
back - Get the last character in the string.
Definition StringRef.h:155
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
LLVM_ABI std::string lower() const
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
static constexpr size_t npos
Definition StringRef.h:57
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition StringRef.h:618
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition StringRef.h:172
static const char * getRegisterName(MCRegister Reg)
static const X86MCExpr * create(MCRegister Reg, MCContext &Ctx)
Definition X86MCExpr.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
@ X86
Windows x64, Windows Itanium (IA-64)
Definition MCAsmInfo.h:50
bool isX86_64NonExtLowByteReg(MCRegister Reg)
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ VEX
VEX - encoding using 0xC4/0xC5.
@ XOP
XOP - Opcode prefix used by XOP instructions.
@ ExplicitVEXPrefix
For instructions that use VEX encoding only when {vex}, {vex2} or {vex3} is present.
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
bool isX86_64ExtendedReg(MCRegister Reg)
bool isApxExtendedReg(MCRegister Reg)
void emitInstruction(MCObjectStreamer &, const MCInst &Inst, const MCSubtargetInfo &STI)
@ AddrNumOperands
Definition X86BaseInfo.h:36
bool optimizeShiftRotateWithImmediateOne(MCInst &MI)
bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc)
@ IP_HAS_REPEAT_NE
Definition X86BaseInfo.h:55
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
Context & getContext() const
Definition BasicBlock.h:99
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
@ Done
Definition Threading.h:60
@ AOK_EndOfStatement
@ AOK_SizeDirective
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
Target & getTheX86_32Target()
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1974
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
Target & getTheX86_64Target()
StringRef toStringRef(bool B)
Construct a string ref from a boolean.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
bool isKind(IdKind kind) const
Definition MCAsmParser.h:66
SmallVectorImpl< AsmRewrite > * AsmRewrites
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
X86Operand - Instances of this class represent a parsed X86 machine instruction.
Definition X86Operand.h:31
SMLoc getStartLoc() const override
getStartLoc - Get the location of the first token of this operand.
Definition X86Operand.h:98
bool isImm() const override
isImm - Is this an immediate operand?
Definition X86Operand.h:223
static std::unique_ptr< X86Operand > CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, StringRef SymName=StringRef(), void *OpDecl=nullptr, bool GlobalRef=true)
Definition X86Operand.h:729
static std::unique_ptr< X86Operand > CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc)
Definition X86Operand.h:723
static std::unique_ptr< X86Operand > CreateDXReg(SMLoc StartLoc, SMLoc EndLoc)
Definition X86Operand.h:718
static std::unique_ptr< X86Operand > CreateReg(MCRegister Reg, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf=false, SMLoc OffsetOfLoc=SMLoc(), StringRef SymName=StringRef(), void *OpDecl=nullptr)
Definition X86Operand.h:705
SMRange getLocRange() const
getLocRange - Get the range between the first and last token of this operand.
Definition X86Operand.h:105
SMLoc getEndLoc() const override
getEndLoc - Get the location of the last token of this operand.
Definition X86Operand.h:101
bool isReg() const override
isReg - Is this a register operand?
Definition X86Operand.h:510
bool isMem() const override
isMem - Is this a memory operand?
Definition X86Operand.h:304
static std::unique_ptr< X86Operand > CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size=0, StringRef SymName=StringRef(), void *OpDecl=nullptr, unsigned FrontendSize=0, bool UseUpRegs=false, bool MaybeDirectBranchDest=true)
Create an absolute memory operand.
Definition X86Operand.h:745
struct MemOp Mem
Definition X86Operand.h:86
bool isVectorReg() const
Definition X86Operand.h:526
static std::unique_ptr< X86Operand > CreateToken(StringRef Str, SMLoc Loc)
Definition X86Operand.h:696
bool isMemUnsized() const
Definition X86Operand.h:305
const MCExpr * getImm() const
Definition X86Operand.h:179
unsigned getMemFrontendSize() const
Definition X86Operand.h:212
bool isMem8() const
Definition X86Operand.h:308
MCRegister getReg() const override
Definition X86Operand.h:169