LLVM 19.0.0git
X86AsmParser.cpp
Go to the documentation of this file.
1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
16#include "X86AsmParserCommon.h"
17#include "X86Operand.h"
18#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCExpr.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstrInfo.h"
32#include "llvm/MC/MCSection.h"
33#include "llvm/MC/MCStreamer.h"
35#include "llvm/MC/MCSymbol.h"
41#include <algorithm>
42#include <memory>
43
44using namespace llvm;
45
47 "x86-experimental-lvi-inline-asm-hardening",
48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
49 " Injection (LVI). This feature is experimental."), cl::Hidden);
50
51static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
54 return true;
55 }
56 return false;
57}
58
59namespace {
60
61static const char OpPrecedence[] = {
62 0, // IC_OR
63 1, // IC_XOR
64 2, // IC_AND
65 4, // IC_LSHIFT
66 4, // IC_RSHIFT
67 5, // IC_PLUS
68 5, // IC_MINUS
69 6, // IC_MULTIPLY
70 6, // IC_DIVIDE
71 6, // IC_MOD
72 7, // IC_NOT
73 8, // IC_NEG
74 9, // IC_RPAREN
75 10, // IC_LPAREN
76 0, // IC_IMM
77 0, // IC_REGISTER
78 3, // IC_EQ
79 3, // IC_NE
80 3, // IC_LT
81 3, // IC_LE
82 3, // IC_GT
83 3 // IC_GE
84};
85
86class X86AsmParser : public MCTargetAsmParser {
87 ParseInstructionInfo *InstInfo;
88 bool Code16GCC;
89 unsigned ForcedDataPrefix = 0;
90
91 enum OpcodePrefix {
92 OpcodePrefix_Default,
93 OpcodePrefix_REX,
94 OpcodePrefix_REX2,
95 OpcodePrefix_VEX,
96 OpcodePrefix_VEX2,
97 OpcodePrefix_VEX3,
98 OpcodePrefix_EVEX,
99 };
100
101 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
102
103 enum DispEncoding {
104 DispEncoding_Default,
105 DispEncoding_Disp8,
106 DispEncoding_Disp32,
107 };
108
109 DispEncoding ForcedDispEncoding = DispEncoding_Default;
110
111 // Does this instruction use apx extended register?
112 bool UseApxExtendedReg = false;
113 // Is this instruction explicitly required not to update flags?
114 bool ForcedNoFlag = false;
115
116private:
117 SMLoc consumeToken() {
118 MCAsmParser &Parser = getParser();
119 SMLoc Result = Parser.getTok().getLoc();
120 Parser.Lex();
121 return Result;
122 }
123
124 X86TargetStreamer &getTargetStreamer() {
125 assert(getParser().getStreamer().getTargetStreamer() &&
126 "do not have a target streamer");
128 return static_cast<X86TargetStreamer &>(TS);
129 }
130
131 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
132 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
133 bool matchingInlineAsm, unsigned VariantID = 0) {
134 // In Code16GCC mode, match as 32-bit.
135 if (Code16GCC)
136 SwitchMode(X86::Is32Bit);
137 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
138 MissingFeatures, matchingInlineAsm,
139 VariantID);
140 if (Code16GCC)
141 SwitchMode(X86::Is16Bit);
142 return rv;
143 }
144
145 enum InfixCalculatorTok {
146 IC_OR = 0,
147 IC_XOR,
148 IC_AND,
149 IC_LSHIFT,
150 IC_RSHIFT,
151 IC_PLUS,
152 IC_MINUS,
153 IC_MULTIPLY,
154 IC_DIVIDE,
155 IC_MOD,
156 IC_NOT,
157 IC_NEG,
158 IC_RPAREN,
159 IC_LPAREN,
160 IC_IMM,
161 IC_REGISTER,
162 IC_EQ,
163 IC_NE,
164 IC_LT,
165 IC_LE,
166 IC_GT,
167 IC_GE
168 };
169
170 enum IntelOperatorKind {
171 IOK_INVALID = 0,
172 IOK_LENGTH,
173 IOK_SIZE,
174 IOK_TYPE,
175 };
176
177 enum MasmOperatorKind {
178 MOK_INVALID = 0,
179 MOK_LENGTHOF,
180 MOK_SIZEOF,
181 MOK_TYPE,
182 };
183
184 class InfixCalculator {
185 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
186 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
187 SmallVector<ICToken, 4> PostfixStack;
188
189 bool isUnaryOperator(InfixCalculatorTok Op) const {
190 return Op == IC_NEG || Op == IC_NOT;
191 }
192
193 public:
194 int64_t popOperand() {
195 assert (!PostfixStack.empty() && "Poped an empty stack!");
196 ICToken Op = PostfixStack.pop_back_val();
197 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
198 return -1; // The invalid Scale value will be caught later by checkScale
199 return Op.second;
200 }
201 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
202 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
203 "Unexpected operand!");
204 PostfixStack.push_back(std::make_pair(Op, Val));
205 }
206
207 void popOperator() { InfixOperatorStack.pop_back(); }
208 void pushOperator(InfixCalculatorTok Op) {
209 // Push the new operator if the stack is empty.
210 if (InfixOperatorStack.empty()) {
211 InfixOperatorStack.push_back(Op);
212 return;
213 }
214
215 // Push the new operator if it has a higher precedence than the operator
216 // on the top of the stack or the operator on the top of the stack is a
217 // left parentheses.
218 unsigned Idx = InfixOperatorStack.size() - 1;
219 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
220 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
221 InfixOperatorStack.push_back(Op);
222 return;
223 }
224
225 // The operator on the top of the stack has higher precedence than the
226 // new operator.
227 unsigned ParenCount = 0;
228 while (true) {
229 // Nothing to process.
230 if (InfixOperatorStack.empty())
231 break;
232
233 Idx = InfixOperatorStack.size() - 1;
234 StackOp = InfixOperatorStack[Idx];
235 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
236 break;
237
238 // If we have an even parentheses count and we see a left parentheses,
239 // then stop processing.
240 if (!ParenCount && StackOp == IC_LPAREN)
241 break;
242
243 if (StackOp == IC_RPAREN) {
244 ++ParenCount;
245 InfixOperatorStack.pop_back();
246 } else if (StackOp == IC_LPAREN) {
247 --ParenCount;
248 InfixOperatorStack.pop_back();
249 } else {
250 InfixOperatorStack.pop_back();
251 PostfixStack.push_back(std::make_pair(StackOp, 0));
252 }
253 }
254 // Push the new operator.
255 InfixOperatorStack.push_back(Op);
256 }
257
258 int64_t execute() {
259 // Push any remaining operators onto the postfix stack.
260 while (!InfixOperatorStack.empty()) {
261 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
262 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
263 PostfixStack.push_back(std::make_pair(StackOp, 0));
264 }
265
266 if (PostfixStack.empty())
267 return 0;
268
269 SmallVector<ICToken, 16> OperandStack;
270 for (const ICToken &Op : PostfixStack) {
271 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
272 OperandStack.push_back(Op);
273 } else if (isUnaryOperator(Op.first)) {
274 assert (OperandStack.size() > 0 && "Too few operands.");
275 ICToken Operand = OperandStack.pop_back_val();
276 assert (Operand.first == IC_IMM &&
277 "Unary operation with a register!");
278 switch (Op.first) {
279 default:
280 report_fatal_error("Unexpected operator!");
281 break;
282 case IC_NEG:
283 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
284 break;
285 case IC_NOT:
286 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
287 break;
288 }
289 } else {
290 assert (OperandStack.size() > 1 && "Too few operands.");
291 int64_t Val;
292 ICToken Op2 = OperandStack.pop_back_val();
293 ICToken Op1 = OperandStack.pop_back_val();
294 switch (Op.first) {
295 default:
296 report_fatal_error("Unexpected operator!");
297 break;
298 case IC_PLUS:
299 Val = Op1.second + Op2.second;
300 OperandStack.push_back(std::make_pair(IC_IMM, Val));
301 break;
302 case IC_MINUS:
303 Val = Op1.second - Op2.second;
304 OperandStack.push_back(std::make_pair(IC_IMM, Val));
305 break;
306 case IC_MULTIPLY:
307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
308 "Multiply operation with an immediate and a register!");
309 Val = Op1.second * Op2.second;
310 OperandStack.push_back(std::make_pair(IC_IMM, Val));
311 break;
312 case IC_DIVIDE:
313 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
314 "Divide operation with an immediate and a register!");
315 assert (Op2.second != 0 && "Division by zero!");
316 Val = Op1.second / Op2.second;
317 OperandStack.push_back(std::make_pair(IC_IMM, Val));
318 break;
319 case IC_MOD:
320 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
321 "Modulo operation with an immediate and a register!");
322 Val = Op1.second % Op2.second;
323 OperandStack.push_back(std::make_pair(IC_IMM, Val));
324 break;
325 case IC_OR:
326 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
327 "Or operation with an immediate and a register!");
328 Val = Op1.second | Op2.second;
329 OperandStack.push_back(std::make_pair(IC_IMM, Val));
330 break;
331 case IC_XOR:
332 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
333 "Xor operation with an immediate and a register!");
334 Val = Op1.second ^ Op2.second;
335 OperandStack.push_back(std::make_pair(IC_IMM, Val));
336 break;
337 case IC_AND:
338 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
339 "And operation with an immediate and a register!");
340 Val = Op1.second & Op2.second;
341 OperandStack.push_back(std::make_pair(IC_IMM, Val));
342 break;
343 case IC_LSHIFT:
344 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
345 "Left shift operation with an immediate and a register!");
346 Val = Op1.second << Op2.second;
347 OperandStack.push_back(std::make_pair(IC_IMM, Val));
348 break;
349 case IC_RSHIFT:
350 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
351 "Right shift operation with an immediate and a register!");
352 Val = Op1.second >> Op2.second;
353 OperandStack.push_back(std::make_pair(IC_IMM, Val));
354 break;
355 case IC_EQ:
356 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
357 "Equals operation with an immediate and a register!");
358 Val = (Op1.second == Op2.second) ? -1 : 0;
359 OperandStack.push_back(std::make_pair(IC_IMM, Val));
360 break;
361 case IC_NE:
362 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
363 "Not-equals operation with an immediate and a register!");
364 Val = (Op1.second != Op2.second) ? -1 : 0;
365 OperandStack.push_back(std::make_pair(IC_IMM, Val));
366 break;
367 case IC_LT:
368 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
369 "Less-than operation with an immediate and a register!");
370 Val = (Op1.second < Op2.second) ? -1 : 0;
371 OperandStack.push_back(std::make_pair(IC_IMM, Val));
372 break;
373 case IC_LE:
374 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
375 "Less-than-or-equal operation with an immediate and a "
376 "register!");
377 Val = (Op1.second <= Op2.second) ? -1 : 0;
378 OperandStack.push_back(std::make_pair(IC_IMM, Val));
379 break;
380 case IC_GT:
381 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
382 "Greater-than operation with an immediate and a register!");
383 Val = (Op1.second > Op2.second) ? -1 : 0;
384 OperandStack.push_back(std::make_pair(IC_IMM, Val));
385 break;
386 case IC_GE:
387 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
388 "Greater-than-or-equal operation with an immediate and a "
389 "register!");
390 Val = (Op1.second >= Op2.second) ? -1 : 0;
391 OperandStack.push_back(std::make_pair(IC_IMM, Val));
392 break;
393 }
394 }
395 }
396 assert (OperandStack.size() == 1 && "Expected a single result.");
397 return OperandStack.pop_back_val().second;
398 }
399 };
400
401 enum IntelExprState {
402 IES_INIT,
403 IES_OR,
404 IES_XOR,
405 IES_AND,
406 IES_EQ,
407 IES_NE,
408 IES_LT,
409 IES_LE,
410 IES_GT,
411 IES_GE,
412 IES_LSHIFT,
413 IES_RSHIFT,
414 IES_PLUS,
415 IES_MINUS,
416 IES_OFFSET,
417 IES_CAST,
418 IES_NOT,
419 IES_MULTIPLY,
420 IES_DIVIDE,
421 IES_MOD,
422 IES_LBRAC,
423 IES_RBRAC,
424 IES_LPAREN,
425 IES_RPAREN,
426 IES_REGISTER,
427 IES_INTEGER,
428 IES_ERROR
429 };
430
431 class IntelExprStateMachine {
432 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
433 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
434 int64_t Imm = 0;
435 const MCExpr *Sym = nullptr;
436 StringRef SymName;
437 InfixCalculator IC;
439 short BracCount = 0;
440 bool MemExpr = false;
441 bool BracketUsed = false;
442 bool OffsetOperator = false;
443 bool AttachToOperandIdx = false;
444 bool IsPIC = false;
445 SMLoc OffsetOperatorLoc;
446 AsmTypeInfo CurType;
447
448 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
449 if (Sym) {
450 ErrMsg = "cannot use more than one symbol in memory operand";
451 return true;
452 }
453 Sym = Val;
454 SymName = ID;
455 return false;
456 }
457
458 public:
459 IntelExprStateMachine() = default;
460
461 void addImm(int64_t imm) { Imm += imm; }
462 short getBracCount() const { return BracCount; }
463 bool isMemExpr() const { return MemExpr; }
464 bool isBracketUsed() const { return BracketUsed; }
465 bool isOffsetOperator() const { return OffsetOperator; }
466 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
467 unsigned getBaseReg() const { return BaseReg; }
468 unsigned getIndexReg() const { return IndexReg; }
469 unsigned getScale() const { return Scale; }
470 const MCExpr *getSym() const { return Sym; }
471 StringRef getSymName() const { return SymName; }
472 StringRef getType() const { return CurType.Name; }
473 unsigned getSize() const { return CurType.Size; }
474 unsigned getElementSize() const { return CurType.ElementSize; }
475 unsigned getLength() const { return CurType.Length; }
476 int64_t getImm() { return Imm + IC.execute(); }
477 bool isValidEndState() const {
478 return State == IES_RBRAC || State == IES_INTEGER;
479 }
480
481 // Is the intel expression appended after an operand index.
482 // [OperandIdx][Intel Expression]
483 // This is neccessary for checking if it is an independent
484 // intel expression at back end when parse inline asm.
485 void setAppendAfterOperand() { AttachToOperandIdx = true; }
486
487 bool isPIC() const { return IsPIC; }
488 void setPIC() { IsPIC = true; }
489
490 bool hadError() const { return State == IES_ERROR; }
491 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
492
493 bool regsUseUpError(StringRef &ErrMsg) {
494 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
495 // can not intruduce additional register in inline asm in PIC model.
496 if (IsPIC && AttachToOperandIdx)
497 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
498 else
499 ErrMsg = "BaseReg/IndexReg already set!";
500 return true;
501 }
502
503 void onOr() {
504 IntelExprState CurrState = State;
505 switch (State) {
506 default:
507 State = IES_ERROR;
508 break;
509 case IES_INTEGER:
510 case IES_RPAREN:
511 case IES_REGISTER:
512 State = IES_OR;
513 IC.pushOperator(IC_OR);
514 break;
515 }
516 PrevState = CurrState;
517 }
518 void onXor() {
519 IntelExprState CurrState = State;
520 switch (State) {
521 default:
522 State = IES_ERROR;
523 break;
524 case IES_INTEGER:
525 case IES_RPAREN:
526 case IES_REGISTER:
527 State = IES_XOR;
528 IC.pushOperator(IC_XOR);
529 break;
530 }
531 PrevState = CurrState;
532 }
533 void onAnd() {
534 IntelExprState CurrState = State;
535 switch (State) {
536 default:
537 State = IES_ERROR;
538 break;
539 case IES_INTEGER:
540 case IES_RPAREN:
541 case IES_REGISTER:
542 State = IES_AND;
543 IC.pushOperator(IC_AND);
544 break;
545 }
546 PrevState = CurrState;
547 }
548 void onEq() {
549 IntelExprState CurrState = State;
550 switch (State) {
551 default:
552 State = IES_ERROR;
553 break;
554 case IES_INTEGER:
555 case IES_RPAREN:
556 case IES_REGISTER:
557 State = IES_EQ;
558 IC.pushOperator(IC_EQ);
559 break;
560 }
561 PrevState = CurrState;
562 }
563 void onNE() {
564 IntelExprState CurrState = State;
565 switch (State) {
566 default:
567 State = IES_ERROR;
568 break;
569 case IES_INTEGER:
570 case IES_RPAREN:
571 case IES_REGISTER:
572 State = IES_NE;
573 IC.pushOperator(IC_NE);
574 break;
575 }
576 PrevState = CurrState;
577 }
578 void onLT() {
579 IntelExprState CurrState = State;
580 switch (State) {
581 default:
582 State = IES_ERROR;
583 break;
584 case IES_INTEGER:
585 case IES_RPAREN:
586 case IES_REGISTER:
587 State = IES_LT;
588 IC.pushOperator(IC_LT);
589 break;
590 }
591 PrevState = CurrState;
592 }
593 void onLE() {
594 IntelExprState CurrState = State;
595 switch (State) {
596 default:
597 State = IES_ERROR;
598 break;
599 case IES_INTEGER:
600 case IES_RPAREN:
601 case IES_REGISTER:
602 State = IES_LE;
603 IC.pushOperator(IC_LE);
604 break;
605 }
606 PrevState = CurrState;
607 }
608 void onGT() {
609 IntelExprState CurrState = State;
610 switch (State) {
611 default:
612 State = IES_ERROR;
613 break;
614 case IES_INTEGER:
615 case IES_RPAREN:
616 case IES_REGISTER:
617 State = IES_GT;
618 IC.pushOperator(IC_GT);
619 break;
620 }
621 PrevState = CurrState;
622 }
623 void onGE() {
624 IntelExprState CurrState = State;
625 switch (State) {
626 default:
627 State = IES_ERROR;
628 break;
629 case IES_INTEGER:
630 case IES_RPAREN:
631 case IES_REGISTER:
632 State = IES_GE;
633 IC.pushOperator(IC_GE);
634 break;
635 }
636 PrevState = CurrState;
637 }
638 void onLShift() {
639 IntelExprState CurrState = State;
640 switch (State) {
641 default:
642 State = IES_ERROR;
643 break;
644 case IES_INTEGER:
645 case IES_RPAREN:
646 case IES_REGISTER:
647 State = IES_LSHIFT;
648 IC.pushOperator(IC_LSHIFT);
649 break;
650 }
651 PrevState = CurrState;
652 }
653 void onRShift() {
654 IntelExprState CurrState = State;
655 switch (State) {
656 default:
657 State = IES_ERROR;
658 break;
659 case IES_INTEGER:
660 case IES_RPAREN:
661 case IES_REGISTER:
662 State = IES_RSHIFT;
663 IC.pushOperator(IC_RSHIFT);
664 break;
665 }
666 PrevState = CurrState;
667 }
668 bool onPlus(StringRef &ErrMsg) {
669 IntelExprState CurrState = State;
670 switch (State) {
671 default:
672 State = IES_ERROR;
673 break;
674 case IES_INTEGER:
675 case IES_RPAREN:
676 case IES_REGISTER:
677 case IES_OFFSET:
678 State = IES_PLUS;
679 IC.pushOperator(IC_PLUS);
680 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
681 // If we already have a BaseReg, then assume this is the IndexReg with
682 // no explicit scale.
683 if (!BaseReg) {
684 BaseReg = TmpReg;
685 } else {
686 if (IndexReg)
687 return regsUseUpError(ErrMsg);
688 IndexReg = TmpReg;
689 Scale = 0;
690 }
691 }
692 break;
693 }
694 PrevState = CurrState;
695 return false;
696 }
697 bool onMinus(StringRef &ErrMsg) {
698 IntelExprState CurrState = State;
699 switch (State) {
700 default:
701 State = IES_ERROR;
702 break;
703 case IES_OR:
704 case IES_XOR:
705 case IES_AND:
706 case IES_EQ:
707 case IES_NE:
708 case IES_LT:
709 case IES_LE:
710 case IES_GT:
711 case IES_GE:
712 case IES_LSHIFT:
713 case IES_RSHIFT:
714 case IES_PLUS:
715 case IES_NOT:
716 case IES_MULTIPLY:
717 case IES_DIVIDE:
718 case IES_MOD:
719 case IES_LPAREN:
720 case IES_RPAREN:
721 case IES_LBRAC:
722 case IES_RBRAC:
723 case IES_INTEGER:
724 case IES_REGISTER:
725 case IES_INIT:
726 case IES_OFFSET:
727 State = IES_MINUS;
728 // push minus operator if it is not a negate operator
729 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
730 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
731 CurrState == IES_OFFSET)
732 IC.pushOperator(IC_MINUS);
733 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
734 // We have negate operator for Scale: it's illegal
735 ErrMsg = "Scale can't be negative";
736 return true;
737 } else
738 IC.pushOperator(IC_NEG);
739 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
740 // If we already have a BaseReg, then assume this is the IndexReg with
741 // no explicit scale.
742 if (!BaseReg) {
743 BaseReg = TmpReg;
744 } else {
745 if (IndexReg)
746 return regsUseUpError(ErrMsg);
747 IndexReg = TmpReg;
748 Scale = 0;
749 }
750 }
751 break;
752 }
753 PrevState = CurrState;
754 return false;
755 }
756 void onNot() {
757 IntelExprState CurrState = State;
758 switch (State) {
759 default:
760 State = IES_ERROR;
761 break;
762 case IES_OR:
763 case IES_XOR:
764 case IES_AND:
765 case IES_EQ:
766 case IES_NE:
767 case IES_LT:
768 case IES_LE:
769 case IES_GT:
770 case IES_GE:
771 case IES_LSHIFT:
772 case IES_RSHIFT:
773 case IES_PLUS:
774 case IES_MINUS:
775 case IES_NOT:
776 case IES_MULTIPLY:
777 case IES_DIVIDE:
778 case IES_MOD:
779 case IES_LPAREN:
780 case IES_LBRAC:
781 case IES_INIT:
782 State = IES_NOT;
783 IC.pushOperator(IC_NOT);
784 break;
785 }
786 PrevState = CurrState;
787 }
788 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
789 IntelExprState CurrState = State;
790 switch (State) {
791 default:
792 State = IES_ERROR;
793 break;
794 case IES_PLUS:
795 case IES_LPAREN:
796 case IES_LBRAC:
797 State = IES_REGISTER;
798 TmpReg = Reg;
799 IC.pushOperand(IC_REGISTER);
800 break;
801 case IES_MULTIPLY:
802 // Index Register - Scale * Register
803 if (PrevState == IES_INTEGER) {
804 if (IndexReg)
805 return regsUseUpError(ErrMsg);
806 State = IES_REGISTER;
807 IndexReg = Reg;
808 // Get the scale and replace the 'Scale * Register' with '0'.
809 Scale = IC.popOperand();
810 if (checkScale(Scale, ErrMsg))
811 return true;
812 IC.pushOperand(IC_IMM);
813 IC.popOperator();
814 } else {
815 State = IES_ERROR;
816 }
817 break;
818 }
819 PrevState = CurrState;
820 return false;
821 }
822 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
823 const InlineAsmIdentifierInfo &IDInfo,
824 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
825 StringRef &ErrMsg) {
826 // InlineAsm: Treat an enum value as an integer
827 if (ParsingMSInlineAsm)
829 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
830 // Treat a symbolic constant like an integer
831 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
832 return onInteger(CE->getValue(), ErrMsg);
833 PrevState = State;
834 switch (State) {
835 default:
836 State = IES_ERROR;
837 break;
838 case IES_CAST:
839 case IES_PLUS:
840 case IES_MINUS:
841 case IES_NOT:
842 case IES_INIT:
843 case IES_LBRAC:
844 case IES_LPAREN:
845 if (setSymRef(SymRef, SymRefName, ErrMsg))
846 return true;
847 MemExpr = true;
848 State = IES_INTEGER;
849 IC.pushOperand(IC_IMM);
850 if (ParsingMSInlineAsm)
851 Info = IDInfo;
852 setTypeInfo(Type);
853 break;
854 }
855 return false;
856 }
857 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
858 IntelExprState CurrState = State;
859 switch (State) {
860 default:
861 State = IES_ERROR;
862 break;
863 case IES_PLUS:
864 case IES_MINUS:
865 case IES_NOT:
866 case IES_OR:
867 case IES_XOR:
868 case IES_AND:
869 case IES_EQ:
870 case IES_NE:
871 case IES_LT:
872 case IES_LE:
873 case IES_GT:
874 case IES_GE:
875 case IES_LSHIFT:
876 case IES_RSHIFT:
877 case IES_DIVIDE:
878 case IES_MOD:
879 case IES_MULTIPLY:
880 case IES_LPAREN:
881 case IES_INIT:
882 case IES_LBRAC:
883 State = IES_INTEGER;
884 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
885 // Index Register - Register * Scale
886 if (IndexReg)
887 return regsUseUpError(ErrMsg);
888 IndexReg = TmpReg;
889 Scale = TmpInt;
890 if (checkScale(Scale, ErrMsg))
891 return true;
892 // Get the scale and replace the 'Register * Scale' with '0'.
893 IC.popOperator();
894 } else {
895 IC.pushOperand(IC_IMM, TmpInt);
896 }
897 break;
898 }
899 PrevState = CurrState;
900 return false;
901 }
902 void onStar() {
903 PrevState = State;
904 switch (State) {
905 default:
906 State = IES_ERROR;
907 break;
908 case IES_INTEGER:
909 case IES_REGISTER:
910 case IES_RPAREN:
911 State = IES_MULTIPLY;
912 IC.pushOperator(IC_MULTIPLY);
913 break;
914 }
915 }
916 void onDivide() {
917 PrevState = State;
918 switch (State) {
919 default:
920 State = IES_ERROR;
921 break;
922 case IES_INTEGER:
923 case IES_RPAREN:
924 State = IES_DIVIDE;
925 IC.pushOperator(IC_DIVIDE);
926 break;
927 }
928 }
929 void onMod() {
930 PrevState = State;
931 switch (State) {
932 default:
933 State = IES_ERROR;
934 break;
935 case IES_INTEGER:
936 case IES_RPAREN:
937 State = IES_MOD;
938 IC.pushOperator(IC_MOD);
939 break;
940 }
941 }
942 bool onLBrac() {
943 if (BracCount)
944 return true;
945 PrevState = State;
946 switch (State) {
947 default:
948 State = IES_ERROR;
949 break;
950 case IES_RBRAC:
951 case IES_INTEGER:
952 case IES_RPAREN:
953 State = IES_PLUS;
954 IC.pushOperator(IC_PLUS);
955 CurType.Length = 1;
956 CurType.Size = CurType.ElementSize;
957 break;
958 case IES_INIT:
959 case IES_CAST:
960 assert(!BracCount && "BracCount should be zero on parsing's start");
961 State = IES_LBRAC;
962 break;
963 }
964 MemExpr = true;
965 BracketUsed = true;
966 BracCount++;
967 return false;
968 }
969 bool onRBrac(StringRef &ErrMsg) {
970 IntelExprState CurrState = State;
971 switch (State) {
972 default:
973 State = IES_ERROR;
974 break;
975 case IES_INTEGER:
976 case IES_OFFSET:
977 case IES_REGISTER:
978 case IES_RPAREN:
979 if (BracCount-- != 1) {
980 ErrMsg = "unexpected bracket encountered";
981 return true;
982 }
983 State = IES_RBRAC;
984 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
985 // If we already have a BaseReg, then assume this is the IndexReg with
986 // no explicit scale.
987 if (!BaseReg) {
988 BaseReg = TmpReg;
989 } else {
990 if (IndexReg)
991 return regsUseUpError(ErrMsg);
992 IndexReg = TmpReg;
993 Scale = 0;
994 }
995 }
996 break;
997 }
998 PrevState = CurrState;
999 return false;
1000 }
1001 void onLParen() {
1002 IntelExprState CurrState = State;
1003 switch (State) {
1004 default:
1005 State = IES_ERROR;
1006 break;
1007 case IES_PLUS:
1008 case IES_MINUS:
1009 case IES_NOT:
1010 case IES_OR:
1011 case IES_XOR:
1012 case IES_AND:
1013 case IES_EQ:
1014 case IES_NE:
1015 case IES_LT:
1016 case IES_LE:
1017 case IES_GT:
1018 case IES_GE:
1019 case IES_LSHIFT:
1020 case IES_RSHIFT:
1021 case IES_MULTIPLY:
1022 case IES_DIVIDE:
1023 case IES_MOD:
1024 case IES_LPAREN:
1025 case IES_INIT:
1026 case IES_LBRAC:
1027 State = IES_LPAREN;
1028 IC.pushOperator(IC_LPAREN);
1029 break;
1030 }
1031 PrevState = CurrState;
1032 }
1033 void onRParen() {
1034 PrevState = State;
1035 switch (State) {
1036 default:
1037 State = IES_ERROR;
1038 break;
1039 case IES_INTEGER:
1040 case IES_OFFSET:
1041 case IES_REGISTER:
1042 case IES_RBRAC:
1043 case IES_RPAREN:
1044 State = IES_RPAREN;
1045 IC.pushOperator(IC_RPAREN);
1046 break;
1047 }
1048 }
1049 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1050 const InlineAsmIdentifierInfo &IDInfo,
1051 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1052 PrevState = State;
1053 switch (State) {
1054 default:
1055 ErrMsg = "unexpected offset operator expression";
1056 return true;
1057 case IES_PLUS:
1058 case IES_INIT:
1059 case IES_LBRAC:
1060 if (setSymRef(Val, ID, ErrMsg))
1061 return true;
1062 OffsetOperator = true;
1063 OffsetOperatorLoc = OffsetLoc;
1064 State = IES_OFFSET;
1065 // As we cannot yet resolve the actual value (offset), we retain
1066 // the requested semantics by pushing a '0' to the operands stack
1067 IC.pushOperand(IC_IMM);
1068 if (ParsingMSInlineAsm) {
1069 Info = IDInfo;
1070 }
1071 break;
1072 }
1073 return false;
1074 }
1075 void onCast(AsmTypeInfo Info) {
1076 PrevState = State;
1077 switch (State) {
1078 default:
1079 State = IES_ERROR;
1080 break;
1081 case IES_LPAREN:
1082 setTypeInfo(Info);
1083 State = IES_CAST;
1084 break;
1085 }
1086 }
1087 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1088 };
1089
1090 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1091 bool MatchingInlineAsm = false) {
1092 MCAsmParser &Parser = getParser();
1093 if (MatchingInlineAsm) {
1094 if (!getLexer().isAtStartOfStatement())
1095 Parser.eatToEndOfStatement();
1096 return false;
1097 }
1098 return Parser.Error(L, Msg, Range);
1099 }
1100
1101 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1102 SMLoc EndLoc);
1103 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1104 bool RestoreOnFailure);
1105
1106 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1107 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1108 bool IsSIReg(unsigned Reg);
1109 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1110 void
1111 AddDefaultSrcDestOperands(OperandVector &Operands,
1112 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1113 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1114 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1115 OperandVector &FinalOperands);
1116 bool parseOperand(OperandVector &Operands, StringRef Name);
1117 bool parseATTOperand(OperandVector &Operands);
1118 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1119 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1121 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1122 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1123 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1124 unsigned IdentifyMasmOperator(StringRef Name);
1125 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1126 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1127 bool parseCFlagsOp(OperandVector &Operands);
1128 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1129 bool &ParseError, SMLoc &End);
1130 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1131 bool &ParseError, SMLoc &End);
1132 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1133 SMLoc End);
1134 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1135 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1137 bool IsUnevaluatedOperand, SMLoc &End,
1138 bool IsParsingOffsetOperator = false);
1139 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1140 IntelExprStateMachine &SM);
1141
1142 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1143 SMLoc EndLoc, OperandVector &Operands);
1144
1145 X86::CondCode ParseConditionCode(StringRef CCode);
1146
1147 bool ParseIntelMemoryOperandSize(unsigned &Size);
1148 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1149 unsigned BaseReg, unsigned IndexReg,
1150 unsigned Scale, bool NonAbsMem, SMLoc Start,
1151 SMLoc End, unsigned Size, StringRef Identifier,
1152 const InlineAsmIdentifierInfo &Info,
1154
1155 bool parseDirectiveArch();
1156 bool parseDirectiveNops(SMLoc L);
1157 bool parseDirectiveEven(SMLoc L);
1158 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1159
1160 /// CodeView FPO data directives.
1161 bool parseDirectiveFPOProc(SMLoc L);
1162 bool parseDirectiveFPOSetFrame(SMLoc L);
1163 bool parseDirectiveFPOPushReg(SMLoc L);
1164 bool parseDirectiveFPOStackAlloc(SMLoc L);
1165 bool parseDirectiveFPOStackAlign(SMLoc L);
1166 bool parseDirectiveFPOEndPrologue(SMLoc L);
1167 bool parseDirectiveFPOEndProc(SMLoc L);
1168
1169 /// SEH directives.
1170 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1171 bool parseDirectiveSEHPushReg(SMLoc);
1172 bool parseDirectiveSEHSetFrame(SMLoc);
1173 bool parseDirectiveSEHSaveReg(SMLoc);
1174 bool parseDirectiveSEHSaveXMM(SMLoc);
1175 bool parseDirectiveSEHPushFrame(SMLoc);
1176
1177 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1178
1179 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1180 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1181
1182 // Load Value Injection (LVI) Mitigations for machine code
1183 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1184 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1185 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1186
1187 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1188 /// instrumentation around Inst.
1189 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1190
1191 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1194 bool MatchingInlineAsm) override;
1195
1196 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1197 MCStreamer &Out, bool MatchingInlineAsm);
1198
1199 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1200 bool MatchingInlineAsm);
1201
1202 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1204 uint64_t &ErrorInfo, bool MatchingInlineAsm);
1205
1206 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1209 bool MatchingInlineAsm);
1210
1211 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1212
1213 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1214 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1215 /// return false if no parsing errors occurred, true otherwise.
1216 bool HandleAVX512Operand(OperandVector &Operands);
1217
1218 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1219
1220 bool is64BitMode() const {
1221 // FIXME: Can tablegen auto-generate this?
1222 return getSTI().hasFeature(X86::Is64Bit);
1223 }
1224 bool is32BitMode() const {
1225 // FIXME: Can tablegen auto-generate this?
1226 return getSTI().hasFeature(X86::Is32Bit);
1227 }
1228 bool is16BitMode() const {
1229 // FIXME: Can tablegen auto-generate this?
1230 return getSTI().hasFeature(X86::Is16Bit);
1231 }
1232 void SwitchMode(unsigned mode) {
1233 MCSubtargetInfo &STI = copySTI();
1234 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1235 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1236 FeatureBitset FB = ComputeAvailableFeatures(
1237 STI.ToggleFeature(OldMode.flip(mode)));
1239
1240 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1241 }
1242
1243 unsigned getPointerWidth() {
1244 if (is16BitMode()) return 16;
1245 if (is32BitMode()) return 32;
1246 if (is64BitMode()) return 64;
1247 llvm_unreachable("invalid mode");
1248 }
1249
1250 bool isParsingIntelSyntax() {
1251 return getParser().getAssemblerDialect();
1252 }
1253
1254 /// @name Auto-generated Matcher Functions
1255 /// {
1256
1257#define GET_ASSEMBLER_HEADER
1258#include "X86GenAsmMatcher.inc"
1259
1260 /// }
1261
1262public:
1263 enum X86MatchResultTy {
1264 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1265#define GET_OPERAND_DIAGNOSTIC_TYPES
1266#include "X86GenAsmMatcher.inc"
1267 };
1268
1269 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1270 const MCInstrInfo &mii, const MCTargetOptions &Options)
1271 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1272 Code16GCC(false) {
1273
1274 Parser.addAliasForDirective(".word", ".2byte");
1275
1276 // Initialize the set of available features.
1277 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1278 }
1279
1280 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1282 SMLoc &EndLoc) override;
1283
1284 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1285
1287 SMLoc NameLoc, OperandVector &Operands) override;
1288
1289 bool ParseDirective(AsmToken DirectiveID) override;
1290};
1291} // end anonymous namespace
1292
1293#define GET_REGISTER_MATCHER
1294#define GET_SUBTARGET_FEATURE_NAME
1295#include "X86GenAsmMatcher.inc"
1296
1297static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1298 unsigned Scale, bool Is64BitMode,
1299 StringRef &ErrMsg) {
1300 // If we have both a base register and an index register make sure they are
1301 // both 64-bit or 32-bit registers.
1302 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1303
1304 if (BaseReg != 0 &&
1305 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1306 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1307 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1308 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1309 ErrMsg = "invalid base+index expression";
1310 return true;
1311 }
1312
1313 if (IndexReg != 0 &&
1314 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1315 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1316 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1317 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1318 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1319 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1320 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1321 ErrMsg = "invalid base+index expression";
1322 return true;
1323 }
1324
1325 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1326 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1327 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1328 ErrMsg = "invalid base+index expression";
1329 return true;
1330 }
1331
1332 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1333 // and then only in non-64-bit modes.
1334 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1335 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1336 BaseReg != X86::SI && BaseReg != X86::DI))) {
1337 ErrMsg = "invalid 16-bit base register";
1338 return true;
1339 }
1340
1341 if (BaseReg == 0 &&
1342 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1343 ErrMsg = "16-bit memory operand may not include only index register";
1344 return true;
1345 }
1346
1347 if (BaseReg != 0 && IndexReg != 0) {
1348 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1349 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1350 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1351 IndexReg == X86::EIZ)) {
1352 ErrMsg = "base register is 64-bit, but index register is not";
1353 return true;
1354 }
1355 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1356 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1357 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1358 IndexReg == X86::RIZ)) {
1359 ErrMsg = "base register is 32-bit, but index register is not";
1360 return true;
1361 }
1362 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1363 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1364 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1365 ErrMsg = "base register is 16-bit, but index register is not";
1366 return true;
1367 }
1368 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1369 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1370 ErrMsg = "invalid 16-bit base/index register combination";
1371 return true;
1372 }
1373 }
1374 }
1375
1376 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1377 if (!Is64BitMode && BaseReg != 0 &&
1378 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1379 ErrMsg = "IP-relative addressing requires 64-bit mode";
1380 return true;
1381 }
1382
1383 return checkScale(Scale, ErrMsg);
1384}
1385
1386bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1387 SMLoc StartLoc, SMLoc EndLoc) {
1388 // If we encounter a %, ignore it. This code handles registers with and
1389 // without the prefix, unprefixed registers can occur in cfi directives.
1390 RegName.consume_front("%");
1391
1392 RegNo = MatchRegisterName(RegName);
1393
1394 // If the match failed, try the register name as lowercase.
1395 if (RegNo == 0)
1396 RegNo = MatchRegisterName(RegName.lower());
1397
1398 // The "flags" and "mxcsr" registers cannot be referenced directly.
1399 // Treat it as an identifier instead.
1400 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1401 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1402 RegNo = 0;
1403
1404 if (!is64BitMode()) {
1405 // FIXME: This should be done using Requires<Not64BitMode> and
1406 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1407 // checked.
1408 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1409 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1412 return Error(StartLoc,
1413 "register %" + RegName + " is only available in 64-bit mode",
1414 SMRange(StartLoc, EndLoc));
1415 }
1416 }
1417
1418 if (X86II::isApxExtendedReg(RegNo))
1419 UseApxExtendedReg = true;
1420
1421 // If this is "db[0-15]", match it as an alias
1422 // for dr[0-15].
1423 if (RegNo == 0 && RegName.starts_with("db")) {
1424 if (RegName.size() == 3) {
1425 switch (RegName[2]) {
1426 case '0':
1427 RegNo = X86::DR0;
1428 break;
1429 case '1':
1430 RegNo = X86::DR1;
1431 break;
1432 case '2':
1433 RegNo = X86::DR2;
1434 break;
1435 case '3':
1436 RegNo = X86::DR3;
1437 break;
1438 case '4':
1439 RegNo = X86::DR4;
1440 break;
1441 case '5':
1442 RegNo = X86::DR5;
1443 break;
1444 case '6':
1445 RegNo = X86::DR6;
1446 break;
1447 case '7':
1448 RegNo = X86::DR7;
1449 break;
1450 case '8':
1451 RegNo = X86::DR8;
1452 break;
1453 case '9':
1454 RegNo = X86::DR9;
1455 break;
1456 }
1457 } else if (RegName.size() == 4 && RegName[2] == '1') {
1458 switch (RegName[3]) {
1459 case '0':
1460 RegNo = X86::DR10;
1461 break;
1462 case '1':
1463 RegNo = X86::DR11;
1464 break;
1465 case '2':
1466 RegNo = X86::DR12;
1467 break;
1468 case '3':
1469 RegNo = X86::DR13;
1470 break;
1471 case '4':
1472 RegNo = X86::DR14;
1473 break;
1474 case '5':
1475 RegNo = X86::DR15;
1476 break;
1477 }
1478 }
1479 }
1480
1481 if (RegNo == 0) {
1482 if (isParsingIntelSyntax())
1483 return true;
1484 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1485 }
1486 return false;
1487}
1488
1489bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1490 SMLoc &EndLoc, bool RestoreOnFailure) {
1491 MCAsmParser &Parser = getParser();
1492 MCAsmLexer &Lexer = getLexer();
1493 RegNo = 0;
1494
1496 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1497 if (RestoreOnFailure) {
1498 while (!Tokens.empty()) {
1499 Lexer.UnLex(Tokens.pop_back_val());
1500 }
1501 }
1502 };
1503
1504 const AsmToken &PercentTok = Parser.getTok();
1505 StartLoc = PercentTok.getLoc();
1506
1507 // If we encounter a %, ignore it. This code handles registers with and
1508 // without the prefix, unprefixed registers can occur in cfi directives.
1509 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1510 Tokens.push_back(PercentTok);
1511 Parser.Lex(); // Eat percent token.
1512 }
1513
1514 const AsmToken &Tok = Parser.getTok();
1515 EndLoc = Tok.getEndLoc();
1516
1517 if (Tok.isNot(AsmToken::Identifier)) {
1518 OnFailure();
1519 if (isParsingIntelSyntax()) return true;
1520 return Error(StartLoc, "invalid register name",
1521 SMRange(StartLoc, EndLoc));
1522 }
1523
1524 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1525 OnFailure();
1526 return true;
1527 }
1528
1529 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1530 if (RegNo == X86::ST0) {
1531 Tokens.push_back(Tok);
1532 Parser.Lex(); // Eat 'st'
1533
1534 // Check to see if we have '(4)' after %st.
1535 if (Lexer.isNot(AsmToken::LParen))
1536 return false;
1537 // Lex the paren.
1538 Tokens.push_back(Parser.getTok());
1539 Parser.Lex();
1540
1541 const AsmToken &IntTok = Parser.getTok();
1542 if (IntTok.isNot(AsmToken::Integer)) {
1543 OnFailure();
1544 return Error(IntTok.getLoc(), "expected stack index");
1545 }
1546 switch (IntTok.getIntVal()) {
1547 case 0: RegNo = X86::ST0; break;
1548 case 1: RegNo = X86::ST1; break;
1549 case 2: RegNo = X86::ST2; break;
1550 case 3: RegNo = X86::ST3; break;
1551 case 4: RegNo = X86::ST4; break;
1552 case 5: RegNo = X86::ST5; break;
1553 case 6: RegNo = X86::ST6; break;
1554 case 7: RegNo = X86::ST7; break;
1555 default:
1556 OnFailure();
1557 return Error(IntTok.getLoc(), "invalid stack index");
1558 }
1559
1560 // Lex IntTok
1561 Tokens.push_back(IntTok);
1562 Parser.Lex();
1563 if (Lexer.isNot(AsmToken::RParen)) {
1564 OnFailure();
1565 return Error(Parser.getTok().getLoc(), "expected ')'");
1566 }
1567
1568 EndLoc = Parser.getTok().getEndLoc();
1569 Parser.Lex(); // Eat ')'
1570 return false;
1571 }
1572
1573 EndLoc = Parser.getTok().getEndLoc();
1574
1575 if (RegNo == 0) {
1576 OnFailure();
1577 if (isParsingIntelSyntax()) return true;
1578 return Error(StartLoc, "invalid register name",
1579 SMRange(StartLoc, EndLoc));
1580 }
1581
1582 Parser.Lex(); // Eat identifier token.
1583 return false;
1584}
1585
1586bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1587 SMLoc &EndLoc) {
1588 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1589}
1590
1591ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1592 SMLoc &EndLoc) {
1593 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1594 bool PendingErrors = getParser().hasPendingError();
1595 getParser().clearPendingErrors();
1596 if (PendingErrors)
1597 return ParseStatus::Failure;
1598 if (Result)
1599 return ParseStatus::NoMatch;
1600 return ParseStatus::Success;
1601}
1602
1603std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1604 bool Parse32 = is32BitMode() || Code16GCC;
1605 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1606 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1607 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1608 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1609 Loc, Loc, 0);
1610}
1611
1612std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1613 bool Parse32 = is32BitMode() || Code16GCC;
1614 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1615 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1616 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1617 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1618 Loc, Loc, 0);
1619}
1620
1621bool X86AsmParser::IsSIReg(unsigned Reg) {
1622 switch (Reg) {
1623 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1624 case X86::RSI:
1625 case X86::ESI:
1626 case X86::SI:
1627 return true;
1628 case X86::RDI:
1629 case X86::EDI:
1630 case X86::DI:
1631 return false;
1632 }
1633}
1634
1635unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1636 bool IsSIReg) {
1637 switch (RegClassID) {
1638 default: llvm_unreachable("Unexpected register class");
1639 case X86::GR64RegClassID:
1640 return IsSIReg ? X86::RSI : X86::RDI;
1641 case X86::GR32RegClassID:
1642 return IsSIReg ? X86::ESI : X86::EDI;
1643 case X86::GR16RegClassID:
1644 return IsSIReg ? X86::SI : X86::DI;
1645 }
1646}
1647
1648void X86AsmParser::AddDefaultSrcDestOperands(
1649 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1650 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1651 if (isParsingIntelSyntax()) {
1652 Operands.push_back(std::move(Dst));
1653 Operands.push_back(std::move(Src));
1654 }
1655 else {
1656 Operands.push_back(std::move(Src));
1657 Operands.push_back(std::move(Dst));
1658 }
1659}
1660
1661bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1662 OperandVector &FinalOperands) {
1663
1664 if (OrigOperands.size() > 1) {
1665 // Check if sizes match, OrigOperands also contains the instruction name
1666 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1667 "Operand size mismatch");
1668
1670 // Verify types match
1671 int RegClassID = -1;
1672 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1673 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1674 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1675
1676 if (FinalOp.isReg() &&
1677 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1678 // Return false and let a normal complaint about bogus operands happen
1679 return false;
1680
1681 if (FinalOp.isMem()) {
1682
1683 if (!OrigOp.isMem())
1684 // Return false and let a normal complaint about bogus operands happen
1685 return false;
1686
1687 unsigned OrigReg = OrigOp.Mem.BaseReg;
1688 unsigned FinalReg = FinalOp.Mem.BaseReg;
1689
1690 // If we've already encounterd a register class, make sure all register
1691 // bases are of the same register class
1692 if (RegClassID != -1 &&
1693 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1694 return Error(OrigOp.getStartLoc(),
1695 "mismatching source and destination index registers");
1696 }
1697
1698 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1699 RegClassID = X86::GR64RegClassID;
1700 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1701 RegClassID = X86::GR32RegClassID;
1702 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1703 RegClassID = X86::GR16RegClassID;
1704 else
1705 // Unexpected register class type
1706 // Return false and let a normal complaint about bogus operands happen
1707 return false;
1708
1709 bool IsSI = IsSIReg(FinalReg);
1710 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1711
1712 if (FinalReg != OrigReg) {
1713 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1714 Warnings.push_back(std::make_pair(
1715 OrigOp.getStartLoc(),
1716 "memory operand is only for determining the size, " + RegName +
1717 " will be used for the location"));
1718 }
1719
1720 FinalOp.Mem.Size = OrigOp.Mem.Size;
1721 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1722 FinalOp.Mem.BaseReg = FinalReg;
1723 }
1724 }
1725
1726 // Produce warnings only if all the operands passed the adjustment - prevent
1727 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1728 for (auto &WarningMsg : Warnings) {
1729 Warning(WarningMsg.first, WarningMsg.second);
1730 }
1731
1732 // Remove old operands
1733 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1734 OrigOperands.pop_back();
1735 }
1736 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1737 for (auto &Op : FinalOperands)
1738 OrigOperands.push_back(std::move(Op));
1739
1740 return false;
1741}
1742
1743bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1744 if (isParsingIntelSyntax())
1745 return parseIntelOperand(Operands, Name);
1746
1747 return parseATTOperand(Operands);
1748}
1749
1750bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1751 unsigned BaseReg, unsigned IndexReg,
1752 unsigned Scale, bool NonAbsMem,
1753 SMLoc Start, SMLoc End,
1754 unsigned Size, StringRef Identifier,
1755 const InlineAsmIdentifierInfo &Info,
1757 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1758 // some other label reference.
1760 // Create an absolute memory reference in order to match against
1761 // instructions taking a PC relative operand.
1762 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1763 End, Size, Identifier,
1764 Info.Label.Decl));
1765 return false;
1766 }
1767 // We either have a direct symbol reference, or an offset from a symbol. The
1768 // parser always puts the symbol on the LHS, so look there for size
1769 // calculation purposes.
1770 unsigned FrontendSize = 0;
1771 void *Decl = nullptr;
1772 bool IsGlobalLV = false;
1774 // Size is in terms of bits in this context.
1775 FrontendSize = Info.Var.Type * 8;
1776 Decl = Info.Var.Decl;
1777 IsGlobalLV = Info.Var.IsGlobalLV;
1778 }
1779 // It is widely common for MS InlineAsm to use a global variable and one/two
1780 // registers in a mmory expression, and though unaccessible via rip/eip.
1781 if (IsGlobalLV) {
1782 if (BaseReg || IndexReg) {
1783 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1784 End, Size, Identifier, Decl, 0,
1785 BaseReg && IndexReg));
1786 return false;
1787 }
1788 if (NonAbsMem)
1789 BaseReg = 1; // Make isAbsMem() false
1790 }
1792 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1793 Size,
1794 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1795 return false;
1796}
1797
1798// Some binary bitwise operators have a named synonymous
1799// Query a candidate string for being such a named operator
1800// and if so - invoke the appropriate handler
1801bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1802 IntelExprStateMachine &SM,
1803 bool &ParseError, SMLoc &End) {
1804 // A named operator should be either lower or upper case, but not a mix...
1805 // except in MASM, which uses full case-insensitivity.
1806 if (Name != Name.lower() && Name != Name.upper() &&
1807 !getParser().isParsingMasm())
1808 return false;
1809 if (Name.equals_insensitive("not")) {
1810 SM.onNot();
1811 } else if (Name.equals_insensitive("or")) {
1812 SM.onOr();
1813 } else if (Name.equals_insensitive("shl")) {
1814 SM.onLShift();
1815 } else if (Name.equals_insensitive("shr")) {
1816 SM.onRShift();
1817 } else if (Name.equals_insensitive("xor")) {
1818 SM.onXor();
1819 } else if (Name.equals_insensitive("and")) {
1820 SM.onAnd();
1821 } else if (Name.equals_insensitive("mod")) {
1822 SM.onMod();
1823 } else if (Name.equals_insensitive("offset")) {
1824 SMLoc OffsetLoc = getTok().getLoc();
1825 const MCExpr *Val = nullptr;
1826 StringRef ID;
1828 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1829 if (ParseError)
1830 return true;
1831 StringRef ErrMsg;
1832 ParseError =
1833 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1834 if (ParseError)
1835 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1836 } else {
1837 return false;
1838 }
1839 if (!Name.equals_insensitive("offset"))
1840 End = consumeToken();
1841 return true;
1842}
1843bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1844 IntelExprStateMachine &SM,
1845 bool &ParseError, SMLoc &End) {
1846 if (Name.equals_insensitive("eq")) {
1847 SM.onEq();
1848 } else if (Name.equals_insensitive("ne")) {
1849 SM.onNE();
1850 } else if (Name.equals_insensitive("lt")) {
1851 SM.onLT();
1852 } else if (Name.equals_insensitive("le")) {
1853 SM.onLE();
1854 } else if (Name.equals_insensitive("gt")) {
1855 SM.onGT();
1856 } else if (Name.equals_insensitive("ge")) {
1857 SM.onGE();
1858 } else {
1859 return false;
1860 }
1861 End = consumeToken();
1862 return true;
1863}
1864
1865// Check if current intel expression append after an operand.
1866// Like: [Operand][Intel Expression]
1867void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1868 IntelExprStateMachine &SM) {
1869 if (PrevTK != AsmToken::RBrac)
1870 return;
1871
1872 SM.setAppendAfterOperand();
1873}
1874
1875bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1876 MCAsmParser &Parser = getParser();
1877 StringRef ErrMsg;
1878
1880
1881 if (getContext().getObjectFileInfo()->isPositionIndependent())
1882 SM.setPIC();
1883
1884 bool Done = false;
1885 while (!Done) {
1886 // Get a fresh reference on each loop iteration in case the previous
1887 // iteration moved the token storage during UnLex().
1888 const AsmToken &Tok = Parser.getTok();
1889
1890 bool UpdateLocLex = true;
1891 AsmToken::TokenKind TK = getLexer().getKind();
1892
1893 switch (TK) {
1894 default:
1895 if ((Done = SM.isValidEndState()))
1896 break;
1897 return Error(Tok.getLoc(), "unknown token in expression");
1898 case AsmToken::Error:
1899 return Error(getLexer().getErrLoc(), getLexer().getErr());
1900 break;
1902 Done = true;
1903 break;
1904 case AsmToken::Real:
1905 // DotOperator: [ebx].0
1906 UpdateLocLex = false;
1907 if (ParseIntelDotOperator(SM, End))
1908 return true;
1909 break;
1910 case AsmToken::Dot:
1911 if (!Parser.isParsingMasm()) {
1912 if ((Done = SM.isValidEndState()))
1913 break;
1914 return Error(Tok.getLoc(), "unknown token in expression");
1915 }
1916 // MASM allows spaces around the dot operator (e.g., "var . x")
1917 Lex();
1918 UpdateLocLex = false;
1919 if (ParseIntelDotOperator(SM, End))
1920 return true;
1921 break;
1922 case AsmToken::Dollar:
1923 if (!Parser.isParsingMasm()) {
1924 if ((Done = SM.isValidEndState()))
1925 break;
1926 return Error(Tok.getLoc(), "unknown token in expression");
1927 }
1928 [[fallthrough]];
1929 case AsmToken::String: {
1930 if (Parser.isParsingMasm()) {
1931 // MASM parsers handle strings in expressions as constants.
1932 SMLoc ValueLoc = Tok.getLoc();
1933 int64_t Res;
1934 const MCExpr *Val;
1935 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1936 return true;
1937 UpdateLocLex = false;
1938 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1939 return Error(ValueLoc, "expected absolute value");
1940 if (SM.onInteger(Res, ErrMsg))
1941 return Error(ValueLoc, ErrMsg);
1942 break;
1943 }
1944 [[fallthrough]];
1945 }
1946 case AsmToken::At:
1947 case AsmToken::Identifier: {
1948 SMLoc IdentLoc = Tok.getLoc();
1950 UpdateLocLex = false;
1951 if (Parser.isParsingMasm()) {
1952 size_t DotOffset = Identifier.find_first_of('.');
1953 if (DotOffset != StringRef::npos) {
1954 consumeToken();
1955 StringRef LHS = Identifier.slice(0, DotOffset);
1956 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1957 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1958 if (!RHS.empty()) {
1959 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1960 }
1961 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1962 if (!LHS.empty()) {
1963 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1964 }
1965 break;
1966 }
1967 }
1968 // (MASM only) <TYPE> PTR operator
1969 if (Parser.isParsingMasm()) {
1970 const AsmToken &NextTok = getLexer().peekTok();
1971 if (NextTok.is(AsmToken::Identifier) &&
1972 NextTok.getIdentifier().equals_insensitive("ptr")) {
1974 if (Parser.lookUpType(Identifier, Info))
1975 return Error(Tok.getLoc(), "unknown type");
1976 SM.onCast(Info);
1977 // Eat type and PTR.
1978 consumeToken();
1979 End = consumeToken();
1980 break;
1981 }
1982 }
1983 // Register, or (MASM only) <register>.<field>
1985 if (Tok.is(AsmToken::Identifier)) {
1986 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1987 if (SM.onRegister(Reg, ErrMsg))
1988 return Error(IdentLoc, ErrMsg);
1989 break;
1990 }
1991 if (Parser.isParsingMasm()) {
1992 const std::pair<StringRef, StringRef> IDField =
1993 Tok.getString().split('.');
1994 const StringRef ID = IDField.first, Field = IDField.second;
1995 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1996 if (!Field.empty() &&
1997 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1998 if (SM.onRegister(Reg, ErrMsg))
1999 return Error(IdentLoc, ErrMsg);
2000
2002 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2003 if (Parser.lookUpField(Field, Info))
2004 return Error(FieldStartLoc, "unknown offset");
2005 else if (SM.onPlus(ErrMsg))
2006 return Error(getTok().getLoc(), ErrMsg);
2007 else if (SM.onInteger(Info.Offset, ErrMsg))
2008 return Error(IdentLoc, ErrMsg);
2009 SM.setTypeInfo(Info.Type);
2010
2011 End = consumeToken();
2012 break;
2013 }
2014 }
2015 }
2016 // Operator synonymous ("not", "or" etc.)
2017 bool ParseError = false;
2018 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2019 if (ParseError)
2020 return true;
2021 break;
2022 }
2023 if (Parser.isParsingMasm() &&
2024 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2025 if (ParseError)
2026 return true;
2027 break;
2028 }
2029 // Symbol reference, when parsing assembly content
2031 AsmFieldInfo FieldInfo;
2032 const MCExpr *Val;
2033 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2034 // MS Dot Operator expression
2035 if (Identifier.count('.') &&
2036 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2037 if (ParseIntelDotOperator(SM, End))
2038 return true;
2039 break;
2040 }
2041 }
2042 if (isParsingMSInlineAsm()) {
2043 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2044 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2045 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2046 if (SM.onInteger(Val, ErrMsg))
2047 return Error(IdentLoc, ErrMsg);
2048 } else {
2049 return true;
2050 }
2051 break;
2052 }
2053 // MS InlineAsm identifier
2054 // Call parseIdentifier() to combine @ with the identifier behind it.
2055 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2056 return Error(IdentLoc, "expected identifier");
2057 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2058 return true;
2059 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2060 true, ErrMsg))
2061 return Error(IdentLoc, ErrMsg);
2062 break;
2063 }
2064 if (Parser.isParsingMasm()) {
2065 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2066 int64_t Val;
2067 if (ParseMasmOperator(OpKind, Val))
2068 return true;
2069 if (SM.onInteger(Val, ErrMsg))
2070 return Error(IdentLoc, ErrMsg);
2071 break;
2072 }
2073 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2074 // Field offset immediate; <TYPE>.<field specification>
2075 Lex(); // eat type
2076 bool EndDot = parseOptionalToken(AsmToken::Dot);
2077 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2078 getTok().getString().starts_with("."))) {
2079 getParser().parseIdentifier(Identifier);
2080 if (!EndDot)
2081 Identifier.consume_front(".");
2082 EndDot = Identifier.consume_back(".");
2083 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2084 FieldInfo)) {
2085 SMLoc IDEnd =
2087 return Error(IdentLoc, "Unable to lookup field reference!",
2088 SMRange(IdentLoc, IDEnd));
2089 }
2090 if (!EndDot)
2091 EndDot = parseOptionalToken(AsmToken::Dot);
2092 }
2093 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2094 return Error(IdentLoc, ErrMsg);
2095 break;
2096 }
2097 }
2098 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2099 return Error(Tok.getLoc(), "Unexpected identifier!");
2100 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2101 false, ErrMsg)) {
2102 return Error(IdentLoc, ErrMsg);
2103 }
2104 break;
2105 }
2106 case AsmToken::Integer: {
2107 // Look for 'b' or 'f' following an Integer as a directional label
2108 SMLoc Loc = getTok().getLoc();
2109 int64_t IntVal = getTok().getIntVal();
2110 End = consumeToken();
2111 UpdateLocLex = false;
2112 if (getLexer().getKind() == AsmToken::Identifier) {
2113 StringRef IDVal = getTok().getString();
2114 if (IDVal == "f" || IDVal == "b") {
2115 MCSymbol *Sym =
2116 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2118 const MCExpr *Val =
2119 MCSymbolRefExpr::create(Sym, Variant, getContext());
2120 if (IDVal == "b" && Sym->isUndefined())
2121 return Error(Loc, "invalid reference to undefined symbol");
2122 StringRef Identifier = Sym->getName();
2125 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2126 isParsingMSInlineAsm(), ErrMsg))
2127 return Error(Loc, ErrMsg);
2128 End = consumeToken();
2129 } else {
2130 if (SM.onInteger(IntVal, ErrMsg))
2131 return Error(Loc, ErrMsg);
2132 }
2133 } else {
2134 if (SM.onInteger(IntVal, ErrMsg))
2135 return Error(Loc, ErrMsg);
2136 }
2137 break;
2138 }
2139 case AsmToken::Plus:
2140 if (SM.onPlus(ErrMsg))
2141 return Error(getTok().getLoc(), ErrMsg);
2142 break;
2143 case AsmToken::Minus:
2144 if (SM.onMinus(ErrMsg))
2145 return Error(getTok().getLoc(), ErrMsg);
2146 break;
2147 case AsmToken::Tilde: SM.onNot(); break;
2148 case AsmToken::Star: SM.onStar(); break;
2149 case AsmToken::Slash: SM.onDivide(); break;
2150 case AsmToken::Percent: SM.onMod(); break;
2151 case AsmToken::Pipe: SM.onOr(); break;
2152 case AsmToken::Caret: SM.onXor(); break;
2153 case AsmToken::Amp: SM.onAnd(); break;
2154 case AsmToken::LessLess:
2155 SM.onLShift(); break;
2157 SM.onRShift(); break;
2158 case AsmToken::LBrac:
2159 if (SM.onLBrac())
2160 return Error(Tok.getLoc(), "unexpected bracket encountered");
2161 tryParseOperandIdx(PrevTK, SM);
2162 break;
2163 case AsmToken::RBrac:
2164 if (SM.onRBrac(ErrMsg)) {
2165 return Error(Tok.getLoc(), ErrMsg);
2166 }
2167 break;
2168 case AsmToken::LParen: SM.onLParen(); break;
2169 case AsmToken::RParen: SM.onRParen(); break;
2170 }
2171 if (SM.hadError())
2172 return Error(Tok.getLoc(), "unknown token in expression");
2173
2174 if (!Done && UpdateLocLex)
2175 End = consumeToken();
2176
2177 PrevTK = TK;
2178 }
2179 return false;
2180}
2181
2182void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2183 SMLoc Start, SMLoc End) {
2184 SMLoc Loc = Start;
2185 unsigned ExprLen = End.getPointer() - Start.getPointer();
2186 // Skip everything before a symbol displacement (if we have one)
2187 if (SM.getSym() && !SM.isOffsetOperator()) {
2188 StringRef SymName = SM.getSymName();
2189 if (unsigned Len = SymName.data() - Start.getPointer())
2190 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2191 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2192 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2193 // If we have only a symbol than there's no need for complex rewrite,
2194 // simply skip everything after it
2195 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2196 if (ExprLen)
2197 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2198 return;
2199 }
2200 }
2201 // Build an Intel Expression rewrite
2202 StringRef BaseRegStr;
2203 StringRef IndexRegStr;
2204 StringRef OffsetNameStr;
2205 if (SM.getBaseReg())
2206 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2207 if (SM.getIndexReg())
2208 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2209 if (SM.isOffsetOperator())
2210 OffsetNameStr = SM.getSymName();
2211 // Emit it
2212 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2213 SM.getImm(), SM.isMemExpr());
2214 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2215}
2216
2217// Inline assembly may use variable names with namespace alias qualifiers.
2218bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2219 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2220 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2221 MCAsmParser &Parser = getParser();
2222 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2223 Val = nullptr;
2224
2225 StringRef LineBuf(Identifier.data());
2226 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2227
2228 const AsmToken &Tok = Parser.getTok();
2229 SMLoc Loc = Tok.getLoc();
2230
2231 // Advance the token stream until the end of the current token is
2232 // after the end of what the frontend claimed.
2233 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2234 do {
2235 End = Tok.getEndLoc();
2236 getLexer().Lex();
2237 } while (End.getPointer() < EndPtr);
2238 Identifier = LineBuf;
2239
2240 // The frontend should end parsing on an assembler token boundary, unless it
2241 // failed parsing.
2242 assert((End.getPointer() == EndPtr ||
2244 "frontend claimed part of a token?");
2245
2246 // If the identifier lookup was unsuccessful, assume that we are dealing with
2247 // a label.
2249 StringRef InternalName =
2250 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2251 Loc, false);
2252 assert(InternalName.size() && "We should have an internal name here.");
2253 // Push a rewrite for replacing the identifier name with the internal name,
2254 // unless we are parsing the operand of an offset operator
2255 if (!IsParsingOffsetOperator)
2256 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2257 InternalName);
2258 else
2259 Identifier = InternalName;
2260 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2261 return false;
2262 // Create the symbol reference.
2263 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2265 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2266 return false;
2267}
2268
2269//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2270bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2271 MCAsmParser &Parser = getParser();
2272 const AsmToken &Tok = Parser.getTok();
2273 // Eat "{" and mark the current place.
2274 const SMLoc consumedToken = consumeToken();
2275 if (Tok.isNot(AsmToken::Identifier))
2276 return Error(Tok.getLoc(), "Expected an identifier after {");
2277 if (Tok.getIdentifier().starts_with("r")) {
2278 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2279 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2280 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2281 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2282 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2283 .Default(-1);
2284 if (-1 == rndMode)
2285 return Error(Tok.getLoc(), "Invalid rounding mode.");
2286 Parser.Lex(); // Eat "r*" of r*-sae
2287 if (!getLexer().is(AsmToken::Minus))
2288 return Error(Tok.getLoc(), "Expected - at this point");
2289 Parser.Lex(); // Eat "-"
2290 Parser.Lex(); // Eat the sae
2291 if (!getLexer().is(AsmToken::RCurly))
2292 return Error(Tok.getLoc(), "Expected } at this point");
2293 SMLoc End = Tok.getEndLoc();
2294 Parser.Lex(); // Eat "}"
2295 const MCExpr *RndModeOp =
2296 MCConstantExpr::create(rndMode, Parser.getContext());
2297 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2298 return false;
2299 }
2300 if (Tok.getIdentifier() == "sae") {
2301 Parser.Lex(); // Eat the sae
2302 if (!getLexer().is(AsmToken::RCurly))
2303 return Error(Tok.getLoc(), "Expected } at this point");
2304 Parser.Lex(); // Eat "}"
2305 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2306 return false;
2307 }
2308 return Error(Tok.getLoc(), "unknown token in expression");
2309}
2310
2311/// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2312/// mnemonic.
2313bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2314 MCAsmParser &Parser = getParser();
2315 AsmToken Tok = Parser.getTok();
2316 const SMLoc Start = Tok.getLoc();
2317 if (!Tok.is(AsmToken::LCurly))
2318 return Error(Tok.getLoc(), "Expected { at this point");
2319 Parser.Lex(); // Eat "{"
2320 Tok = Parser.getTok();
2321 if (Tok.getIdentifier().lower() != "dfv")
2322 return Error(Tok.getLoc(), "Expected dfv at this point");
2323 Parser.Lex(); // Eat "dfv"
2324 Tok = Parser.getTok();
2325 if (!Tok.is(AsmToken::Equal))
2326 return Error(Tok.getLoc(), "Expected = at this point");
2327 Parser.Lex(); // Eat "="
2328
2329 Tok = Parser.getTok();
2330 SMLoc End;
2331 if (Tok.is(AsmToken::RCurly)) {
2332 End = Tok.getEndLoc();
2334 MCConstantExpr::create(0, Parser.getContext()), Start, End));
2335 Parser.Lex(); // Eat "}"
2336 return false;
2337 }
2338 unsigned CFlags = 0;
2339 for (unsigned I = 0; I < 4; ++I) {
2340 Tok = Parser.getTok();
2341 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2342 .Case("of", 0x8)
2343 .Case("sf", 0x4)
2344 .Case("zf", 0x2)
2345 .Case("cf", 0x1)
2346 .Default(~0U);
2347 if (CFlag == ~0U)
2348 return Error(Tok.getLoc(), "Invalid conditional flags");
2349
2350 if (CFlags & CFlag)
2351 return Error(Tok.getLoc(), "Duplicated conditional flag");
2352 CFlags |= CFlag;
2353
2354 Parser.Lex(); // Eat one conditional flag
2355 Tok = Parser.getTok();
2356 if (Tok.is(AsmToken::RCurly)) {
2357 End = Tok.getEndLoc();
2359 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2360 Parser.Lex(); // Eat "}"
2361 return false;
2362 } else if (I == 3) {
2363 return Error(Tok.getLoc(), "Expected } at this point");
2364 } else if (Tok.isNot(AsmToken::Comma)) {
2365 return Error(Tok.getLoc(), "Expected } or , at this point");
2366 }
2367 Parser.Lex(); // Eat ","
2368 }
2369 llvm_unreachable("Unexpected control flow");
2370}
2371
2372/// Parse the '.' operator.
2373bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2374 SMLoc &End) {
2375 const AsmToken &Tok = getTok();
2377
2378 // Drop the optional '.'.
2379 StringRef DotDispStr = Tok.getString();
2380 DotDispStr.consume_front(".");
2381 StringRef TrailingDot;
2382
2383 // .Imm gets lexed as a real.
2384 if (Tok.is(AsmToken::Real)) {
2385 APInt DotDisp;
2386 if (DotDispStr.getAsInteger(10, DotDisp))
2387 return Error(Tok.getLoc(), "Unexpected offset");
2388 Info.Offset = DotDisp.getZExtValue();
2389 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2390 Tok.is(AsmToken::Identifier)) {
2391 if (DotDispStr.ends_with(".")) {
2392 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2393 DotDispStr = DotDispStr.drop_back(1);
2394 }
2395 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2396 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2397 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2398 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2399 getParser().lookUpField(DotDispStr, Info) &&
2400 (!SemaCallback ||
2401 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2402 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2403 } else {
2404 return Error(Tok.getLoc(), "Unexpected token type!");
2405 }
2406
2407 // Eat the DotExpression and update End
2408 End = SMLoc::getFromPointer(DotDispStr.data());
2409 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2410 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2411 Lex();
2412 if (!TrailingDot.empty())
2413 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2414 SM.addImm(Info.Offset);
2415 SM.setTypeInfo(Info.Type);
2416 return false;
2417}
2418
2419/// Parse the 'offset' operator.
2420/// This operator is used to specify the location of a given operand
2421bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2423 SMLoc &End) {
2424 // Eat offset, mark start of identifier.
2425 SMLoc Start = Lex().getLoc();
2426 ID = getTok().getString();
2427 if (!isParsingMSInlineAsm()) {
2428 if ((getTok().isNot(AsmToken::Identifier) &&
2429 getTok().isNot(AsmToken::String)) ||
2430 getParser().parsePrimaryExpr(Val, End, nullptr))
2431 return Error(Start, "unexpected token!");
2432 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2433 return Error(Start, "unable to lookup expression");
2434 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2435 return Error(Start, "offset operator cannot yet handle constants");
2436 }
2437 return false;
2438}
2439
2440// Query a candidate string for being an Intel assembly operator
2441// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2442unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2444 .Cases("TYPE","type",IOK_TYPE)
2445 .Cases("SIZE","size",IOK_SIZE)
2446 .Cases("LENGTH","length",IOK_LENGTH)
2447 .Default(IOK_INVALID);
2448}
2449
2450/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2451/// returns the number of elements in an array. It returns the value 1 for
2452/// non-array variables. The SIZE operator returns the size of a C or C++
2453/// variable. A variable's size is the product of its LENGTH and TYPE. The
2454/// TYPE operator returns the size of a C or C++ type or variable. If the
2455/// variable is an array, TYPE returns the size of a single element.
2456unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2457 MCAsmParser &Parser = getParser();
2458 const AsmToken &Tok = Parser.getTok();
2459 Parser.Lex(); // Eat operator.
2460
2461 const MCExpr *Val = nullptr;
2463 SMLoc Start = Tok.getLoc(), End;
2465 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2466 /*IsUnevaluatedOperand=*/true, End))
2467 return 0;
2468
2470 Error(Start, "unable to lookup expression");
2471 return 0;
2472 }
2473
2474 unsigned CVal = 0;
2475 switch(OpKind) {
2476 default: llvm_unreachable("Unexpected operand kind!");
2477 case IOK_LENGTH: CVal = Info.Var.Length; break;
2478 case IOK_SIZE: CVal = Info.Var.Size; break;
2479 case IOK_TYPE: CVal = Info.Var.Type; break;
2480 }
2481
2482 return CVal;
2483}
2484
2485// Query a candidate string for being an Intel assembly operator
2486// Report back its kind, or IOK_INVALID if does not evaluated as a known one
2487unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2488 return StringSwitch<unsigned>(Name.lower())
2489 .Case("type", MOK_TYPE)
2490 .Cases("size", "sizeof", MOK_SIZEOF)
2491 .Cases("length", "lengthof", MOK_LENGTHOF)
2492 .Default(MOK_INVALID);
2493}
2494
2495/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2496/// returns the number of elements in an array. It returns the value 1 for
2497/// non-array variables. The SIZEOF operator returns the size of a type or
2498/// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2499/// The TYPE operator returns the size of a variable. If the variable is an
2500/// array, TYPE returns the size of a single element.
2501bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2502 MCAsmParser &Parser = getParser();
2503 SMLoc OpLoc = Parser.getTok().getLoc();
2504 Parser.Lex(); // Eat operator.
2505
2506 Val = 0;
2507 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2508 // Check for SIZEOF(<type>) and TYPE(<type>).
2509 bool InParens = Parser.getTok().is(AsmToken::LParen);
2510 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2512 if (IDTok.is(AsmToken::Identifier) &&
2513 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2514 Val = Type.Size;
2515
2516 // Eat tokens.
2517 if (InParens)
2518 parseToken(AsmToken::LParen);
2519 parseToken(AsmToken::Identifier);
2520 if (InParens)
2521 parseToken(AsmToken::RParen);
2522 }
2523 }
2524
2525 if (!Val) {
2526 IntelExprStateMachine SM;
2527 SMLoc End, Start = Parser.getTok().getLoc();
2528 if (ParseIntelExpression(SM, End))
2529 return true;
2530
2531 switch (OpKind) {
2532 default:
2533 llvm_unreachable("Unexpected operand kind!");
2534 case MOK_SIZEOF:
2535 Val = SM.getSize();
2536 break;
2537 case MOK_LENGTHOF:
2538 Val = SM.getLength();
2539 break;
2540 case MOK_TYPE:
2541 Val = SM.getElementSize();
2542 break;
2543 }
2544
2545 if (!Val)
2546 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2547 }
2548
2549 return false;
2550}
2551
2552bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2553 Size = StringSwitch<unsigned>(getTok().getString())
2554 .Cases("BYTE", "byte", 8)
2555 .Cases("WORD", "word", 16)
2556 .Cases("DWORD", "dword", 32)
2557 .Cases("FLOAT", "float", 32)
2558 .Cases("LONG", "long", 32)
2559 .Cases("FWORD", "fword", 48)
2560 .Cases("DOUBLE", "double", 64)
2561 .Cases("QWORD", "qword", 64)
2562 .Cases("MMWORD","mmword", 64)
2563 .Cases("XWORD", "xword", 80)
2564 .Cases("TBYTE", "tbyte", 80)
2565 .Cases("XMMWORD", "xmmword", 128)
2566 .Cases("YMMWORD", "ymmword", 256)
2567 .Cases("ZMMWORD", "zmmword", 512)
2568 .Default(0);
2569 if (Size) {
2570 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2571 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2572 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2573 Lex(); // Eat ptr.
2574 }
2575 return false;
2576}
2577
2578bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2579 MCAsmParser &Parser = getParser();
2580 const AsmToken &Tok = Parser.getTok();
2581 SMLoc Start, End;
2582
2583 // Parse optional Size directive.
2584 unsigned Size;
2585 if (ParseIntelMemoryOperandSize(Size))
2586 return true;
2587 bool PtrInOperand = bool(Size);
2588
2589 Start = Tok.getLoc();
2590
2591 // Rounding mode operand.
2592 if (getLexer().is(AsmToken::LCurly))
2593 return ParseRoundingModeOp(Start, Operands);
2594
2595 // Register operand.
2596 MCRegister RegNo;
2597 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2598 if (RegNo == X86::RIP)
2599 return Error(Start, "rip can only be used as a base register");
2600 // A Register followed by ':' is considered a segment override
2601 if (Tok.isNot(AsmToken::Colon)) {
2602 if (PtrInOperand)
2603 return Error(Start, "expected memory operand after 'ptr', "
2604 "found register operand instead");
2605 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2606 return false;
2607 }
2608 // An alleged segment override. check if we have a valid segment register
2609 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2610 return Error(Start, "invalid segment register");
2611 // Eat ':' and update Start location
2612 Start = Lex().getLoc();
2613 }
2614
2615 // Immediates and Memory
2616 IntelExprStateMachine SM;
2617 if (ParseIntelExpression(SM, End))
2618 return true;
2619
2620 if (isParsingMSInlineAsm())
2621 RewriteIntelExpression(SM, Start, Tok.getLoc());
2622
2623 int64_t Imm = SM.getImm();
2624 const MCExpr *Disp = SM.getSym();
2625 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2626 if (Disp && Imm)
2627 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2628 if (!Disp)
2629 Disp = ImmDisp;
2630
2631 // RegNo != 0 specifies a valid segment register,
2632 // and we are parsing a segment override
2633 if (!SM.isMemExpr() && !RegNo) {
2634 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2635 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2637 // Disp includes the address of a variable; make sure this is recorded
2638 // for later handling.
2639 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2640 SM.getSymName(), Info.Var.Decl,
2641 Info.Var.IsGlobalLV));
2642 return false;
2643 }
2644 }
2645
2646 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2647 return false;
2648 }
2649
2650 StringRef ErrMsg;
2651 unsigned BaseReg = SM.getBaseReg();
2652 unsigned IndexReg = SM.getIndexReg();
2653 if (IndexReg && BaseReg == X86::RIP)
2654 BaseReg = 0;
2655 unsigned Scale = SM.getScale();
2656 if (!PtrInOperand)
2657 Size = SM.getElementSize() << 3;
2658
2659 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2660 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2661 std::swap(BaseReg, IndexReg);
2662
2663 // If BaseReg is a vector register and IndexReg is not, swap them unless
2664 // Scale was specified in which case it would be an error.
2665 if (Scale == 0 &&
2666 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2667 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2668 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2669 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2670 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2671 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2672 std::swap(BaseReg, IndexReg);
2673
2674 if (Scale != 0 &&
2675 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2676 return Error(Start, "16-bit addresses cannot have a scale");
2677
2678 // If there was no explicit scale specified, change it to 1.
2679 if (Scale == 0)
2680 Scale = 1;
2681
2682 // If this is a 16-bit addressing mode with the base and index in the wrong
2683 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2684 // shared with att syntax where order matters.
2685 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2686 (IndexReg == X86::BX || IndexReg == X86::BP))
2687 std::swap(BaseReg, IndexReg);
2688
2689 if ((BaseReg || IndexReg) &&
2690 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2691 ErrMsg))
2692 return Error(Start, ErrMsg);
2693 bool IsUnconditionalBranch =
2694 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2695 if (isParsingMSInlineAsm())
2696 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2697 IsUnconditionalBranch && is64BitMode(),
2698 Start, End, Size, SM.getSymName(),
2699 SM.getIdentifierInfo(), Operands);
2700
2701 // When parsing x64 MS-style assembly, all non-absolute references to a named
2702 // variable default to RIP-relative.
2703 unsigned DefaultBaseReg = X86::NoRegister;
2704 bool MaybeDirectBranchDest = true;
2705
2706 if (Parser.isParsingMasm()) {
2707 if (is64BitMode() && SM.getElementSize() > 0) {
2708 DefaultBaseReg = X86::RIP;
2709 }
2710 if (IsUnconditionalBranch) {
2711 if (PtrInOperand) {
2712 MaybeDirectBranchDest = false;
2713 if (is64BitMode())
2714 DefaultBaseReg = X86::RIP;
2715 } else if (!BaseReg && !IndexReg && Disp &&
2716 Disp->getKind() == MCExpr::SymbolRef) {
2717 if (is64BitMode()) {
2718 if (SM.getSize() == 8) {
2719 MaybeDirectBranchDest = false;
2720 DefaultBaseReg = X86::RIP;
2721 }
2722 } else {
2723 if (SM.getSize() == 4 || SM.getSize() == 2)
2724 MaybeDirectBranchDest = false;
2725 }
2726 }
2727 }
2728 } else if (IsUnconditionalBranch) {
2729 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2730 if (!PtrInOperand && SM.isOffsetOperator())
2731 return Error(
2732 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2733 if (PtrInOperand || SM.isBracketUsed())
2734 MaybeDirectBranchDest = false;
2735 }
2736
2737 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2739 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2740 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2741 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2742 else
2744 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2745 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2746 MaybeDirectBranchDest));
2747 return false;
2748}
2749
2750bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2751 MCAsmParser &Parser = getParser();
2752 switch (getLexer().getKind()) {
2753 case AsmToken::Dollar: {
2754 // $42 or $ID -> immediate.
2755 SMLoc Start = Parser.getTok().getLoc(), End;
2756 Parser.Lex();
2757 const MCExpr *Val;
2758 // This is an immediate, so we should not parse a register. Do a precheck
2759 // for '%' to supercede intra-register parse errors.
2760 SMLoc L = Parser.getTok().getLoc();
2761 if (check(getLexer().is(AsmToken::Percent), L,
2762 "expected immediate expression") ||
2763 getParser().parseExpression(Val, End) ||
2764 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2765 return true;
2766 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2767 return false;
2768 }
2769 case AsmToken::LCurly: {
2770 SMLoc Start = Parser.getTok().getLoc();
2771 return ParseRoundingModeOp(Start, Operands);
2772 }
2773 default: {
2774 // This a memory operand or a register. We have some parsing complications
2775 // as a '(' may be part of an immediate expression or the addressing mode
2776 // block. This is complicated by the fact that an assembler-level variable
2777 // may refer either to a register or an immediate expression.
2778
2779 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2780 const MCExpr *Expr = nullptr;
2781 unsigned Reg = 0;
2782 if (getLexer().isNot(AsmToken::LParen)) {
2783 // No '(' so this is either a displacement expression or a register.
2784 if (Parser.parseExpression(Expr, EndLoc))
2785 return true;
2786 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2787 // Segment Register. Reset Expr and copy value to register.
2788 Expr = nullptr;
2789 Reg = RE->getRegNo();
2790
2791 // Check the register.
2792 if (Reg == X86::EIZ || Reg == X86::RIZ)
2793 return Error(
2794 Loc, "%eiz and %riz can only be used as index registers",
2795 SMRange(Loc, EndLoc));
2796 if (Reg == X86::RIP)
2797 return Error(Loc, "%rip can only be used as a base register",
2798 SMRange(Loc, EndLoc));
2799 // Return register that are not segment prefixes immediately.
2800 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2801 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2802 return false;
2803 }
2804 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2805 return Error(Loc, "invalid segment register");
2806 // Accept a '*' absolute memory reference after the segment. Place it
2807 // before the full memory operand.
2808 if (getLexer().is(AsmToken::Star))
2809 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2810 }
2811 }
2812 // This is a Memory operand.
2813 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2814 }
2815 }
2816}
2817
2818// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2819// otherwise the EFLAGS Condition Code enumerator.
2820X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2822 .Case("o", X86::COND_O) // Overflow
2823 .Case("no", X86::COND_NO) // No Overflow
2824 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2825 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2826 .Cases("e", "z", X86::COND_E) // Equal/Zero
2827 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2828 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2829 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2830 .Case("s", X86::COND_S) // Sign
2831 .Case("ns", X86::COND_NS) // No Sign
2832 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2833 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2834 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2835 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2836 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2837 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2839}
2840
2841// true on failure, false otherwise
2842// If no {z} mark was found - Parser doesn't advance
2843bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2844 const SMLoc &StartLoc) {
2845 MCAsmParser &Parser = getParser();
2846 // Assuming we are just pass the '{' mark, quering the next token
2847 // Searched for {z}, but none was found. Return false, as no parsing error was
2848 // encountered
2849 if (!(getLexer().is(AsmToken::Identifier) &&
2850 (getLexer().getTok().getIdentifier() == "z")))
2851 return false;
2852 Parser.Lex(); // Eat z
2853 // Query and eat the '}' mark
2854 if (!getLexer().is(AsmToken::RCurly))
2855 return Error(getLexer().getLoc(), "Expected } at this point");
2856 Parser.Lex(); // Eat '}'
2857 // Assign Z with the {z} mark operand
2858 Z = X86Operand::CreateToken("{z}", StartLoc);
2859 return false;
2860}
2861
2862// true on failure, false otherwise
2863bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2864 MCAsmParser &Parser = getParser();
2865 if (getLexer().is(AsmToken::LCurly)) {
2866 // Eat "{" and mark the current place.
2867 const SMLoc consumedToken = consumeToken();
2868 // Distinguish {1to<NUM>} from {%k<NUM>}.
2869 if(getLexer().is(AsmToken::Integer)) {
2870 // Parse memory broadcasting ({1to<NUM>}).
2871 if (getLexer().getTok().getIntVal() != 1)
2872 return TokError("Expected 1to<NUM> at this point");
2873 StringRef Prefix = getLexer().getTok().getString();
2874 Parser.Lex(); // Eat first token of 1to8
2875 if (!getLexer().is(AsmToken::Identifier))
2876 return TokError("Expected 1to<NUM> at this point");
2877 // Recognize only reasonable suffixes.
2878 SmallVector<char, 5> BroadcastVector;
2879 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2880 .toStringRef(BroadcastVector);
2881 if (!BroadcastString.starts_with("1to"))
2882 return TokError("Expected 1to<NUM> at this point");
2883 const char *BroadcastPrimitive =
2884 StringSwitch<const char *>(BroadcastString)
2885 .Case("1to2", "{1to2}")
2886 .Case("1to4", "{1to4}")
2887 .Case("1to8", "{1to8}")
2888 .Case("1to16", "{1to16}")
2889 .Case("1to32", "{1to32}")
2890 .Default(nullptr);
2891 if (!BroadcastPrimitive)
2892 return TokError("Invalid memory broadcast primitive.");
2893 Parser.Lex(); // Eat trailing token of 1toN
2894 if (!getLexer().is(AsmToken::RCurly))
2895 return TokError("Expected } at this point");
2896 Parser.Lex(); // Eat "}"
2897 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2898 consumedToken));
2899 // No AVX512 specific primitives can pass
2900 // after memory broadcasting, so return.
2901 return false;
2902 } else {
2903 // Parse either {k}{z}, {z}{k}, {k} or {z}
2904 // last one have no meaning, but GCC accepts it
2905 // Currently, we're just pass a '{' mark
2906 std::unique_ptr<X86Operand> Z;
2907 if (ParseZ(Z, consumedToken))
2908 return true;
2909 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2910 // no errors.
2911 // Query for the need of further parsing for a {%k<NUM>} mark
2912 if (!Z || getLexer().is(AsmToken::LCurly)) {
2913 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2914 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2915 // expected
2916 MCRegister RegNo;
2917 SMLoc RegLoc;
2918 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2919 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2920 if (RegNo == X86::K0)
2921 return Error(RegLoc, "Register k0 can't be used as write mask");
2922 if (!getLexer().is(AsmToken::RCurly))
2923 return Error(getLexer().getLoc(), "Expected } at this point");
2924 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2925 Operands.push_back(
2926 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2927 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2928 } else
2929 return Error(getLexer().getLoc(),
2930 "Expected an op-mask register at this point");
2931 // {%k<NUM>} mark is found, inquire for {z}
2932 if (getLexer().is(AsmToken::LCurly) && !Z) {
2933 // Have we've found a parsing error, or found no (expected) {z} mark
2934 // - report an error
2935 if (ParseZ(Z, consumeToken()) || !Z)
2936 return Error(getLexer().getLoc(),
2937 "Expected a {z} mark at this point");
2938
2939 }
2940 // '{z}' on its own is meaningless, hence should be ignored.
2941 // on the contrary - have it been accompanied by a K register,
2942 // allow it.
2943 if (Z)
2944 Operands.push_back(std::move(Z));
2945 }
2946 }
2947 }
2948 return false;
2949}
2950
2951/// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2952/// has already been parsed if present. disp may be provided as well.
2953bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2954 SMLoc StartLoc, SMLoc EndLoc,
2956 MCAsmParser &Parser = getParser();
2957 SMLoc Loc;
2958 // Based on the initial passed values, we may be in any of these cases, we are
2959 // in one of these cases (with current position (*)):
2960
2961 // 1. seg : * disp (base-index-scale-expr)
2962 // 2. seg : *(disp) (base-index-scale-expr)
2963 // 3. seg : *(base-index-scale-expr)
2964 // 4. disp *(base-index-scale-expr)
2965 // 5. *(disp) (base-index-scale-expr)
2966 // 6. *(base-index-scale-expr)
2967 // 7. disp *
2968 // 8. *(disp)
2969
2970 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2971 // checking if the first object after the parenthesis is a register (or an
2972 // identifier referring to a register) and parse the displacement or default
2973 // to 0 as appropriate.
2974 auto isAtMemOperand = [this]() {
2975 if (this->getLexer().isNot(AsmToken::LParen))
2976 return false;
2977 AsmToken Buf[2];
2978 StringRef Id;
2979 auto TokCount = this->getLexer().peekTokens(Buf, true);
2980 if (TokCount == 0)
2981 return false;
2982 switch (Buf[0].getKind()) {
2983 case AsmToken::Percent:
2984 case AsmToken::Comma:
2985 return true;
2986 // These lower cases are doing a peekIdentifier.
2987 case AsmToken::At:
2988 case AsmToken::Dollar:
2989 if ((TokCount > 1) &&
2990 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2991 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2992 Id = StringRef(Buf[0].getLoc().getPointer(),
2993 Buf[1].getIdentifier().size() + 1);
2994 break;
2996 case AsmToken::String:
2997 Id = Buf[0].getIdentifier();
2998 break;
2999 default:
3000 return false;
3001 }
3002 // We have an ID. Check if it is bound to a register.
3003 if (!Id.empty()) {
3004 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3005 if (Sym->isVariable()) {
3006 auto V = Sym->getVariableValue(/*SetUsed*/ false);
3007 return isa<X86MCExpr>(V);
3008 }
3009 }
3010 return false;
3011 };
3012
3013 if (!Disp) {
3014 // Parse immediate if we're not at a mem operand yet.
3015 if (!isAtMemOperand()) {
3016 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3017 return true;
3018 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3019 } else {
3020 // Disp is implicitly zero if we haven't parsed it yet.
3021 Disp = MCConstantExpr::create(0, Parser.getContext());
3022 }
3023 }
3024
3025 // We are now either at the end of the operand or at the '(' at the start of a
3026 // base-index-scale-expr.
3027
3028 if (!parseOptionalToken(AsmToken::LParen)) {
3029 if (SegReg == 0)
3030 Operands.push_back(
3031 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3032 else
3033 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3034 0, 0, 1, StartLoc, EndLoc));
3035 return false;
3036 }
3037
3038 // If we reached here, then eat the '(' and Process
3039 // the rest of the memory operand.
3040 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
3041 SMLoc BaseLoc = getLexer().getLoc();
3042 const MCExpr *E;
3043 StringRef ErrMsg;
3044
3045 // Parse BaseReg if one is provided.
3046 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3047 if (Parser.parseExpression(E, EndLoc) ||
3048 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3049 return true;
3050
3051 // Check the register.
3052 BaseReg = cast<X86MCExpr>(E)->getRegNo();
3053 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3054 return Error(BaseLoc, "eiz and riz can only be used as index registers",
3055 SMRange(BaseLoc, EndLoc));
3056 }
3057
3058 if (parseOptionalToken(AsmToken::Comma)) {
3059 // Following the comma we should have either an index register, or a scale
3060 // value. We don't support the later form, but we want to parse it
3061 // correctly.
3062 //
3063 // Even though it would be completely consistent to support syntax like
3064 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3065 if (getLexer().isNot(AsmToken::RParen)) {
3066 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3067 return true;
3068
3069 if (!isa<X86MCExpr>(E)) {
3070 // We've parsed an unexpected Scale Value instead of an index
3071 // register. Interpret it as an absolute.
3072 int64_t ScaleVal;
3073 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3074 return Error(Loc, "expected absolute expression");
3075 if (ScaleVal != 1)
3076 Warning(Loc, "scale factor without index register is ignored");
3077 Scale = 1;
3078 } else { // IndexReg Found.
3079 IndexReg = cast<X86MCExpr>(E)->getRegNo();
3080
3081 if (BaseReg == X86::RIP)
3082 return Error(Loc,
3083 "%rip as base register can not have an index register");
3084 if (IndexReg == X86::RIP)
3085 return Error(Loc, "%rip is not allowed as an index register");
3086
3087 if (parseOptionalToken(AsmToken::Comma)) {
3088 // Parse the scale amount:
3089 // ::= ',' [scale-expression]
3090
3091 // A scale amount without an index is ignored.
3092 if (getLexer().isNot(AsmToken::RParen)) {
3093 int64_t ScaleVal;
3094 if (Parser.parseTokenLoc(Loc) ||
3095 Parser.parseAbsoluteExpression(ScaleVal))
3096 return Error(Loc, "expected scale expression");
3097 Scale = (unsigned)ScaleVal;
3098 // Validate the scale amount.
3099 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3100 Scale != 1)
3101 return Error(Loc, "scale factor in 16-bit address must be 1");
3102 if (checkScale(Scale, ErrMsg))
3103 return Error(Loc, ErrMsg);
3104 }
3105 }
3106 }
3107 }
3108 }
3109
3110 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3111 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3112 return true;
3113
3114 // This is to support otherwise illegal operand (%dx) found in various
3115 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3116 // be supported. Mark such DX variants separately fix only in special cases.
3117 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3118 isa<MCConstantExpr>(Disp) &&
3119 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3120 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3121 return false;
3122 }
3123
3124 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3125 ErrMsg))
3126 return Error(BaseLoc, ErrMsg);
3127
3128 // If the displacement is a constant, check overflows. For 64-bit addressing,
3129 // gas requires isInt<32> and otherwise reports an error. For others, gas
3130 // reports a warning and allows a wider range. E.g. gas allows
3131 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3132 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3133 if (BaseReg || IndexReg) {
3134 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3135 auto Imm = CE->getValue();
3136 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3137 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3138 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3139 if (Is64) {
3140 if (!isInt<32>(Imm))
3141 return Error(BaseLoc, "displacement " + Twine(Imm) +
3142 " is not within [-2147483648, 2147483647]");
3143 } else if (!Is16) {
3144 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3145 Warning(BaseLoc, "displacement " + Twine(Imm) +
3146 " shortened to 32-bit signed " +
3147 Twine(static_cast<int32_t>(Imm)));
3148 }
3149 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3150 Warning(BaseLoc, "displacement " + Twine(Imm) +
3151 " shortened to 16-bit signed " +
3152 Twine(static_cast<int16_t>(Imm)));
3153 }
3154 }
3155 }
3156
3157 if (SegReg || BaseReg || IndexReg)
3158 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3159 BaseReg, IndexReg, Scale, StartLoc,
3160 EndLoc));
3161 else
3162 Operands.push_back(
3163 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3164 return false;
3165}
3166
3167// Parse either a standard primary expression or a register.
3168bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3169 MCAsmParser &Parser = getParser();
3170 // See if this is a register first.
3171 if (getTok().is(AsmToken::Percent) ||
3172 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3173 MatchRegisterName(Parser.getTok().getString()))) {
3174 SMLoc StartLoc = Parser.getTok().getLoc();
3175 MCRegister RegNo;
3176 if (parseRegister(RegNo, StartLoc, EndLoc))
3177 return true;
3178 Res = X86MCExpr::create(RegNo, Parser.getContext());
3179 return false;
3180 }
3181 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3182}
3183
3184bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3185 SMLoc NameLoc, OperandVector &Operands) {
3186 MCAsmParser &Parser = getParser();
3187 InstInfo = &Info;
3188
3189 // Reset the forced VEX encoding.
3190 ForcedOpcodePrefix = OpcodePrefix_Default;
3191 ForcedDispEncoding = DispEncoding_Default;
3192 UseApxExtendedReg = false;
3193 ForcedNoFlag = false;
3194
3195 // Parse pseudo prefixes.
3196 while (true) {
3197 if (Name == "{") {
3198 if (getLexer().isNot(AsmToken::Identifier))
3199 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3200 std::string Prefix = Parser.getTok().getString().lower();
3201 Parser.Lex(); // Eat identifier.
3202 if (getLexer().isNot(AsmToken::RCurly))
3203 return Error(Parser.getTok().getLoc(), "Expected '}'");
3204 Parser.Lex(); // Eat curly.
3205
3206 if (Prefix == "rex")
3207 ForcedOpcodePrefix = OpcodePrefix_REX;
3208 else if (Prefix == "rex2")
3209 ForcedOpcodePrefix = OpcodePrefix_REX2;
3210 else if (Prefix == "vex")
3211 ForcedOpcodePrefix = OpcodePrefix_VEX;
3212 else if (Prefix == "vex2")
3213 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3214 else if (Prefix == "vex3")
3215 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3216 else if (Prefix == "evex")
3217 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3218 else if (Prefix == "disp8")
3219 ForcedDispEncoding = DispEncoding_Disp8;
3220 else if (Prefix == "disp32")
3221 ForcedDispEncoding = DispEncoding_Disp32;
3222 else if (Prefix == "nf")
3223 ForcedNoFlag = true;
3224 else
3225 return Error(NameLoc, "unknown prefix");
3226
3227 NameLoc = Parser.getTok().getLoc();
3228 if (getLexer().is(AsmToken::LCurly)) {
3229 Parser.Lex();
3230 Name = "{";
3231 } else {
3232 if (getLexer().isNot(AsmToken::Identifier))
3233 return Error(Parser.getTok().getLoc(), "Expected identifier");
3234 // FIXME: The mnemonic won't match correctly if its not in lower case.
3235 Name = Parser.getTok().getString();
3236 Parser.Lex();
3237 }
3238 continue;
3239 }
3240 // Parse MASM style pseudo prefixes.
3241 if (isParsingMSInlineAsm()) {
3242 if (Name.equals_insensitive("vex"))
3243 ForcedOpcodePrefix = OpcodePrefix_VEX;
3244 else if (Name.equals_insensitive("vex2"))
3245 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3246 else if (Name.equals_insensitive("vex3"))
3247 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3248 else if (Name.equals_insensitive("evex"))
3249 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3250
3251 if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3252 if (getLexer().isNot(AsmToken::Identifier))
3253 return Error(Parser.getTok().getLoc(), "Expected identifier");
3254 // FIXME: The mnemonic won't match correctly if its not in lower case.
3255 Name = Parser.getTok().getString();
3256 NameLoc = Parser.getTok().getLoc();
3257 Parser.Lex();
3258 }
3259 }
3260 break;
3261 }
3262
3263 // Support the suffix syntax for overriding displacement size as well.
3264 if (Name.consume_back(".d32")) {
3265 ForcedDispEncoding = DispEncoding_Disp32;
3266 } else if (Name.consume_back(".d8")) {
3267 ForcedDispEncoding = DispEncoding_Disp8;
3268 }
3269
3270 StringRef PatchedName = Name;
3271
3272 // Hack to skip "short" following Jcc.
3273 if (isParsingIntelSyntax() &&
3274 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3275 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3276 (PatchedName.starts_with("j") &&
3277 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3278 StringRef NextTok = Parser.getTok().getString();
3279 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3280 : NextTok == "short") {
3281 SMLoc NameEndLoc =
3282 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3283 // Eat the short keyword.
3284 Parser.Lex();
3285 // MS and GAS ignore the short keyword; they both determine the jmp type
3286 // based on the distance of the label. (NASM does emit different code with
3287 // and without "short," though.)
3288 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3289 NextTok.size() + 1);
3290 }
3291 }
3292
3293 // FIXME: Hack to recognize setneb as setne.
3294 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3295 PatchedName != "setzub" && PatchedName != "setzunb" &&
3296 PatchedName != "setb" && PatchedName != "setnb")
3297 PatchedName = PatchedName.substr(0, Name.size()-1);
3298
3299 unsigned ComparisonPredicate = ~0U;
3300
3301 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3302 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3303 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3304 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3305 PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) {
3306 bool IsVCMP = PatchedName[0] == 'v';
3307 unsigned CCIdx = IsVCMP ? 4 : 3;
3308 unsigned CC = StringSwitch<unsigned>(
3309 PatchedName.slice(CCIdx, PatchedName.size() - 2))
3310 .Case("eq", 0x00)
3311 .Case("eq_oq", 0x00)
3312 .Case("lt", 0x01)
3313 .Case("lt_os", 0x01)
3314 .Case("le", 0x02)
3315 .Case("le_os", 0x02)
3316 .Case("unord", 0x03)
3317 .Case("unord_q", 0x03)
3318 .Case("neq", 0x04)
3319 .Case("neq_uq", 0x04)
3320 .Case("nlt", 0x05)
3321 .Case("nlt_us", 0x05)
3322 .Case("nle", 0x06)
3323 .Case("nle_us", 0x06)
3324 .Case("ord", 0x07)
3325 .Case("ord_q", 0x07)
3326 /* AVX only from here */
3327 .Case("eq_uq", 0x08)
3328 .Case("nge", 0x09)
3329 .Case("nge_us", 0x09)
3330 .Case("ngt", 0x0A)
3331 .Case("ngt_us", 0x0A)
3332 .Case("false", 0x0B)
3333 .Case("false_oq", 0x0B)
3334 .Case("neq_oq", 0x0C)
3335 .Case("ge", 0x0D)
3336 .Case("ge_os", 0x0D)
3337 .Case("gt", 0x0E)
3338 .Case("gt_os", 0x0E)
3339 .Case("true", 0x0F)
3340 .Case("true_uq", 0x0F)
3341 .Case("eq_os", 0x10)
3342 .Case("lt_oq", 0x11)
3343 .Case("le_oq", 0x12)
3344 .Case("unord_s", 0x13)
3345 .Case("neq_us", 0x14)
3346 .Case("nlt_uq", 0x15)
3347 .Case("nle_uq", 0x16)
3348 .Case("ord_s", 0x17)
3349 .Case("eq_us", 0x18)
3350 .Case("nge_uq", 0x19)
3351 .Case("ngt_uq", 0x1A)
3352 .Case("false_os", 0x1B)
3353 .Case("neq_os", 0x1C)
3354 .Case("ge_oq", 0x1D)
3355 .Case("gt_oq", 0x1E)
3356 .Case("true_us", 0x1F)
3357 .Default(~0U);
3358 if (CC != ~0U && (IsVCMP || CC < 8) &&
3359 (IsVCMP || PatchedName.back() != 'h')) {
3360 if (PatchedName.ends_with("ss"))
3361 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3362 else if (PatchedName.ends_with("sd"))
3363 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3364 else if (PatchedName.ends_with("ps"))
3365 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3366 else if (PatchedName.ends_with("pd"))
3367 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3368 else if (PatchedName.ends_with("sh"))
3369 PatchedName = "vcmpsh";
3370 else if (PatchedName.ends_with("ph"))
3371 PatchedName = "vcmpph";
3372 else
3373 llvm_unreachable("Unexpected suffix!");
3374
3375 ComparisonPredicate = CC;
3376 }
3377 }
3378
3379 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3380 if (PatchedName.starts_with("vpcmp") &&
3381 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3382 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3383 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3384 unsigned CC = StringSwitch<unsigned>(
3385 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3386 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3387 .Case("lt", 0x1)
3388 .Case("le", 0x2)
3389 //.Case("false", 0x3) // Not a documented alias.
3390 .Case("neq", 0x4)
3391 .Case("nlt", 0x5)
3392 .Case("nle", 0x6)
3393 //.Case("true", 0x7) // Not a documented alias.
3394 .Default(~0U);
3395 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3396 switch (PatchedName.back()) {
3397 default: llvm_unreachable("Unexpected character!");
3398 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3399 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3400 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3401 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3402 }
3403 // Set up the immediate to push into the operands later.
3404 ComparisonPredicate = CC;
3405 }
3406 }
3407
3408 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3409 if (PatchedName.starts_with("vpcom") &&
3410 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3411 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3412 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3413 unsigned CC = StringSwitch<unsigned>(
3414 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3415 .Case("lt", 0x0)
3416 .Case("le", 0x1)
3417 .Case("gt", 0x2)
3418 .Case("ge", 0x3)
3419 .Case("eq", 0x4)
3420 .Case("neq", 0x5)
3421 .Case("false", 0x6)
3422 .Case("true", 0x7)
3423 .Default(~0U);
3424 if (CC != ~0U) {
3425 switch (PatchedName.back()) {
3426 default: llvm_unreachable("Unexpected character!");
3427 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3428 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3429 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3430 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3431 }
3432 // Set up the immediate to push into the operands later.
3433 ComparisonPredicate = CC;
3434 }
3435 }
3436
3437 // Determine whether this is an instruction prefix.
3438 // FIXME:
3439 // Enhance prefixes integrity robustness. for example, following forms
3440 // are currently tolerated:
3441 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3442 // lock addq %rax, %rbx ; Destination operand must be of memory type
3443 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3444 bool IsPrefix =
3446 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3447 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3448 .Cases("xacquire", "xrelease", true)
3449 .Cases("acquire", "release", isParsingIntelSyntax())
3450 .Default(false);
3451
3452 auto isLockRepeatNtPrefix = [](StringRef N) {
3453 return StringSwitch<bool>(N)
3454 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3455 .Default(false);
3456 };
3457
3458 bool CurlyAsEndOfStatement = false;
3459
3460 unsigned Flags = X86::IP_NO_PREFIX;
3461 while (isLockRepeatNtPrefix(Name.lower())) {
3462 unsigned Prefix =
3464 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3465 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3466 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3467 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3468 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3469 Flags |= Prefix;
3470 if (getLexer().is(AsmToken::EndOfStatement)) {
3471 // We don't have real instr with the given prefix
3472 // let's use the prefix as the instr.
3473 // TODO: there could be several prefixes one after another
3475 break;
3476 }
3477 // FIXME: The mnemonic won't match correctly if its not in lower case.
3478 Name = Parser.getTok().getString();
3479 Parser.Lex(); // eat the prefix
3480 // Hack: we could have something like "rep # some comment" or
3481 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3482 while (Name.starts_with(";") || Name.starts_with("\n") ||
3483 Name.starts_with("#") || Name.starts_with("\t") ||
3484 Name.starts_with("/")) {
3485 // FIXME: The mnemonic won't match correctly if its not in lower case.
3486 Name = Parser.getTok().getString();
3487 Parser.Lex(); // go to next prefix or instr
3488 }
3489 }
3490
3491 if (Flags)
3492 PatchedName = Name;
3493
3494 // Hacks to handle 'data16' and 'data32'
3495 if (PatchedName == "data16" && is16BitMode()) {
3496 return Error(NameLoc, "redundant data16 prefix");
3497 }
3498 if (PatchedName == "data32") {
3499 if (is32BitMode())
3500 return Error(NameLoc, "redundant data32 prefix");
3501 if (is64BitMode())
3502 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3503 // Hack to 'data16' for the table lookup.
3504 PatchedName = "data16";
3505
3506 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3507 StringRef Next = Parser.getTok().getString();
3508 getLexer().Lex();
3509 // data32 effectively changes the instruction suffix.
3510 // TODO Generalize.
3511 if (Next == "callw")
3512 Next = "calll";
3513 if (Next == "ljmpw")
3514 Next = "ljmpl";
3515
3516 Name = Next;
3517 PatchedName = Name;
3518 ForcedDataPrefix = X86::Is32Bit;
3519 IsPrefix = false;
3520 }
3521 }
3522
3523 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3524
3525 // Push the immediate if we extracted one from the mnemonic.
3526 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3527 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3528 getParser().getContext());
3529 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3530 }
3531
3532 // Parse condtional flags after mnemonic.
3533 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3534 parseCFlagsOp(Operands))
3535 return true;
3536
3537 // This does the actual operand parsing. Don't parse any more if we have a
3538 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3539 // just want to parse the "lock" as the first instruction and the "incl" as
3540 // the next one.
3541 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3542 // Parse '*' modifier.
3543 if (getLexer().is(AsmToken::Star))
3544 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3545
3546 // Read the operands.
3547 while (true) {
3548 if (parseOperand(Operands, Name))
3549 return true;
3550 if (HandleAVX512Operand(Operands))
3551 return true;
3552
3553 // check for comma and eat it
3554 if (getLexer().is(AsmToken::Comma))
3555 Parser.Lex();
3556 else
3557 break;
3558 }
3559
3560 // In MS inline asm curly braces mark the beginning/end of a block,
3561 // therefore they should be interepreted as end of statement
3562 CurlyAsEndOfStatement =
3563 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3564 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3565 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3566 return TokError("unexpected token in argument list");
3567 }
3568
3569 // Push the immediate if we extracted one from the mnemonic.
3570 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3571 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3572 getParser().getContext());
3573 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3574 }
3575
3576 // Consume the EndOfStatement or the prefix separator Slash
3577 if (getLexer().is(AsmToken::EndOfStatement) ||
3578 (IsPrefix && getLexer().is(AsmToken::Slash)))
3579 Parser.Lex();
3580 else if (CurlyAsEndOfStatement)
3581 // Add an actual EndOfStatement before the curly brace
3582 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3583 getLexer().getTok().getLoc(), 0);
3584
3585 // This is for gas compatibility and cannot be done in td.
3586 // Adding "p" for some floating point with no argument.
3587 // For example: fsub --> fsubp
3588 bool IsFp =
3589 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3590 if (IsFp && Operands.size() == 1) {
3591 const char *Repl = StringSwitch<const char *>(Name)
3592 .Case("fsub", "fsubp")
3593 .Case("fdiv", "fdivp")
3594 .Case("fsubr", "fsubrp")
3595 .Case("fdivr", "fdivrp");
3596 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3597 }
3598
3599 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3600 (Operands.size() == 3)) {
3601 X86Operand &Op1 = (X86Operand &)*Operands[1];
3602 X86Operand &Op2 = (X86Operand &)*Operands[2];
3603 SMLoc Loc = Op1.getEndLoc();
3604 // Moving a 32 or 16 bit value into a segment register has the same
3605 // behavior. Modify such instructions to always take shorter form.
3606 if (Op1.isReg() && Op2.isReg() &&
3607 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3608 Op2.getReg()) &&
3609 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3610 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3611 // Change instruction name to match new instruction.
3612 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3613 Name = is16BitMode() ? "movw" : "movl";
3614 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3615 }
3616 // Select the correct equivalent 16-/32-bit source register.
3617 MCRegister Reg =
3618 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3619 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3620 }
3621 }
3622
3623 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3624 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3625 // documented form in various unofficial manuals, so a lot of code uses it.
3626 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3627 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3628 Operands.size() == 3) {
3629 X86Operand &Op = (X86Operand &)*Operands.back();
3630 if (Op.isDXReg())
3631 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3632 Op.getEndLoc());
3633 }
3634 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3635 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3636 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3637 Operands.size() == 3) {
3638 X86Operand &Op = (X86Operand &)*Operands[1];
3639 if (Op.isDXReg())
3640 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3641 Op.getEndLoc());
3642 }
3643
3645 bool HadVerifyError = false;
3646
3647 // Append default arguments to "ins[bwld]"
3648 if (Name.starts_with("ins") &&
3649 (Operands.size() == 1 || Operands.size() == 3) &&
3650 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3651 Name == "ins")) {
3652
3653 AddDefaultSrcDestOperands(TmpOperands,
3654 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3655 DefaultMemDIOperand(NameLoc));
3656 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3657 }
3658
3659 // Append default arguments to "outs[bwld]"
3660 if (Name.starts_with("outs") &&
3661 (Operands.size() == 1 || Operands.size() == 3) &&
3662 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3663 Name == "outsd" || Name == "outs")) {
3664 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3665 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3666 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3667 }
3668
3669 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3670 // values of $SIREG according to the mode. It would be nice if this
3671 // could be achieved with InstAlias in the tables.
3672 if (Name.starts_with("lods") &&
3673 (Operands.size() == 1 || Operands.size() == 2) &&
3674 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3675 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3676 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3677 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3678 }
3679
3680 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3681 // values of $DIREG according to the mode. It would be nice if this
3682 // could be achieved with InstAlias in the tables.
3683 if (Name.starts_with("stos") &&
3684 (Operands.size() == 1 || Operands.size() == 2) &&
3685 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3686 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3687 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3688 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3689 }
3690
3691 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3692 // values of $DIREG according to the mode. It would be nice if this
3693 // could be achieved with InstAlias in the tables.
3694 if (Name.starts_with("scas") &&
3695 (Operands.size() == 1 || Operands.size() == 2) &&
3696 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3697 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3698 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3699 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3700 }
3701
3702 // Add default SI and DI operands to "cmps[bwlq]".
3703 if (Name.starts_with("cmps") &&
3704 (Operands.size() == 1 || Operands.size() == 3) &&
3705 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3706 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3707 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3708 DefaultMemSIOperand(NameLoc));
3709 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3710 }
3711
3712 // Add default SI and DI operands to "movs[bwlq]".
3713 if (((Name.starts_with("movs") &&
3714 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3715 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3716 (Name.starts_with("smov") &&
3717 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3718 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3719 (Operands.size() == 1 || Operands.size() == 3)) {
3720 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3721 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3722 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3723 DefaultMemDIOperand(NameLoc));
3724 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3725 }
3726
3727 // Check if we encountered an error for one the string insturctions
3728 if (HadVerifyError) {
3729 return HadVerifyError;
3730 }
3731
3732 // Transforms "xlat mem8" into "xlatb"
3733 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3734 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3735 if (Op1.isMem8()) {
3736 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3737 "size, (R|E)BX will be used for the location");
3738 Operands.pop_back();
3739 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3740 }
3741 }
3742
3743 if (Flags)
3744 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3745 return false;
3746}
3747
3748bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3749 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3750 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3751 return true;
3752
3754 return true;
3755
3756 switch (Inst.getOpcode()) {
3757 default: return false;
3758 case X86::JMP_1:
3759 // {disp32} forces a larger displacement as if the instruction was relaxed.
3760 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3761 // This matches GNU assembler.
3762 if (ForcedDispEncoding == DispEncoding_Disp32) {
3763 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3764 return true;
3765 }
3766
3767 return false;
3768 case X86::JCC_1:
3769 // {disp32} forces a larger displacement as if the instruction was relaxed.
3770 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3771 // This matches GNU assembler.
3772 if (ForcedDispEncoding == DispEncoding_Disp32) {
3773 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3774 return true;
3775 }
3776
3777 return false;
3778 case X86::INT: {
3779 // Transforms "int $3" into "int3" as a size optimization.
3780 // We can't write this as an InstAlias.
3781 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3782 return false;
3783 Inst.clear();
3784 Inst.setOpcode(X86::INT3);
3785 return true;
3786 }
3787 }
3788}
3789
3790bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3791 using namespace X86;
3792 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3793 unsigned Opcode = Inst.getOpcode();
3794 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3795 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3796 isVFMADDCSH(Opcode)) {
3797 unsigned Dest = Inst.getOperand(0).getReg();
3798 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3799 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3800 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3801 "distinct from source registers");
3802 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3803 isVFMULCSH(Opcode)) {
3804 unsigned Dest = Inst.getOperand(0).getReg();
3805 // The mask variants have different operand list. Scan from the third
3806 // operand to avoid emitting incorrect warning.
3807 // VFMULCPHZrr Dest, Src1, Src2
3808 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3809 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3810 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3811 i < Inst.getNumOperands(); i++)
3812 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3813 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3814 "distinct from source registers");
3815 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3816 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3817 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3818 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3820 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3821 if (Src2Enc % 4 != 0) {
3823 unsigned GroupStart = (Src2Enc / 4) * 4;
3824 unsigned GroupEnd = GroupStart + 3;
3825 return Warning(Ops[0]->getStartLoc(),
3826 "source register '" + RegName + "' implicitly denotes '" +
3827 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3828 RegName.take_front(3) + Twine(GroupEnd) +
3829 "' source group");
3830 }
3831 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3832 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3833 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3834 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3835 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3836 if (HasEVEX) {
3837 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3838 unsigned Index = MRI->getEncodingValue(
3839 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3840 if (Dest == Index)
3841 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3842 "should be distinct");
3843 } else {
3844 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3845 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3846 unsigned Index = MRI->getEncodingValue(
3847 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3848 if (Dest == Mask || Dest == Index || Mask == Index)
3849 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3850 "registers should be distinct");
3851 }
3852 }
3853
3854 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3855 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3856 if ((TSFlags & X86II::EncodingMask) == 0) {
3857 MCPhysReg HReg = X86::NoRegister;
3858 bool UsesRex = TSFlags & X86II::REX_W;
3859 unsigned NumOps = Inst.getNumOperands();
3860 for (unsigned i = 0; i != NumOps; ++i) {
3861 const MCOperand &MO = Inst.getOperand(i);
3862 if (!MO.isReg())
3863 continue;
3864 unsigned Reg = MO.getReg();
3865 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3866 HReg = Reg;
3869 UsesRex = true;
3870 }
3871
3872 if (UsesRex && HReg != X86::NoRegister) {
3874 return Error(Ops[0]->getStartLoc(),
3875 "can't encode '" + RegName + "' in an instruction requiring "
3876 "REX prefix");
3877 }
3878 }
3879
3880 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
3881 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
3882 if (!MO.isReg() || MO.getReg() != X86::RIP)
3883 return Warning(
3884 Ops[0]->getStartLoc(),
3885 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
3886 : "'prefetchit1'")) +
3887 " only supports RIP-relative address");
3888 }
3889 return false;
3890}
3891
3892void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3893 Warning(Loc, "Instruction may be vulnerable to LVI and "
3894 "requires manual mitigation");
3895 Note(SMLoc(), "See https://software.intel.com/"
3896 "security-software-guidance/insights/"
3897 "deep-dive-load-value-injection#specialinstructions"
3898 " for more information");
3899}
3900
3901/// RET instructions and also instructions that indirect calls/jumps from memory
3902/// combine a load and a branch within a single instruction. To mitigate these
3903/// instructions against LVI, they must be decomposed into separate load and
3904/// branch instructions, with an LFENCE in between. For more details, see:
3905/// - X86LoadValueInjectionRetHardening.cpp
3906/// - X86LoadValueInjectionIndirectThunks.cpp
3907/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3908///
3909/// Returns `true` if a mitigation was applied or warning was emitted.
3910void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3911 // Information on control-flow instructions that require manual mitigation can
3912 // be found here:
3913 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3914 switch (Inst.getOpcode()) {
3915 case X86::RET16:
3916 case X86::RET32:
3917 case X86::RET64:
3918 case X86::RETI16:
3919 case X86::RETI32:
3920 case X86::RETI64: {
3921 MCInst ShlInst, FenceInst;
3922 bool Parse32 = is32BitMode() || Code16GCC;
3923 unsigned Basereg =
3924 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3925 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3926 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3927 /*BaseReg=*/Basereg, /*IndexReg=*/0,
3928 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3929 ShlInst.setOpcode(X86::SHL64mi);
3930 ShlMemOp->addMemOperands(ShlInst, 5);
3931 ShlInst.addOperand(MCOperand::createImm(0));
3932 FenceInst.setOpcode(X86::LFENCE);
3933 Out.emitInstruction(ShlInst, getSTI());
3934 Out.emitInstruction(FenceInst, getSTI());
3935 return;
3936 }
3937 case X86::JMP16m:
3938 case X86::JMP32m:
3939 case X86::JMP64m:
3940 case X86::CALL16m:
3941 case X86::CALL32m:
3942 case X86::CALL64m:
3943 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3944 return;
3945 }
3946}
3947
3948/// To mitigate LVI, every instruction that performs a load can be followed by
3949/// an LFENCE instruction to squash any potential mis-speculation. There are
3950/// some instructions that require additional considerations, and may requre
3951/// manual mitigation. For more details, see:
3952/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3953///
3954/// Returns `true` if a mitigation was applied or warning was emitted.
3955void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3956 MCStreamer &Out) {
3957 auto Opcode = Inst.getOpcode();
3958 auto Flags = Inst.getFlags();
3959 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3960 // Information on REP string instructions that require manual mitigation can
3961 // be found here:
3962 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3963 switch (Opcode) {
3964 case X86::CMPSB:
3965 case X86::CMPSW:
3966 case X86::CMPSL:
3967 case X86::CMPSQ:
3968 case X86::SCASB:
3969 case X86::SCASW:
3970 case X86::SCASL:
3971 case X86::SCASQ:
3972 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3973 return;
3974 }
3975 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
3976 // If a REP instruction is found on its own line, it may or may not be
3977 // followed by a vulnerable instruction. Emit a warning just in case.
3978 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3979 return;
3980 }
3981
3982 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
3983
3984 // Can't mitigate after terminators or calls. A control flow change may have
3985 // already occurred.
3986 if (MCID.isTerminator() || MCID.isCall())
3987 return;
3988
3989 // LFENCE has the mayLoad property, don't double fence.
3990 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
3992 FenceInst.setOpcode(X86::LFENCE);
3993 Out.emitInstruction(FenceInst, getSTI());
3994 }
3995}
3996
3997void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
3998 MCStreamer &Out) {
4000 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4001 applyLVICFIMitigation(Inst, Out);
4002
4003 Out.emitInstruction(Inst, getSTI());
4004
4006 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4007 applyLVILoadHardeningMitigation(Inst, Out);
4008}
4009
4011 unsigned Result = 0;
4012 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4013 if (Prefix.isPrefix()) {
4014 Result = Prefix.getPrefix();
4015 Operands.pop_back();
4016 }
4017 return Result;
4018}
4019
4020bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4023 bool MatchingInlineAsm) {
4024 assert(!Operands.empty() && "Unexpect empty operand list!");
4025 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4026
4027 // First, handle aliases that expand to multiple instructions.
4028 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4029 Out, MatchingInlineAsm);
4030 unsigned Prefixes = getPrefixes(Operands);
4031
4032 MCInst Inst;
4033
4034 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4035 // the encoder and printer.
4036 if (ForcedOpcodePrefix == OpcodePrefix_REX)
4037 Prefixes |= X86::IP_USE_REX;
4038 else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4039 Prefixes |= X86::IP_USE_REX2;
4040 else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4041 Prefixes |= X86::IP_USE_VEX;
4042 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4043 Prefixes |= X86::IP_USE_VEX2;
4044 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4045 Prefixes |= X86::IP_USE_VEX3;
4046 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4047 Prefixes |= X86::IP_USE_EVEX;
4048
4049 // Set encoded flags for {disp8} and {disp32}.
4050 if (ForcedDispEncoding == DispEncoding_Disp8)
4051 Prefixes |= X86::IP_USE_DISP8;
4052 else if (ForcedDispEncoding == DispEncoding_Disp32)
4053 Prefixes |= X86::IP_USE_DISP32;
4054
4055 if (Prefixes)
4056 Inst.setFlags(Prefixes);
4057
4058 return isParsingIntelSyntax()
4059 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4060 ErrorInfo, MatchingInlineAsm)
4061 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4062 ErrorInfo, MatchingInlineAsm);
4063}
4064
4065void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4067 bool MatchingInlineAsm) {
4068 // FIXME: This should be replaced with a real .td file alias mechanism.
4069 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4070 // call.
4071 const char *Repl = StringSwitch<const char *>(Op.getToken())
4072 .Case("finit", "fninit")
4073 .Case("fsave", "fnsave")
4074 .Case("fstcw", "fnstcw")
4075 .Case("fstcww", "fnstcw")
4076 .Case("fstenv", "fnstenv")
4077 .Case("fstsw", "fnstsw")
4078 .Case("fstsww", "fnstsw")
4079 .Case("fclex", "fnclex")
4080 .Default(nullptr);
4081 if (Repl) {
4082 MCInst Inst;
4083 Inst.setOpcode(X86::WAIT);
4084 Inst.setLoc(IDLoc);
4085 if (!MatchingInlineAsm)
4086 emitInstruction(Inst, Operands, Out);
4087 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4088 }
4089}
4090
4091bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4092 const FeatureBitset &MissingFeatures,
4093 bool MatchingInlineAsm) {
4094 assert(MissingFeatures.any() && "Unknown missing feature!");
4095 SmallString<126> Msg;
4097 OS << "instruction requires:";
4098 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4099 if (MissingFeatures[i])
4100 OS << ' ' << getSubtargetFeatureName(i);
4101 }
4102 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4103}
4104
4105unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4106 unsigned Opc = Inst.getOpcode();
4107 const MCInstrDesc &MCID = MII.get(Opc);
4108 uint64_t TSFlags = MCID.TSFlags;
4109
4110 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4111 return Match_Unsupported;
4112 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4113 return Match_Unsupported;
4114
4115 switch (ForcedOpcodePrefix) {
4116 case OpcodePrefix_Default:
4117 break;
4118 case OpcodePrefix_REX:
4119 case OpcodePrefix_REX2:
4120 if (TSFlags & X86II::EncodingMask)
4121 return Match_Unsupported;
4122 break;
4123 case OpcodePrefix_VEX:
4124 case OpcodePrefix_VEX2:
4125 case OpcodePrefix_VEX3:
4126 if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4127 return Match_Unsupported;
4128 break;
4129 case OpcodePrefix_EVEX:
4130 if ((TSFlags & X86II::EncodingMask) != X86II::EVEX)
4131 return Match_Unsupported;
4132 break;
4133 }
4134
4136 (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4137 ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4138 ForcedOpcodePrefix != OpcodePrefix_VEX3))
4139 return Match_Unsupported;
4140
4141 return Match_Success;
4142}
4143
4144bool X86AsmParser::matchAndEmitATTInstruction(
4145 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4146 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4147 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4148 SMRange EmptyRange = std::nullopt;
4149 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4150 // when matching the instruction.
4151 if (ForcedDataPrefix == X86::Is32Bit)
4152 SwitchMode(X86::Is32Bit);
4153 // First, try a direct match.
4154 FeatureBitset MissingFeatures;
4155 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4156 MissingFeatures, MatchingInlineAsm,
4157 isParsingIntelSyntax());
4158 if (ForcedDataPrefix == X86::Is32Bit) {
4159 SwitchMode(X86::Is16Bit);
4160 ForcedDataPrefix = 0;
4161 }
4162 switch (OriginalError) {
4163 default: llvm_unreachable("Unexpected match result!");
4164 case Match_Success:
4165 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4166 return true;
4167 // Some instructions need post-processing to, for example, tweak which
4168 // encoding is selected. Loop on it while changes happen so the
4169 // individual transformations can chain off each other.
4170 if (!MatchingInlineAsm)
4171 while (processInstruction(Inst, Operands))
4172 ;
4173
4174 Inst.setLoc(IDLoc);
4175 if (!MatchingInlineAsm)
4176 emitInstruction(Inst, Operands, Out);
4177 Opcode = Inst.getOpcode();
4178 return false;
4179 case Match_InvalidImmUnsignedi4: {
4180 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4181 if (ErrorLoc == SMLoc())
4182 ErrorLoc = IDLoc;
4183 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4184 EmptyRange, MatchingInlineAsm);
4185 }
4186 case Match_MissingFeature:
4187 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4188 case Match_InvalidOperand:
4189 case Match_MnemonicFail:
4190 case Match_Unsupported:
4191 break;
4192 }
4193 if (Op.getToken().empty()) {
4194 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4195 MatchingInlineAsm);
4196 return true;
4197 }
4198
4199 // FIXME: Ideally, we would only attempt suffix matches for things which are
4200 // valid prefixes, and we could just infer the right unambiguous
4201 // type. However, that requires substantially more matcher support than the
4202 // following hack.
4203
4204 // Change the operand to point to a temporary token.
4205 StringRef Base = Op.getToken();
4206 SmallString<16> Tmp;
4207 Tmp += Base;
4208 Tmp += ' ';
4209 Op.setTokenValue(Tmp);
4210
4211 // If this instruction starts with an 'f', then it is a floating point stack
4212 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4213 // 80-bit floating point, which use the suffixes s,l,t respectively.
4214 //
4215 // Otherwise, we assume that this may be an integer instruction, which comes
4216 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4217 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4218 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4219 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4220
4221 // Check for the various suffix matches.
4222 uint64_t ErrorInfoIgnore;
4223 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4224 unsigned Match[4];
4225
4226 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4227 // So we should make sure the suffix matcher only works for memory variant
4228 // that has the same size with the suffix.
4229 // FIXME: This flag is a workaround for legacy instructions that didn't
4230 // declare non suffix variant assembly.
4231 bool HasVectorReg = false;
4232 X86Operand *MemOp = nullptr;
4233 for (const auto &Op : Operands) {
4234 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4235 if (X86Op->isVectorReg())
4236 HasVectorReg = true;
4237 else if (X86Op->isMem()) {
4238 MemOp = X86Op;
4239 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4240 // Have we found an unqualified memory operand,
4241 // break. IA allows only one memory operand.
4242 break;
4243 }
4244 }
4245
4246 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4247 Tmp.back() = Suffixes[I];
4248 if (MemOp && HasVectorReg)
4249 MemOp->Mem.Size = MemSize[I];
4250 Match[I] = Match_MnemonicFail;
4251 if (MemOp || !HasVectorReg) {
4252 Match[I] =
4253 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4254 MatchingInlineAsm, isParsingIntelSyntax());
4255 // If this returned as a missing feature failure, remember that.
4256 if (Match[I] == Match_MissingFeature)
4257 ErrorInfoMissingFeatures = MissingFeatures;
4258 }
4259 }
4260
4261 // Restore the old token.
4262 Op.setTokenValue(Base);
4263
4264 // If exactly one matched, then we treat that as a successful match (and the
4265 // instruction will already have been filled in correctly, since the failing
4266 // matches won't have modified it).
4267 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4268 if (NumSuccessfulMatches == 1) {
4269 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4270 return true;
4271 // Some instructions need post-processing to, for example, tweak which
4272 // encoding is selected. Loop on it while changes happen so the
4273 // individual transformations can chain off each other.
4274 if (!MatchingInlineAsm)
4275 while (processInstruction(Inst, Operands))
4276 ;
4277
4278 Inst.setLoc(IDLoc);
4279 if (!MatchingInlineAsm)
4280 emitInstruction(Inst, Operands, Out);
4281 Opcode = Inst.getOpcode();
4282 return false;
4283 }
4284
4285 // Otherwise, the match failed, try to produce a decent error message.
4286
4287 // If we had multiple suffix matches, then identify this as an ambiguous
4288 // match.
4289 if (NumSuccessfulMatches > 1) {
4290 char MatchChars[4];
4291 unsigned NumMatches = 0;
4292 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4293 if (Match[I] == Match_Success)
4294 MatchChars[NumMatches++] = Suffixes[I];
4295
4296 SmallString<126> Msg;
4298 OS << "ambiguous instructions require an explicit suffix (could be ";
4299 for (unsigned i = 0; i != NumMatches; ++i) {
4300 if (i != 0)
4301 OS << ", ";
4302 if (i + 1 == NumMatches)
4303 OS << "or ";
4304 OS << "'" << Base << MatchChars[i] << "'";
4305 }
4306 OS << ")";
4307 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4308 return true;
4309 }
4310
4311 // Okay, we know that none of the variants matched successfully.
4312
4313 // If all of the instructions reported an invalid mnemonic, then the original
4314 // mnemonic was invalid.
4315 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4316 if (OriginalError == Match_MnemonicFail)
4317 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4318 Op.getLocRange(), MatchingInlineAsm);
4319
4320 if (OriginalError == Match_Unsupported)
4321 return Error(IDLoc, "unsupported instruction", EmptyRange,
4322 MatchingInlineAsm);
4323
4324 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4325 // Recover location info for the operand if we know which was the problem.
4326 if (ErrorInfo != ~0ULL) {
4327 if (ErrorInfo >= Operands.size())
4328 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4329 MatchingInlineAsm);
4330
4331 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4332 if (Operand.getStartLoc().isValid()) {
4333 SMRange OperandRange = Operand.getLocRange();
4334 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4335 OperandRange, MatchingInlineAsm);
4336 }
4337 }
4338
4339 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4340 MatchingInlineAsm);
4341 }
4342
4343 // If one instruction matched as unsupported, report this as unsupported.
4344 if (llvm::count(Match, Match_Unsupported) == 1) {
4345 return Error(IDLoc, "unsupported instruction", EmptyRange,
4346 MatchingInlineAsm);
4347 }
4348
4349 // If one instruction matched with a missing feature, report this as a
4350 // missing feature.
4351 if (llvm::count(Match, Match_MissingFeature) == 1) {
4352 ErrorInfo = Match_MissingFeature;
4353 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4354 MatchingInlineAsm);
4355 }
4356
4357 // If one instruction matched with an invalid operand, report this as an
4358 // operand failure.
4359 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4360 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4361 MatchingInlineAsm);
4362 }
4363
4364 // If all of these were an outright failure, report it in a useless way.
4365 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4366 EmptyRange, MatchingInlineAsm);
4367 return true;
4368}
4369
4370bool X86AsmParser::matchAndEmitIntelInstruction(
4371 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4372 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4373 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4374 SMRange EmptyRange = std::nullopt;
4375 // Find one unsized memory operand, if present.
4376 X86Operand *UnsizedMemOp = nullptr;
4377 for (const auto &Op : Operands) {
4378 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4379 if (X86Op->isMemUnsized()) {
4380 UnsizedMemOp = X86Op;
4381 // Have we found an unqualified memory operand,
4382 // break. IA allows only one memory operand.
4383 break;
4384 }
4385 }
4386
4387 // Allow some instructions to have implicitly pointer-sized operands. This is
4388 // compatible with gas.
4389 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4390 if (UnsizedMemOp) {
4391 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4392 for (const char *Instr : PtrSizedInstrs) {
4393 if (Mnemonic == Instr) {
4394 UnsizedMemOp->Mem.Size = getPointerWidth();
4395 break;
4396 }
4397 }
4398 }
4399
4401 FeatureBitset ErrorInfoMissingFeatures;
4402 FeatureBitset MissingFeatures;
4403 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4404
4405 // If unsized push has immediate operand we should default the default pointer
4406 // size for the size.
4407 if (Mnemonic == "push" && Operands.size() == 2) {
4408 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4409 if (X86Op->isImm()) {
4410 // If it's not a constant fall through and let remainder take care of it.
4411 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4412 unsigned Size = getPointerWidth();
4413 if (CE &&
4414 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4415 SmallString<16> Tmp;
4416 Tmp += Base;
4417 Tmp += (is64BitMode())
4418 ? "q"
4419 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4420 Op.setTokenValue(Tmp);
4421 // Do match in ATT mode to allow explicit suffix usage.
4422 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4423 MissingFeatures, MatchingInlineAsm,
4424 false /*isParsingIntelSyntax()*/));
4425 Op.setTokenValue(Base);
4426 }
4427 }
4428 }
4429
4430 // If an unsized memory operand is present, try to match with each memory
4431 // operand size. In Intel assembly, the size is not part of the instruction
4432 // mnemonic.
4433 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4434 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4435 for (unsigned Size : MopSizes) {
4436 UnsizedMemOp->Mem.Size = Size;
4437 uint64_t ErrorInfoIgnore;
4438 unsigned LastOpcode = Inst.getOpcode();
4439 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4440 MissingFeatures, MatchingInlineAsm,
4441 isParsingIntelSyntax());
4442 if (Match.empty() || LastOpcode != Inst.getOpcode())
4443 Match.push_back(M);
4444
4445 // If this returned as a missing feature failure, remember that.
4446 if (Match.back() == Match_MissingFeature)
4447 ErrorInfoMissingFeatures = MissingFeatures;
4448 }
4449
4450 // Restore the size of the unsized memory operand if we modified it.
4451 UnsizedMemOp->Mem.Size = 0;
4452 }
4453
4454 // If we haven't matched anything yet, this is not a basic integer or FPU
4455 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4456 // matching with the unsized operand.
4457 if (Match.empty()) {
4458 Match.push_back(MatchInstruction(
4459 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4460 isParsingIntelSyntax()));
4461 // If this returned as a missing feature failure, remember that.
4462 if (Match.back() == Match_MissingFeature)
4463 ErrorInfoMissingFeatures = MissingFeatures;
4464 }
4465
4466 // Restore the size of the unsized memory operand if we modified it.
4467 if (UnsizedMemOp)
4468 UnsizedMemOp->Mem.Size = 0;
4469
4470 // If it's a bad mnemonic, all results will be the same.
4471 if (Match.back() == Match_MnemonicFail) {
4472 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4473 Op.getLocRange(), MatchingInlineAsm);
4474 }
4475
4476 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4477
4478 // If matching was ambiguous and we had size information from the frontend,
4479 // try again with that. This handles cases like "movxz eax, m8/m16".
4480 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4481 UnsizedMemOp->getMemFrontendSize()) {
4482 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4483 unsigned M = MatchInstruction(
4484 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4485 isParsingIntelSyntax());
4486 if (M == Match_Success)
4487 NumSuccessfulMatches = 1;
4488
4489 // Add a rewrite that encodes the size information we used from the
4490 // frontend.
4491 InstInfo->AsmRewrites->emplace_back(
4492 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4493 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4494 }
4495
4496 // If exactly one matched, then we treat that as a successful match (and the
4497 // instruction will already have been filled in correctly, since the failing
4498 // matches won't have modified it).
4499 if (NumSuccessfulMatches == 1) {
4500 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4501 return true;
4502 // Some instructions need post-processing to, for example, tweak which
4503 // encoding is selected. Loop on it while changes happen so the individual
4504 // transformations can chain off each other.
4505 if (!MatchingInlineAsm)
4506 while (processInstruction(Inst, Operands))
4507 ;
4508 Inst.setLoc(IDLoc);
4509 if (!MatchingInlineAsm)
4510 emitInstruction(Inst, Operands, Out);
4511 Opcode = Inst.getOpcode();
4512 return false;
4513 } else if (NumSuccessfulMatches > 1) {
4514 assert(UnsizedMemOp &&
4515 "multiple matches only possible with unsized memory operands");
4516 return Error(UnsizedMemOp->getStartLoc(),
4517 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4518 UnsizedMemOp->getLocRange());
4519 }
4520
4521 // If one instruction matched as unsupported, report this as unsupported.
4522 if (llvm::count(Match, Match_Unsupported) == 1) {
4523 return Error(IDLoc, "unsupported instruction", EmptyRange,
4524 MatchingInlineAsm);
4525 }
4526
4527 // If one instruction matched with a missing feature, report this as a
4528 // missing feature.
4529 if (llvm::count(Match, Match_MissingFeature) == 1) {
4530 ErrorInfo = Match_MissingFeature;
4531 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4532 MatchingInlineAsm);
4533 }
4534
4535 // If one instruction matched with an invalid operand, report this as an
4536 // operand failure.
4537 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4538 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4539 MatchingInlineAsm);
4540 }
4541
4542 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4543 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4544 if (ErrorLoc == SMLoc())
4545 ErrorLoc = IDLoc;
4546 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4547 EmptyRange, MatchingInlineAsm);
4548 }
4549
4550 // If all of these were an outright failure, report it in a useless way.
4551 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4552 MatchingInlineAsm);
4553}
4554
4555bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4556 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4557}
4558
4559bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4560 MCAsmParser &Parser = getParser();
4561 StringRef IDVal = DirectiveID.getIdentifier();
4562 if (IDVal.starts_with(".arch"))
4563 return parseDirectiveArch();
4564 if (IDVal.starts_with(".code"))
4565 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4566 else if (IDVal.starts_with(".att_syntax")) {
4567 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4568 if (Parser.getTok().getString() == "prefix")
4569 Parser.Lex();
4570 else if (Parser.getTok().getString() == "noprefix")
4571 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4572 "supported: registers must have a "
4573 "'%' prefix in .att_syntax");
4574 }
4575 getParser().setAssemblerDialect(0);
4576 return false;
4577 } else if (IDVal.starts_with(".intel_syntax")) {
4578 getParser().setAssemblerDialect(1);
4579 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4580 if (Parser.getTok().getString() == "noprefix")
4581 Parser.Lex();
4582 else if (Parser.getTok().getString() == "prefix")
4583 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4584 "supported: registers must not have "
4585 "a '%' prefix in .intel_syntax");
4586 }
4587 return false;
4588 } else if (IDVal == ".nops")
4589 return parseDirectiveNops(DirectiveID.getLoc());
4590 else if (IDVal == ".even")
4591 return parseDirectiveEven(DirectiveID.getLoc());
4592 else if (IDVal == ".cv_fpo_proc")
4593 return parseDirectiveFPOProc(DirectiveID.getLoc());
4594 else if (IDVal == ".cv_fpo_setframe")
4595 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4596 else if (IDVal == ".cv_fpo_pushreg")
4597 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4598 else if (IDVal == ".cv_fpo_stackalloc")
4599 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4600 else if (IDVal == ".cv_fpo_stackalign")
4601 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4602 else if (IDVal == ".cv_fpo_endprologue")
4603 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4604 else if (IDVal == ".cv_fpo_endproc")
4605 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4606 else if (IDVal == ".seh_pushreg" ||
4607 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4608 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4609 else if (IDVal == ".seh_setframe" ||
4610 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4611 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4612 else if (IDVal == ".seh_savereg" ||
4613 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4614 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4615 else if (IDVal == ".seh_savexmm" ||
4616 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4617 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4618 else if (IDVal == ".seh_pushframe" ||
4619 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4620 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4621
4622 return true;
4623}
4624
4625bool X86AsmParser::parseDirectiveArch() {
4626 // Ignore .arch for now.
4627 getParser().parseStringToEndOfStatement();
4628 return false;
4629}
4630
4631/// parseDirectiveNops
4632/// ::= .nops size[, control]
4633bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4634 int64_t NumBytes = 0, Control = 0;
4635 SMLoc NumBytesLoc, ControlLoc;
4636 const MCSubtargetInfo& STI = getSTI();
4637 NumBytesLoc = getTok().getLoc();
4638 if (getParser().checkForValidSection() ||
4639 getParser().parseAbsoluteExpression(NumBytes))
4640 return true;
4641
4642 if (parseOptionalToken(AsmToken::Comma)) {
4643 ControlLoc = getTok().getLoc();
4644 if (getParser().parseAbsoluteExpression(Control))
4645 return true;
4646 }
4647 if (getParser().parseEOL())
4648 return true;
4649
4650 if (NumBytes <= 0) {
4651 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4652 return false;
4653 }
4654
4655 if (Control < 0) {
4656 Error(ControlLoc, "'.nops' directive with negative NOP size");
4657 return false;
4658 }
4659
4660 /// Emit nops
4661 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4662
4663 return false;
4664}
4665
4666/// parseDirectiveEven
4667/// ::= .even
4668bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4669 if (parseEOL())
4670 return false;
4671
4672 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4673 if (!Section) {
4674 getStreamer().initSections(false, getSTI());
4675 Section = getStreamer().getCurrentSectionOnly();
4676 }
4677 if (Section->useCodeAlign())
4678 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4679 else
4680 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4681 return false;
4682}
4683
4684/// ParseDirectiveCode
4685/// ::= .code16 | .code32 | .code64
4686bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4687 MCAsmParser &Parser = getParser();
4688 Code16GCC = false;
4689 if (IDVal == ".code16") {
4690 Parser.Lex();
4691 if (!is16BitMode()) {
4692 SwitchMode(X86::Is16Bit);
4693 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4694 }
4695 } else if (IDVal == ".code16gcc") {
4696 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4697 Parser.Lex();
4698 Code16GCC = true;
4699 if (!is16BitMode()) {
4700 SwitchMode(X86::Is16Bit);
4701 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4702 }
4703 } else if (IDVal == ".code32") {
4704 Parser.Lex();
4705 if (!is32BitMode()) {
4706 SwitchMode(X86::Is32Bit);
4707 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4708 }
4709 } else if (IDVal == ".code64") {
4710 Parser.Lex();
4711 if (!is64BitMode()) {
4712 SwitchMode(X86::Is64Bit);
4713 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4714 }
4715 } else {
4716 Error(L, "unknown directive " + IDVal);
4717 return false;
4718 }
4719
4720 return false;
4721}
4722
4723// .cv_fpo_proc foo
4724bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4725 MCAsmParser &Parser = getParser();
4726 StringRef ProcName;
4727 int64_t ParamsSize;
4728 if (Parser.parseIdentifier(ProcName))
4729 return Parser.TokError("expected symbol name");
4730 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4731 return true;
4732 if (!isUIntN(32, ParamsSize))
4733 return Parser.TokError("parameters size out of range");
4734 if (parseEOL())
4735 return true;
4736 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4737 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4738}
4739
4740// .cv_fpo_setframe ebp
4741bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4743 SMLoc DummyLoc;
4744 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4745 return true;
4746 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4747}
4748
4749// .cv_fpo_pushreg ebx
4750bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4752 SMLoc DummyLoc;
4753 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4754 return true;
4755 return getTargetStreamer().emitFPOPushReg(Reg, L);
4756}
4757
4758// .cv_fpo_stackalloc 20
4759bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4760 MCAsmParser &Parser = getParser();
4761 int64_t Offset;
4762 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4763 return true;
4764 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4765}
4766
4767// .cv_fpo_stackalign 8
4768bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4769 MCAsmParser &Parser = getParser();
4770 int64_t Offset;
4771 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4772 return true;
4773 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4774}
4775
4776// .cv_fpo_endprologue
4777bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4778 MCAsmParser &Parser = getParser();
4779 if (Parser.parseEOL())
4780 return true;
4781 return getTargetStreamer().emitFPOEndPrologue(L);
4782}
4783
4784// .cv_fpo_endproc
4785bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4786 MCAsmParser &Parser = getParser();
4787 if (Parser.parseEOL())
4788 return true;
4789 return getTargetStreamer().emitFPOEndProc(L);
4790}
4791
4792bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4793 MCRegister &RegNo) {
4794 SMLoc startLoc = getLexer().getLoc();
4795 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4796
4797 // Try parsing the argument as a register first.
4798 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4799 SMLoc endLoc;
4800 if (parseRegister(RegNo, startLoc, endLoc))
4801 return true;
4802
4803 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4804 return Error(startLoc,
4805 "register is not supported for use with this directive");
4806 }
4807 } else {
4808 // Otherwise, an integer number matching the encoding of the desired
4809 // register may appear.
4810 int64_t EncodedReg;
4811 if (getParser().parseAbsoluteExpression(EncodedReg))
4812 return true;
4813
4814 // The SEH register number is the same as the encoding register number. Map
4815 // from the encoding back to the LLVM register number.
4816 RegNo = 0;
4817 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4818 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4819 RegNo = Reg;
4820 break;
4821 }
4822 }
4823 if (RegNo == 0) {
4824 return Error(startLoc,
4825 "incorrect register number for use with this directive");
4826 }
4827 }
4828
4829 return false;
4830}
4831
4832bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4834 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4835 return true;
4836
4837 if (getLexer().isNot(AsmToken::EndOfStatement))
4838 return TokError("expected end of directive");
4839
4840 getParser().Lex();
4841 getStreamer().emitWinCFIPushReg(Reg, Loc);
4842 return false;
4843}
4844
4845bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4847 int64_t Off;
4848 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4849 return true;
4850 if (getLexer().isNot(AsmToken::Comma))
4851 return TokError("you must specify a stack pointer offset");
4852
4853 getParser().Lex();
4854 if (getParser().parseAbsoluteExpression(Off))
4855 return true;
4856
4857 if (getLexer().isNot(AsmToken::EndOfStatement))
4858 return TokError("expected end of directive");
4859
4860 getParser().Lex();
4861 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4862 return false;
4863}
4864
4865bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4867 int64_t Off;
4868 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4869 return true;
4870 if (getLexer().isNot(AsmToken::Comma))
4871 return TokError("you must specify an offset on the stack");
4872
4873 getParser().Lex();
4874 if (getParser().parseAbsoluteExpression(Off))
4875 return true;
4876
4877 if (getLexer().isNot(AsmToken::EndOfStatement))
4878 return TokError("expected end of directive");
4879
4880 getParser().Lex();
4881 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4882 return false;
4883}
4884
4885bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4887 int64_t Off;
4888 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4889 return true;
4890 if (getLexer().isNot(AsmToken::Comma))
4891 return TokError("you must specify an offset on the stack");
4892
4893 getParser().Lex();
4894 if (getParser().parseAbsoluteExpression(Off))
4895 return true;
4896
4897 if (getLexer().isNot(AsmToken::EndOfStatement))
4898 return TokError("expected end of directive");
4899
4900 getParser().Lex();
4901 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4902 return false;
4903}
4904
4905bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4906 bool Code = false;
4907 StringRef CodeID;
4908 if (getLexer().is(AsmToken::At)) {
4909 SMLoc startLoc = getLexer().getLoc();
4910 getParser().Lex();
4911 if (!getParser().parseIdentifier(CodeID)) {
4912 if (CodeID != "code")
4913 return Error(startLoc, "expected @code");
4914 Code = true;
4915 }
4916 }
4917
4918 if (getLexer().isNot(AsmToken::EndOfStatement))
4919 return TokError("expected end of directive");
4920
4921 getParser().Lex();
4922 getStreamer().emitWinCFIPushFrame(Code, Loc);
4923 return false;
4924}
4925
4926// Force static initialization.
4930}
4931
4932#define GET_MATCHER_IMPLEMENTATION
4933#include "X86GenAsmMatcher.inc"
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static const char * getSubtargetFeatureName(uint64_t Val)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define check(cond)
amode Optimize addressing mode
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb)
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static bool IsVCMP(unsigned Opcode)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallString class.
This file defines the SmallVector class.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
DEMANGLE_NAMESPACE_BEGIN bool starts_with(std::string_view self, char C) noexcept
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static cl::opt< bool > LVIInlineAsmHardening("x86-experimental-lvi-inline-asm-hardening", cl::desc("Harden inline assembly code that may be vulnerable to Load Value" " Injection (LVI). This feature is experimental."), cl::Hidden)
static bool checkScale(unsigned Scale, StringRef &ErrMsg)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser()
static unsigned getPrefixes(OperandVector &Operands)
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, unsigned Scale, bool Is64BitMode, StringRef &ErrMsg)
Value * RHS
Value * LHS
static unsigned getSize(unsigned Kind)
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
int64_t getIntVal() const
Definition: MCAsmMacro.h:115
bool isNot(TokenKind K) const
Definition: MCAsmMacro.h:83
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30