LLVM 17.0.0git
MasmParser.cpp
Go to the documentation of this file.
1//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class implements the parser for assembly files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APFloat.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringMap.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCCodeView.h"
29#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDwarf.h"
32#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCInstrInfo.h"
44#include "llvm/MC/MCSection.h"
45#include "llvm/MC/MCStreamer.h"
47#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/Format.h"
53#include "llvm/Support/MD5.h"
56#include "llvm/Support/Path.h"
57#include "llvm/Support/SMLoc.h"
60#include <algorithm>
61#include <cassert>
62#include <climits>
63#include <cstddef>
64#include <cstdint>
65#include <ctime>
66#include <deque>
67#include <memory>
68#include <optional>
69#include <sstream>
70#include <string>
71#include <tuple>
72#include <utility>
73#include <vector>
74
75using namespace llvm;
76
77namespace {
78
79/// Helper types for tracking macro definitions.
80typedef std::vector<AsmToken> MCAsmMacroArgument;
81typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
82
83/// Helper class for storing information about an active macro instantiation.
84struct MacroInstantiation {
85 /// The location of the instantiation.
86 SMLoc InstantiationLoc;
87
88 /// The buffer where parsing should resume upon instantiation completion.
89 unsigned ExitBuffer;
90
91 /// The location where parsing should resume upon instantiation completion.
92 SMLoc ExitLoc;
93
94 /// The depth of TheCondStack at the start of the instantiation.
95 size_t CondStackDepth;
96};
97
98struct ParseStatementInfo {
99 /// The parsed operands from the last parsed statement.
101
102 /// The opcode from the last parsed instruction.
103 unsigned Opcode = ~0U;
104
105 /// Was there an error parsing the inline assembly?
106 bool ParseError = false;
107
108 /// The value associated with a macro exit.
109 std::optional<std::string> ExitValue;
110
111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
112
113 ParseStatementInfo() = delete;
114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115 : AsmRewrites(rewrites) {}
116};
117
118enum FieldType {
119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120 FT_REAL, // Initializer: real number, stored as an APInt.
121 FT_STRUCT // Initializer: struct initializer, stored recursively.
122};
123
124struct FieldInfo;
125struct StructInfo {
127 bool IsUnion = false;
128 bool Initializable = true;
129 unsigned Alignment = 0;
130 unsigned AlignmentSize = 0;
131 unsigned NextOffset = 0;
132 unsigned Size = 0;
133 std::vector<FieldInfo> Fields;
134 StringMap<size_t> FieldsByName;
135
136 FieldInfo &addField(StringRef FieldName, FieldType FT,
137 unsigned FieldAlignmentSize);
138
139 StructInfo() = default;
140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
141};
142
143// FIXME: This should probably use a class hierarchy, raw pointers between the
144// objects, and dynamic type resolution instead of a union. On the other hand,
145// ownership then becomes much more complicated; the obvious thing would be to
146// use BumpPtrAllocator, but the lack of a destructor makes that messy.
147
148struct StructInitializer;
149struct IntFieldInfo {
151
152 IntFieldInfo() = default;
153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
155};
156struct RealFieldInfo {
157 SmallVector<APInt, 1> AsIntValues;
158
159 RealFieldInfo() = default;
160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
162};
163struct StructFieldInfo {
164 std::vector<StructInitializer> Initializers;
165 StructInfo Structure;
166
167 StructFieldInfo() = default;
168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
169};
170
171class FieldInitializer {
172public:
173 FieldType FT;
174 union {
175 IntFieldInfo IntInfo;
176 RealFieldInfo RealInfo;
177 StructFieldInfo StructInfo;
178 };
179
180 ~FieldInitializer();
181 FieldInitializer(FieldType FT);
182
183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185 FieldInitializer(std::vector<StructInitializer> &&Initializers,
186 struct StructInfo Structure);
187
188 FieldInitializer(const FieldInitializer &Initializer);
189 FieldInitializer(FieldInitializer &&Initializer);
190
191 FieldInitializer &operator=(const FieldInitializer &Initializer);
192 FieldInitializer &operator=(FieldInitializer &&Initializer);
193};
194
195struct StructInitializer {
196 std::vector<FieldInitializer> FieldInitializers;
197};
198
199struct FieldInfo {
200 // Offset of the field within the containing STRUCT.
201 unsigned Offset = 0;
202
203 // Total size of the field (= LengthOf * Type).
204 unsigned SizeOf = 0;
205
206 // Number of elements in the field (1 if scalar, >1 if an array).
207 unsigned LengthOf = 0;
208
209 // Size of a single entry in this field, in bytes ("type" in MASM standards).
210 unsigned Type = 0;
211
212 FieldInitializer Contents;
213
214 FieldInfo(FieldType FT) : Contents(FT) {}
215};
216
217StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218 StructInfo S) {
219 Initializers = std::move(V);
220 Structure = S;
221}
222
223StructInfo::StructInfo(StringRef StructName, bool Union,
224 unsigned AlignmentValue)
225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
226
227FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228 unsigned FieldAlignmentSize) {
229 if (!FieldName.empty())
230 FieldsByName[FieldName.lower()] = Fields.size();
231 Fields.emplace_back(FT);
232 FieldInfo &Field = Fields.back();
233 Field.Offset =
234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235 if (!IsUnion) {
236 NextOffset = std::max(NextOffset, Field.Offset);
237 }
238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239 return Field;
240}
241
242FieldInitializer::~FieldInitializer() {
243 switch (FT) {
244 case FT_INTEGRAL:
245 IntInfo.~IntFieldInfo();
246 break;
247 case FT_REAL:
248 RealInfo.~RealFieldInfo();
249 break;
250 case FT_STRUCT:
251 StructInfo.~StructFieldInfo();
252 break;
253 }
254}
255
256FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257 switch (FT) {
258 case FT_INTEGRAL:
259 new (&IntInfo) IntFieldInfo();
260 break;
261 case FT_REAL:
262 new (&RealInfo) RealFieldInfo();
263 break;
264 case FT_STRUCT:
265 new (&StructInfo) StructFieldInfo();
266 break;
267 }
268}
269
270FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271 : FT(FT_INTEGRAL) {
272 new (&IntInfo) IntFieldInfo(std::move(Values));
273}
274
275FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276 : FT(FT_REAL) {
277 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
278}
279
280FieldInitializer::FieldInitializer(
281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282 : FT(FT_STRUCT) {
283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
284}
285
286FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287 : FT(Initializer.FT) {
288 switch (FT) {
289 case FT_INTEGRAL:
290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291 break;
292 case FT_REAL:
293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294 break;
295 case FT_STRUCT:
296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297 break;
298 }
299}
300
301FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302 : FT(Initializer.FT) {
303 switch (FT) {
304 case FT_INTEGRAL:
305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306 break;
307 case FT_REAL:
308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309 break;
310 case FT_STRUCT:
311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312 break;
313 }
314}
315
316FieldInitializer &
317FieldInitializer::operator=(const FieldInitializer &Initializer) {
318 if (FT != Initializer.FT) {
319 switch (FT) {
320 case FT_INTEGRAL:
321 IntInfo.~IntFieldInfo();
322 break;
323 case FT_REAL:
324 RealInfo.~RealFieldInfo();
325 break;
326 case FT_STRUCT:
327 StructInfo.~StructFieldInfo();
328 break;
329 }
330 }
331 FT = Initializer.FT;
332 switch (FT) {
333 case FT_INTEGRAL:
334 IntInfo = Initializer.IntInfo;
335 break;
336 case FT_REAL:
337 RealInfo = Initializer.RealInfo;
338 break;
339 case FT_STRUCT:
340 StructInfo = Initializer.StructInfo;
341 break;
342 }
343 return *this;
344}
345
346FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347 if (FT != Initializer.FT) {
348 switch (FT) {
349 case FT_INTEGRAL:
350 IntInfo.~IntFieldInfo();
351 break;
352 case FT_REAL:
353 RealInfo.~RealFieldInfo();
354 break;
355 case FT_STRUCT:
356 StructInfo.~StructFieldInfo();
357 break;
358 }
359 }
360 FT = Initializer.FT;
361 switch (FT) {
362 case FT_INTEGRAL:
363 IntInfo = Initializer.IntInfo;
364 break;
365 case FT_REAL:
366 RealInfo = Initializer.RealInfo;
367 break;
368 case FT_STRUCT:
369 StructInfo = Initializer.StructInfo;
370 break;
371 }
372 return *this;
373}
374
375/// The concrete assembly parser instance.
376// Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377// It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378class MasmParser : public MCAsmParser {
379private:
380 AsmLexer Lexer;
381 MCContext &Ctx;
382 MCStreamer &Out;
383 const MCAsmInfo &MAI;
385 SourceMgr::DiagHandlerTy SavedDiagHandler;
386 void *SavedDiagContext;
387 std::unique_ptr<MCAsmParserExtension> PlatformParser;
388
389 /// This is the current buffer index we're lexing from as managed by the
390 /// SourceMgr object.
391 unsigned CurBuffer;
392
393 /// time of assembly
394 struct tm TM;
395
396 BitVector EndStatementAtEOFStack;
397
398 AsmCond TheCondState;
399 std::vector<AsmCond> TheCondStack;
400
401 /// maps directive names to handler methods in parser
402 /// extensions. Extensions register themselves in this map by calling
403 /// addDirectiveHandler.
404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
405
406 /// maps assembly-time variable names to variables.
407 struct Variable {
408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
409
411 RedefinableKind Redefinable = REDEFINABLE;
412 bool IsText = false;
413 std::string TextValue;
414 };
415 StringMap<Variable> Variables;
416
417 /// Stack of active struct definitions.
418 SmallVector<StructInfo, 1> StructInProgress;
419
420 /// Maps struct tags to struct definitions.
421 StringMap<StructInfo> Structs;
422
423 /// Maps data location names to types.
424 StringMap<AsmTypeInfo> KnownType;
425
426 /// Stack of active macro instantiations.
427 std::vector<MacroInstantiation*> ActiveMacros;
428
429 /// List of bodies of anonymous macros.
430 std::deque<MCAsmMacro> MacroLikeBodies;
431
432 /// Keeps track of how many .macro's have been instantiated.
433 unsigned NumOfMacroInstantiations;
434
435 /// The values from the last parsed cpp hash file line comment if any.
436 struct CppHashInfoTy {
438 int64_t LineNumber;
439 SMLoc Loc;
440 unsigned Buf;
441 CppHashInfoTy() : LineNumber(0), Buf(0) {}
442 };
443 CppHashInfoTy CppHashInfo;
444
445 /// The filename from the first cpp hash file line comment, if any.
446 StringRef FirstCppHashFilename;
447
448 /// List of forward directional labels for diagnosis at the end.
450
451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452 /// Defaults to 1U, meaning Intel.
453 unsigned AssemblerDialect = 1U;
454
455 /// is Darwin compatibility enabled?
456 bool IsDarwin = false;
457
458 /// Are we parsing ms-style inline assembly?
459 bool ParsingMSInlineAsm = false;
460
461 /// Did we already inform the user about inconsistent MD5 usage?
462 bool ReportedInconsistentMD5 = false;
463
464 // Current <...> expression depth.
465 unsigned AngleBracketDepth = 0U;
466
467 // Number of locals defined.
468 uint16_t LocalCounter = 0;
469
470public:
471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473 MasmParser(const MasmParser &) = delete;
474 MasmParser &operator=(const MasmParser &) = delete;
475 ~MasmParser() override;
476
477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
478
480 ExtensionDirectiveHandler Handler) override {
481 ExtensionDirectiveMap[Directive] = Handler;
482 if (!DirectiveKindMap.contains(Directive)) {
483 DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
484 }
485 }
486
487 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
488 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
489 }
490
491 /// @name MCAsmParser Interface
492 /// {
493
494 SourceMgr &getSourceManager() override { return SrcMgr; }
495 MCAsmLexer &getLexer() override { return Lexer; }
496 MCContext &getContext() override { return Ctx; }
497 MCStreamer &getStreamer() override { return Out; }
498
499 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
500
501 unsigned getAssemblerDialect() override {
502 if (AssemblerDialect == ~0U)
503 return MAI.getAssemblerDialect();
504 else
505 return AssemblerDialect;
506 }
507 void setAssemblerDialect(unsigned i) override {
508 AssemblerDialect = i;
509 }
510
511 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
512 bool Warning(SMLoc L, const Twine &Msg,
513 SMRange Range = std::nullopt) override;
514 bool printError(SMLoc L, const Twine &Msg,
515 SMRange Range = std::nullopt) override;
516
517 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518 const AsmToken &Lex(ExpandKind ExpandNextToken);
519 const AsmToken &Lex() override { return Lex(ExpandMacros); }
520
521 void setParsingMSInlineAsm(bool V) override {
522 ParsingMSInlineAsm = V;
523 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524 // hex integer literals.
525 Lexer.setLexMasmIntegers(V);
526 }
527 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
528
529 bool isParsingMasm() const override { return true; }
530
531 bool defineMacro(StringRef Name, StringRef Value) override;
532
533 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534 bool lookUpField(StringRef Base, StringRef Member,
535 AsmFieldInfo &Info) const override;
536
537 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
538
539 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540 unsigned &NumInputs,
541 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542 SmallVectorImpl<std::string> &Constraints,
544 const MCInstrInfo *MII, const MCInstPrinter *IP,
545 MCAsmParserSemaCallback &SI) override;
546
547 bool parseExpression(const MCExpr *&Res);
548 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550 AsmTypeInfo *TypeInfo) override;
551 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553 SMLoc &EndLoc) override;
554 bool parseAbsoluteExpression(int64_t &Res) override;
555
556 /// Parse a floating point expression using the float \p Semantics
557 /// and set \p Res to the value.
558 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
559
560 /// Parse an identifier or string (as a quoted identifier)
561 /// and set \p Res to the identifier contents.
562 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
564 bool parseIdentifier(StringRef &Res) override {
565 return parseIdentifier(Res, StandardPosition);
566 }
567 void eatToEndOfStatement() override;
568
569 bool checkForValidSection() override;
570
571 /// }
572
573private:
574 bool expandMacros();
575 const AsmToken peekTok(bool ShouldSkipSpace = true);
576
577 bool parseStatement(ParseStatementInfo &Info,
579 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580 bool parseCppHashLineFilenameComment(SMLoc L);
581
582 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
585 const std::vector<std::string> &Locals, SMLoc L);
586
587 /// Are we inside a macro instantiation?
588 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
589
590 /// Handle entry to macro instantiation.
591 ///
592 /// \param M The macro.
593 /// \param NameLoc Instantiation location.
594 bool handleMacroEntry(
595 const MCAsmMacro *M, SMLoc NameLoc,
597
598 /// Handle invocation of macro function.
599 ///
600 /// \param M The macro.
601 /// \param NameLoc Invocation location.
602 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
603
604 /// Handle exit from macro instantiation.
605 void handleMacroExit();
606
607 /// Extract AsmTokens for a macro argument.
608 bool
609 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
611
612 /// Parse all macro arguments for a given macro.
613 bool
614 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
616
617 void printMacroInstantiations();
618
619 bool expandStatement(SMLoc Loc);
620
621 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622 SMRange Range = std::nullopt) const {
624 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
625 }
626 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
627
628 bool lookUpField(const StructInfo &Structure, StringRef Member,
629 AsmFieldInfo &Info) const;
630
631 /// Should we emit DWARF describing this assembler source? (Returns false if
632 /// the source has .file directives, which means we don't want to generate
633 /// info describing the assembler source itself.)
634 bool enabledGenDwarfForAssembly();
635
636 /// Enter the specified file. This returns true on failure.
637 bool enterIncludeFile(const std::string &Filename);
638
639 /// Reset the current lexer position to that given by \p Loc. The
640 /// current token is not set; clients should ensure Lex() is called
641 /// subsequently.
642 ///
643 /// \param InBuffer If not 0, should be the known buffer id that contains the
644 /// location.
645 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646 bool EndStatementAtEOF = true);
647
648 /// Parse up to a token of kind \p EndTok and return the contents from the
649 /// current token up to (but not including) this token; the current token on
650 /// exit will be either this kind or EOF. Reads through instantiated macro
651 /// functions and text macros.
652 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653 std::string parseStringTo(AsmToken::TokenKind EndTok);
654
655 /// Parse up to the end of statement and return the contents from the current
656 /// token until the end of the statement; the current token on exit will be
657 /// either the EndOfStatement or EOF.
659
660 bool parseTextItem(std::string &Data);
661
662 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
664
665 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
668
669 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
670
671 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
673
674 // Generic (target and platform independent) directive parsing.
675 enum DirectiveKind {
676 DK_NO_DIRECTIVE, // Placeholder
677 DK_HANDLER_DIRECTIVE,
678 DK_ASSIGN,
679 DK_EQU,
680 DK_TEXTEQU,
681 DK_ASCII,
682 DK_ASCIZ,
683 DK_STRING,
684 DK_BYTE,
685 DK_SBYTE,
686 DK_WORD,
687 DK_SWORD,
688 DK_DWORD,
689 DK_SDWORD,
690 DK_FWORD,
691 DK_QWORD,
692 DK_SQWORD,
693 DK_DB,
694 DK_DD,
695 DK_DF,
696 DK_DQ,
697 DK_DW,
698 DK_REAL4,
699 DK_REAL8,
700 DK_REAL10,
701 DK_ALIGN,
702 DK_EVEN,
703 DK_ORG,
704 DK_ENDR,
705 DK_EXTERN,
706 DK_PUBLIC,
707 DK_COMM,
708 DK_COMMENT,
709 DK_INCLUDE,
710 DK_REPEAT,
711 DK_WHILE,
712 DK_FOR,
713 DK_FORC,
714 DK_IF,
715 DK_IFE,
716 DK_IFB,
717 DK_IFNB,
718 DK_IFDEF,
719 DK_IFNDEF,
720 DK_IFDIF,
721 DK_IFDIFI,
722 DK_IFIDN,
723 DK_IFIDNI,
724 DK_ELSEIF,
725 DK_ELSEIFE,
726 DK_ELSEIFB,
727 DK_ELSEIFNB,
728 DK_ELSEIFDEF,
729 DK_ELSEIFNDEF,
730 DK_ELSEIFDIF,
731 DK_ELSEIFDIFI,
732 DK_ELSEIFIDN,
733 DK_ELSEIFIDNI,
734 DK_ELSE,
735 DK_ENDIF,
736 DK_FILE,
737 DK_LINE,
738 DK_LOC,
739 DK_STABS,
740 DK_CV_FILE,
741 DK_CV_FUNC_ID,
742 DK_CV_INLINE_SITE_ID,
743 DK_CV_LOC,
744 DK_CV_LINETABLE,
745 DK_CV_INLINE_LINETABLE,
746 DK_CV_DEF_RANGE,
747 DK_CV_STRINGTABLE,
748 DK_CV_STRING,
749 DK_CV_FILECHECKSUMS,
750 DK_CV_FILECHECKSUM_OFFSET,
751 DK_CV_FPO_DATA,
752 DK_CFI_SECTIONS,
753 DK_CFI_STARTPROC,
754 DK_CFI_ENDPROC,
755 DK_CFI_DEF_CFA,
756 DK_CFI_DEF_CFA_OFFSET,
757 DK_CFI_ADJUST_CFA_OFFSET,
758 DK_CFI_DEF_CFA_REGISTER,
759 DK_CFI_OFFSET,
760 DK_CFI_REL_OFFSET,
761 DK_CFI_PERSONALITY,
762 DK_CFI_LSDA,
763 DK_CFI_REMEMBER_STATE,
764 DK_CFI_RESTORE_STATE,
765 DK_CFI_SAME_VALUE,
766 DK_CFI_RESTORE,
767 DK_CFI_ESCAPE,
768 DK_CFI_RETURN_COLUMN,
769 DK_CFI_SIGNAL_FRAME,
770 DK_CFI_UNDEFINED,
771 DK_CFI_REGISTER,
772 DK_CFI_WINDOW_SAVE,
773 DK_CFI_B_KEY_FRAME,
774 DK_MACRO,
775 DK_EXITM,
776 DK_ENDM,
777 DK_PURGE,
778 DK_ERR,
779 DK_ERRB,
780 DK_ERRNB,
781 DK_ERRDEF,
782 DK_ERRNDEF,
783 DK_ERRDIF,
784 DK_ERRDIFI,
785 DK_ERRIDN,
786 DK_ERRIDNI,
787 DK_ERRE,
788 DK_ERRNZ,
789 DK_ECHO,
790 DK_STRUCT,
791 DK_UNION,
792 DK_ENDS,
793 DK_END,
794 DK_PUSHFRAME,
795 DK_PUSHREG,
796 DK_SAVEREG,
797 DK_SAVEXMM128,
798 DK_SETFRAME,
799 DK_RADIX,
800 };
801
802 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803 /// class.
804 StringMap<DirectiveKind> DirectiveKindMap;
805
806 bool isMacroLikeDirective();
807
808 // Codeview def_range type parsing.
809 enum CVDefRangeType {
810 CVDR_DEFRANGE = 0, // Placeholder
811 CVDR_DEFRANGE_REGISTER,
812 CVDR_DEFRANGE_FRAMEPOINTER_REL,
813 CVDR_DEFRANGE_SUBFIELD_REGISTER,
814 CVDR_DEFRANGE_REGISTER_REL
815 };
816
817 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818 /// def_range types parsed by this class.
819 StringMap<CVDefRangeType> CVDefRangeTypeMap;
820
821 // Generic (target and platform independent) directive parsing.
822 enum BuiltinSymbol {
823 BI_NO_SYMBOL, // Placeholder
824 BI_DATE,
825 BI_TIME,
826 BI_VERSION,
827 BI_FILECUR,
828 BI_FILENAME,
829 BI_LINE,
830 BI_CURSEG,
831 BI_CPU,
832 BI_INTERFACE,
833 BI_CODE,
834 BI_DATA,
835 BI_FARDATA,
836 BI_WORDSIZE,
837 BI_CODESIZE,
838 BI_DATASIZE,
839 BI_MODEL,
840 BI_STACK,
841 };
842
843 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844 /// class.
845 StringMap<BuiltinSymbol> BuiltinSymbolMap;
846
847 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
848
849 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850 SMLoc StartLoc);
851
852 // ".ascii", ".asciz", ".string"
853 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
854
855 // "byte", "word", ...
856 bool emitIntValue(const MCExpr *Value, unsigned Size);
857 bool parseScalarInitializer(unsigned Size,
859 unsigned StringPadLength = 0);
860 bool parseScalarInstList(
861 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
863 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864 bool addIntegralField(StringRef Name, unsigned Size);
865 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867 StringRef Name, SMLoc NameLoc);
868
869 // "real4", "real8", "real10"
870 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873 size_t Size);
874 bool parseRealInstList(
875 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
877 bool parseDirectiveNamedRealValue(StringRef TypeName,
878 const fltSemantics &Semantics,
879 unsigned Size, StringRef Name,
880 SMLoc NameLoc);
881
882 bool parseOptionalAngleBracketOpen();
883 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
884
885 bool parseFieldInitializer(const FieldInfo &Field,
886 FieldInitializer &Initializer);
887 bool parseFieldInitializer(const FieldInfo &Field,
888 const IntFieldInfo &Contents,
889 FieldInitializer &Initializer);
890 bool parseFieldInitializer(const FieldInfo &Field,
891 const RealFieldInfo &Contents,
892 FieldInitializer &Initializer);
893 bool parseFieldInitializer(const FieldInfo &Field,
894 const StructFieldInfo &Contents,
895 FieldInitializer &Initializer);
896
897 bool parseStructInitializer(const StructInfo &Structure,
898 StructInitializer &Initializer);
899 bool parseStructInstList(
900 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
902
903 bool emitFieldValue(const FieldInfo &Field);
904 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
907
908 bool emitFieldInitializer(const FieldInfo &Field,
909 const FieldInitializer &Initializer);
910 bool emitFieldInitializer(const FieldInfo &Field,
911 const IntFieldInfo &Contents,
912 const IntFieldInfo &Initializer);
913 bool emitFieldInitializer(const FieldInfo &Field,
914 const RealFieldInfo &Contents,
915 const RealFieldInfo &Initializer);
916 bool emitFieldInitializer(const FieldInfo &Field,
917 const StructFieldInfo &Contents,
918 const StructFieldInfo &Initializer);
919
920 bool emitStructInitializer(const StructInfo &Structure,
921 const StructInitializer &Initializer);
922
923 // User-defined types (structs, unions):
924 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925 bool addStructField(StringRef Name, const StructInfo &Structure);
926 bool parseDirectiveStructValue(const StructInfo &Structure,
927 StringRef Directive, SMLoc DirLoc);
928 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929 StringRef Directive, SMLoc DirLoc,
931
932 // "=", "equ", "textequ"
933 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934 DirectiveKind DirKind, SMLoc NameLoc);
935
936 bool parseDirectiveOrg(); // "org"
937
938 bool emitAlignTo(int64_t Alignment);
939 bool parseDirectiveAlign(); // "align"
940 bool parseDirectiveEven(); // "even"
941
942 // ".file", ".line", ".loc", ".stabs"
943 bool parseDirectiveFile(SMLoc DirectiveLoc);
944 bool parseDirectiveLine();
945 bool parseDirectiveLoc();
946 bool parseDirectiveStabs();
947
948 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950 bool parseDirectiveCVFile();
951 bool parseDirectiveCVFuncId();
952 bool parseDirectiveCVInlineSiteId();
953 bool parseDirectiveCVLoc();
954 bool parseDirectiveCVLinetable();
955 bool parseDirectiveCVInlineLinetable();
956 bool parseDirectiveCVDefRange();
957 bool parseDirectiveCVString();
958 bool parseDirectiveCVStringTable();
959 bool parseDirectiveCVFileChecksums();
960 bool parseDirectiveCVFileChecksumOffset();
961 bool parseDirectiveCVFPOData();
962
963 // .cfi directives
964 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965 bool parseDirectiveCFIWindowSave();
966 bool parseDirectiveCFISections();
967 bool parseDirectiveCFIStartProc();
968 bool parseDirectiveCFIEndProc();
969 bool parseDirectiveCFIDefCfaOffset();
970 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971 bool parseDirectiveCFIAdjustCfaOffset();
972 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976 bool parseDirectiveCFIRememberState();
977 bool parseDirectiveCFIRestoreState();
978 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980 bool parseDirectiveCFIEscape();
981 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982 bool parseDirectiveCFISignalFrame();
983 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
984
985 // macro directives
986 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988 std::string &Value);
989 bool parseDirectiveEndMacro(StringRef Directive);
990 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
991
992 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993 StringRef Name, SMLoc NameLoc);
994 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996 bool parseDirectiveNestedEnds();
997
998 bool parseDirectiveExtern();
999
1000 /// Parse a directive like ".globl" which accepts a single symbol (which
1001 /// should be a label or an external).
1002 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1003
1004 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1005
1006 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1007
1008 bool parseDirectiveInclude(); // "include"
1009
1010 // "if" or "ife"
1011 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012 // "ifb" or "ifnb", depending on ExpectBlank.
1013 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015 // CaseInsensitive.
1016 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017 bool CaseInsensitive);
1018 // "ifdef" or "ifndef", depending on expect_defined
1019 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020 // "elseif" or "elseife"
1021 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022 // "elseifb" or "elseifnb", depending on ExpectBlank.
1023 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024 // ".elseifdef" or ".elseifndef", depending on expect_defined
1025 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027 // ExpectEqual and CaseInsensitive.
1028 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029 bool CaseInsensitive);
1030 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1031 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1032 bool parseEscapedString(std::string &Data) override;
1033 bool parseAngleBracketString(std::string &Data) override;
1034
1035 // Macro-like directives
1036 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1039 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040 SMLoc ExitLoc, raw_svector_ostream &OS);
1041 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1045
1046 // "_emit" or "__emit"
1047 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048 size_t Len);
1049
1050 // "align"
1051 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1052
1053 // "end"
1054 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1055
1056 // ".err"
1057 bool parseDirectiveError(SMLoc DirectiveLoc);
1058 // ".errb" or ".errnb", depending on ExpectBlank.
1059 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060 // ".errdef" or ".errndef", depending on ExpectBlank.
1061 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063 // and CaseInsensitive.
1064 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065 bool CaseInsensitive);
1066 // ".erre" or ".errnz", depending on ExpectZero.
1067 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1068
1069 // ".radix"
1070 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1071
1072 // "echo"
1073 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1074
1075 void initializeDirectiveKindMap();
1076 void initializeCVDefRangeTypeMap();
1077 void initializeBuiltinSymbolMap();
1078};
1079
1080} // end anonymous namespace
1081
1082namespace llvm {
1083
1085
1087
1088} // end namespace llvm
1089
1091
1092MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096 HadError = false;
1097 // Save the old handler.
1098 SavedDiagHandler = SrcMgr.getDiagHandler();
1099 SavedDiagContext = SrcMgr.getDiagContext();
1100 // Set our own handler which calls the saved handler.
1102 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103 EndStatementAtEOFStack.push_back(true);
1104
1105 // Initialize the platform / file format parser.
1106 switch (Ctx.getObjectFileType()) {
1107 case MCContext::IsCOFF:
1108 PlatformParser.reset(createCOFFMasmParser());
1109 break;
1110 default:
1111 report_fatal_error("llvm-ml currently supports only COFF output.");
1112 break;
1113 }
1114
1115 initializeDirectiveKindMap();
1116 PlatformParser->Initialize(*this);
1117 initializeCVDefRangeTypeMap();
1118 initializeBuiltinSymbolMap();
1119
1120 NumOfMacroInstantiations = 0;
1121}
1122
1123MasmParser::~MasmParser() {
1124 assert((HadError || ActiveMacros.empty()) &&
1125 "Unexpected active macro instantiation!");
1126
1127 // Restore the saved diagnostics handler and context for use during
1128 // finalization.
1129 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130}
1131
1132void MasmParser::printMacroInstantiations() {
1133 // Print the active macro instantiation stack.
1134 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135 it = ActiveMacros.rbegin(),
1136 ie = ActiveMacros.rend();
1137 it != ie; ++it)
1138 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139 "while in macro instantiation");
1140}
1141
1142void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143 printPendingErrors();
1144 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145 printMacroInstantiations();
1146}
1147
1148bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149 if (getTargetParser().getTargetOptions().MCNoWarn)
1150 return false;
1151 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152 return Error(L, Msg, Range);
1153 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154 printMacroInstantiations();
1155 return false;
1156}
1157
1158bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159 HadError = true;
1160 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161 printMacroInstantiations();
1162 return true;
1163}
1164
1165bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166 std::string IncludedFile;
1167 unsigned NewBuf =
1168 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169 if (!NewBuf)
1170 return true;
1171
1172 CurBuffer = NewBuf;
1173 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174 EndStatementAtEOFStack.push_back(true);
1175 return false;
1176}
1177
1178void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179 bool EndStatementAtEOF) {
1180 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182 Loc.getPointer(), EndStatementAtEOF);
1183}
1184
1185bool MasmParser::expandMacros() {
1186 const AsmToken &Tok = getTok();
1187 const std::string IDLower = Tok.getIdentifier().lower();
1188
1189 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191 // This is a macro function invocation; expand it in place.
1192 const SMLoc MacroLoc = Tok.getLoc();
1193 const StringRef MacroId = Tok.getIdentifier();
1194 Lexer.Lex();
1195 if (handleMacroInvocation(M, MacroLoc)) {
1196 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197 Lexer.Lex();
1198 }
1199 return false;
1200 }
1201
1202 std::optional<std::string> ExpandedValue;
1203 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204 if (BuiltinIt != BuiltinSymbolMap.end()) {
1205 ExpandedValue =
1206 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207 } else {
1208 auto VarIt = Variables.find(IDLower);
1209 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210 ExpandedValue = VarIt->getValue().TextValue;
1211 }
1212 }
1213
1214 if (!ExpandedValue)
1215 return true;
1216 std::unique_ptr<MemoryBuffer> Instantiation =
1217 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1218
1219 // Jump to the macro instantiation and prime the lexer.
1220 CurBuffer =
1221 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223 /*EndStatementAtEOF=*/false);
1224 EndStatementAtEOFStack.push_back(false);
1225 Lexer.Lex();
1226 return false;
1227}
1228
1229const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230 if (Lexer.getTok().is(AsmToken::Error))
1231 Error(Lexer.getErrLoc(), Lexer.getErr());
1232
1233 // if it's a end of statement with a comment in it
1234 if (getTok().is(AsmToken::EndOfStatement)) {
1235 // if this is a line comment output it.
1236 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238 Out.addExplicitComment(Twine(getTok().getString()));
1239 }
1240
1241 const AsmToken *tok = &Lexer.Lex();
1242 bool StartOfStatement = Lexer.isAtStartOfStatement();
1243
1244 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245 if (StartOfStatement) {
1246 AsmToken NextTok;
1247 MutableArrayRef<AsmToken> Buf(NextTok);
1248 size_t ReadCount = Lexer.peekTokens(Buf);
1249 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250 (NextTok.getString().equals_insensitive("equ") ||
1251 NextTok.getString().equals_insensitive("textequ"))) {
1252 // This looks like an EQU or TEXTEQU directive; don't expand the
1253 // identifier, allowing for redefinitions.
1254 break;
1255 }
1256 }
1257 if (expandMacros())
1258 break;
1259 }
1260
1261 // Parse comments here to be deferred until end of next statement.
1262 while (tok->is(AsmToken::Comment)) {
1263 if (MAI.preserveAsmComments())
1264 Out.addExplicitComment(Twine(tok->getString()));
1265 tok = &Lexer.Lex();
1266 }
1267
1268 // Recognize and bypass line continuations.
1269 while (tok->is(AsmToken::BackSlash) &&
1270 peekTok().is(AsmToken::EndOfStatement)) {
1271 // Eat both the backslash and the end of statement.
1272 Lexer.Lex();
1273 tok = &Lexer.Lex();
1274 }
1275
1276 if (tok->is(AsmToken::Eof)) {
1277 // If this is the end of an included file, pop the parent file off the
1278 // include stack.
1279 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280 if (ParentIncludeLoc != SMLoc()) {
1281 EndStatementAtEOFStack.pop_back();
1282 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283 return Lex();
1284 }
1285 EndStatementAtEOFStack.pop_back();
1286 assert(EndStatementAtEOFStack.empty());
1287 }
1288
1289 return *tok;
1290}
1291
1292const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293 AsmToken Tok;
1294
1296 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1297
1298 if (ReadCount == 0) {
1299 // If this is the end of an included file, pop the parent file off the
1300 // include stack.
1301 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302 if (ParentIncludeLoc != SMLoc()) {
1303 EndStatementAtEOFStack.pop_back();
1304 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305 return peekTok(ShouldSkipSpace);
1306 }
1307 EndStatementAtEOFStack.pop_back();
1308 assert(EndStatementAtEOFStack.empty());
1309 }
1310
1311 assert(ReadCount == 1);
1312 return Tok;
1313}
1314
1315bool MasmParser::enabledGenDwarfForAssembly() {
1316 // Check whether the user specified -g.
1317 if (!getContext().getGenDwarfForAssembly())
1318 return false;
1319 // If we haven't encountered any .file directives (which would imply that
1320 // the assembler source was produced with debug info already) then emit one
1321 // describing the assembler source file itself.
1322 if (getContext().getGenDwarfFileNumber() == 0) {
1323 // Use the first #line directive for this, if any. It's preprocessed, so
1324 // there is no checksum, and of course no source directive.
1325 if (!FirstCppHashFilename.empty())
1326 getContext().setMCLineTableRootFile(
1327 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1328 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1329 const MCDwarfFile &RootFile =
1330 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1331 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1332 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1333 RootFile.Checksum, RootFile.Source));
1334 }
1335 return true;
1336}
1337
1338bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1339 // Create the initial section, if requested.
1340 if (!NoInitialTextSection)
1341 Out.initSections(false, getTargetParser().getSTI());
1342
1343 // Prime the lexer.
1344 Lex();
1345
1346 HadError = false;
1347 AsmCond StartingCondState = TheCondState;
1348 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1349
1350 // If we are generating dwarf for assembly source files save the initial text
1351 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1352 // emitting any actual debug info yet and haven't had a chance to parse any
1353 // embedded .file directives.)
1354 if (getContext().getGenDwarfForAssembly()) {
1355 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1356 if (!Sec->getBeginSymbol()) {
1357 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1358 getStreamer().emitLabel(SectionStartSym);
1359 Sec->setBeginSymbol(SectionStartSym);
1360 }
1361 bool InsertResult = getContext().addGenDwarfSection(Sec);
1362 assert(InsertResult && ".text section should not have debug info yet");
1363 (void)InsertResult;
1364 }
1365
1366 getTargetParser().onBeginOfFile();
1367
1368 // While we have input, parse each statement.
1369 while (Lexer.isNot(AsmToken::Eof) ||
1370 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1371 // Skip through the EOF at the end of an inclusion.
1372 if (Lexer.is(AsmToken::Eof))
1373 Lex();
1374
1375 ParseStatementInfo Info(&AsmStrRewrites);
1376 bool Parsed = parseStatement(Info, nullptr);
1377
1378 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1379 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1380 // exists.
1381 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1382 Lex();
1383 }
1384
1385 // parseStatement returned true so may need to emit an error.
1386 printPendingErrors();
1387
1388 // Skipping to the next line if needed.
1389 if (Parsed && !getLexer().isAtStartOfStatement())
1390 eatToEndOfStatement();
1391 }
1392
1393 getTargetParser().onEndOfFile();
1394 printPendingErrors();
1395
1396 // All errors should have been emitted.
1397 assert(!hasPendingError() && "unexpected error from parseStatement");
1398
1399 getTargetParser().flushPendingInstructions(getStreamer());
1400
1401 if (TheCondState.TheCond != StartingCondState.TheCond ||
1402 TheCondState.Ignore != StartingCondState.Ignore)
1403 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1404 // Check to see there are no empty DwarfFile slots.
1405 const auto &LineTables = getContext().getMCDwarfLineTables();
1406 if (!LineTables.empty()) {
1407 unsigned Index = 0;
1408 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1409 if (File.Name.empty() && Index != 0)
1410 printError(getTok().getLoc(), "unassigned file number: " +
1411 Twine(Index) +
1412 " for .file directives");
1413 ++Index;
1414 }
1415 }
1416
1417 // Check to see that all assembler local symbols were actually defined.
1418 // Targets that don't do subsections via symbols may not want this, though,
1419 // so conservatively exclude them. Only do this if we're finalizing, though,
1420 // as otherwise we won't necessarilly have seen everything yet.
1421 if (!NoFinalize) {
1422 if (MAI.hasSubsectionsViaSymbols()) {
1423 for (const auto &TableEntry : getContext().getSymbols()) {
1424 MCSymbol *Sym = TableEntry.getValue();
1425 // Variable symbols may not be marked as defined, so check those
1426 // explicitly. If we know it's a variable, we have a definition for
1427 // the purposes of this check.
1428 if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1429 // FIXME: We would really like to refer back to where the symbol was
1430 // first referenced for a source location. We need to add something
1431 // to track that. Currently, we just point to the end of the file.
1432 printError(getTok().getLoc(), "assembler local symbol '" +
1433 Sym->getName() + "' not defined");
1434 }
1435 }
1436
1437 // Temporary symbols like the ones for directional jumps don't go in the
1438 // symbol table. They also need to be diagnosed in all (final) cases.
1439 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1440 if (std::get<2>(LocSym)->isUndefined()) {
1441 // Reset the state of any "# line file" directives we've seen to the
1442 // context as it was at the diagnostic site.
1443 CppHashInfo = std::get<1>(LocSym);
1444 printError(std::get<0>(LocSym), "directional label undefined");
1445 }
1446 }
1447 }
1448
1449 // Finalize the output stream if there are no errors and if the client wants
1450 // us to.
1451 if (!HadError && !NoFinalize)
1452 Out.finish(Lexer.getLoc());
1453
1454 return HadError || getContext().hadError();
1455}
1456
1457bool MasmParser::checkForValidSection() {
1458 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1459 Out.initSections(false, getTargetParser().getSTI());
1460 return Error(getTok().getLoc(),
1461 "expected section directive before assembly directive");
1462 }
1463 return false;
1464}
1465
1466/// Throw away the rest of the line for testing purposes.
1467void MasmParser::eatToEndOfStatement() {
1468 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1469 if (Lexer.is(AsmToken::Eof)) {
1470 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1471 if (ParentIncludeLoc == SMLoc()) {
1472 break;
1473 }
1474
1475 EndStatementAtEOFStack.pop_back();
1476 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1477 }
1478
1479 Lexer.Lex();
1480 }
1481
1482 // Eat EOL.
1483 if (Lexer.is(AsmToken::EndOfStatement))
1484 Lexer.Lex();
1485}
1486
1488MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1490 const char *Start = getTok().getLoc().getPointer();
1491 while (Lexer.isNot(EndTok)) {
1492 if (Lexer.is(AsmToken::Eof)) {
1493 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1494 if (ParentIncludeLoc == SMLoc()) {
1495 break;
1496 }
1497 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1498
1499 EndStatementAtEOFStack.pop_back();
1500 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1501 Lexer.Lex();
1502 Start = getTok().getLoc().getPointer();
1503 } else {
1504 Lexer.Lex();
1505 }
1506 }
1507 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1508 return Refs;
1509}
1510
1511std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1512 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1513 std::string Str;
1514 for (StringRef S : Refs) {
1515 Str.append(S.str());
1516 }
1517 return Str;
1518}
1519
1520StringRef MasmParser::parseStringToEndOfStatement() {
1521 const char *Start = getTok().getLoc().getPointer();
1522
1523 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1524 Lexer.Lex();
1525
1526 const char *End = getTok().getLoc().getPointer();
1527 return StringRef(Start, End - Start);
1528}
1529
1530/// Parse a paren expression and return it.
1531/// NOTE: This assumes the leading '(' has already been consumed.
1532///
1533/// parenexpr ::= expr)
1534///
1535bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1536 if (parseExpression(Res))
1537 return true;
1538 EndLoc = Lexer.getTok().getEndLoc();
1539 return parseRParen();
1540}
1541
1542/// Parse a bracket expression and return it.
1543/// NOTE: This assumes the leading '[' has already been consumed.
1544///
1545/// bracketexpr ::= expr]
1546///
1547bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1548 if (parseExpression(Res))
1549 return true;
1550 EndLoc = getTok().getEndLoc();
1551 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1552 return true;
1553 return false;
1554}
1555
1556/// Parse a primary expression and return it.
1557/// primaryexpr ::= (parenexpr
1558/// primaryexpr ::= symbol
1559/// primaryexpr ::= number
1560/// primaryexpr ::= '.'
1561/// primaryexpr ::= ~,+,-,'not' primaryexpr
1562/// primaryexpr ::= string
1563/// (a string is interpreted as a 64-bit number in big-endian base-256)
1564bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1565 AsmTypeInfo *TypeInfo) {
1566 SMLoc FirstTokenLoc = getLexer().getLoc();
1567 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1568 switch (FirstTokenKind) {
1569 default:
1570 return TokError("unknown token in expression");
1571 // If we have an error assume that we've already handled it.
1572 case AsmToken::Error:
1573 return true;
1574 case AsmToken::Exclaim:
1575 Lex(); // Eat the operator.
1576 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1577 return true;
1578 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1579 return false;
1580 case AsmToken::Dollar:
1581 case AsmToken::At:
1582 case AsmToken::Identifier: {
1584 if (parseIdentifier(Identifier)) {
1585 // We may have failed but $ may be a valid token.
1586 if (getTok().is(AsmToken::Dollar)) {
1587 if (Lexer.getMAI().getDollarIsPC()) {
1588 Lex();
1589 // This is a '$' reference, which references the current PC. Emit a
1590 // temporary label to the streamer and refer to it.
1591 MCSymbol *Sym = Ctx.createTempSymbol();
1592 Out.emitLabel(Sym);
1594 getContext());
1595 EndLoc = FirstTokenLoc;
1596 return false;
1597 }
1598 return Error(FirstTokenLoc, "invalid token in expression");
1599 }
1600 }
1601 // Parse named bitwise negation.
1602 if (Identifier.equals_insensitive("not")) {
1603 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1604 return true;
1605 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1606 return false;
1607 }
1608 // Parse directional local label references.
1609 if (Identifier.equals_insensitive("@b") ||
1610 Identifier.equals_insensitive("@f")) {
1611 bool Before = Identifier.equals_insensitive("@b");
1612 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1613 if (Before && Sym->isUndefined())
1614 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1615 Res = MCSymbolRefExpr::create(Sym, getContext());
1616 return false;
1617 }
1618 // Parse symbol variant.
1619 std::pair<StringRef, StringRef> Split;
1620 if (!MAI.useParensForSymbolVariant()) {
1621 if (FirstTokenKind == AsmToken::String) {
1622 if (Lexer.is(AsmToken::At)) {
1623 Lex(); // eat @
1624 SMLoc AtLoc = getLexer().getLoc();
1625 StringRef VName;
1626 if (parseIdentifier(VName))
1627 return Error(AtLoc, "expected symbol variant after '@'");
1628
1629 Split = std::make_pair(Identifier, VName);
1630 }
1631 } else {
1632 Split = Identifier.split('@');
1633 }
1634 } else if (Lexer.is(AsmToken::LParen)) {
1635 Lex(); // eat '('.
1636 StringRef VName;
1637 parseIdentifier(VName);
1638 // eat ')'.
1639 if (parseToken(AsmToken::RParen,
1640 "unexpected token in variant, expected ')'"))
1641 return true;
1642 Split = std::make_pair(Identifier, VName);
1643 }
1644
1645 EndLoc = SMLoc::getFromPointer(Identifier.end());
1646
1647 // This is a symbol reference.
1649 if (SymbolName.empty())
1650 return Error(getLexer().getLoc(), "expected a symbol reference");
1651
1653
1654 // Look up the symbol variant if used.
1655 if (!Split.second.empty()) {
1657 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1658 SymbolName = Split.first;
1659 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1661 } else {
1662 return Error(SMLoc::getFromPointer(Split.second.begin()),
1663 "invalid variant '" + Split.second + "'");
1664 }
1665 }
1666
1667 // Find the field offset if used.
1669 Split = SymbolName.split('.');
1670 if (Split.second.empty()) {
1671 } else {
1672 SymbolName = Split.first;
1673 if (lookUpField(SymbolName, Split.second, Info)) {
1674 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1675 StringRef Base = BaseMember.first, Member = BaseMember.second;
1676 lookUpField(Base, Member, Info);
1677 } else if (Structs.count(SymbolName.lower())) {
1678 // This is actually a reference to a field offset.
1679 Res = MCConstantExpr::create(Info.Offset, getContext());
1680 return false;
1681 }
1682 }
1683
1684 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1685 if (!Sym) {
1686 // If this is a built-in numeric value, treat it as a constant.
1687 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1688 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1689 ? BI_NO_SYMBOL
1690 : BuiltinIt->getValue();
1691 if (Symbol != BI_NO_SYMBOL) {
1692 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1693 if (Value) {
1694 Res = Value;
1695 return false;
1696 }
1697 }
1698
1699 // Variables use case-insensitive symbol names; if this is a variable, we
1700 // find the symbol using its canonical name.
1701 auto VarIt = Variables.find(SymbolName.lower());
1702 if (VarIt != Variables.end())
1703 SymbolName = VarIt->second.Name;
1704 Sym = getContext().getOrCreateSymbol(SymbolName);
1705 }
1706
1707 // If this is an absolute variable reference, substitute it now to preserve
1708 // semantics in the face of reassignment.
1709 if (Sym->isVariable()) {
1710 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1711 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1712 if (auto TV = dyn_cast<MCTargetExpr>(V))
1713 DoInline = TV->inlineAssignedExpr();
1714 if (DoInline) {
1715 if (Variant)
1716 return Error(EndLoc, "unexpected modifier on variable reference");
1717 Res = Sym->getVariableValue(/*SetUsed=*/false);
1718 return false;
1719 }
1720 }
1721
1722 // Otherwise create a symbol ref.
1723 const MCExpr *SymRef =
1724 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1725 if (Info.Offset) {
1727 MCBinaryExpr::Add, SymRef,
1728 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1729 } else {
1730 Res = SymRef;
1731 }
1732 if (TypeInfo) {
1733 if (Info.Type.Name.empty()) {
1734 auto TypeIt = KnownType.find(Identifier.lower());
1735 if (TypeIt != KnownType.end()) {
1736 Info.Type = TypeIt->second;
1737 }
1738 }
1739
1740 *TypeInfo = Info.Type;
1741 }
1742 return false;
1743 }
1744 case AsmToken::BigNum:
1745 return TokError("literal value out of range for directive");
1746 case AsmToken::Integer: {
1747 int64_t IntVal = getTok().getIntVal();
1748 Res = MCConstantExpr::create(IntVal, getContext());
1749 EndLoc = Lexer.getTok().getEndLoc();
1750 Lex(); // Eat token.
1751 return false;
1752 }
1753 case AsmToken::String: {
1754 // MASM strings (used as constants) are interpreted as big-endian base-256.
1755 SMLoc ValueLoc = getTok().getLoc();
1756 std::string Value;
1757 if (parseEscapedString(Value))
1758 return true;
1759 if (Value.size() > 8)
1760 return Error(ValueLoc, "literal value out of range");
1761 uint64_t IntValue = 0;
1762 for (const unsigned char CharVal : Value)
1763 IntValue = (IntValue << 8) | CharVal;
1764 Res = MCConstantExpr::create(IntValue, getContext());
1765 return false;
1766 }
1767 case AsmToken::Real: {
1768 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1769 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1770 Res = MCConstantExpr::create(IntVal, getContext());
1771 EndLoc = Lexer.getTok().getEndLoc();
1772 Lex(); // Eat token.
1773 return false;
1774 }
1775 case AsmToken::Dot: {
1776 // This is a '.' reference, which references the current PC. Emit a
1777 // temporary label to the streamer and refer to it.
1778 MCSymbol *Sym = Ctx.createTempSymbol();
1779 Out.emitLabel(Sym);
1780 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1781 EndLoc = Lexer.getTok().getEndLoc();
1782 Lex(); // Eat identifier.
1783 return false;
1784 }
1785 case AsmToken::LParen:
1786 Lex(); // Eat the '('.
1787 return parseParenExpr(Res, EndLoc);
1788 case AsmToken::LBrac:
1789 if (!PlatformParser->HasBracketExpressions())
1790 return TokError("brackets expression not supported on this target");
1791 Lex(); // Eat the '['.
1792 return parseBracketExpr(Res, EndLoc);
1793 case AsmToken::Minus:
1794 Lex(); // Eat the operator.
1795 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1796 return true;
1797 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1798 return false;
1799 case AsmToken::Plus:
1800 Lex(); // Eat the operator.
1801 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1802 return true;
1803 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1804 return false;
1805 case AsmToken::Tilde:
1806 Lex(); // Eat the operator.
1807 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1808 return true;
1809 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1810 return false;
1811 // MIPS unary expression operators. The lexer won't generate these tokens if
1812 // MCAsmInfo::HasMipsExpressions is false for the target.
1837 Lex(); // Eat the operator.
1838 if (Lexer.isNot(AsmToken::LParen))
1839 return TokError("expected '(' after operator");
1840 Lex(); // Eat the operator.
1841 if (parseExpression(Res, EndLoc))
1842 return true;
1843 if (parseRParen())
1844 return true;
1845 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1846 return !Res;
1847 }
1848}
1849
1850bool MasmParser::parseExpression(const MCExpr *&Res) {
1851 SMLoc EndLoc;
1852 return parseExpression(Res, EndLoc);
1853}
1854
1855/// This function checks if the next token is <string> type or arithmetic.
1856/// string that begin with character '<' must end with character '>'.
1857/// otherwise it is arithmetics.
1858/// If the function returns a 'true' value,
1859/// the End argument will be filled with the last location pointed to the '>'
1860/// character.
1861static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1862 assert((StrLoc.getPointer() != nullptr) &&
1863 "Argument to the function cannot be a NULL value");
1864 const char *CharPtr = StrLoc.getPointer();
1865 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1866 (*CharPtr != '\0')) {
1867 if (*CharPtr == '!')
1868 CharPtr++;
1869 CharPtr++;
1870 }
1871 if (*CharPtr == '>') {
1872 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1873 return true;
1874 }
1875 return false;
1876}
1877
1878/// creating a string without the escape characters '!'.
1879static std::string angleBracketString(StringRef BracketContents) {
1880 std::string Res;
1881 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1882 if (BracketContents[Pos] == '!')
1883 Pos++;
1884 Res += BracketContents[Pos];
1885 }
1886 return Res;
1887}
1888
1889/// Parse an expression and return it.
1890///
1891/// expr ::= expr &&,|| expr -> lowest.
1892/// expr ::= expr |,^,&,! expr
1893/// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1894/// expr ::= expr <<,>> expr
1895/// expr ::= expr +,- expr
1896/// expr ::= expr *,/,% expr -> highest.
1897/// expr ::= primaryexpr
1898///
1899bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1900 // Parse the expression.
1901 Res = nullptr;
1902 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1903 parseBinOpRHS(1, Res, EndLoc))
1904 return true;
1905
1906 // Try to constant fold it up front, if possible. Do not exploit
1907 // assembler here.
1908 int64_t Value;
1909 if (Res->evaluateAsAbsolute(Value))
1910 Res = MCConstantExpr::create(Value, getContext());
1911
1912 return false;
1913}
1914
1915bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1916 Res = nullptr;
1917 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1918}
1919
1920bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1921 SMLoc &EndLoc) {
1922 if (parseParenExpr(Res, EndLoc))
1923 return true;
1924
1925 for (; ParenDepth > 0; --ParenDepth) {
1926 if (parseBinOpRHS(1, Res, EndLoc))
1927 return true;
1928
1929 // We don't Lex() the last RParen.
1930 // This is the same behavior as parseParenExpression().
1931 if (ParenDepth - 1 > 0) {
1932 EndLoc = getTok().getEndLoc();
1933 if (parseRParen())
1934 return true;
1935 }
1936 }
1937 return false;
1938}
1939
1940bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1941 const MCExpr *Expr;
1942
1943 SMLoc StartLoc = Lexer.getLoc();
1944 if (parseExpression(Expr))
1945 return true;
1946
1947 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1948 return Error(StartLoc, "expected absolute expression");
1949
1950 return false;
1951}
1952
1955 bool ShouldUseLogicalShr,
1956 bool EndExpressionAtGreater) {
1957 switch (K) {
1958 default:
1959 return 0; // not a binop.
1960
1961 // Lowest Precedence: &&, ||
1962 case AsmToken::AmpAmp:
1963 Kind = MCBinaryExpr::LAnd;
1964 return 2;
1965 case AsmToken::PipePipe:
1966 Kind = MCBinaryExpr::LOr;
1967 return 1;
1968
1969 // Low Precedence: ==, !=, <>, <, <=, >, >=
1971 Kind = MCBinaryExpr::EQ;
1972 return 3;
1975 Kind = MCBinaryExpr::NE;
1976 return 3;
1977 case AsmToken::Less:
1978 Kind = MCBinaryExpr::LT;
1979 return 3;
1981 Kind = MCBinaryExpr::LTE;
1982 return 3;
1983 case AsmToken::Greater:
1984 if (EndExpressionAtGreater)
1985 return 0;
1986 Kind = MCBinaryExpr::GT;
1987 return 3;
1989 Kind = MCBinaryExpr::GTE;
1990 return 3;
1991
1992 // Low Intermediate Precedence: +, -
1993 case AsmToken::Plus:
1994 Kind = MCBinaryExpr::Add;
1995 return 4;
1996 case AsmToken::Minus:
1997 Kind = MCBinaryExpr::Sub;
1998 return 4;
1999
2000 // High Intermediate Precedence: |, &, ^
2001 case AsmToken::Pipe:
2002 Kind = MCBinaryExpr::Or;
2003 return 5;
2004 case AsmToken::Caret:
2005 Kind = MCBinaryExpr::Xor;
2006 return 5;
2007 case AsmToken::Amp:
2008 Kind = MCBinaryExpr::And;
2009 return 5;
2010
2011 // Highest Precedence: *, /, %, <<, >>
2012 case AsmToken::Star:
2013 Kind = MCBinaryExpr::Mul;
2014 return 6;
2015 case AsmToken::Slash:
2016 Kind = MCBinaryExpr::Div;
2017 return 6;
2018 case AsmToken::Percent:
2019 Kind = MCBinaryExpr::Mod;
2020 return 6;
2021 case AsmToken::LessLess:
2022 Kind = MCBinaryExpr::Shl;
2023 return 6;
2025 if (EndExpressionAtGreater)
2026 return 0;
2027 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2028 return 6;
2029 }
2030}
2031
2032unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2033 MCBinaryExpr::Opcode &Kind) {
2034 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2035 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2036 AngleBracketDepth > 0);
2037}
2038
2039/// Parse all binary operators with precedence >= 'Precedence'.
2040/// Res contains the LHS of the expression on input.
2041bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2042 SMLoc &EndLoc) {
2043 SMLoc StartLoc = Lexer.getLoc();
2044 while (true) {
2046 if (Lexer.getKind() == AsmToken::Identifier) {
2048 .CaseLower("and", AsmToken::Amp)
2051 .CaseLower("xor", AsmToken::Caret)
2060 .Default(TokKind);
2061 }
2063 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2064
2065 // If the next token is lower precedence than we are allowed to eat, return
2066 // successfully with what we ate already.
2067 if (TokPrec < Precedence)
2068 return false;
2069
2070 Lex();
2071
2072 // Eat the next primary expression.
2073 const MCExpr *RHS;
2074 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2075 return true;
2076
2077 // If BinOp binds less tightly with RHS than the operator after RHS, let
2078 // the pending operator take RHS as its LHS.
2080 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2081 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2082 return true;
2083
2084 // Merge LHS and RHS according to operator.
2085 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2086 }
2087}
2088
2089/// ParseStatement:
2090/// ::= % statement
2091/// ::= EndOfStatement
2092/// ::= Label* Directive ...Operands... EndOfStatement
2093/// ::= Label* Identifier OperandList* EndOfStatement
2094bool MasmParser::parseStatement(ParseStatementInfo &Info,
2096 assert(!hasPendingError() && "parseStatement started with pending error");
2097 // Eat initial spaces and comments.
2098 while (Lexer.is(AsmToken::Space))
2099 Lex();
2100 if (Lexer.is(AsmToken::EndOfStatement)) {
2101 // If this is a line comment we can drop it safely.
2102 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2103 getTok().getString().front() == '\n')
2104 Out.addBlankLine();
2105 Lex();
2106 return false;
2107 }
2108
2109 // If preceded by an expansion operator, first expand all text macros and
2110 // macro functions.
2111 if (getTok().is(AsmToken::Percent)) {
2112 SMLoc ExpansionLoc = getTok().getLoc();
2113 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2114 return true;
2115 }
2116
2117 // Statements always start with an identifier, unless we're dealing with a
2118 // processor directive (.386, .686, etc.) that lexes as a real.
2119 AsmToken ID = getTok();
2120 SMLoc IDLoc = ID.getLoc();
2121 StringRef IDVal;
2122 if (Lexer.is(AsmToken::HashDirective))
2123 return parseCppHashLineFilenameComment(IDLoc);
2124 if (Lexer.is(AsmToken::Dot)) {
2125 // Treat '.' as a valid identifier in this context.
2126 Lex();
2127 IDVal = ".";
2128 } else if (Lexer.is(AsmToken::Real)) {
2129 // Treat ".<number>" as a valid identifier in this context.
2130 IDVal = getTok().getString();
2131 Lex(); // always eat a token
2132 if (!IDVal.startswith("."))
2133 return Error(IDLoc, "unexpected token at start of statement");
2134 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2135 if (!TheCondState.Ignore) {
2136 Lex(); // always eat a token
2137 return Error(IDLoc, "unexpected token at start of statement");
2138 }
2139 IDVal = "";
2140 }
2141
2142 // Handle conditional assembly here before checking for skipping. We
2143 // have to do this so that .endif isn't skipped in a ".if 0" block for
2144 // example.
2146 DirectiveKindMap.find(IDVal.lower());
2147 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2148 ? DK_NO_DIRECTIVE
2149 : DirKindIt->getValue();
2150 switch (DirKind) {
2151 default:
2152 break;
2153 case DK_IF:
2154 case DK_IFE:
2155 return parseDirectiveIf(IDLoc, DirKind);
2156 case DK_IFB:
2157 return parseDirectiveIfb(IDLoc, true);
2158 case DK_IFNB:
2159 return parseDirectiveIfb(IDLoc, false);
2160 case DK_IFDEF:
2161 return parseDirectiveIfdef(IDLoc, true);
2162 case DK_IFNDEF:
2163 return parseDirectiveIfdef(IDLoc, false);
2164 case DK_IFDIF:
2165 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2166 /*CaseInsensitive=*/false);
2167 case DK_IFDIFI:
2168 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2169 /*CaseInsensitive=*/true);
2170 case DK_IFIDN:
2171 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2172 /*CaseInsensitive=*/false);
2173 case DK_IFIDNI:
2174 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2175 /*CaseInsensitive=*/true);
2176 case DK_ELSEIF:
2177 case DK_ELSEIFE:
2178 return parseDirectiveElseIf(IDLoc, DirKind);
2179 case DK_ELSEIFB:
2180 return parseDirectiveElseIfb(IDLoc, true);
2181 case DK_ELSEIFNB:
2182 return parseDirectiveElseIfb(IDLoc, false);
2183 case DK_ELSEIFDEF:
2184 return parseDirectiveElseIfdef(IDLoc, true);
2185 case DK_ELSEIFNDEF:
2186 return parseDirectiveElseIfdef(IDLoc, false);
2187 case DK_ELSEIFDIF:
2188 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2189 /*CaseInsensitive=*/false);
2190 case DK_ELSEIFDIFI:
2191 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2192 /*CaseInsensitive=*/true);
2193 case DK_ELSEIFIDN:
2194 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2195 /*CaseInsensitive=*/false);
2196 case DK_ELSEIFIDNI:
2197 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2198 /*CaseInsensitive=*/true);
2199 case DK_ELSE:
2200 return parseDirectiveElse(IDLoc);
2201 case DK_ENDIF:
2202 return parseDirectiveEndIf(IDLoc);
2203 }
2204
2205 // Ignore the statement if in the middle of inactive conditional
2206 // (e.g. ".if 0").
2207 if (TheCondState.Ignore) {
2208 eatToEndOfStatement();
2209 return false;
2210 }
2211
2212 // FIXME: Recurse on local labels?
2213
2214 // Check for a label.
2215 // ::= identifier ':'
2216 // ::= number ':'
2217 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2218 if (checkForValidSection())
2219 return true;
2220
2221 // identifier ':' -> Label.
2222 Lex();
2223
2224 // Diagnose attempt to use '.' as a label.
2225 if (IDVal == ".")
2226 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2227
2228 // Diagnose attempt to use a variable as a label.
2229 //
2230 // FIXME: Diagnostics. Note the location of the definition as a label.
2231 // FIXME: This doesn't diagnose assignment to a symbol which has been
2232 // implicitly marked as external.
2233 MCSymbol *Sym;
2234 if (ParsingMSInlineAsm && SI) {
2235 StringRef RewrittenLabel =
2236 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2237 assert(!RewrittenLabel.empty() &&
2238 "We should have an internal name here.");
2239 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2240 RewrittenLabel);
2241 IDVal = RewrittenLabel;
2242 }
2243 // Handle directional local labels
2244 if (IDVal == "@@") {
2245 Sym = Ctx.createDirectionalLocalSymbol(0);
2246 } else {
2247 Sym = getContext().getOrCreateSymbol(IDVal);
2248 }
2249
2250 // End of Labels should be treated as end of line for lexing
2251 // purposes but that information is not available to the Lexer who
2252 // does not understand Labels. This may cause us to see a Hash
2253 // here instead of a preprocessor line comment.
2254 if (getTok().is(AsmToken::Hash)) {
2255 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2256 Lexer.Lex();
2257 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2258 }
2259
2260 // Consume any end of statement token, if present, to avoid spurious
2261 // addBlankLine calls().
2262 if (getTok().is(AsmToken::EndOfStatement)) {
2263 Lex();
2264 }
2265
2266 getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2267
2268 // Emit the label.
2269 if (!getTargetParser().isParsingMSInlineAsm())
2270 Out.emitLabel(Sym, IDLoc);
2271
2272 // If we are generating dwarf for assembly source files then gather the
2273 // info to make a dwarf label entry for this label if needed.
2274 if (enabledGenDwarfForAssembly())
2275 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2276 IDLoc);
2277
2278 getTargetParser().onLabelParsed(Sym);
2279
2280 return false;
2281 }
2282
2283 // If macros are enabled, check to see if this is a macro instantiation.
2284 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2285 return handleMacroEntry(M, IDLoc);
2286 }
2287
2288 // Otherwise, we have a normal instruction or directive.
2289
2290 if (DirKind != DK_NO_DIRECTIVE) {
2291 // There are several entities interested in parsing directives:
2292 //
2293 // 1. Asm parser extensions. For example, platform-specific parsers
2294 // (like the ELF parser) register themselves as extensions.
2295 // 2. The target-specific assembly parser. Some directives are target
2296 // specific or may potentially behave differently on certain targets.
2297 // 3. The generic directive parser implemented by this class. These are
2298 // all the directives that behave in a target and platform independent
2299 // manner, or at least have a default behavior that's shared between
2300 // all targets and platforms.
2301
2302 getTargetParser().flushPendingInstructions(getStreamer());
2303
2304 // Special-case handling of structure-end directives at higher priority,
2305 // since ENDS is overloaded as a segment-end directive.
2306 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2307 getTok().is(AsmToken::EndOfStatement)) {
2308 return parseDirectiveNestedEnds();
2309 }
2310
2311 // First, check the extension directive map to see if any extension has
2312 // registered itself to parse this directive.
2313 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2314 ExtensionDirectiveMap.lookup(IDVal.lower());
2315 if (Handler.first)
2316 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2317
2318 // Next, let the target-specific assembly parser try.
2319 SMLoc StartTokLoc = getTok().getLoc();
2320 bool TPDirectiveReturn =
2321 ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2322
2323 if (hasPendingError())
2324 return true;
2325 // Currently the return value should be true if we are
2326 // uninterested but as this is at odds with the standard parsing
2327 // convention (return true = error) we have instances of a parsed
2328 // directive that fails returning true as an error. Catch these
2329 // cases as best as possible errors here.
2330 if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2331 return true;
2332 // Return if we did some parsing or believe we succeeded.
2333 if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2334 return false;
2335
2336 // Finally, if no one else is interested in this directive, it must be
2337 // generic and familiar to this class.
2338 switch (DirKind) {
2339 default:
2340 break;
2341 case DK_ASCII:
2342 return parseDirectiveAscii(IDVal, false);
2343 case DK_ASCIZ:
2344 case DK_STRING:
2345 return parseDirectiveAscii(IDVal, true);
2346 case DK_BYTE:
2347 case DK_SBYTE:
2348 case DK_DB:
2349 return parseDirectiveValue(IDVal, 1);
2350 case DK_WORD:
2351 case DK_SWORD:
2352 case DK_DW:
2353 return parseDirectiveValue(IDVal, 2);
2354 case DK_DWORD:
2355 case DK_SDWORD:
2356 case DK_DD:
2357 return parseDirectiveValue(IDVal, 4);
2358 case DK_FWORD:
2359 case DK_DF:
2360 return parseDirectiveValue(IDVal, 6);
2361 case DK_QWORD:
2362 case DK_SQWORD:
2363 case DK_DQ:
2364 return parseDirectiveValue(IDVal, 8);
2365 case DK_REAL4:
2366 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2367 case DK_REAL8:
2368 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2369 case DK_REAL10:
2370 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2371 case DK_STRUCT:
2372 case DK_UNION:
2373 return parseDirectiveNestedStruct(IDVal, DirKind);
2374 case DK_ENDS:
2375 return parseDirectiveNestedEnds();
2376 case DK_ALIGN:
2377 return parseDirectiveAlign();
2378 case DK_EVEN:
2379 return parseDirectiveEven();
2380 case DK_ORG:
2381 return parseDirectiveOrg();
2382 case DK_EXTERN:
2383 return parseDirectiveExtern();
2384 case DK_PUBLIC:
2385 return parseDirectiveSymbolAttribute(MCSA_Global);
2386 case DK_COMM:
2387 return parseDirectiveComm(/*IsLocal=*/false);
2388 case DK_COMMENT:
2389 return parseDirectiveComment(IDLoc);
2390 case DK_INCLUDE:
2391 return parseDirectiveInclude();
2392 case DK_REPEAT:
2393 return parseDirectiveRepeat(IDLoc, IDVal);
2394 case DK_WHILE:
2395 return parseDirectiveWhile(IDLoc);
2396 case DK_FOR:
2397 return parseDirectiveFor(IDLoc, IDVal);
2398 case DK_FORC:
2399 return parseDirectiveForc(IDLoc, IDVal);
2400 case DK_FILE:
2401 return parseDirectiveFile(IDLoc);
2402 case DK_LINE:
2403 return parseDirectiveLine();
2404 case DK_LOC:
2405 return parseDirectiveLoc();
2406 case DK_STABS:
2407 return parseDirectiveStabs();
2408 case DK_CV_FILE:
2409 return parseDirectiveCVFile();
2410 case DK_CV_FUNC_ID:
2411 return parseDirectiveCVFuncId();
2412 case DK_CV_INLINE_SITE_ID:
2413 return parseDirectiveCVInlineSiteId();
2414 case DK_CV_LOC:
2415 return parseDirectiveCVLoc();
2416 case DK_CV_LINETABLE:
2417 return parseDirectiveCVLinetable();
2418 case DK_CV_INLINE_LINETABLE:
2419 return parseDirectiveCVInlineLinetable();
2420 case DK_CV_DEF_RANGE:
2421 return parseDirectiveCVDefRange();
2422 case DK_CV_STRING:
2423 return parseDirectiveCVString();
2424 case DK_CV_STRINGTABLE:
2425 return parseDirectiveCVStringTable();
2426 case DK_CV_FILECHECKSUMS:
2427 return parseDirectiveCVFileChecksums();
2428 case DK_CV_FILECHECKSUM_OFFSET:
2429 return parseDirectiveCVFileChecksumOffset();
2430 case DK_CV_FPO_DATA:
2431 return parseDirectiveCVFPOData();
2432 case DK_CFI_SECTIONS:
2433 return parseDirectiveCFISections();
2434 case DK_CFI_STARTPROC:
2435 return parseDirectiveCFIStartProc();
2436 case DK_CFI_ENDPROC:
2437 return parseDirectiveCFIEndProc();
2438 case DK_CFI_DEF_CFA:
2439 return parseDirectiveCFIDefCfa(IDLoc);
2440 case DK_CFI_DEF_CFA_OFFSET:
2441 return parseDirectiveCFIDefCfaOffset();
2442 case DK_CFI_ADJUST_CFA_OFFSET:
2443 return parseDirectiveCFIAdjustCfaOffset();
2444 case DK_CFI_DEF_CFA_REGISTER:
2445 return parseDirectiveCFIDefCfaRegister(IDLoc);
2446 case DK_CFI_OFFSET:
2447 return parseDirectiveCFIOffset(IDLoc);
2448 case DK_CFI_REL_OFFSET:
2449 return parseDirectiveCFIRelOffset(IDLoc);
2450 case DK_CFI_PERSONALITY:
2451 return parseDirectiveCFIPersonalityOrLsda(true);
2452 case DK_CFI_LSDA:
2453 return parseDirectiveCFIPersonalityOrLsda(false);
2454 case DK_CFI_REMEMBER_STATE:
2455 return parseDirectiveCFIRememberState();
2456 case DK_CFI_RESTORE_STATE:
2457 return parseDirectiveCFIRestoreState();
2458 case DK_CFI_SAME_VALUE:
2459 return parseDirectiveCFISameValue(IDLoc);
2460 case DK_CFI_RESTORE:
2461 return parseDirectiveCFIRestore(IDLoc);
2462 case DK_CFI_ESCAPE:
2463 return parseDirectiveCFIEscape();
2464 case DK_CFI_RETURN_COLUMN:
2465 return parseDirectiveCFIReturnColumn(IDLoc);
2466 case DK_CFI_SIGNAL_FRAME:
2467 return parseDirectiveCFISignalFrame();
2468 case DK_CFI_UNDEFINED:
2469 return parseDirectiveCFIUndefined(IDLoc);
2470 case DK_CFI_REGISTER:
2471 return parseDirectiveCFIRegister(IDLoc);
2472 case DK_CFI_WINDOW_SAVE:
2473 return parseDirectiveCFIWindowSave();
2474 case DK_EXITM:
2475 Info.ExitValue = "";
2476 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2477 case DK_ENDM:
2478 Info.ExitValue = "";
2479 return parseDirectiveEndMacro(IDVal);
2480 case DK_PURGE:
2481 return parseDirectivePurgeMacro(IDLoc);
2482 case DK_END:
2483 return parseDirectiveEnd(IDLoc);
2484 case DK_ERR:
2485 return parseDirectiveError(IDLoc);
2486 case DK_ERRB:
2487 return parseDirectiveErrorIfb(IDLoc, true);
2488 case DK_ERRNB:
2489 return parseDirectiveErrorIfb(IDLoc, false);
2490 case DK_ERRDEF:
2491 return parseDirectiveErrorIfdef(IDLoc, true);
2492 case DK_ERRNDEF:
2493 return parseDirectiveErrorIfdef(IDLoc, false);
2494 case DK_ERRDIF:
2495 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2496 /*CaseInsensitive=*/false);
2497 case DK_ERRDIFI:
2498 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2499 /*CaseInsensitive=*/true);
2500 case DK_ERRIDN:
2501 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2502 /*CaseInsensitive=*/false);
2503 case DK_ERRIDNI:
2504 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2505 /*CaseInsensitive=*/true);
2506 case DK_ERRE:
2507 return parseDirectiveErrorIfe(IDLoc, true);
2508 case DK_ERRNZ:
2509 return parseDirectiveErrorIfe(IDLoc, false);
2510 case DK_RADIX:
2511 return parseDirectiveRadix(IDLoc);
2512 case DK_ECHO:
2513 return parseDirectiveEcho(IDLoc);
2514 }
2515
2516 return Error(IDLoc, "unknown directive");
2517 }
2518
2519 // We also check if this is allocating memory with user-defined type.
2520 auto IDIt = Structs.find(IDVal.lower());
2521 if (IDIt != Structs.end())
2522 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2523 IDLoc);
2524
2525 // Non-conditional Microsoft directives sometimes follow their first argument.
2526 const AsmToken nextTok = getTok();
2527 const StringRef nextVal = nextTok.getString();
2528 const SMLoc nextLoc = nextTok.getLoc();
2529
2530 const AsmToken afterNextTok = peekTok();
2531
2532 // There are several entities interested in parsing infix directives:
2533 //
2534 // 1. Asm parser extensions. For example, platform-specific parsers
2535 // (like the ELF parser) register themselves as extensions.
2536 // 2. The generic directive parser implemented by this class. These are
2537 // all the directives that behave in a target and platform independent
2538 // manner, or at least have a default behavior that's shared between
2539 // all targets and platforms.
2540
2541 getTargetParser().flushPendingInstructions(getStreamer());
2542
2543 // Special-case handling of structure-end directives at higher priority, since
2544 // ENDS is overloaded as a segment-end directive.
2545 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2546 Lex();
2547 return parseDirectiveEnds(IDVal, IDLoc);
2548 }
2549
2550 // First, check the extension directive map to see if any extension has
2551 // registered itself to parse this directive.
2552 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2553 ExtensionDirectiveMap.lookup(nextVal.lower());
2554 if (Handler.first) {
2555 Lex();
2556 Lexer.UnLex(ID);
2557 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2558 }
2559
2560 // If no one else is interested in this directive, it must be
2561 // generic and familiar to this class.
2562 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2563 DirKind = (DirKindIt == DirectiveKindMap.end())
2564 ? DK_NO_DIRECTIVE
2565 : DirKindIt->getValue();
2566 switch (DirKind) {
2567 default:
2568 break;
2569 case DK_ASSIGN:
2570 case DK_EQU:
2571 case DK_TEXTEQU:
2572 Lex();
2573 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2574 case DK_BYTE:
2575 if (afterNextTok.is(AsmToken::Identifier) &&
2576 afterNextTok.getString().equals_insensitive("ptr")) {
2577 // Size directive; part of an instruction.
2578 break;
2579 }
2580 [[fallthrough]];
2581 case DK_SBYTE:
2582 case DK_DB:
2583 Lex();
2584 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2585 case DK_WORD:
2586 if (afterNextTok.is(AsmToken::Identifier) &&
2587 afterNextTok.getString().equals_insensitive("ptr")) {
2588 // Size directive; part of an instruction.
2589 break;
2590 }
2591 [[fallthrough]];
2592 case DK_SWORD:
2593 case DK_DW:
2594 Lex();
2595 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2596 case DK_DWORD:
2597 if (afterNextTok.is(AsmToken::Identifier) &&
2598 afterNextTok.getString().equals_insensitive("ptr")) {
2599 // Size directive; part of an instruction.
2600 break;
2601 }
2602 [[fallthrough]];
2603 case DK_SDWORD:
2604 case DK_DD:
2605 Lex();
2606 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2607 case DK_FWORD:
2608 if (afterNextTok.is(AsmToken::Identifier) &&
2609 afterNextTok.getString().equals_insensitive("ptr")) {
2610 // Size directive; part of an instruction.
2611 break;
2612 }
2613 [[fallthrough]];
2614 case DK_DF:
2615 Lex();
2616 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2617 case DK_QWORD:
2618 if (afterNextTok.is(AsmToken::Identifier) &&
2619 afterNextTok.getString().equals_insensitive("ptr")) {
2620 // Size directive; part of an instruction.
2621 break;
2622 }
2623 [[fallthrough]];
2624 case DK_SQWORD:
2625 case DK_DQ:
2626 Lex();
2627 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2628 case DK_REAL4:
2629 Lex();
2630 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2631 IDVal, IDLoc);
2632 case DK_REAL8:
2633 Lex();
2634 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2635 IDVal, IDLoc);
2636 case DK_REAL10:
2637 Lex();
2638 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2639 10, IDVal, IDLoc);
2640 case DK_STRUCT:
2641 case DK_UNION:
2642 Lex();
2643 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2644 case DK_ENDS:
2645 Lex();
2646 return parseDirectiveEnds(IDVal, IDLoc);
2647 case DK_MACRO:
2648 Lex();
2649 return parseDirectiveMacro(IDVal, IDLoc);
2650 }
2651
2652 // Finally, we check if this is allocating a variable with user-defined type.
2653 auto NextIt = Structs.find(nextVal.lower());
2654 if (NextIt != Structs.end()) {
2655 Lex();
2656 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2657 nextVal, nextLoc, IDVal);
2658 }
2659
2660 // __asm _emit or __asm __emit
2661 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2662 IDVal == "_EMIT" || IDVal == "__EMIT"))
2663 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2664
2665 // __asm align
2666 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2667 return parseDirectiveMSAlign(IDLoc, Info);
2668
2669 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2670 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2671 if (checkForValidSection())
2672 return true;
2673
2674 // Canonicalize the opcode to lower case.
2675 std::string OpcodeStr = IDVal.lower();
2676 ParseInstructionInfo IInfo(Info.AsmRewrites);
2677 bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2678 Info.ParsedOperands);
2679 Info.ParseError = ParseHadError;
2680
2681 // Dump the parsed representation, if requested.
2682 if (getShowParsedOperands()) {
2683 SmallString<256> Str;
2685 OS << "parsed instruction: [";
2686 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2687 if (i != 0)
2688 OS << ", ";
2689 Info.ParsedOperands[i]->print(OS);
2690 }
2691 OS << "]";
2692
2693 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2694 }
2695
2696 // Fail even if ParseInstruction erroneously returns false.
2697 if (hasPendingError() || ParseHadError)
2698 return true;
2699
2700 // If we are generating dwarf for the current section then generate a .loc
2701 // directive for the instruction.
2702 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2703 getContext().getGenDwarfSectionSyms().count(
2704 getStreamer().getCurrentSectionOnly())) {
2705 unsigned Line;
2706 if (ActiveMacros.empty())
2707 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2708 else
2709 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2710 ActiveMacros.front()->ExitBuffer);
2711
2712 // If we previously parsed a cpp hash file line comment then make sure the
2713 // current Dwarf File is for the CppHashFilename if not then emit the
2714 // Dwarf File table for it and adjust the line number for the .loc.
2715 if (!CppHashInfo.Filename.empty()) {
2716 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2717 0, StringRef(), CppHashInfo.Filename);
2718 getContext().setGenDwarfFileNumber(FileNumber);
2719
2720 unsigned CppHashLocLineNo =
2721 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2722 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2723 }
2724
2725 getStreamer().emitDwarfLocDirective(
2726 getContext().getGenDwarfFileNumber(), Line, 0,
2728 StringRef());
2729 }
2730
2731 // If parsing succeeded, match the instruction.
2732 if (!ParseHadError) {
2734 if (getTargetParser().MatchAndEmitInstruction(
2735 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2736 getTargetParser().isParsingMSInlineAsm()))
2737 return true;
2738 }
2739 return false;
2740}
2741
2742// Parse and erase curly braces marking block start/end.
2743bool MasmParser::parseCurlyBlockScope(
2744 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2745 // Identify curly brace marking block start/end.
2746 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2747 return false;
2748
2749 SMLoc StartLoc = Lexer.getLoc();
2750 Lex(); // Eat the brace.
2751 if (Lexer.is(AsmToken::EndOfStatement))
2752 Lex(); // Eat EndOfStatement following the brace.
2753
2754 // Erase the block start/end brace from the output asm string.
2755 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2756 StartLoc.getPointer());
2757 return true;
2758}
2759
2760/// parseCppHashLineFilenameComment as this:
2761/// ::= # number "filename"
2762bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2763 Lex(); // Eat the hash token.
2764 // Lexer only ever emits HashDirective if it fully formed if it's
2765 // done the checking already so this is an internal error.
2766 assert(getTok().is(AsmToken::Integer) &&
2767 "Lexing Cpp line comment: Expected Integer");
2768 int64_t LineNumber = getTok().getIntVal();
2769 Lex();
2770 assert(getTok().is(AsmToken::String) &&
2771 "Lexing Cpp line comment: Expected String");
2772 StringRef Filename = getTok().getString();
2773 Lex();
2774
2775 // Get rid of the enclosing quotes.
2776 Filename = Filename.substr(1, Filename.size() - 2);
2777
2778 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2779 // and possibly DWARF file info.
2780 CppHashInfo.Loc = L;
2781 CppHashInfo.Filename = Filename;
2782 CppHashInfo.LineNumber = LineNumber;
2783 CppHashInfo.Buf = CurBuffer;
2784 if (FirstCppHashFilename.empty())
2785 FirstCppHashFilename = Filename;
2786 return false;
2787}
2788
2789/// will use the last parsed cpp hash line filename comment
2790/// for the Filename and LineNo if any in the diagnostic.
2791void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2792 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2793 raw_ostream &OS = errs();
2794
2795 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2796 SMLoc DiagLoc = Diag.getLoc();
2797 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2798 unsigned CppHashBuf =
2799 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2800
2801 // Like SourceMgr::printMessage() we need to print the include stack if any
2802 // before printing the message.
2803 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2804 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2805 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2806 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2807 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2808 }
2809
2810 // If we have not parsed a cpp hash line filename comment or the source
2811 // manager changed or buffer changed (like in a nested include) then just
2812 // print the normal diagnostic using its Filename and LineNo.
2813 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2814 DiagBuf != CppHashBuf) {
2815 if (Parser->SavedDiagHandler)
2816 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2817 else
2818 Diag.print(nullptr, OS);
2819 return;
2820 }
2821
2822 // Use the CppHashFilename and calculate a line number based on the
2823 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2824 // for the diagnostic.
2825 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2826
2827 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2828 int CppHashLocLineNo =
2829 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2830 int LineNo =
2831 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2832
2833 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2834 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2835 Diag.getLineContents(), Diag.getRanges());
2836
2837 if (Parser->SavedDiagHandler)
2838 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2839 else
2840 NewDiag.print(nullptr, OS);
2841}
2842
2843// This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2844// not accept '.'.
2845static bool isMacroParameterChar(char C) {
2846 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2847}
2848
2849bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2852 const std::vector<std::string> &Locals, SMLoc L) {
2853 unsigned NParameters = Parameters.size();
2854 if (NParameters != A.size())
2855 return Error(L, "Wrong number of arguments");
2856 StringMap<std::string> LocalSymbols;
2857 std::string Name;
2858 Name.reserve(6);
2859 for (StringRef Local : Locals) {
2861 LocalName << "??"
2862 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2863 LocalSymbols.insert({Local, LocalName.str()});
2864 Name.clear();
2865 }
2866
2867 std::optional<char> CurrentQuote;
2868 while (!Body.empty()) {
2869 // Scan for the next substitution.
2870 std::size_t End = Body.size(), Pos = 0;
2871 std::size_t IdentifierPos = End;
2872 for (; Pos != End; ++Pos) {
2873 // Find the next possible macro parameter, including preceding a '&'
2874 // inside quotes.
2875 if (Body[Pos] == '&')
2876 break;
2877 if (isMacroParameterChar(Body[Pos])) {
2878 if (!CurrentQuote)
2879 break;
2880 if (IdentifierPos == End)
2881 IdentifierPos = Pos;
2882 } else {
2883 IdentifierPos = End;
2884 }
2885
2886 // Track quotation status
2887 if (!CurrentQuote) {
2888 if (Body[Pos] == '\'' || Body[Pos] == '"')
2889 CurrentQuote = Body[Pos];
2890 } else if (Body[Pos] == CurrentQuote) {
2891 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2892 // Escaped quote, and quotes aren't identifier chars; skip
2893 ++Pos;
2894 continue;
2895 } else {
2896 CurrentQuote.reset();
2897 }
2898 }
2899 }
2900 if (IdentifierPos != End) {
2901 // We've recognized an identifier before an apostrophe inside quotes;
2902 // check once to see if we can expand it.
2903 Pos = IdentifierPos;
2904 IdentifierPos = End;
2905 }
2906
2907 // Add the prefix.
2908 OS << Body.slice(0, Pos);
2909
2910 // Check if we reached the end.
2911 if (Pos == End)
2912 break;
2913
2914 unsigned I = Pos;
2915 bool InitialAmpersand = (Body[I] == '&');
2916 if (InitialAmpersand) {
2917 ++I;
2918 ++Pos;
2919 }
2920 while (I < End && isMacroParameterChar(Body[I]))
2921 ++I;
2922
2923 const char *Begin = Body.data() + Pos;
2924 StringRef Argument(Begin, I - Pos);
2925 const std::string ArgumentLower = Argument.lower();
2926 unsigned Index = 0;
2927
2928 for (; Index < NParameters; ++Index)
2929 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2930 break;
2931
2932 if (Index == NParameters) {
2933 if (InitialAmpersand)
2934 OS << '&';
2935 auto it = LocalSymbols.find(ArgumentLower);
2936 if (it != LocalSymbols.end())
2937 OS << it->second;
2938 else
2939 OS << Argument;
2940 Pos = I;
2941 } else {
2942 for (const AsmToken &Token : A[Index]) {
2943 // In MASM, you can write '%expr'.
2944 // The prefix '%' evaluates the expression 'expr'
2945 // and uses the result as a string (e.g. replace %(1+2) with the
2946 // string "3").
2947 // Here, we identify the integer token which is the result of the
2948 // absolute expression evaluation and replace it with its string
2949 // representation.
2950 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2951 // Emit an integer value to the buffer.
2952 OS << Token.getIntVal();
2953 else
2954 OS << Token.getString();
2955 }
2956
2957 Pos += Argument.size();
2958 if (Pos < End && Body[Pos] == '&') {
2959 ++Pos;
2960 }
2961 }
2962 // Update the scan point.
2963 Body = Body.substr(Pos);
2964 }
2965
2966 return false;
2967}
2968
2970 switch (kind) {
2971 default:
2972 return false;
2973 case AsmToken::Plus:
2974 case AsmToken::Minus:
2975 case AsmToken::Tilde:
2976 case AsmToken::Slash:
2977 case AsmToken::Star:
2978 case AsmToken::Dot:
2979 case AsmToken::Equal:
2981 case AsmToken::Pipe:
2982 case AsmToken::PipePipe:
2983 case AsmToken::Caret:
2984 case AsmToken::Amp:
2985 case AsmToken::AmpAmp:
2986 case AsmToken::Exclaim:
2988 case AsmToken::Less:
2990 case AsmToken::LessLess:
2992 case AsmToken::Greater:
2995 return true;
2996 }
2997}
2998
2999namespace {
3000
3001class AsmLexerSkipSpaceRAII {
3002public:
3003 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
3004 Lexer.setSkipSpace(SkipSpace);
3005 }
3006
3007 ~AsmLexerSkipSpaceRAII() {
3008 Lexer.setSkipSpace(true);
3009 }
3010
3011private:
3012 AsmLexer &Lexer;
3013};
3014
3015} // end anonymous namespace
3016
3017bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3018 MCAsmMacroArgument &MA,
3019 AsmToken::TokenKind EndTok) {
3020 if (MP && MP->Vararg) {
3021 if (Lexer.isNot(EndTok)) {
3022 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3023 for (StringRef S : Str) {
3024 MA.emplace_back(AsmToken::String, S);
3025 }
3026 }
3027 return false;
3028 }
3029
3030 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3031 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3032 const char *StrChar = StrLoc.getPointer() + 1;
3033 const char *EndChar = EndLoc.getPointer() - 1;
3034 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3035 /// Eat from '<' to '>'.
3036 Lex();
3037 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3038 return false;
3039 }
3040
3041 unsigned ParenLevel = 0;
3042
3043 // Darwin doesn't use spaces to delmit arguments.
3044 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3045
3046 bool SpaceEaten;
3047
3048 while (true) {
3049 SpaceEaten = false;
3050 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3051 return TokError("unexpected token");
3052
3053 if (ParenLevel == 0) {
3054 if (Lexer.is(AsmToken::Comma))
3055 break;
3056
3057 if (Lexer.is(AsmToken::Space)) {
3058 SpaceEaten = true;
3059 Lex(); // Eat spaces.
3060 }
3061
3062 // Spaces can delimit parameters, but could also be part an expression.
3063 // If the token after a space is an operator, add the token and the next
3064 // one into this argument
3065 if (!IsDarwin) {
3066 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3067 MA.push_back(getTok());
3068 Lex();
3069
3070 // Whitespace after an operator can be ignored.
3071 if (Lexer.is(AsmToken::Space))
3072 Lex();
3073
3074 continue;
3075 }
3076 }
3077 if (SpaceEaten)
3078 break;
3079 }
3080
3081 // handleMacroEntry relies on not advancing the lexer here
3082 // to be able to fill in the remaining default parameter values
3083 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3084 break;
3085
3086 // Adjust the current parentheses level.
3087 if (Lexer.is(AsmToken::LParen))
3088 ++ParenLevel;
3089 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3090 --ParenLevel;
3091
3092 // Append the token to the current argument list.
3093 MA.push_back(getTok());
3094 Lex();
3095 }
3096
3097 if (ParenLevel != 0)
3098 return TokError("unbalanced parentheses in argument");
3099
3100 if (MA.empty() && MP) {
3101 if (MP->Required) {
3102 return TokError("missing value for required parameter '" + MP->Name +
3103 "'");
3104 } else {
3105 MA = MP->Value;
3106 }
3107 }
3108 return false;
3109}
3110
3111// Parse the macro instantiation arguments.
3112bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3113 MCAsmMacroArguments &A,
3114 AsmToken::TokenKind EndTok) {
3115 const unsigned NParameters = M ? M->Parameters.size() : 0;
3116 bool NamedParametersFound = false;
3117 SmallVector<SMLoc, 4> FALocs;
3118
3119 A.resize(NParameters);
3120 FALocs.resize(NParameters);
3121
3122 // Parse two kinds of macro invocations:
3123 // - macros defined without any parameters accept an arbitrary number of them
3124 // - macros defined with parameters accept at most that many of them
3125 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3126 ++Parameter) {
3127 SMLoc IDLoc = Lexer.getLoc();
3129
3130 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3131 if (parseIdentifier(FA.Name))
3132 return Error(IDLoc, "invalid argument identifier for formal argument");
3133
3134 if (Lexer.isNot(AsmToken::Equal))
3135 return TokError("expected '=' after formal parameter identifier");
3136
3137 Lex();
3138
3139 NamedParametersFound = true;
3140 }
3141
3142 if (NamedParametersFound && FA.Name.empty())
3143 return Error(IDLoc, "cannot mix positional and keyword arguments");
3144
3145 unsigned PI = Parameter;
3146 if (!FA.Name.empty()) {
3147 assert(M && "expected macro to be defined");
3148 unsigned FAI = 0;
3149 for (FAI = 0; FAI < NParameters; ++FAI)
3150 if (M->Parameters[FAI].Name == FA.Name)
3151 break;
3152
3153 if (FAI >= NParameters) {
3154 return Error(IDLoc, "parameter named '" + FA.Name +
3155 "' does not exist for macro '" + M->Name + "'");
3156 }
3157 PI = FAI;
3158 }
3159 const MCAsmMacroParameter *MP = nullptr;
3160 if (M && PI < NParameters)
3161 MP = &M->Parameters[PI];
3162
3163 SMLoc StrLoc = Lexer.getLoc();
3164 SMLoc EndLoc;
3165 if (Lexer.is(AsmToken::Percent)) {
3166 const MCExpr *AbsoluteExp;
3167 int64_t Value;
3168 /// Eat '%'.
3169 Lex();
3170 if (parseExpression(AbsoluteExp, EndLoc))
3171 return false;
3172 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3173 getStreamer().getAssemblerPtr()))
3174 return Error(StrLoc, "expected absolute expression");
3175 const char *StrChar = StrLoc.getPointer();
3176 const char *EndChar = EndLoc.getPointer();
3177 AsmToken newToken(AsmToken::Integer,
3178 StringRef(StrChar, EndChar - StrChar), Value);
3179 FA.Value.push_back(newToken);
3180 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3181 if (M)
3182 return addErrorSuffix(" in '" + M->Name + "' macro");
3183 else
3184 return true;
3185 }
3186
3187 if (!FA.Value.empty()) {
3188 if (A.size() <= PI)
3189 A.resize(PI + 1);
3190 A[PI] = FA.Value;
3191
3192 if (FALocs.size() <= PI)
3193 FALocs.resize(PI + 1);
3194
3195 FALocs[PI] = Lexer.getLoc();
3196 }
3197
3198 // At the end of the statement, fill in remaining arguments that have
3199 // default values. If there aren't any, then the next argument is
3200 // required but missing
3201 if (Lexer.is(EndTok)) {
3202 bool Failure = false;
3203 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3204 if (A[FAI].empty()) {
3205 if (M->Parameters[FAI].Required) {
3206 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3207 "missing value for required parameter "
3208 "'" +
3209 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3210 Failure = true;
3211 }
3212
3213 if (!M->Parameters[FAI].Value.empty())
3214 A[FAI] = M->Parameters[FAI].Value;
3215 }
3216 }
3217 return Failure;
3218 }
3219
3220 if (Lexer.is(AsmToken::Comma))
3221 Lex();
3222 }
3223
3224 return TokError("too many positional arguments");
3225}
3226
3227bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3228 AsmToken::TokenKind ArgumentEndTok) {
3229 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3230 // eliminate this, although we should protect against infinite loops.
3231 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3232 if (ActiveMacros.size() == MaxNestingDepth) {
3233 std::ostringstream MaxNestingDepthError;
3234 MaxNestingDepthError << "macros cannot be nested more than "
3235 << MaxNestingDepth << " levels deep."
3236 << " Use -asm-macro-max-nesting-depth to increase "
3237 "this limit.";
3238 return TokError(MaxNestingDepthError.str());
3239 }
3240
3241 MCAsmMacroArguments A;
3242 if (parseMacroArguments(M, A, ArgumentEndTok))
3243 return true;
3244
3245 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3246 // to hold the macro body with substitutions.
3247 SmallString<256> Buf;
3248 StringRef Body = M->Body;
3250
3251 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3252 return true;
3253
3254 // We include the endm in the buffer as our cue to exit the macro
3255 // instantiation.
3256 OS << "endm\n";
3257
3258 std::unique_ptr<MemoryBuffer> Instantiation =
3259 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3260
3261 // Create the macro instantiation object and add to the current macro
3262 // instantiation stack.
3263 MacroInstantiation *MI = new MacroInstantiation{
3264 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3265 ActiveMacros.push_back(MI);
3266
3267 ++NumOfMacroInstantiations;
3268
3269 // Jump to the macro instantiation and prime the lexer.
3270 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3271 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3272 EndStatementAtEOFStack.push_back(true);
3273 Lex();
3274
3275 return false;
3276}
3277
3278void MasmParser::handleMacroExit() {
3279 // Jump to the token we should return to, and consume it.
3280 EndStatementAtEOFStack.pop_back();
3281 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3282 EndStatementAtEOFStack.back());
3283 Lex();
3284
3285 // Pop the instantiation entry.
3286 delete ActiveMacros.back();
3287 ActiveMacros.pop_back();
3288}
3289
3290bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3291 if (!M->IsFunction)
3292 return Error(NameLoc, "cannot invoke macro procedure as function");
3293
3294 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3295 "' requires arguments in parentheses") ||
3296 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3297 return true;
3298
3299 // Parse all statements in the macro, retrieving the exit value when it ends.
3300 std::string ExitValue;
3301 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3302 while (Lexer.isNot(AsmToken::Eof)) {
3303 ParseStatementInfo Info(&AsmStrRewrites);
3304 bool Parsed = parseStatement(Info, nullptr);
3305
3306 if (!Parsed && Info.ExitValue) {
3307 ExitValue = std::move(*Info.ExitValue);
3308 break;
3309 }
3310
3311 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3312 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3313 // exists.
3314 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3315 Lex();
3316 }
3317
3318 // parseStatement returned true so may need to emit an error.
3319 printPendingErrors();
3320
3321 // Skipping to the next line if needed.
3322 if (Parsed && !getLexer().isAtStartOfStatement())
3323 eatToEndOfStatement();
3324 }
3325
3326 // Consume the right-parenthesis on the other side of the arguments.
3327 if (parseRParen())
3328 return true;
3329
3330 // Exit values may require lexing, unfortunately. We construct a new buffer to
3331 // hold the exit value.
3332 std::unique_ptr<MemoryBuffer> MacroValue =
3333 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3334
3335 // Jump from this location to the instantiated exit value, and prime the
3336 // lexer.
3337 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3338 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3339 /*EndStatementAtEOF=*/false);
3340 EndStatementAtEOFStack.push_back(false);
3341 Lex();
3342
3343 return false;
3344}
3345
3346/// parseIdentifier:
3347/// ::= identifier
3348/// ::= string
3349bool MasmParser::parseIdentifier(StringRef &Res,
3350 IdentifierPositionKind Position) {
3351 // The assembler has relaxed rules for accepting identifiers, in particular we
3352 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3353 // be separate tokens. At this level, we have already lexed so we cannot
3354 // (currently) handle this as a context dependent token, instead we detect
3355 // adjacent tokens and return the combined identifier.
3356 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3357 SMLoc PrefixLoc = getLexer().getLoc();
3358
3359 // Consume the prefix character, and check for a following identifier.
3360
3361 AsmToken nextTok = peekTok(false);
3362
3363 if (nextTok.isNot(AsmToken::Identifier))
3364 return true;
3365
3366 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3367 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3368 return true;
3369
3370 // eat $ or @
3371 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3372 // Construct the joined identifier and consume the token.
3373 Res =
3374 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3375 Lex(); // Parser Lex to maintain invariants.
3376 return false;
3377 }
3378
3379 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3380 return true;
3381
3382 Res = getTok().getIdentifier();
3383
3384 // Consume the identifier token - but if parsing certain directives, avoid
3385 // lexical expansion of the next token.
3386 ExpandKind ExpandNextToken = ExpandMacros;
3387 if (Position == StartOfStatement &&
3389 .CaseLower("echo", true)
3390 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3391 .Default(false)) {
3392 ExpandNextToken = DoNotExpandMacros;
3393 }
3394 Lex(ExpandNextToken);
3395
3396 return false;
3397}
3398
3399/// parseDirectiveEquate:
3400/// ::= name "=" expression
3401/// | name "equ" expression (not redefinable)
3402/// | name "equ" text-list
3403/// | name "textequ" text-list (redefinability unspecified)
3404bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3405 DirectiveKind DirKind, SMLoc NameLoc) {
3406 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3407 if (BuiltinIt != BuiltinSymbolMap.end())
3408 return Error(NameLoc, "cannot redefine a built-in symbol");
3409
3410 Variable &Var = Variables[Name.lower()];
3411 if (Var.Name.empty()) {
3412 Var.Name = Name;
3413 }
3414
3415 SMLoc StartLoc = Lexer.getLoc();
3416 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3417 // "equ" and "textequ" both allow text expressions.
3418 std::string Value;
3419 std::string TextItem;
3420 if (!parseTextItem(TextItem)) {
3421 Value += TextItem;
3422
3423 // Accept a text-list, not just one text-item.
3424 auto parseItem = [&]() -> bool {
3425 if (parseTextItem(TextItem))
3426 return TokError("expected text item");
3427 Value += TextItem;
3428 return false;
3429 };
3430 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3431 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3432
3433 if (!Var.IsText || Var.TextValue != Value) {
3434 switch (Var.Redefinable) {
3435 case Variable::NOT_REDEFINABLE:
3436 return Error(getTok().getLoc(), "invalid variable redefinition");
3437 case Variable::WARN_ON_REDEFINITION:
3438 if (Warning(NameLoc, "redefining '" + Name +
3439 "', already defined on the command line")) {
3440 return true;
3441 }
3442 break;
3443 default:
3444 break;
3445 }
3446 }
3447 Var.IsText = true;
3448 Var.TextValue = Value;
3449 Var.Redefinable = Variable::REDEFINABLE;
3450
3451 return false;
3452 }
3453 }
3454 if (DirKind == DK_TEXTEQU)
3455 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3456
3457 // Parse as expression assignment.
3458 const MCExpr *Expr;
3459 SMLoc EndLoc;
3460 if (parseExpression(Expr, EndLoc))
3461 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3462 StringRef ExprAsString = StringRef(
3463 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3464
3465 int64_t Value;
3466 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3467 if (DirKind == DK_ASSIGN)
3468 return Error(
3469 StartLoc,
3470 "expected absolute expression; not all symbols have known values",
3471 {StartLoc, EndLoc});
3472
3473 // Not an absolute expression; define as a text replacement.
3474 if (!Var.IsText || Var.TextValue != ExprAsString) {
3475 switch (Var.Redefinable) {
3476 case Variable::NOT_REDEFINABLE:
3477 return Error(getTok().getLoc(), "invalid variable redefinition");
3478 case Variable::WARN_ON_REDEFINITION:
3479 if (Warning(NameLoc, "redefining '" + Name +
3480 "', already defined on the command line")) {
3481 return true;
3482 }
3483 break;
3484 default:
3485 break;
3486 }
3487 }
3488
3489 Var.IsText = true;
3490 Var.TextValue = ExprAsString.str();
3491 Var.Redefinable = Variable::REDEFINABLE;
3492
3493 return false;
3494 }
3495
3496 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3497
3498 const MCConstantExpr *PrevValue =
3499 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3500 Sym->getVariableValue(/*SetUsed=*/false))
3501 : nullptr;
3502 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3503 switch (Var.Redefinable) {
3504 case Variable::NOT_REDEFINABLE:
3505 return Error(getTok().getLoc(), "invalid variable redefinition");
3506 case Variable::WARN_ON_REDEFINITION:
3507 if (Warning(NameLoc, "redefining '" + Name +
3508 "', already defined on the command line")) {
3509 return true;
3510 }
3511 break;
3512 default:
3513 break;
3514 }
3515 }
3516
3517 Var.IsText = false;
3518 Var.TextValue.clear();
3519 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3520 : Variable::NOT_REDEFINABLE;
3521
3522 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3523 Sym->setVariableValue(Expr);
3524 Sym->setExternal(false);
3525
3526 return false;
3527}
3528
3529bool MasmParser::parseEscapedString(std::string &Data) {
3530 if (check(getTok().isNot(AsmToken::String), "expected string"))
3531 return true;
3532
3533 Data = "";
3534 char Quote = getTok().getString().front();
3535 StringRef Str = getTok().getStringContents();
3536 Data.reserve(Str.size());
3537 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3538 Data.push_back(Str[i]);
3539 if (Str[i] == Quote) {
3540 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3541 // If we're escaping the string's trailing delimiter, we're definitely
3542 // missing a quotation mark.
3543 if (i + 1 == Str.size())
3544 return Error(getTok().getLoc(), "missing quotation mark in string");
3545 if (Str[i + 1] == Quote)
3546 ++i;
3547 }
3548 }
3549
3550 Lex();
3551 return false;
3552}
3553
3554bool MasmParser::parseAngleBracketString(std::string &Data) {
3555 SMLoc EndLoc, StartLoc = getTok().getLoc();
3556 if (isAngleBracketString(StartLoc, EndLoc)) {
3557 const char *StartChar = StartLoc.getPointer() + 1;
3558 const char *EndChar = EndLoc.getPointer() - 1;
3559 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3560 // Eat from '<' to '>'.
3561 Lex();
3562
3563 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3564 return false;
3565 }
3566 return true;
3567}
3568
3569/// textItem ::= textLiteral | textMacroID | % constExpr
3570bool MasmParser::parseTextItem(std::string &Data) {
3571 switch (getTok().getKind()) {
3572 default:
3573 return true;
3574 case AsmToken::Percent: {
3575 int64_t Res;
3576 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3577 return true;
3578 Data = std::to_string(Res);
3579 return false;
3580 }
3581 case AsmToken::Less:
3583 case AsmToken::LessLess:
3585 return parseAngleBracketString(Data);
3586 case AsmToken::Identifier: {
3587 // This must be a text macro; we need to expand it accordingly.
3588 StringRef ID;
3589 SMLoc StartLoc = getTok().getLoc();
3590 if (parseIdentifier(ID))
3591 return true;
3592 Data = ID.str();
3593
3594 bool Expanded = false;
3595 while (true) {
3596 // Try to resolve as a built-in text macro
3597 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3598 if (BuiltinIt != BuiltinSymbolMap.end()) {
3599 std::optional<std::string> BuiltinText =
3600 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3601 if (!BuiltinText) {
3602 // Not a text macro; break without substituting
3603 break;
3604 }
3605 Data = std::move(*BuiltinText);
3606 ID = StringRef(Data);
3607 Expanded = true;
3608 continue;
3609 }
3610
3611 // Try to resolve as a variable text macro
3612 auto VarIt = Variables.find(ID.lower());
3613 if (VarIt != Variables.end()) {
3614 const Variable &Var = VarIt->getValue();
3615 if (!Var.IsText) {
3616 // Not a text macro; break without substituting
3617 break;
3618 }
3619 Data = Var.TextValue;
3620 ID = StringRef(Data);
3621 Expanded = true;
3622 continue;
3623 }
3624
3625 break;
3626 }
3627
3628 if (!Expanded) {
3629 // Not a text macro; not usable in TextItem context. Since we haven't used
3630 // the token, put it back for better error recovery.
3631 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3632 return true;
3633 }
3634 return false;
3635 }
3636 }
3637 llvm_unreachable("unhandled token kind");
3638}
3639
3640/// parseDirectiveAscii:
3641/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3642bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3643 auto parseOp = [&]() -> bool {
3644 std::string Data;
3645 if (checkForValidSection() || parseEscapedString(Data))
3646 return true;
3647 getStreamer().emitBytes(Data);
3648 if (ZeroTerminated)
3649 getStreamer().emitBytes(StringRef("\0", 1));
3650 return false;
3651 };
3652
3653 if (parseMany(parseOp))
3654 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3655 return false;
3656}
3657
3658bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3659 // Special case constant expressions to match code generator.
3660 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3661 assert(Size <= 8 && "Invalid size");
3662 int64_t IntValue = MCE->getValue();
3663 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3664 return Error(MCE->getLoc(), "out of range literal value");
3665 getStreamer().emitIntValue(IntValue, Size);
3666 } else {
3667 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3668 if (MSE && MSE->getSymbol().getName() == "?") {
3669 // ? initializer; treat as 0.
3670 getStreamer().emitIntValue(0, Size);
3671 } else {
3672 getStreamer().emitValue(Value, Size, Value->getLoc());
3673 }
3674 }
3675 return false;
3676}
3677
3678bool MasmParser::parseScalarInitializer(unsigned Size,
3680 unsigned StringPadLength) {
3681 if (Size == 1 && getTok().is(AsmToken::String)) {
3682 std::string Value;
3683 if (parseEscapedString(Value))
3684 return true;
3685 // Treat each character as an initializer.
3686 for (const unsigned char CharVal : Value)
3687 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3688
3689 // Pad the string with spaces to the specified length.
3690 for (size_t i = Value.size(); i < StringPadLength; ++i)
3691 Values.push_back(MCConstantExpr::create(' ', getContext()));
3692 } else {
3693 const MCExpr *Value;
3694 if (parseExpression(Value))
3695 return true;
3696 if (getTok().is(AsmToken::Identifier) &&
3697 getTok().getString().equals_insensitive("dup")) {
3698 Lex(); // Eat 'dup'.
3699 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3700 if (!MCE)
3701 return Error(Value->getLoc(),
3702 "cannot repeat value a non-constant number of times");
3703 const int64_t Repetitions = MCE->getValue();
3704 if (Repetitions < 0)
3705 return Error(Value->getLoc(),
3706 "cannot repeat value a negative number of times");
3707
3708 SmallVector<const MCExpr *, 1> DuplicatedValues;
3709 if (parseToken(AsmToken::LParen,
3710 "parentheses required for 'dup' contents") ||
3711 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3712 return true;
3713
3714 for (int i = 0; i < Repetitions; ++i)
3715 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3716 } else {
3717 Values.push_back(Value);
3718 }
3719 }
3720 return false;
3721}
3722
3723bool MasmParser::parseScalarInstList(unsigned Size,
3725 const AsmToken::TokenKind EndToken) {
3726 while (getTok().isNot(EndToken) &&
3727 (EndToken != AsmToken::Greater ||
3728 getTok().isNot(AsmToken::GreaterGreater))) {
3729 parseScalarInitializer(Size, Values);
3730
3731 // If we see a comma, continue, and allow line continuation.
3732 if (!parseOptionalToken(AsmToken::Comma))
3733 break;
3734 parseOptionalToken(AsmToken::EndOfStatement);
3735 }
3736 return false;
3737}
3738
3739bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3741 if (checkForValidSection() || parseScalarInstList(Size, Values))
3742 return true;
3743
3744 for (const auto *Value : Values) {
3745 emitIntValue(Value, Size);
3746 }
3747 if (Count)
3748 *Count = Values.size();
3749 return false;
3750}
3751
3752// Add a field to the current structure.
3753bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3754 StructInfo &Struct = StructInProgress.back();
3755 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3756 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3757
3758 Field.Type = Size;
3759
3760 if (parseScalarInstList(Size, IntInfo.Values))
3761 return true;
3762
3763 Field.SizeOf = Field.Type * IntInfo.Values.size();
3764 Field.LengthOf = IntInfo.Values.size();
3765 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3766 if (!Struct.IsUnion) {
3767 Struct.NextOffset = FieldEnd;
3768 }
3769 Struct.Size = std::max(Struct.Size, FieldEnd);
3770 return false;
3771}
3772
3773/// parseDirectiveValue
3774/// ::= (byte | word | ... ) [ expression (, expression)* ]
3775bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3776 if (StructInProgress.empty()) {
3777 // Initialize data value.
3778 if (emitIntegralValues(Size))
3779 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3780 } else if (addIntegralField("", Size)) {
3781 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3782 }
3783
3784 return false;
3785}
3786
3787/// parseDirectiveNamedValue
3788/// ::= name (byte | word | ... ) [ expression (, expression)* ]
3789bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3790 StringRef Name, SMLoc NameLoc) {
3791 if (StructInProgress.empty()) {
3792 // Initialize named data value.
3793 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3794 getStreamer().emitLabel(Sym);
3795 unsigned Count;
3796 if (emitIntegralValues(Size, &Count))
3797 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3798
3800 Type.Name = TypeName;
3801 Type.Size = Size * Count;
3802 Type.ElementSize = Size;
3803 Type.Length = Count;
3804 KnownType[Name.lower()] = Type;
3805 } else if (addIntegralField(Name, Size)) {
3806 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3807 }
3808
3809 return false;
3810}
3811
3812static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3813 if (Asm.getTok().isNot(AsmToken::Integer) &&
3814 Asm.getTok().isNot(AsmToken::BigNum))
3815 return Asm.TokError("unknown token in expression");
3816 SMLoc ExprLoc = Asm.getTok().getLoc();
3817 APInt IntValue = Asm.getTok().getAPIntVal();
3818 Asm.Lex();
3819 if (!IntValue.isIntN(128))
3820 return Asm.Error(ExprLoc, "out of range literal value");
3821 if (!IntValue.isIntN(64)) {
3822 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3823 lo = IntValue.getLoBits(64).getZExtValue();
3824 } else {
3825 hi = 0;
3826 lo = IntValue.getZExtValue();
3827 }
3828 return false;
3829}
3830
3831bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3832 // We don't truly support arithmetic on floating point expressions, so we
3833 // have to manually parse unary prefixes.
3834 bool IsNeg = false;
3835 SMLoc SignLoc;
3836 if (getLexer().is(AsmToken::Minus)) {
3837 SignLoc = getLexer().getLoc();
3838 Lexer.Lex();
3839 IsNeg = true;
3840 } else if (getLexer().is(AsmToken::Plus)) {
3841 SignLoc = getLexer().getLoc();
3842 Lexer.Lex();
3843 }
3844
3845 if (Lexer.is(AsmToken::Error))
3846 return TokError(Lexer.getErr());
3847 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3849 return TokError("unexpected token in directive");
3850
3851 // Convert to an APFloat.
3852 APFloat Value(Semantics);
3853 StringRef IDVal = getTok().getString();
3854 if (getLexer().is(AsmToken::Identifier)) {
3855 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3856 Value = APFloat::getInf(Semantics);
3857 else if (IDVal.equals_insensitive("nan"))
3858 Value = APFloat::getNaN(Semantics, false, ~0);
3859 else if (IDVal.equals_insensitive("?"))
3860 Value = APFloat::getZero(Semantics);
3861 else
3862 return TokError("invalid floating point literal");
3863 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3864 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3865 // To match ML64.exe, ignore the initial sign.
3866 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3867 if (SizeInBits != (IDVal.size() << 2))
3868 return TokError("invalid floating point literal");
3869
3870 // Consume the numeric token.
3871 Lex();
3872
3873 Res = APInt(SizeInBits, IDVal, 16);
3874 if (SignLoc.isValid())
3875 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3876 return false;
3877 } else if (errorToBool(
3878 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3879 .takeError())) {
3880 return TokError("invalid floating point literal");
3881 }
3882 if (IsNeg)
3883 Value.changeSign();
3884
3885 // Consume the numeric token.
3886 Lex();
3887
3888 Res = Value.bitcastToAPInt();
3889
3890 return false;
3891}
3892
3893bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3894 SmallVectorImpl<APInt> &ValuesAsInt,
3895 const AsmToken::TokenKind EndToken) {
3896 while (getTok().isNot(EndToken) ||
3897 (EndToken == AsmToken::Greater &&
3898 getTok().isNot(AsmToken::GreaterGreater))) {
3899 const AsmToken NextTok = peekTok();
3900 if (NextTok.is(AsmToken::Identifier) &&
3901 NextTok.getString().equals_insensitive("dup")) {
3902 const MCExpr *Value;
3903 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3904 return true;
3905 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3906 if (!MCE)
3907 return Error(Value->getLoc(),
3908 "cannot repeat value a non-constant number of times");
3909 const int64_t Repetitions = MCE->getValue();
3910 if (Repetitions < 0)
3911 return Error(Value->getLoc(),
3912 "cannot repeat value a negative number of times");
3913
3914 SmallVector<APInt, 1> DuplicatedValues;
3915 if (parseToken(AsmToken::LParen,
3916 "parentheses required for 'dup' contents") ||
3917 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3918 return true;
3919
3920 for (int i = 0; i < Repetitions; ++i)
3921 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3922 } else {
3923 APInt AsInt;
3924 if (parseRealValue(Semantics, AsInt))
3925 return true;
3926 ValuesAsInt.push_back(AsInt);
3927 }
3928
3929 // Continue if we see a comma. (Also, allow line continuation.)
3930 if (!parseOptionalToken(AsmToken::Comma))
3931 break;
3932 parseOptionalToken(AsmToken::EndOfStatement);
3933 }
3934
3935 return false;
3936}
3937
3938// Initialize real data values.
3939bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3940 unsigned *Count) {
3941 if (checkForValidSection())
3942 return true;
3943
3944 SmallVector<APInt, 1> ValuesAsInt;
3945 if (parseRealInstList(Semantics, ValuesAsInt))
3946 return true;
3947
3948 for (const APInt &AsInt : ValuesAsInt) {
3949 getStreamer().emitIntValue(AsInt);
3950 }
3951 if (Count)
3952 *Count = ValuesAsInt.size();
3953 return false;
3954}
3955
3956// Add a real field to the current struct.
3957bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3958 size_t Size) {
3959 StructInfo &Struct = StructInProgress.back();
3960 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3961 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3962
3963 Field.SizeOf = 0;
3964
3965 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3966 return true;
3967
3968 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3969 Field.LengthOf = RealInfo.AsIntValues.size();
3970 Field.SizeOf = Field.Type * Field.LengthOf;
3971
3972 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3973 if (!Struct.IsUnion) {
3974 Struct.NextOffset = FieldEnd;
3975 }
3976 Struct.Size = std::max(Struct.Size, FieldEnd);
3977 return false;
3978}
3979
3980/// parseDirectiveRealValue
3981/// ::= (real4 | real8 | real10) [ expression (, expression)* ]
3982bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3983 const fltSemantics &Semantics,
3984 size_t Size) {
3985 if (StructInProgress.empty()) {
3986 // Initialize data value.
3987 if (emitRealValues(Semantics))
3988 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3989 } else if (addRealField("", Semantics, Size)) {
3990 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3991 }
3992 return false;
3993}
3994
3995/// parseDirectiveNamedRealValue
3996/// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
3997bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3998 const fltSemantics &Semantics,
3999 unsigned Size, StringRef Name,
4000 SMLoc NameLoc) {
4001 if (StructInProgress.empty()) {
4002 // Initialize named data value.
4003 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4004 getStreamer().emitLabel(Sym);
4005 unsigned Count;
4006 if (emitRealValues(Semantics, &Count))
4007 return addErrorSuffix(" in '" + TypeName + "' directive");
4008
4010 Type.Name = TypeName;
4011 Type.Size = Size * Count;
4012 Type.ElementSize = Size;
4013 Type.Length = Count;
4014 KnownType[Name.lower()] = Type;
4015 } else if (addRealField(Name, Semantics, Size)) {
4016 return addErrorSuffix(" in '" + TypeName + "' directive");
4017 }
4018 return false;
4019}
4020
4021bool MasmParser::parseOptionalAngleBracketOpen() {
4022 const AsmToken Tok = getTok();
4023 if (parseOptionalToken(AsmToken::LessLess)) {
4024 AngleBracketDepth++;
4025 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4026 return true;
4027 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4028 AngleBracketDepth++;
4030 return true;
4031 } else if (parseOptionalToken(AsmToken::Less)) {
4032 AngleBracketDepth++;
4033 return true;
4034 }
4035
4036 return false;
4037}
4038
4039bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4040 const AsmToken Tok = getTok();
4041 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4043 } else if (parseToken(AsmToken::Greater, Msg)) {
4044 return true;
4045 }
4046 AngleBracketDepth--;
4047 return false;
4048}
4049
4050bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4051 const IntFieldInfo &Contents,
4052 FieldInitializer &Initializer) {
4053 SMLoc Loc = getTok().getLoc();
4054
4056 if (parseOptionalToken(AsmToken::LCurly)) {
4057 if (Field.LengthOf == 1 && Field.Type > 1)
4058 return Error(Loc, "Cannot initialize scalar field with array value");
4059 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4060 parseToken(AsmToken::RCurly))
4061 return true;
4062 } else if (parseOptionalAngleBracketOpen()) {
4063 if (Field.LengthOf == 1 && Field.Type > 1)
4064 return Error(Loc, "Cannot initialize scalar field with array value");
4065 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4066 parseAngleBracketClose())
4067 return true;
4068 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4069 return Error(Loc, "Cannot initialize array field with scalar value");
4070 } else if (parseScalarInitializer(Field.Type, Values,
4071 /*StringPadLength=*/Field.LengthOf)) {
4072 return true;
4073 }
4074
4075 if (Values.size() > Field.LengthOf) {
4076 return Error(Loc, "Initializer too long for field; expected at most " +
4077 std::to_string(Field.LengthOf) + " elements, got " +
4078 std::to_string(Values.size()));
4079 }
4080 // Default-initialize all remaining values.
4081 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4082
4083 Initializer = FieldInitializer(std::move(Values));
4084 return false;
4085}
4086
4087bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4088 const RealFieldInfo &Contents,
4089 FieldInitializer &Initializer) {
4090 const fltSemantics *Semantics;
4091 switch (Field.Type) {
4092 case 4:
4093 Semantics = &APFloat::IEEEsingle();
4094 break;
4095 case 8:
4096 Semantics = &APFloat::IEEEdouble();
4097 break;
4098 case 10:
4099 Semantics = &APFloat::x87DoubleExtended();
4100 break;
4101 default:
4102 llvm_unreachable("unknown real field type");
4103 }
4104
4105 SMLoc Loc = getTok().getLoc();
4106
4107 SmallVector<APInt, 1> AsIntValues;
4108 if (parseOptionalToken(AsmToken::LCurly)) {
4109 if (Field.LengthOf == 1)
4110 return Error(Loc, "Cannot initialize scalar field with array value");
4111 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4112 parseToken(AsmToken::RCurly))
4113 return true;
4114 } else if (parseOptionalAngleBracketOpen()) {
4115 if (Field.LengthOf == 1)
4116 return Error(Loc, "Cannot initialize scalar field with array value");
4117 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4118 parseAngleBracketClose())
4119 return true;
4120 } else if (Field.LengthOf > 1) {
4121 return Error(Loc, "Cannot initialize array field with scalar value");
4122 } else {
4123 AsIntValues.emplace_back();
4124 if (parseRealValue(*Semantics, AsIntValues.back()))
4125 return true;
4126 }
4127
4128 if (AsIntValues.size() > Field.LengthOf) {
4129 return Error(Loc, "Initializer too long for field; expected at most " +
4130 std::to_string(Field.LengthOf) + " elements, got " +
4131 std::to_string(AsIntValues.size()));
4132 }
4133 // Default-initialize all remaining values.
4134 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4135 Contents.AsIntValues.end());
4136
4137 Initializer = FieldInitializer(std::move(AsIntValues));
4138 return false;
4139}
4140
4141bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4142 const StructFieldInfo &Contents,
4143 FieldInitializer &Initializer) {
4144 SMLoc Loc = getTok().getLoc();
4145
4146 std::vector<StructInitializer> Initializers;
4147 if (Field.LengthOf > 1) {
4148 if (parseOptionalToken(AsmToken::LCurly)) {
4149 if (parseStructInstList(Contents.Structure, Initializers,
4151 parseToken(AsmToken::RCurly))
4152 return true;
4153 } else if (parseOptionalAngleBracketOpen()) {
4154 if (parseStructInstList(Contents.Structure, Initializers,
4156 parseAngleBracketClose())
4157 return true;
4158 } else {
4159 return Error(Loc, "Cannot initialize array field with scalar value");
4160 }
4161 } else {
4162 Initializers.emplace_back();
4163 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4164 return true;
4165 }
4166
4167 if (Initializers.size() > Field.LengthOf) {
4168 return Error(Loc, "Initializer too long for field; expected at most " +
4169 std::to_string(Field.LengthOf) + " elements, got " +
4170 std::to_string(Initializers.size()));
4171 }
4172 // Default-initialize all remaining values.
4173 Initializers.insert(Initializers.end(),
4174 Contents.Initializers.begin() + Initializers.size(),
4175 Contents.Initializers.end());
4176
4177 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4178 return false;
4179}
4180
4181bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4182 FieldInitializer &Initializer) {
4183 switch (Field.Contents.FT) {
4184 case FT_INTEGRAL:
4185 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4186 case FT_REAL:
4187 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4188 case FT_STRUCT:
4189 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4190 }
4191 llvm_unreachable("Unhandled FieldType enum");
4192}
4193
4194bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4195 StructInitializer &Initializer) {
4196 const AsmToken FirstToken = getTok();
4197
4198 std::optional<AsmToken::TokenKind> EndToken;
4199 if (parseOptionalToken(AsmToken::LCurly)) {
4200 EndToken = AsmToken::RCurly;
4201 } else if (parseOptionalAngleBracketOpen()) {
4202 EndToken = AsmToken::Greater;
4203 AngleBracketDepth++;
4204 } else if (FirstToken.is(AsmToken::Identifier) &&
4205 FirstToken.getString() == "?") {
4206 // ? initializer; leave EndToken uninitialized to treat as empty.
4207 if (parseToken(AsmToken::Identifier))
4208 return true;
4209 } else {
4210 return Error(FirstToken.getLoc(), "Expected struct initializer");
4211 }
4212
4213 auto &FieldInitializers = Initializer.FieldInitializers;
4214 size_t FieldIndex = 0;
4215 if (EndToken) {
4216 // Initialize all fields with given initializers.
4217 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4218 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4219 if (parseOptionalToken(AsmToken::Comma)) {
4220 // Empty initializer; use the default and continue. (Also, allow line
4221 // continuation.)
4222 FieldInitializers.push_back(Field.Contents);
4223 parseOptionalToken(AsmToken::EndOfStatement);
4224 continue;
4225 }
4226 FieldInitializers.emplace_back(Field.Contents.FT);
4227 if (parseFieldInitializer(Field, FieldInitializers.back()))
4228 return true;
4229
4230 // Continue if we see a comma. (Also, allow line continuation.)
4231 SMLoc CommaLoc = getTok().getLoc();
4232 if (!parseOptionalToken(AsmToken::Comma))
4233 break;
4234 if (FieldIndex == Structure.Fields.size())
4235 return Error(CommaLoc, "'" + Structure.Name +
4236 "' initializer initializes too many fields");
4237 parseOptionalToken(AsmToken::EndOfStatement);
4238 }
4239 }
4240 // Default-initialize all remaining fields.
4241 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4242 FieldInitializers.push_back(Field.Contents);
4243
4244 if (EndToken) {
4245 if (*EndToken == AsmToken::Greater)
4246 return parseAngleBracketClose();
4247
4248 return parseToken(*EndToken);
4249 }
4250
4251 return false;
4252}
4253
4254bool MasmParser::parseStructInstList(
4255 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4256 const AsmToken::TokenKind EndToken) {
4257 while (getTok().isNot(EndToken) ||
4258 (EndToken == AsmToken::Greater &&
4259 getTok().isNot(AsmToken::GreaterGreater))) {
4260 const AsmToken NextTok = peekTok();
4261 if (NextTok.is(AsmToken::Identifier) &&
4262 NextTok.getString().equals_insensitive("dup")) {
4263 const MCExpr *Value;
4264 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4265 return true;
4266 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4267 if (!MCE)
4268 return Error(Value->getLoc(),
4269 "cannot repeat value a non-constant number of times");
4270 const int64_t Repetitions = MCE->getValue();
4271 if (Repetitions < 0)
4272 return Error(Value->getLoc(),
4273 "cannot repeat value a negative number of times");
4274
4275 std::vector<StructInitializer> DuplicatedValues;
4276 if (parseToken(AsmToken::LParen,
4277 "parentheses required for 'dup' contents") ||
4278 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4279 return true;
4280
4281 for (int i = 0; i < Repetitions; ++i)
4282 llvm::append_range(Initializers, DuplicatedValues);
4283 } else {
4284 Initializers.emplace_back();
4285 if (parseStructInitializer(Structure, Initializers.back()))
4286 return true;
4287 }
4288
4289 // Continue if we see a comma. (Also, allow line continuation.)
4290 if (!parseOptionalToken(AsmToken::Comma))
4291 break;
4292 parseOptionalToken(AsmToken::EndOfStatement);
4293 }
4294
4295 return false;
4296}
4297
4298bool MasmParser::emitFieldValue(const FieldInfo &Field,
4299 const IntFieldInfo &Contents) {
4300 // Default-initialize all values.
4301 for (const MCExpr *Value : Contents.Values) {
4302 if (emitIntValue(Value, Field.Type))
4303 return true;
4304 }
4305 return false;
4306}
4307
4308bool MasmParser::emitFieldValue(const FieldInfo &Field,
4309 const RealFieldInfo &Contents) {
4310 for (const APInt &AsInt : Contents.AsIntValues) {
4311 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4312 AsInt.getBitWidth() / 8);
4313 }
4314 return false;
4315}
4316
4317bool MasmParser::emitFieldValue(const FieldInfo &Field,
4318 const StructFieldInfo &Contents) {
4319 for (const auto &Initializer : Contents.Initializers) {
4320 size_t Index = 0, Offset = 0;
4321 for (const auto &SubField : Contents.Structure.Fields) {
4322 getStreamer().emitZeros(SubField.Offset - Offset);
4323 Offset = SubField.Offset + SubField.SizeOf;
4324 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4325 }
4326 }
4327 return false;
4328}
4329
4330bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4331 switch (Field.Contents.FT) {
4332 case FT_INTEGRAL:
4333 return emitFieldValue(Field, Field.Contents.IntInfo);
4334 case FT_REAL:
4335 return emitFieldValue(Field, Field.Contents.RealInfo);
4336 case FT_STRUCT:
4337 return emitFieldValue(Field, Field.Contents.StructInfo);
4338 }
4339 llvm_unreachable("Unhandled FieldType enum");
4340}
4341
4342bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4343 const IntFieldInfo &Contents,
4344 const IntFieldInfo &Initializer) {
4345 for (const auto &Value : Initializer.Values) {
4346 if (emitIntValue(Value, Field.Type))
4347 return true;
4348 }
4349 // Default-initialize all remaining values.
4350 for (const auto &Value :
4351 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4352 if (emitIntValue(Value, Field.Type))
4353 return true;
4354 }
4355 return false;
4356}
4357
4358bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4359 const RealFieldInfo &Contents,
4360 const RealFieldInfo &Initializer) {
4361 for (const auto &AsInt : Initializer.AsIntValues) {
4362 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4363 AsInt.getBitWidth() / 8);
4364 }
4365 // Default-initialize all remaining values.
4366 for (const auto &AsInt :
4367 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4368 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4369 AsInt.getBitWidth() / 8);
4370 }
4371 return false;
4372}
4373
4374bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4375 const StructFieldInfo &Contents,
4376 const StructFieldInfo &Initializer) {
4377 for (const auto &Init : Initializer.Initializers) {
4378 if (emitStructInitializer(Contents.Structure, Init))
4379 return true;
4380 }
4381 // Default-initialize all remaining values.
4382 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4383 Initializer.Initializers.size())) {
4384 if (emitStructInitializer(Contents.Structure, Init))
4385 return true;
4386 }
4387 return false;
4388}
4389
4390bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4391 const FieldInitializer &Initializer) {
4392 switch (Field.Contents.FT) {
4393 case FT_INTEGRAL:
4394 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4395 Initializer.IntInfo);
4396 case FT_REAL:
4397 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4398 Initializer.RealInfo);
4399 case FT_STRUCT:
4400 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4401 Initializer.StructInfo);
4402 }
4403 llvm_unreachable("Unhandled FieldType enum");
4404}
4405
4406bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4407 const StructInitializer &Initializer) {
4408 if (!Structure.Initializable)
4409 return Error(getLexer().getLoc(),
4410 "cannot initialize a value of type '" + Structure.Name +
4411 "'; 'org' was used in the type's declaration");
4412 size_t Index = 0, Offset = 0;
4413 for (const auto &Init : Initializer.FieldInitializers) {
4414 const auto &Field = Structure.Fields[Index++];
4415 getStreamer().emitZeros(Field.Offset - Offset);
4416 Offset = Field.Offset + Field.SizeOf;
4417 if (emitFieldInitializer(Field, Init))
4418 return true;
4419 }
4420 // Default-initialize all remaining fields.
4421 for (const auto &Field : llvm::drop_begin(
4422 Structure.Fields, Initializer.FieldInitializers.size())) {
4423 getStreamer().emitZeros(Field.Offset - Offset);
4424 Offset = Field.Offset + Field.SizeOf;
4425 if (emitFieldValue(Field))
4426 return true;
4427 }
4428 // Add final padding.
4429 if (Offset != Structure.Size)
4430 getStreamer().emitZeros(Structure.Size - Offset);
4431 return false;
4432}
4433
4434// Set data values from initializers.
4435bool MasmParser::emitStructValues(const StructInfo &Structure,
4436 unsigned *Count) {
4437 std::vector<StructInitializer> Initializers;
4438 if (parseStructInstList(Structure, Initializers))
4439 return true;
4440
4441 for (const auto &Initializer : Initializers) {
4442 if (emitStructInitializer(Structure, Initializer))
4443 return true;
4444 }
4445
4446 if (Count)
4447 *Count = Initializers.size();
4448 return false;
4449}
4450
4451// Declare a field in the current struct.
4452bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4453 StructInfo &OwningStruct = StructInProgress.back();
4454 FieldInfo &Field =
4455 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4456 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4457
4458 StructInfo.Structure = Structure;
4459 Field.Type = Structure.Size;
4460
4461 if (parseStructInstList(Structure, StructInfo.Initializers))
4462 return true;
4463
4464 Field.LengthOf = StructInfo.Initializers.size();
4465 Field.SizeOf = Field.Type * Field.LengthOf;
4466
4467 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4468 if (!OwningStruct.IsUnion) {
4469 OwningStruct.NextOffset = FieldEnd;
4470 }
4471 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4472
4473 return false;
4474}
4475
4476/// parseDirectiveStructValue
4477/// ::= struct-id (<struct-initializer> | {struct-initializer})
4478/// [, (<struct-initializer> | {struct-initializer})]*
4479bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4480 StringRef Directive, SMLoc DirLoc) {
4481 if (StructInProgress.empty()) {
4482 if (emitStructValues(Structure))
4483 return true;
4484 } else if (addStructField("", Structure)) {
4485 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4486 }
4487
4488 return false;
4489}
4490
4491/// parseDirectiveNamedValue
4492/// ::= name (byte | word | ... ) [ expression (, expression)* ]
4493bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4495 SMLoc DirLoc, StringRef Name) {
4496 if (StructInProgress.empty()) {
4497 // Initialize named data value.
4498 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4499 getStreamer().emitLabel(Sym);
4500 unsigned Count;
4501 if (emitStructValues(Structure, &Count))
4502 return true;
4504 Type.Name = Structure.Name;
4505 Type.Size = Structure.Size * Count;
4506 Type.ElementSize = Structure.Size;
4507 Type.Length = Count;
4508 KnownType[Name.lower()] = Type;
4509 } else if (addStructField(Name, Structure)) {
4510 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4511 }
4512
4513 return false;
4514}
4515
4516/// parseDirectiveStruct
4517/// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4518/// (dataDir | generalDir | offsetDir | nestedStruct)+
4519/// <name> ENDS
4520////// dataDir = data declaration
4521////// offsetDir = EVEN, ORG, ALIGN
4522bool MasmParser::parseDirectiveStruct(StringRef Directive,
4523 DirectiveKind DirKind, StringRef Name,
4524 SMLoc NameLoc) {
4525 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4526 // anyway, so all field accesses must be qualified.
4527 AsmToken NextTok = getTok();
4528 int64_t AlignmentValue = 1;
4529 if (NextTok.isNot(AsmToken::Comma) &&
4531 parseAbsoluteExpression(AlignmentValue)) {
4532 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4533 "' directive");
4534 }
4535 if (!isPowerOf2_64(AlignmentValue)) {
4536 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4537 std::to_string(AlignmentValue));
4538 }
4539
4541 SMLoc QualifierLoc;
4542 if (parseOptionalToken(AsmToken::Comma)) {
4543 QualifierLoc = getTok().getLoc();
4544 if (parseIdentifier(Qualifier))
4545 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4546 if (!Qualifier.equals_insensitive("nonunique"))
4547 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4548 Twine(Directive) +
4549 "' directive; expected none or NONUNIQUE");
4550 }
4551
4552 if (parseToken(AsmToken::EndOfStatement))
4553 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4554
4555 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4556 return false;
4557}
4558
4559/// parseDirectiveNestedStruct
4560/// ::= (STRUC | STRUCT | UNION) [name]
4561/// (dataDir | generalDir | offsetDir | nestedStruct)+
4562/// ENDS
4563bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4564 DirectiveKind DirKind) {
4565 if (StructInProgress.empty())
4566 return TokError("missing name in top-level '" + Twine(Directive) +
4567 "' directive");
4568
4570 if (getTok().is(AsmToken::Identifier)) {
4571 Name = getTok().getIdentifier();
4572 parseToken(AsmToken::Identifier);
4573 }
4574 if (parseToken(AsmToken::EndOfStatement))
4575 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4576
4577 // Reserve space to ensure Alignment doesn't get invalidated when
4578 // StructInProgress grows.
4579 StructInProgress.reserve(StructInProgress.size() + 1);
4580 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4581 StructInProgress.back().Alignment);
4582 return false;
4583}
4584
4585bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4586 if (StructInProgress.empty())
4587 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4588 if (StructInProgress.size() > 1)
4589 return Error(NameLoc, "unexpected name in nested ENDS directive");
4590 if (StructInProgress.back().Name.compare_insensitive(Name))
4591 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4592 StructInProgress.back().Name + "'");
4593 StructInfo Structure = StructInProgress.pop_back_val();
4594 // Pad to make the structure's size divisible by the smaller of its alignment
4595 // and the size of its largest field.
4596 Structure.Size = llvm::alignTo(
4597 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4598 Structs[Name.lower()] = Structure;
4599
4600 if (parseToken(AsmToken::EndOfStatement))
4601 return addErrorSuffix(" in ENDS directive");
4602
4603 return false;
4604}
4605
4606bool MasmParser::parseDirectiveNestedEnds() {
4607 if (StructInProgress.empty())
4608 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4609 if (StructInProgress.size() == 1)
4610 return TokError("missing name in top-level ENDS directive");
4611
4612 if (parseToken(AsmToken::EndOfStatement))
4613 return addErrorSuffix(" in nested ENDS directive");
4614
4615 StructInfo Structure = StructInProgress.pop_back_val();
4616 // Pad to make the structure's size divisible by its alignment.
4617 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4618
4619 StructInfo &ParentStruct = StructInProgress.back();
4620 if (Structure.Name.empty()) {
4621 // Anonymous substructures' fields are addressed as if they belong to the
4622 // parent structure - so we transfer them to the parent here.
4623 const size_t OldFields = ParentStruct.Fields.size();
4624 ParentStruct.Fields.insert(
4625 ParentStruct.Fields.end(),
4626 std::make_move_iterator(Structure.Fields.begin()),
4627 std::make_move_iterator(Structure.Fields.end()));
4628 for (const auto &FieldByName : Structure.FieldsByName) {
4629 ParentStruct.FieldsByName[FieldByName.getKey()] =
4630 FieldByName.getValue() + OldFields;
4631 }
4632
4633 unsigned FirstFieldOffset = 0;
4634 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4635 FirstFieldOffset = llvm::alignTo(
4636 ParentStruct.NextOffset,
4637 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4638 }
4639
4640 if (ParentStruct.IsUnion) {
4641 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4642 } else {
4643 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4644 Field.Offset += FirstFieldOffset;
4645
4646 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4647 if (!ParentStruct.IsUnion) {
4648 ParentStruct.NextOffset = StructureEnd;
4649 }
4650 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4651 }
4652 } else {
4653 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4654 Structure.AlignmentSize);
4655 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4656 Field.Type = Structure.Size;
4657 Field.LengthOf = 1;
4658 Field.SizeOf = Structure.Size;
4659
4660 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4661 if (!ParentStruct.IsUnion) {
4662 ParentStruct.NextOffset = StructureEnd;
4663 }
4664 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4665
4666 StructInfo.Structure = Structure;
4667 StructInfo.Initializers.emplace_back();
4668 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4669 for (const auto &SubField : Structure.Fields) {
4670 FieldInitializers.push_back(SubField.Contents);
4671 }
4672 }
4673
4674 return false;
4675}
4676
4677/// parseDirectiveOrg
4678/// ::= org expression
4679bool MasmParser::parseDirectiveOrg() {
4680 const MCExpr *Offset;
4681 SMLoc OffsetLoc = Lexer.getLoc();
4682 if (checkForValidSection() || parseExpression(Offset))
4683 return true;
4684 if (parseToken(AsmToken::EndOfStatement))
4685 return addErrorSuffix(" in 'org' directive");
4686
4687 if (StructInProgress.empty()) {
4688 // Not in a struct; change the offset for the next instruction or data
4689 if (checkForValidSection())
4690 return addErrorSuffix(" in 'org' directive");
4691
4692 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4693 } else {
4694 // Offset the next field of this struct
4695 StructInfo &Structure = StructInProgress.back();
4696 int64_t OffsetRes;
4697 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4698 return Error(OffsetLoc,
4699 "expected absolute expression in 'org' directive");
4700 if (OffsetRes < 0)
4701 return Error(
4702 OffsetLoc,
4703 "expected non-negative value in struct's 'org' directive; was " +
4704 std::to_string(OffsetRes));
4705 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4706
4707 // ORG-affected structures cannot be initialized
4708 Structure.Initializable = false;
4709 }
4710
4711 return false;
4712}
4713
4714bool MasmParser::emitAlignTo(int64_t Alignment) {
4715 if (StructInProgress.empty()) {
4716 // Not in a struct; align the next instruction or data
4717 if (checkForValidSection())
4718 return true;
4719
4720 // Check whether we should use optimal code alignment for this align
4721 // directive.
4722 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4723 assert(Section && "must have section to emit alignment");
4724 if (Section->useCodeAlign()) {
4725 getStreamer().emitCodeAlignment(Align(Alignment),
4726 &getTargetParser().getSTI(),
4727 /*MaxBytesToEmit=*/0);
4728 } else {
4729 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4730 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4731 /*ValueSize=*/1,
4732 /*MaxBytesToEmit=*/0);
4733 }
4734 } else {
4735 // Align the next field of this struct
4736 StructInfo &Structure = StructInProgress.back();
4737 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4738 }
4739
4740 return false;
4741}
4742
4743/// parseDirectiveAlign
4744/// ::= align expression
4745bool MasmParser::parseDirectiveAlign() {
4746 SMLoc AlignmentLoc = getLexer().getLoc();
4747 int64_t Alignment;
4748
4749 // Ignore empty 'align' directives.
4750 if (getTok().is(AsmToken::EndOfStatement)) {
4751 return Warning(AlignmentLoc,
4752 "align directive with no operand is ignored") &&
4753 parseToken(AsmToken::EndOfStatement);
4754 }
4755 if (parseAbsoluteExpression(Alignment) ||
4756 parseToken(AsmToken::EndOfStatement))
4757 return addErrorSuffix(" in align directive");
4758
4759 // Always emit an alignment here even if we throw an error.
4760 bool ReturnVal = false;
4761
4762 // Reject alignments that aren't either a power of two or zero, for ML.exe
4763 // compatibility. Alignment of zero is silently rounded up to one.
4764 if (Alignment == 0)
4765 Alignment = 1;
4766 if (!isPowerOf2_64(Alignment))
4767 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4768 std::to_string(Alignment));
4769
4770 if (emitAlignTo(Alignment))
4771 ReturnVal |= addErrorSuffix(" in align directive");
4772
4773 return ReturnVal;
4774}
4775
4776/// parseDirectiveEven
4777/// ::= even
4778bool MasmParser::parseDirectiveEven() {
4779 if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
4780 return addErrorSuffix(" in even directive");
4781
4782 return false;
4783}
4784
4785/// parseDirectiveFile
4786/// ::= .file filename
4787/// ::= .file number [directory] filename [md5 checksum] [source source-text]
4788bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4789 // FIXME: I'm not sure what this is.
4790 int64_t FileNumber = -1;
4791 if (getLexer().is(AsmToken::Integer)) {
4792 FileNumber = getTok().getIntVal();
4793 Lex();
4794
4795 if (FileNumber < 0)
4796 return TokError("negative file number");
4797 }
4798
4799 std::string Path;
4800
4801 // Usually the directory and filename together, otherwise just the directory.
4802 // Allow the strings to have escaped octal character sequence.
4803 if (check(getTok().isNot(AsmToken::String),
4804 "unexpected token in '.file' directive") ||
4805 parseEscapedString(Path))
4806 return true;
4807
4808 StringRef Directory;
4810 std::string FilenameData;
4811 if (getLexer().is(AsmToken::String)) {
4812 if (check(FileNumber == -1,
4813 "explicit path specified, but no file number") ||
4814 parseEscapedString(FilenameData))
4815 return true;
4816 Filename = FilenameData;
4817 Directory = Path;
4818 } else {
4819 Filename = Path;
4820 }
4821
4822 uint64_t MD5Hi, MD5Lo;
4823 bool HasMD5 = false;
4824
4825 std::optional<StringRef> Source;
4826 bool HasSource = false;
4827 std::string SourceString;
4828
4829 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4831 if (check(getTok().isNot(AsmToken::Identifier),
4832 "unexpected token in '.file' directive") ||
4833 parseIdentifier(Keyword))
4834 return true;
4835 if (Keyword == "md5") {
4836 HasMD5 = true;
4837 if (check(FileNumber == -1,
4838 "MD5 checksum specified, but no file number") ||
4839 parseHexOcta(*this, MD5Hi, MD5Lo))
4840 return true;
4841 } else if (Keyword == "source") {
4842 HasSource = true;
4843 if (check(FileNumber == -1,
4844 "source specified, but no file number") ||
4845 check(getTok().isNot(AsmToken::String),
4846 "unexpected token in '.file' directive") ||
4847 parseEscapedString(SourceString))
4848 return true;
4849 } else {
4850 return TokError("unexpected token in '.file' directive");
4851 }
4852 }
4853
4854 if (FileNumber == -1) {
4855 // Ignore the directive if there is no number and the target doesn't support
4856 // numberless .file directives. This allows some portability of assembler
4857 // between different object file formats.
4858 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4859 getStreamer().emitFileDirective(Filename);
4860 } else {
4861 // In case there is a -g option as well as debug info from directive .file,
4862 // we turn off the -g option, directly use the existing debug info instead.
4863 // Throw away any implicit file table for the assembler source.
4864 if (Ctx.getGenDwarfForAssembly()) {
4866 Ctx.setGenDwarfForAssembly(false);
4867 }
4868
4869 std::optional<MD5::MD5Result> CKMem;
4870 if (HasMD5) {
4871 MD5::MD5Result Sum;
4872 for (unsigned i = 0; i != 8; ++i) {
4873 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4874 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4875 }
4876 CKMem = Sum;
4877 }
4878 if (HasSource) {
4879 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4880 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4881 Source = StringRef(SourceBuf, SourceString.size());
4882 }
4883 if (FileNumber == 0) {
4884 if (Ctx.getDwarfVersion() < 5)
4885 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4886 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4887 } else {
4888 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4889 FileNumber, Directory, Filename, CKMem, Source);
4890 if (!FileNumOrErr)
4891 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4892 }
4893 // Alert the user if there are some .file directives with MD5 and some not.
4894 // But only do that once.
4895 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4896 ReportedInconsistentMD5 = true;
4897 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4898 }
4899 }
4900
4901 return false;
4902}
4903
4904/// parseDirectiveLine
4905/// ::= .line [number]
4906bool MasmParser::parseDirectiveLine() {
4907 int64_t LineNumber;
4908 if (getLexer().is(AsmToken::Integer)) {
4909 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4910 return true;
4911 (void)LineNumber;
4912 // FIXME: Do something with the .line.
4913 }
4914 if (parseEOL())
4915 return true;
4916
4917 return false;
4918}
4919
4920/// parseDirectiveLoc
4921/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4922/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4923/// The first number is a file number, must have been previously assigned with
4924/// a .file directive, the second number is the line number and optionally the
4925/// third number is a column position (zero if not specified). The remaining
4926/// optional items are .loc sub-directives.
4927bool MasmParser::parseDirectiveLoc() {
4928 int64_t FileNumber = 0, LineNumber = 0;
4929 SMLoc Loc = getTok().getLoc();
4930 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4931 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4932 "file number less than one in '.loc' directive") ||
4933 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4934 "unassigned file number in '.loc' directive"))
4935 return true;
4936
4937 // optional
4938 if (getLexer().is(AsmToken::Integer)) {
4939 LineNumber = getTok().getIntVal();
4940 if (LineNumber < 0)
4941 return TokError("line number less than zero in '.loc' directive");
4942 Lex();
4943 }
4944
4945 int64_t ColumnPos = 0;
4946 if (getLexer().is(AsmToken::Integer)) {
4947 ColumnPos = getTok().getIntVal();
4948 if (ColumnPos < 0)
4949 return TokError("column position less than zero in '.loc' directive");
4950 Lex();
4951 }
4952
4953 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4954 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4955 unsigned Isa = 0;
4956 int64_t Discriminator = 0;
4957
4958 auto parseLocOp = [&]() -> bool {
4960 SMLoc Loc = getTok().getLoc();
4961 if (parseIdentifier(Name))
4962 return TokError("unexpected token in '.loc' directive");
4963
4964 if (Name == "basic_block")
4966 else if (Name == "prologue_end")
4968 else if (Name == "epilogue_begin")
4970 else if (Name == "is_stmt") {
4971 Loc = getTok().getLoc();
4972 const MCExpr *Value;
4973 if (parseExpression(Value))
4974 return true;
4975 // The expression must be the constant 0 or 1.
4976 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4977 int Value = MCE->getValue();
4978 if (Value == 0)
4979 Flags &= ~DWARF2_FLAG_IS_STMT;
4980 else if (Value == 1)
4982 else
4983 return Error(Loc, "is_stmt value not 0 or 1");
4984 } else {
4985 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4986 }
4987 } else if (Name == "isa") {
4988 Loc = getTok().getLoc();
4989 const MCExpr *Value;
4990 if (parseExpression(Value))
4991 return true;
4992 // The expression must be a constant greater or equal to 0.
4993 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4994 int Value = MCE->getValue();
4995 if (Value < 0)
4996 return Error(Loc, "isa number less than zero");
4997 Isa = Value;
4998 } else {
4999 return Error(Loc, "isa number not a constant value");
5000 }
5001 } else if (Name == "discriminator") {
5002 if (parseAbsoluteExpression(Discriminator))
5003 return true;
5004 } else {
5005 return Error(Loc, "unknown sub-directive in '.loc' directive");
5006 }
5007 return false;
5008 };
5009
5010 if (parseMany(parseLocOp, false /*hasComma*/))
5011 return true;
5012
5013 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
5014 Isa, Discriminator, StringRef());
5015
5016 return false;
5017}
5018
5019/// parseDirectiveStabs
5020/// ::= .stabs string, number, number, number
5021bool MasmParser::parseDirectiveStabs() {
5022 return TokError("unsupported directive '.stabs'");
5023}
5024
5025/// parseDirectiveCVFile
5026/// ::= .cv_file number filename [checksum] [checksumkind]
5027bool MasmParser::parseDirectiveCVFile() {
5028 SMLoc FileNumberLoc = getTok().getLoc();
5029 int64_t FileNumber;
5030 std::string Filename;
5031 std::string Checksum;
5032 int64_t ChecksumKind = 0;
5033
5034 if (parseIntToken(FileNumber,
5035 "expected file number in '.cv_file' directive") ||
5036 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5037 check(getTok().isNot(AsmToken::String),
5038 "unexpected token in '.cv_file' directive") ||
5039 parseEscapedString(Filename))
5040 return true;
5041 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5042 if (check(getTok().isNot(AsmToken::String),
5043 "unexpected token in '.cv_file' directive") ||
5044 parseEscapedString(Checksum) ||
5045 parseIntToken(ChecksumKind,
5046 "expected checksum kind in '.cv_file' directive") ||
5047 parseEOL())
5048 return true;
5049 }
5050
5051 Checksum = fromHex(Checksum);
5052 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5053 memcpy(CKMem, Checksum.data(), Checksum.size());
5054 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5055 Checksum.size());
5056
5057 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5058 static_cast<uint8_t>(ChecksumKind)))
5059 return Error(FileNumberLoc, "file number already allocated");
5060
5061 return false;
5062}
5063
5064bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5065 StringRef DirectiveName) {
5066 SMLoc Loc;
5067 return parseTokenLoc(Loc) ||
5068 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5069 "' directive") ||
5070 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5071 "expected function id within range [0, UINT_MAX)");
5072}
5073
5074bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5075 SMLoc Loc;
5076 return parseTokenLoc(Loc) ||
5077 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5078 "' directive") ||
5079 check(FileNumber < 1, Loc, "file number less than one in '" +
5080 DirectiveName + "' directive") ||
5081 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5082 "unassigned file number in '" + DirectiveName + "' directive");
5083}
5084
5085/// parseDirectiveCVFuncId
5086/// ::= .cv_func_id FunctionId
5087///
5088/// Introduces a function ID that can be used with .cv_loc.
5089bool MasmParser::parseDirectiveCVFuncId() {
5090 SMLoc FunctionIdLoc = getTok().getLoc();
5091 int64_t FunctionId;
5092
5093 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5094 return true;
5095
5096 if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5097 return Error(FunctionIdLoc, "function id already allocated");
5098
5099 return false;
5100}
5101
5102/// parseDirectiveCVInlineSiteId
5103/// ::= .cv_inline_site_id FunctionId
5104/// "within" IAFunc
5105/// "inlined_at" IAFile IALine [IACol]
5106///
5107/// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5108/// at" source location information for use in the line table of the caller,
5109/// whether the caller is a real function or another inlined call site.
5110bool MasmParser::parseDirectiveCVInlineSiteId() {
5111 SMLoc FunctionIdLoc = getTok().getLoc();
5112 int64_t FunctionId;
5113 int64_t IAFunc;
5114 int64_t IAFile;
5115 int64_t IALine;
5116 int64_t IACol = 0;
5117
5118 // FunctionId
5119 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5120 return true;
5121
5122 // "within"
5123 if (check((getLexer().isNot(AsmToken::Identifier) ||
5124 getTok().getIdentifier() != "within"),
5125 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5126 return true;
5127 Lex();
5128
5129 // IAFunc
5130 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5131 return true;
5132
5133 // "inlined_at"
5134 if (check((getLexer().isNot(AsmToken::Identifier) ||
5135 getTok().getIdentifier() != "inlined_at"),
5136 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5137 "directive") )
5138 return true;
5139 Lex();
5140
5141 // IAFile IALine
5142 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5143 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5144 return true;
5145
5146 // [IACol]
5147 if (getLexer().is(AsmToken::Integer)) {
5148 IACol = getTok().getIntVal();
5149 Lex();
5150 }
5151
5152 if (parseEOL())
5153 return true;
5154
5155 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5156 IALine, IACol, FunctionIdLoc))
5157 return Error(FunctionIdLoc, "function id already allocated");
5158
5159 return false;
5160}
5161
5162/// parseDirectiveCVLoc
5163/// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5164/// [is_stmt VALUE]
5165/// The first number is a file number, must have been previously assigned with
5166/// a .file directive, the second number is the line number and optionally the
5167/// third number is a column position (zero if not specified). The remaining
5168/// optional items are .loc sub-directives.
5169bool MasmParser::parseDirectiveCVLoc() {
5170 SMLoc DirectiveLoc = getTok().getLoc();
5171 int64_t FunctionId, FileNumber;
5172 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5173 parseCVFileId(FileNumber, ".cv_loc"))
5174 return true;
5175
5176 int64_t LineNumber = 0;
5177 if (getLexer().is(AsmToken::Integer)) {
5178 LineNumber = getTok().getIntVal();
5179 if (LineNumber < 0)
5180 return TokError("line number less than zero in '.cv_loc' directive");
5181 Lex();
5182 }
5183
5184 int64_t ColumnPos = 0;
5185 if (getLexer().is(AsmToken::Integer)) {
5186 ColumnPos = getTok().getIntVal();
5187 if (ColumnPos < 0)
5188 return TokError("column position less than zero in '.cv_loc' directive");
5189 Lex();
5190 }
5191
5192 bool PrologueEnd = false;
5193 uint64_t IsStmt = 0;
5194
5195 auto parseOp = [&]() -> bool {
5197 SMLoc Loc = getTok().getLoc();
5198 if (parseIdentifier(Name))
5199 return TokError("unexpected token in '.cv_loc' directive");
5200 if (Name == "prologue_end")
5201 PrologueEnd = true;
5202 else if (Name == "is_stmt") {
5203 Loc = getTok().getLoc();
5204 const MCExpr *Value;
5205 if (parseExpression(Value))
5206 return true;
5207 // The expression must be the constant 0 or 1.
5208 IsStmt = ~0ULL;
5209 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5210 IsStmt = MCE->getValue();
5211
5212 if (IsStmt > 1)
5213 return Error(Loc, "is_stmt value not 0 or 1");
5214 } else {
5215 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5216 }
5217 return false;
5218 };
5219
5220 if (parseMany(parseOp, false /*hasComma*/))
5221 return true;
5222
5223 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5224 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5225 DirectiveLoc);
5226 return false;
5227}
5228
5229/// parseDirectiveCVLinetable
5230/// ::= .cv_linetable FunctionId, FnStart, FnEnd
5231bool MasmParser::parseDirectiveCVLinetable() {
5232 int64_t FunctionId;
5233 StringRef FnStartName, FnEndName;
5234 SMLoc Loc = getTok().getLoc();
5235 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5236 parseToken(AsmToken::Comma,
5237 "unexpected token in '.cv_linetable' directive") ||
5238 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5239 "expected identifier in directive") ||
5240 parseToken(AsmToken::Comma,
5241 "unexpected token in '.cv_linetable' directive") ||
5242 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5243 "expected identifier in directive"))
5244 return true;
5245
5246 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5247 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5248
5249 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5250 return false;
5251}
5252
5253/// parseDirectiveCVInlineLinetable
5254/// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5255bool MasmParser::parseDirectiveCVInlineLinetable() {
5256 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5257 StringRef FnStartName, FnEndName;
5258 SMLoc Loc = getTok().getLoc();
5259 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5260 parseTokenLoc(Loc) ||
5261 parseIntToken(
5262 SourceFileId,
5263 "expected SourceField in '.cv_inline_linetable' directive") ||
5264 check(SourceFileId <= 0, Loc,
5265 "File id less than zero in '.cv_inline_linetable' directive") ||
5266 parseTokenLoc(Loc) ||
5267 parseIntToken(
5268 SourceLineNum,
5269 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5270 check(SourceLineNum < 0, Loc,
5271 "Line number less than zero in '.cv_inline_linetable' directive") ||
5272 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5273 "expected identifier in directive") ||
5274 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5275 "expected identifier in directive"))
5276 return true;
5277
5278 if (parseEOL())
5279 return true;
5280
5281 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5282 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5283 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5284 SourceLineNum, FnStartSym,
5285 FnEndSym);
5286 return false;
5287}
5288
5289void MasmParser::initializeCVDefRangeTypeMap() {
5290 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5291 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5292 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5293 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5294}
5295
5296/// parseDirectiveCVDefRange
5297/// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5298bool MasmParser::parseDirectiveCVDefRange() {
5299 SMLoc Loc;
5300 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5301 while (getLexer().is(AsmToken::Identifier)) {
5302 Loc = getLexer().getLoc();
5303 StringRef GapStartName;
5304 if (parseIdentifier(GapStartName))
5305 return Error(Loc, "expected identifier in directive");
5306 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5307
5308 Loc = getLexer().getLoc();
5309 StringRef GapEndName;
5310 if (parseIdentifier(GapEndName))
5311 return Error(Loc, "expected identifier in directive");
5312 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5313
5314 Ranges.push_back({GapStartSym, GapEndSym});
5315 }
5316
5317 StringRef CVDefRangeTypeStr;
5318 if (parseToken(
5320 "expected comma before def_range type in .cv_def_range directive") ||
5321 parseIdentifier(CVDefRangeTypeStr))
5322 return Error(Loc, "expected def_range type in directive");
5323
5325 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5326 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5327 ? CVDR_DEFRANGE
5328 : CVTypeIt->getValue();
5329 switch (CVDRType) {
5330 case CVDR_DEFRANGE_REGISTER: {
5331 int64_t DRRegister;
5332 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5333 ".cv_def_range directive") ||
5334 parseAbsoluteExpression(DRRegister))
5335 return Error(Loc, "expected register number");
5336
5338 DRHdr.Register = DRRegister;
5339 DRHdr.MayHaveNoName = 0;
5340 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5341 break;
5342 }
5343 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5344 int64_t DROffset;
5345 if (parseToken(AsmToken::Comma,
5346 "expected comma before offset in .cv_def_range directive") ||
5347 parseAbsoluteExpression(DROffset))
5348 return Error(Loc, "expected offset value");
5349
5351 DRHdr.Offset = DROffset;
5352 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5353 break;
5354 }
5355 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5356 int64_t DRRegister;
5357 int64_t DROffsetInParent;
5358 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5359 ".cv_def_range directive") ||
5360 parseAbsoluteExpression(DRRegister))
5361 return Error(Loc, "expected register number");
5362 if (parseToken(AsmToken::Comma,
5363 "expected comma before offset in .cv_def_range directive") ||
5364 parseAbsoluteExpression(DROffsetInParent))
5365 return Error(Loc, "expected offset value");
5366
5368 DRHdr.Register = DRRegister;
5369 DRHdr.MayHaveNoName = 0;
5370 DRHdr.OffsetInParent = DROffsetInParent;
5371 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5372 break;
5373 }
5374 case CVDR_DEFRANGE_REGISTER_REL: {
5375 int64_t DRRegister;
5376 int64_t DRFlags;
5377 int64_t DRBasePointerOffset;
5378 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5379 ".cv_def_range directive") ||
5380 parseAbsoluteExpression(DRRegister))
5381 return Error(Loc, "expected register value");
5382 if (parseToken(
5384 "expected comma before flag value in .cv_def_range directive") ||
5385 parseAbsoluteExpression(DRFlags))
5386 return Error(Loc, "expected flag value");
5387 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5388 "in .cv_def_range directive") ||
5389 parseAbsoluteExpression(DRBasePointerOffset))
5390 return Error(Loc, "expected base pointer offset value");
5391
5393 DRHdr.Register = DRRegister;
5394 DRHdr.Flags = DRFlags;
5395 DRHdr.BasePointerOffset = DRBasePointerOffset;
5396 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5397 break;
5398 }
5399 default:
5400 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5401 }
5402 return true;
5403}
5404
5405/// parseDirectiveCVString
5406/// ::= .cv_stringtable "string"
5407bool MasmParser::parseDirectiveCVString() {
5408 std::string Data;
5409 if (checkForValidSection() || parseEscapedString(Data))
5410 return addErrorSuffix(" in '.cv_string' directive");
5411
5412 // Put the string in the table and emit the offset.
5413 std::pair<StringRef, unsigned> Insertion =
5414 getCVContext().addToStringTable(Data);
5415 getStreamer().emitIntValue(Insertion.second, 4);
5416 return false;
5417}
5418
5419/// parseDirectiveCVStringTable
5420/// ::= .cv_stringtable
5421bool MasmParser::parseDirectiveCVStringTable() {
5422 getStreamer().emitCVStringTableDirective();
5423 return false;
5424}
5425
5426/// parseDirectiveCVFileChecksums
5427/// ::= .cv_filechecksums
5428bool MasmParser::parseDirectiveCVFileChecksums() {
5429 getStreamer().emitCVFileChecksumsDirective();
5430 return false;
5431}
5432
5433/// parseDirectiveCVFileChecksumOffset
5434/// ::= .cv_filechecksumoffset fileno
5435bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5436 int64_t FileNo;
5437 if (parseIntToken(FileNo, "expected identifier in directive"))
5438 return true;
5439 if (parseEOL())
5440 return true;
5441 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5442 return false;
5443}
5444
5445/// parseDirectiveCVFPOData
5446/// ::= .cv_fpo_data procsym
5447bool MasmParser::parseDirectiveCVFPOData() {
5448 SMLoc DirLoc = getLexer().getLoc();
5449 StringRef ProcName;
5450 if (parseIdentifier(ProcName))
5451 return TokError("expected symbol name");
5452 if (parseEOL("unexpected tokens"))
5453 return addErrorSuffix(" in '.cv_fpo_data' directive");
5454 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5455 getStreamer().emitCVFPOData(ProcSym, DirLoc);
5456 return false;
5457}
5458
5459/// parseDirectiveCFISections
5460/// ::= .cfi_sections section [, section]
5461