LLVM  15.0.0git
MasmParser.cpp
Go to the documentation of this file.
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCCodeView.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDirectives.h"
33 #include "llvm/MC/MCDwarf.h"
34 #include "llvm/MC/MCExpr.h"
35 #include "llvm/MC/MCInstPrinter.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCInstrInfo.h"
45 #include "llvm/MC/MCRegisterInfo.h"
46 #include "llvm/MC/MCSection.h"
47 #include "llvm/MC/MCStreamer.h"
49 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Format.h"
55 #include "llvm/Support/MD5.h"
58 #include "llvm/Support/Path.h"
59 #include "llvm/Support/SMLoc.h"
60 #include "llvm/Support/SourceMgr.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <climits>
65 #include <cstddef>
66 #include <cstdint>
67 #include <ctime>
68 #include <deque>
69 #include <memory>
70 #include <sstream>
71 #include <string>
72 #include <tuple>
73 #include <utility>
74 #include <vector>
75 
76 using namespace llvm;
77 
79 
80 namespace {
81 
82 /// Helper types for tracking macro definitions.
83 typedef std::vector<AsmToken> MCAsmMacroArgument;
84 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
85 
86 /// Helper class for storing information about an active macro instantiation.
87 struct MacroInstantiation {
88  /// The location of the instantiation.
89  SMLoc InstantiationLoc;
90 
91  /// The buffer where parsing should resume upon instantiation completion.
92  unsigned ExitBuffer;
93 
94  /// The location where parsing should resume upon instantiation completion.
95  SMLoc ExitLoc;
96 
97  /// The depth of TheCondStack at the start of the instantiation.
98  size_t CondStackDepth;
99 };
100 
101 struct ParseStatementInfo {
102  /// The parsed operands from the last parsed statement.
104 
105  /// The opcode from the last parsed instruction.
106  unsigned Opcode = ~0U;
107 
108  /// Was there an error parsing the inline assembly?
109  bool ParseError = false;
110 
111  /// The value associated with a macro exit.
112  Optional<std::string> ExitValue;
113 
114  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
115 
116  ParseStatementInfo() = delete;
117  ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
118  : AsmRewrites(rewrites) {}
119 };
120 
121 enum FieldType {
122  FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
123  FT_REAL, // Initializer: real number, stored as an APInt.
124  FT_STRUCT // Initializer: struct initializer, stored recursively.
125 };
126 
127 struct FieldInfo;
128 struct StructInfo {
129  StringRef Name;
130  bool IsUnion = false;
131  bool Initializable = true;
132  unsigned Alignment = 0;
133  unsigned AlignmentSize = 0;
134  unsigned NextOffset = 0;
135  unsigned Size = 0;
136  std::vector<FieldInfo> Fields;
137  StringMap<size_t> FieldsByName;
138 
139  FieldInfo &addField(StringRef FieldName, FieldType FT,
140  unsigned FieldAlignmentSize);
141 
142  StructInfo() = default;
143 
144  StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue)
145  : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
146 };
147 
148 // FIXME: This should probably use a class hierarchy, raw pointers between the
149 // objects, and dynamic type resolution instead of a union. On the other hand,
150 // ownership then becomes much more complicated; the obvious thing would be to
151 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
152 
153 struct StructInitializer;
154 struct IntFieldInfo {
156 
157  IntFieldInfo() = default;
158  IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
159  IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
160 };
161 struct RealFieldInfo {
162  SmallVector<APInt, 1> AsIntValues;
163 
164  RealFieldInfo() = default;
165  RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
166  RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
167 };
168 struct StructFieldInfo {
169  std::vector<StructInitializer> Initializers;
170  StructInfo Structure;
171 
172  StructFieldInfo() = default;
173  StructFieldInfo(const std::vector<StructInitializer> &V, StructInfo S) {
174  Initializers = V;
175  Structure = S;
176  }
177  StructFieldInfo(std::vector<StructInitializer> &&V, StructInfo S) {
178  Initializers = V;
179  Structure = S;
180  }
181 };
182 
183 class FieldInitializer {
184 public:
185  FieldType FT;
186  union {
187  IntFieldInfo IntInfo;
188  RealFieldInfo RealInfo;
189  StructFieldInfo StructInfo;
190  };
191 
192  ~FieldInitializer() {
193  switch (FT) {
194  case FT_INTEGRAL:
195  IntInfo.~IntFieldInfo();
196  break;
197  case FT_REAL:
198  RealInfo.~RealFieldInfo();
199  break;
200  case FT_STRUCT:
201  StructInfo.~StructFieldInfo();
202  break;
203  }
204  }
205 
206  FieldInitializer(FieldType FT) : FT(FT) {
207  switch (FT) {
208  case FT_INTEGRAL:
209  new (&IntInfo) IntFieldInfo();
210  break;
211  case FT_REAL:
212  new (&RealInfo) RealFieldInfo();
213  break;
214  case FT_STRUCT:
215  new (&StructInfo) StructFieldInfo();
216  break;
217  }
218  }
219 
220  FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) : FT(FT_INTEGRAL) {
221  new (&IntInfo) IntFieldInfo(Values);
222  }
223 
224  FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) : FT(FT_REAL) {
225  new (&RealInfo) RealFieldInfo(AsIntValues);
226  }
227 
228  FieldInitializer(std::vector<StructInitializer> &&Initializers,
229  struct StructInfo Structure)
230  : FT(FT_STRUCT) {
231  new (&StructInfo) StructFieldInfo(Initializers, Structure);
232  }
233 
234  FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) {
235  switch (FT) {
236  case FT_INTEGRAL:
237  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
238  break;
239  case FT_REAL:
240  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
241  break;
242  case FT_STRUCT:
243  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
244  break;
245  }
246  }
247 
248  FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) {
249  switch (FT) {
250  case FT_INTEGRAL:
251  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
252  break;
253  case FT_REAL:
254  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
255  break;
256  case FT_STRUCT:
257  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
258  break;
259  }
260  }
261 
262  FieldInitializer &operator=(const FieldInitializer &Initializer) {
263  if (FT != Initializer.FT) {
264  switch (FT) {
265  case FT_INTEGRAL:
266  IntInfo.~IntFieldInfo();
267  break;
268  case FT_REAL:
269  RealInfo.~RealFieldInfo();
270  break;
271  case FT_STRUCT:
272  StructInfo.~StructFieldInfo();
273  break;
274  }
275  }
276  FT = Initializer.FT;
277  switch (FT) {
278  case FT_INTEGRAL:
279  IntInfo = Initializer.IntInfo;
280  break;
281  case FT_REAL:
282  RealInfo = Initializer.RealInfo;
283  break;
284  case FT_STRUCT:
285  StructInfo = Initializer.StructInfo;
286  break;
287  }
288  return *this;
289  }
290 
291  FieldInitializer &operator=(FieldInitializer &&Initializer) {
292  if (FT != Initializer.FT) {
293  switch (FT) {
294  case FT_INTEGRAL:
295  IntInfo.~IntFieldInfo();
296  break;
297  case FT_REAL:
298  RealInfo.~RealFieldInfo();
299  break;
300  case FT_STRUCT:
301  StructInfo.~StructFieldInfo();
302  break;
303  }
304  }
305  FT = Initializer.FT;
306  switch (FT) {
307  case FT_INTEGRAL:
308  IntInfo = Initializer.IntInfo;
309  break;
310  case FT_REAL:
311  RealInfo = Initializer.RealInfo;
312  break;
313  case FT_STRUCT:
314  StructInfo = Initializer.StructInfo;
315  break;
316  }
317  return *this;
318  }
319 };
320 
321 struct StructInitializer {
322  std::vector<FieldInitializer> FieldInitializers;
323 };
324 
325 struct FieldInfo {
326  // Offset of the field within the containing STRUCT.
327  unsigned Offset = 0;
328 
329  // Total size of the field (= LengthOf * Type).
330  unsigned SizeOf = 0;
331 
332  // Number of elements in the field (1 if scalar, >1 if an array).
333  unsigned LengthOf = 0;
334 
335  // Size of a single entry in this field, in bytes ("type" in MASM standards).
336  unsigned Type = 0;
337 
338  FieldInitializer Contents;
339 
340  FieldInfo(FieldType FT) : Contents(FT) {}
341 };
342 
343 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
344  unsigned FieldAlignmentSize) {
345  if (!FieldName.empty())
346  FieldsByName[FieldName.lower()] = Fields.size();
347  Fields.emplace_back(FT);
348  FieldInfo &Field = Fields.back();
349  Field.Offset =
350  llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
351  if (!IsUnion) {
352  NextOffset = std::max(NextOffset, Field.Offset);
353  }
354  AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
355  return Field;
356 }
357 
358 /// The concrete assembly parser instance.
359 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
360 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
361 class MasmParser : public MCAsmParser {
362 private:
363  AsmLexer Lexer;
364  MCContext &Ctx;
365  MCStreamer &Out;
366  const MCAsmInfo &MAI;
367  SourceMgr &SrcMgr;
368  SourceMgr::DiagHandlerTy SavedDiagHandler;
369  void *SavedDiagContext;
370  std::unique_ptr<MCAsmParserExtension> PlatformParser;
371 
372  /// This is the current buffer index we're lexing from as managed by the
373  /// SourceMgr object.
374  unsigned CurBuffer;
375 
376  /// time of assembly
377  struct tm TM;
378 
379  BitVector EndStatementAtEOFStack;
380 
381  AsmCond TheCondState;
382  std::vector<AsmCond> TheCondStack;
383 
384  /// maps directive names to handler methods in parser
385  /// extensions. Extensions register themselves in this map by calling
386  /// addDirectiveHandler.
387  StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
388 
389  /// maps assembly-time variable names to variables.
390  struct Variable {
391  enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
392 
393  StringRef Name;
394  RedefinableKind Redefinable = REDEFINABLE;
395  bool IsText = false;
396  std::string TextValue;
397  };
398  StringMap<Variable> Variables;
399 
400  /// Stack of active struct definitions.
401  SmallVector<StructInfo, 1> StructInProgress;
402 
403  /// Maps struct tags to struct definitions.
404  StringMap<StructInfo> Structs;
405 
406  /// Maps data location names to types.
407  StringMap<AsmTypeInfo> KnownType;
408 
409  /// Stack of active macro instantiations.
410  std::vector<MacroInstantiation*> ActiveMacros;
411 
412  /// List of bodies of anonymous macros.
413  std::deque<MCAsmMacro> MacroLikeBodies;
414 
415  /// Keeps track of how many .macro's have been instantiated.
416  unsigned NumOfMacroInstantiations;
417 
418  /// The values from the last parsed cpp hash file line comment if any.
419  struct CppHashInfoTy {
420  StringRef Filename;
421  int64_t LineNumber;
422  SMLoc Loc;
423  unsigned Buf;
424  CppHashInfoTy() : LineNumber(0), Buf(0) {}
425  };
426  CppHashInfoTy CppHashInfo;
427 
428  /// The filename from the first cpp hash file line comment, if any.
429  StringRef FirstCppHashFilename;
430 
431  /// List of forward directional labels for diagnosis at the end.
433 
434  /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
435  /// Defaults to 1U, meaning Intel.
436  unsigned AssemblerDialect = 1U;
437 
438  /// is Darwin compatibility enabled?
439  bool IsDarwin = false;
440 
441  /// Are we parsing ms-style inline assembly?
442  bool ParsingMSInlineAsm = false;
443 
444  /// Did we already inform the user about inconsistent MD5 usage?
445  bool ReportedInconsistentMD5 = false;
446 
447  // Current <...> expression depth.
448  unsigned AngleBracketDepth = 0U;
449 
450  // Number of locals defined.
451  uint16_t LocalCounter = 0;
452 
453 public:
454  MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
455  const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
456  MasmParser(const MasmParser &) = delete;
457  MasmParser &operator=(const MasmParser &) = delete;
458  ~MasmParser() override;
459 
460  bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
461 
462  void addDirectiveHandler(StringRef Directive,
463  ExtensionDirectiveHandler Handler) override {
464  ExtensionDirectiveMap[Directive] = Handler;
465  if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
466  DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
467  }
468  }
469 
470  void addAliasForDirective(StringRef Directive, StringRef Alias) override {
471  DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
472  }
473 
474  /// @name MCAsmParser Interface
475  /// {
476 
477  SourceMgr &getSourceManager() override { return SrcMgr; }
478  MCAsmLexer &getLexer() override { return Lexer; }
479  MCContext &getContext() override { return Ctx; }
480  MCStreamer &getStreamer() override { return Out; }
481 
482  CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
483 
484  unsigned getAssemblerDialect() override {
485  if (AssemblerDialect == ~0U)
486  return MAI.getAssemblerDialect();
487  else
488  return AssemblerDialect;
489  }
490  void setAssemblerDialect(unsigned i) override {
491  AssemblerDialect = i;
492  }
493 
494  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
495  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
496  bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
497 
498  enum ExpandKind { ExpandMacros, DoNotExpandMacros };
499  const AsmToken &Lex(ExpandKind ExpandNextToken);
500  const AsmToken &Lex() override { return Lex(ExpandMacros); }
501 
502  void setParsingMSInlineAsm(bool V) override {
503  ParsingMSInlineAsm = V;
504  // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
505  // hex integer literals.
506  Lexer.setLexMasmIntegers(V);
507  }
508  bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
509 
510  bool isParsingMasm() const override { return true; }
511 
512  bool defineMacro(StringRef Name, StringRef Value) override;
513 
514  bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
515  bool lookUpField(StringRef Base, StringRef Member,
516  AsmFieldInfo &Info) const override;
517 
518  bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
519 
520  bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
521  unsigned &NumInputs,
522  SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
523  SmallVectorImpl<std::string> &Constraints,
525  const MCInstrInfo *MII, const MCInstPrinter *IP,
526  MCAsmParserSemaCallback &SI) override;
527 
528  bool parseExpression(const MCExpr *&Res);
529  bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
530  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
531  AsmTypeInfo *TypeInfo) override;
532  bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
533  bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
534  SMLoc &EndLoc) override;
535  bool parseAbsoluteExpression(int64_t &Res) override;
536 
537  /// Parse a floating point expression using the float \p Semantics
538  /// and set \p Res to the value.
539  bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
540 
541  /// Parse an identifier or string (as a quoted identifier)
542  /// and set \p Res to the identifier contents.
543  enum IdentifierPositionKind { StandardPosition, StartOfStatement };
544  bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
545  bool parseIdentifier(StringRef &Res) override {
546  return parseIdentifier(Res, StandardPosition);
547  }
548  void eatToEndOfStatement() override;
549 
550  bool checkForValidSection() override;
551 
552  /// }
553 
554 private:
555  bool expandMacros();
556  const AsmToken peekTok(bool ShouldSkipSpace = true);
557 
558  bool parseStatement(ParseStatementInfo &Info,
560  bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
561  bool parseCppHashLineFilenameComment(SMLoc L);
562 
563  bool expandMacro(raw_svector_ostream &OS, StringRef Body,
566  const std::vector<std::string> &Locals, SMLoc L);
567 
568  /// Are we inside a macro instantiation?
569  bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
570 
571  /// Handle entry to macro instantiation.
572  ///
573  /// \param M The macro.
574  /// \param NameLoc Instantiation location.
575  bool handleMacroEntry(
576  const MCAsmMacro *M, SMLoc NameLoc,
578 
579  /// Handle invocation of macro function.
580  ///
581  /// \param M The macro.
582  /// \param NameLoc Invocation location.
583  bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
584 
585  /// Handle exit from macro instantiation.
586  void handleMacroExit();
587 
588  /// Extract AsmTokens for a macro argument.
589  bool
590  parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
592 
593  /// Parse all macro arguments for a given macro.
594  bool
595  parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
597 
598  void printMacroInstantiations();
599 
600  bool expandStatement(SMLoc Loc);
601 
602  void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
603  SMRange Range = None) const {
604  ArrayRef<SMRange> Ranges(Range);
605  SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
606  }
607  static void DiagHandler(const SMDiagnostic &Diag, void *Context);
608 
609  bool lookUpField(const StructInfo &Structure, StringRef Member,
610  AsmFieldInfo &Info) const;
611 
612  /// Should we emit DWARF describing this assembler source? (Returns false if
613  /// the source has .file directives, which means we don't want to generate
614  /// info describing the assembler source itself.)
615  bool enabledGenDwarfForAssembly();
616 
617  /// Enter the specified file. This returns true on failure.
618  bool enterIncludeFile(const std::string &Filename);
619 
620  /// Reset the current lexer position to that given by \p Loc. The
621  /// current token is not set; clients should ensure Lex() is called
622  /// subsequently.
623  ///
624  /// \param InBuffer If not 0, should be the known buffer id that contains the
625  /// location.
626  void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
627  bool EndStatementAtEOF = true);
628 
629  /// Parse up to a token of kind \p EndTok and return the contents from the
630  /// current token up to (but not including) this token; the current token on
631  /// exit will be either this kind or EOF. Reads through instantiated macro
632  /// functions and text macros.
633  SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
634  std::string parseStringTo(AsmToken::TokenKind EndTok);
635 
636  /// Parse up to the end of statement and return the contents from the current
637  /// token until the end of the statement; the current token on exit will be
638  /// either the EndOfStatement or EOF.
639  StringRef parseStringToEndOfStatement() override;
640 
641  bool parseTextItem(std::string &Data);
642 
643  unsigned getBinOpPrecedence(AsmToken::TokenKind K,
645 
646  bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
647  bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
648  bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
649 
650  bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
651 
652  bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
653  bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
654 
655  // Generic (target and platform independent) directive parsing.
656  enum DirectiveKind {
657  DK_NO_DIRECTIVE, // Placeholder
658  DK_HANDLER_DIRECTIVE,
659  DK_ASSIGN,
660  DK_EQU,
661  DK_TEXTEQU,
662  DK_ASCII,
663  DK_ASCIZ,
664  DK_STRING,
665  DK_BYTE,
666  DK_SBYTE,
667  DK_WORD,
668  DK_SWORD,
669  DK_DWORD,
670  DK_SDWORD,
671  DK_FWORD,
672  DK_QWORD,
673  DK_SQWORD,
674  DK_DB,
675  DK_DD,
676  DK_DF,
677  DK_DQ,
678  DK_DW,
679  DK_REAL4,
680  DK_REAL8,
681  DK_REAL10,
682  DK_ALIGN,
683  DK_EVEN,
684  DK_ORG,
685  DK_ENDR,
686  DK_EXTERN,
687  DK_PUBLIC,
688  DK_COMM,
689  DK_COMMENT,
690  DK_INCLUDE,
691  DK_REPEAT,
692  DK_WHILE,
693  DK_FOR,
694  DK_FORC,
695  DK_IF,
696  DK_IFE,
697  DK_IFB,
698  DK_IFNB,
699  DK_IFDEF,
700  DK_IFNDEF,
701  DK_IFDIF,
702  DK_IFDIFI,
703  DK_IFIDN,
704  DK_IFIDNI,
705  DK_ELSEIF,
706  DK_ELSEIFE,
707  DK_ELSEIFB,
708  DK_ELSEIFNB,
709  DK_ELSEIFDEF,
710  DK_ELSEIFNDEF,
711  DK_ELSEIFDIF,
712  DK_ELSEIFDIFI,
713  DK_ELSEIFIDN,
714  DK_ELSEIFIDNI,
715  DK_ELSE,
716  DK_ENDIF,
717  DK_FILE,
718  DK_LINE,
719  DK_LOC,
720  DK_STABS,
721  DK_CV_FILE,
722  DK_CV_FUNC_ID,
723  DK_CV_INLINE_SITE_ID,
724  DK_CV_LOC,
725  DK_CV_LINETABLE,
726  DK_CV_INLINE_LINETABLE,
727  DK_CV_DEF_RANGE,
728  DK_CV_STRINGTABLE,
729  DK_CV_STRING,
730  DK_CV_FILECHECKSUMS,
731  DK_CV_FILECHECKSUM_OFFSET,
732  DK_CV_FPO_DATA,
733  DK_CFI_SECTIONS,
734  DK_CFI_STARTPROC,
735  DK_CFI_ENDPROC,
736  DK_CFI_DEF_CFA,
737  DK_CFI_DEF_CFA_OFFSET,
738  DK_CFI_ADJUST_CFA_OFFSET,
739  DK_CFI_DEF_CFA_REGISTER,
740  DK_CFI_OFFSET,
741  DK_CFI_REL_OFFSET,
742  DK_CFI_PERSONALITY,
743  DK_CFI_LSDA,
744  DK_CFI_REMEMBER_STATE,
745  DK_CFI_RESTORE_STATE,
746  DK_CFI_SAME_VALUE,
747  DK_CFI_RESTORE,
748  DK_CFI_ESCAPE,
749  DK_CFI_RETURN_COLUMN,
750  DK_CFI_SIGNAL_FRAME,
751  DK_CFI_UNDEFINED,
752  DK_CFI_REGISTER,
753  DK_CFI_WINDOW_SAVE,
754  DK_CFI_B_KEY_FRAME,
755  DK_MACRO,
756  DK_EXITM,
757  DK_ENDM,
758  DK_PURGE,
759  DK_ERR,
760  DK_ERRB,
761  DK_ERRNB,
762  DK_ERRDEF,
763  DK_ERRNDEF,
764  DK_ERRDIF,
765  DK_ERRDIFI,
766  DK_ERRIDN,
767  DK_ERRIDNI,
768  DK_ERRE,
769  DK_ERRNZ,
770  DK_ECHO,
771  DK_STRUCT,
772  DK_UNION,
773  DK_ENDS,
774  DK_END,
775  DK_PUSHFRAME,
776  DK_PUSHREG,
777  DK_SAVEREG,
778  DK_SAVEXMM128,
779  DK_SETFRAME,
780  DK_RADIX,
781  };
782 
783  /// Maps directive name --> DirectiveKind enum, for directives parsed by this
784  /// class.
785  StringMap<DirectiveKind> DirectiveKindMap;
786 
787  bool isMacroLikeDirective();
788 
789  // Codeview def_range type parsing.
790  enum CVDefRangeType {
791  CVDR_DEFRANGE = 0, // Placeholder
792  CVDR_DEFRANGE_REGISTER,
793  CVDR_DEFRANGE_FRAMEPOINTER_REL,
794  CVDR_DEFRANGE_SUBFIELD_REGISTER,
795  CVDR_DEFRANGE_REGISTER_REL
796  };
797 
798  /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
799  /// def_range types parsed by this class.
800  StringMap<CVDefRangeType> CVDefRangeTypeMap;
801 
802  // Generic (target and platform independent) directive parsing.
803  enum BuiltinSymbol {
804  BI_NO_SYMBOL, // Placeholder
805  BI_DATE,
806  BI_TIME,
807  BI_VERSION,
808  BI_FILECUR,
809  BI_FILENAME,
810  BI_LINE,
811  BI_CURSEG,
812  BI_CPU,
813  BI_INTERFACE,
814  BI_CODE,
815  BI_DATA,
816  BI_FARDATA,
817  BI_WORDSIZE,
818  BI_CODESIZE,
819  BI_DATASIZE,
820  BI_MODEL,
821  BI_STACK,
822  };
823 
824  /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
825  /// class.
826  StringMap<BuiltinSymbol> BuiltinSymbolMap;
827 
828  const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
829 
830  llvm::Optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
831  SMLoc StartLoc);
832 
833  // ".ascii", ".asciz", ".string"
834  bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
835 
836  // "byte", "word", ...
837  bool emitIntValue(const MCExpr *Value, unsigned Size);
838  bool parseScalarInitializer(unsigned Size,
840  unsigned StringPadLength = 0);
841  bool parseScalarInstList(
842  unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
844  bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
845  bool addIntegralField(StringRef Name, unsigned Size);
846  bool parseDirectiveValue(StringRef IDVal, unsigned Size);
847  bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
848  StringRef Name, SMLoc NameLoc);
849 
850  // "real4", "real8", "real10"
851  bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
852  bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
853  bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
854  size_t Size);
855  bool parseRealInstList(
856  const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
858  bool parseDirectiveNamedRealValue(StringRef TypeName,
859  const fltSemantics &Semantics,
860  unsigned Size, StringRef Name,
861  SMLoc NameLoc);
862 
863  bool parseOptionalAngleBracketOpen();
864  bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
865 
866  bool parseFieldInitializer(const FieldInfo &Field,
867  FieldInitializer &Initializer);
868  bool parseFieldInitializer(const FieldInfo &Field,
869  const IntFieldInfo &Contents,
870  FieldInitializer &Initializer);
871  bool parseFieldInitializer(const FieldInfo &Field,
872  const RealFieldInfo &Contents,
873  FieldInitializer &Initializer);
874  bool parseFieldInitializer(const FieldInfo &Field,
875  const StructFieldInfo &Contents,
876  FieldInitializer &Initializer);
877 
878  bool parseStructInitializer(const StructInfo &Structure,
879  StructInitializer &Initializer);
880  bool parseStructInstList(
881  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
883 
884  bool emitFieldValue(const FieldInfo &Field);
885  bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
886  bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
887  bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
888 
889  bool emitFieldInitializer(const FieldInfo &Field,
890  const FieldInitializer &Initializer);
891  bool emitFieldInitializer(const FieldInfo &Field,
892  const IntFieldInfo &Contents,
893  const IntFieldInfo &Initializer);
894  bool emitFieldInitializer(const FieldInfo &Field,
895  const RealFieldInfo &Contents,
896  const RealFieldInfo &Initializer);
897  bool emitFieldInitializer(const FieldInfo &Field,
898  const StructFieldInfo &Contents,
899  const StructFieldInfo &Initializer);
900 
901  bool emitStructInitializer(const StructInfo &Structure,
902  const StructInitializer &Initializer);
903 
904  // User-defined types (structs, unions):
905  bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
906  bool addStructField(StringRef Name, const StructInfo &Structure);
907  bool parseDirectiveStructValue(const StructInfo &Structure,
908  StringRef Directive, SMLoc DirLoc);
909  bool parseDirectiveNamedStructValue(const StructInfo &Structure,
910  StringRef Directive, SMLoc DirLoc,
911  StringRef Name);
912 
913  // "=", "equ", "textequ"
914  bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
915  DirectiveKind DirKind, SMLoc NameLoc);
916 
917  bool parseDirectiveOrg(); // "org"
918 
919  bool emitAlignTo(int64_t Alignment);
920  bool parseDirectiveAlign(); // "align"
921  bool parseDirectiveEven(); // "even"
922 
923  // ".file", ".line", ".loc", ".stabs"
924  bool parseDirectiveFile(SMLoc DirectiveLoc);
925  bool parseDirectiveLine();
926  bool parseDirectiveLoc();
927  bool parseDirectiveStabs();
928 
929  // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
930  // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
931  bool parseDirectiveCVFile();
932  bool parseDirectiveCVFuncId();
933  bool parseDirectiveCVInlineSiteId();
934  bool parseDirectiveCVLoc();
935  bool parseDirectiveCVLinetable();
936  bool parseDirectiveCVInlineLinetable();
937  bool parseDirectiveCVDefRange();
938  bool parseDirectiveCVString();
939  bool parseDirectiveCVStringTable();
940  bool parseDirectiveCVFileChecksums();
941  bool parseDirectiveCVFileChecksumOffset();
942  bool parseDirectiveCVFPOData();
943 
944  // .cfi directives
945  bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
946  bool parseDirectiveCFIWindowSave();
947  bool parseDirectiveCFISections();
948  bool parseDirectiveCFIStartProc();
949  bool parseDirectiveCFIEndProc();
950  bool parseDirectiveCFIDefCfaOffset();
951  bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
952  bool parseDirectiveCFIAdjustCfaOffset();
953  bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
954  bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
955  bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
956  bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
957  bool parseDirectiveCFIRememberState();
958  bool parseDirectiveCFIRestoreState();
959  bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
960  bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
961  bool parseDirectiveCFIEscape();
962  bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
963  bool parseDirectiveCFISignalFrame();
964  bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
965 
966  // macro directives
967  bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
968  bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
969  std::string &Value);
970  bool parseDirectiveEndMacro(StringRef Directive);
971  bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
972 
973  bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
974  StringRef Name, SMLoc NameLoc);
975  bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
976  bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
977  bool parseDirectiveNestedEnds();
978 
979  bool parseDirectiveExtern();
980 
981  /// Parse a directive like ".globl" which accepts a single symbol (which
982  /// should be a label or an external).
983  bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
984 
985  bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
986 
987  bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
988 
989  bool parseDirectiveInclude(); // "include"
990 
991  // "if" or "ife"
992  bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
993  // "ifb" or "ifnb", depending on ExpectBlank.
994  bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
995  // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
996  // CaseInsensitive.
997  bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
998  bool CaseInsensitive);
999  // "ifdef" or "ifndef", depending on expect_defined
1000  bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1001  // "elseif" or "elseife"
1002  bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1003  // "elseifb" or "elseifnb", depending on ExpectBlank.
1004  bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1005  // ".elseifdef" or ".elseifndef", depending on expect_defined
1006  bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1007  // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1008  // ExpectEqual and CaseInsensitive.
1009  bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1010  bool CaseInsensitive);
1011  bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1012  bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1013  bool parseEscapedString(std::string &Data) override;
1014  bool parseAngleBracketString(std::string &Data) override;
1015 
1016  // Macro-like directives
1017  MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1018  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1019  raw_svector_ostream &OS);
1020  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1021  SMLoc ExitLoc, raw_svector_ostream &OS);
1022  bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1023  bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1024  bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1025  bool parseDirectiveWhile(SMLoc DirectiveLoc);
1026 
1027  // "_emit" or "__emit"
1028  bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1029  size_t Len);
1030 
1031  // "align"
1032  bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1033 
1034  // "end"
1035  bool parseDirectiveEnd(SMLoc DirectiveLoc);
1036 
1037  // ".err"
1038  bool parseDirectiveError(SMLoc DirectiveLoc);
1039  // ".errb" or ".errnb", depending on ExpectBlank.
1040  bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1041  // ".errdef" or ".errndef", depending on ExpectBlank.
1042  bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1043  // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1044  // and CaseInsensitive.
1045  bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1046  bool CaseInsensitive);
1047  // ".erre" or ".errnz", depending on ExpectZero.
1048  bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1049 
1050  // ".radix"
1051  bool parseDirectiveRadix(SMLoc DirectiveLoc);
1052 
1053  // "echo"
1054  bool parseDirectiveEcho(SMLoc DirectiveLoc);
1055 
1056  void initializeDirectiveKindMap();
1057  void initializeCVDefRangeTypeMap();
1058  void initializeBuiltinSymbolMap();
1059 };
1060 
1061 } // end anonymous namespace
1062 
1063 namespace llvm {
1064 
1066 
1067 } // end namespace llvm
1068 
1069 enum { DEFAULT_ADDRSPACE = 0 };
1070 
1071 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1072  const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1073  : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1074  CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1075  HadError = false;
1076  // Save the old handler.
1077  SavedDiagHandler = SrcMgr.getDiagHandler();
1078  SavedDiagContext = SrcMgr.getDiagContext();
1079  // Set our own handler which calls the saved handler.
1081  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1082  EndStatementAtEOFStack.push_back(true);
1083 
1084  // Initialize the platform / file format parser.
1085  switch (Ctx.getObjectFileType()) {
1086  case MCContext::IsCOFF:
1087  PlatformParser.reset(createCOFFMasmParser());
1088  break;
1089  default:
1090  report_fatal_error("llvm-ml currently supports only COFF output.");
1091  break;
1092  }
1093 
1094  initializeDirectiveKindMap();
1095  PlatformParser->Initialize(*this);
1096  initializeCVDefRangeTypeMap();
1097  initializeBuiltinSymbolMap();
1098 
1099  NumOfMacroInstantiations = 0;
1100 }
1101 
1102 MasmParser::~MasmParser() {
1103  assert((HadError || ActiveMacros.empty()) &&
1104  "Unexpected active macro instantiation!");
1105 
1106  // Restore the saved diagnostics handler and context for use during
1107  // finalization.
1108  SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1109 }
1110 
1111 void MasmParser::printMacroInstantiations() {
1112  // Print the active macro instantiation stack.
1113  for (std::vector<MacroInstantiation *>::const_reverse_iterator
1114  it = ActiveMacros.rbegin(),
1115  ie = ActiveMacros.rend();
1116  it != ie; ++it)
1117  printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1118  "while in macro instantiation");
1119 }
1120 
1121 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1122  printPendingErrors();
1123  printMessage(L, SourceMgr::DK_Note, Msg, Range);
1124  printMacroInstantiations();
1125 }
1126 
1127 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1128  if (getTargetParser().getTargetOptions().MCNoWarn)
1129  return false;
1130  if (getTargetParser().getTargetOptions().MCFatalWarnings)
1131  return Error(L, Msg, Range);
1132  printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1133  printMacroInstantiations();
1134  return false;
1135 }
1136 
1137 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1138  HadError = true;
1139  printMessage(L, SourceMgr::DK_Error, Msg, Range);
1140  printMacroInstantiations();
1141  return true;
1142 }
1143 
1144 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1145  std::string IncludedFile;
1146  unsigned NewBuf =
1147  SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1148  if (!NewBuf)
1149  return true;
1150 
1151  CurBuffer = NewBuf;
1152  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1153  EndStatementAtEOFStack.push_back(true);
1154  return false;
1155 }
1156 
1157 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1158  bool EndStatementAtEOF) {
1159  CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1160  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1161  Loc.getPointer(), EndStatementAtEOF);
1162 }
1163 
1164 bool MasmParser::expandMacros() {
1165  const AsmToken &Tok = getTok();
1166  const std::string IDLower = Tok.getIdentifier().lower();
1167 
1168  const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1169  if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1170  // This is a macro function invocation; expand it in place.
1171  const SMLoc MacroLoc = Tok.getLoc();
1172  const StringRef MacroId = Tok.getIdentifier();
1173  Lexer.Lex();
1174  if (handleMacroInvocation(M, MacroLoc)) {
1175  Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1176  Lexer.Lex();
1177  }
1178  return false;
1179  }
1180 
1181  llvm::Optional<std::string> ExpandedValue;
1182  auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1183  if (BuiltinIt != BuiltinSymbolMap.end()) {
1184  ExpandedValue =
1185  evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1186  } else {
1187  auto VarIt = Variables.find(IDLower);
1188  if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1189  ExpandedValue = VarIt->getValue().TextValue;
1190  }
1191  }
1192 
1193  if (!ExpandedValue.hasValue())
1194  return true;
1195  std::unique_ptr<MemoryBuffer> Instantiation =
1196  MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1197 
1198  // Jump to the macro instantiation and prime the lexer.
1199  CurBuffer =
1200  SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1201  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1202  /*EndStatementAtEOF=*/false);
1203  EndStatementAtEOFStack.push_back(false);
1204  Lexer.Lex();
1205  return false;
1206 }
1207 
1208 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1209  if (Lexer.getTok().is(AsmToken::Error))
1210  Error(Lexer.getErrLoc(), Lexer.getErr());
1211 
1212  // if it's a end of statement with a comment in it
1213  if (getTok().is(AsmToken::EndOfStatement)) {
1214  // if this is a line comment output it.
1215  if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1216  getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1217  Out.addExplicitComment(Twine(getTok().getString()));
1218  }
1219 
1220  const AsmToken *tok = &Lexer.Lex();
1221  bool StartOfStatement = Lexer.isAtStartOfStatement();
1222 
1223  while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1224  if (StartOfStatement) {
1225  AsmToken NextTok;
1226  MutableArrayRef<AsmToken> Buf(NextTok);
1227  size_t ReadCount = Lexer.peekTokens(Buf);
1228  if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1229  (NextTok.getString().equals_insensitive("equ") ||
1230  NextTok.getString().equals_insensitive("textequ"))) {
1231  // This looks like an EQU or TEXTEQU directive; don't expand the
1232  // identifier, allowing for redefinitions.
1233  break;
1234  }
1235  }
1236  if (expandMacros())
1237  break;
1238  }
1239 
1240  // Parse comments here to be deferred until end of next statement.
1241  while (tok->is(AsmToken::Comment)) {
1242  if (MAI.preserveAsmComments())
1243  Out.addExplicitComment(Twine(tok->getString()));
1244  tok = &Lexer.Lex();
1245  }
1246 
1247  // Recognize and bypass line continuations.
1248  while (tok->is(AsmToken::BackSlash) &&
1249  peekTok().is(AsmToken::EndOfStatement)) {
1250  // Eat both the backslash and the end of statement.
1251  Lexer.Lex();
1252  tok = &Lexer.Lex();
1253  }
1254 
1255  if (tok->is(AsmToken::Eof)) {
1256  // If this is the end of an included file, pop the parent file off the
1257  // include stack.
1258  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1259  if (ParentIncludeLoc != SMLoc()) {
1260  EndStatementAtEOFStack.pop_back();
1261  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1262  return Lex();
1263  }
1264  EndStatementAtEOFStack.pop_back();
1265  assert(EndStatementAtEOFStack.empty());
1266  }
1267 
1268  return *tok;
1269 }
1270 
1271 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1272  AsmToken Tok;
1273 
1274  MutableArrayRef<AsmToken> Buf(Tok);
1275  size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1276 
1277  if (ReadCount == 0) {
1278  // If this is the end of an included file, pop the parent file off the
1279  // include stack.
1280  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1281  if (ParentIncludeLoc != SMLoc()) {
1282  EndStatementAtEOFStack.pop_back();
1283  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1284  return peekTok(ShouldSkipSpace);
1285  }
1286  EndStatementAtEOFStack.pop_back();
1287  assert(EndStatementAtEOFStack.empty());
1288  }
1289 
1290  assert(ReadCount == 1);
1291  return Tok;
1292 }
1293 
1294 bool MasmParser::enabledGenDwarfForAssembly() {
1295  // Check whether the user specified -g.
1296  if (!getContext().getGenDwarfForAssembly())
1297  return false;
1298  // If we haven't encountered any .file directives (which would imply that
1299  // the assembler source was produced with debug info already) then emit one
1300  // describing the assembler source file itself.
1301  if (getContext().getGenDwarfFileNumber() == 0) {
1302  // Use the first #line directive for this, if any. It's preprocessed, so
1303  // there is no checksum, and of course no source directive.
1304  if (!FirstCppHashFilename.empty())
1305  getContext().setMCLineTableRootFile(/*CUID=*/0,
1306  getContext().getCompilationDir(),
1307  FirstCppHashFilename,
1308  /*Cksum=*/None, /*Source=*/None);
1309  const MCDwarfFile &RootFile =
1310  getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1311  getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1312  /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1313  RootFile.Checksum, RootFile.Source));
1314  }
1315  return true;
1316 }
1317 
1318 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1319  // Create the initial section, if requested.
1320  if (!NoInitialTextSection)
1321  Out.initSections(false, getTargetParser().getSTI());
1322 
1323  // Prime the lexer.
1324  Lex();
1325 
1326  HadError = false;
1327  AsmCond StartingCondState = TheCondState;
1328  SmallVector<AsmRewrite, 4> AsmStrRewrites;
1329 
1330  // If we are generating dwarf for assembly source files save the initial text
1331  // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1332  // emitting any actual debug info yet and haven't had a chance to parse any
1333  // embedded .file directives.)
1334  if (getContext().getGenDwarfForAssembly()) {
1335  MCSection *Sec = getStreamer().getCurrentSectionOnly();
1336  if (!Sec->getBeginSymbol()) {
1337  MCSymbol *SectionStartSym = getContext().createTempSymbol();
1338  getStreamer().emitLabel(SectionStartSym);
1339  Sec->setBeginSymbol(SectionStartSym);
1340  }
1341  bool InsertResult = getContext().addGenDwarfSection(Sec);
1342  assert(InsertResult && ".text section should not have debug info yet");
1343  (void)InsertResult;
1344  }
1345 
1346  getTargetParser().onBeginOfFile();
1347 
1348  // While we have input, parse each statement.
1349  while (Lexer.isNot(AsmToken::Eof) ||
1350  SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1351  // Skip through the EOF at the end of an inclusion.
1352  if (Lexer.is(AsmToken::Eof))
1353  Lex();
1354 
1355  ParseStatementInfo Info(&AsmStrRewrites);
1356  bool Parsed = parseStatement(Info, nullptr);
1357 
1358  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1359  // for printing ErrMsg via Lex() only if no (presumably better) parser error
1360  // exists.
1361  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1362  Lex();
1363  }
1364 
1365  // parseStatement returned true so may need to emit an error.
1366  printPendingErrors();
1367 
1368  // Skipping to the next line if needed.
1369  if (Parsed && !getLexer().isAtStartOfStatement())
1370  eatToEndOfStatement();
1371  }
1372 
1373  getTargetParser().onEndOfFile();
1374  printPendingErrors();
1375 
1376  // All errors should have been emitted.
1377  assert(!hasPendingError() && "unexpected error from parseStatement");
1378 
1379  getTargetParser().flushPendingInstructions(getStreamer());
1380 
1381  if (TheCondState.TheCond != StartingCondState.TheCond ||
1382  TheCondState.Ignore != StartingCondState.Ignore)
1383  printError(getTok().getLoc(), "unmatched .ifs or .elses");
1384  // Check to see there are no empty DwarfFile slots.
1385  const auto &LineTables = getContext().getMCDwarfLineTables();
1386  if (!LineTables.empty()) {
1387  unsigned Index = 0;
1388  for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1389  if (File.Name.empty() && Index != 0)
1390  printError(getTok().getLoc(), "unassigned file number: " +
1391  Twine(Index) +
1392  " for .file directives");
1393  ++Index;
1394  }
1395  }
1396 
1397  // Check to see that all assembler local symbols were actually defined.
1398  // Targets that don't do subsections via symbols may not want this, though,
1399  // so conservatively exclude them. Only do this if we're finalizing, though,
1400  // as otherwise we won't necessarilly have seen everything yet.
1401  if (!NoFinalize) {
1402  if (MAI.hasSubsectionsViaSymbols()) {
1403  for (const auto &TableEntry : getContext().getSymbols()) {
1404  MCSymbol *Sym = TableEntry.getValue();
1405  // Variable symbols may not be marked as defined, so check those
1406  // explicitly. If we know it's a variable, we have a definition for
1407  // the purposes of this check.
1408  if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1409  // FIXME: We would really like to refer back to where the symbol was
1410  // first referenced for a source location. We need to add something
1411  // to track that. Currently, we just point to the end of the file.
1412  printError(getTok().getLoc(), "assembler local symbol '" +
1413  Sym->getName() + "' not defined");
1414  }
1415  }
1416 
1417  // Temporary symbols like the ones for directional jumps don't go in the
1418  // symbol table. They also need to be diagnosed in all (final) cases.
1419  for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1420  if (std::get<2>(LocSym)->isUndefined()) {
1421  // Reset the state of any "# line file" directives we've seen to the
1422  // context as it was at the diagnostic site.
1423  CppHashInfo = std::get<1>(LocSym);
1424  printError(std::get<0>(LocSym), "directional label undefined");
1425  }
1426  }
1427  }
1428 
1429  // Finalize the output stream if there are no errors and if the client wants
1430  // us to.
1431  if (!HadError && !NoFinalize)
1432  Out.Finish(Lexer.getLoc());
1433 
1434  return HadError || getContext().hadError();
1435 }
1436 
1437 bool MasmParser::checkForValidSection() {
1438  if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1439  Out.initSections(false, getTargetParser().getSTI());
1440  return Error(getTok().getLoc(),
1441  "expected section directive before assembly directive");
1442  }
1443  return false;
1444 }
1445 
1446 /// Throw away the rest of the line for testing purposes.
1447 void MasmParser::eatToEndOfStatement() {
1448  while (Lexer.isNot(AsmToken::EndOfStatement)) {
1449  if (Lexer.is(AsmToken::Eof)) {
1450  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1451  if (ParentIncludeLoc == SMLoc()) {
1452  break;
1453  }
1454 
1455  EndStatementAtEOFStack.pop_back();
1456  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1457  }
1458 
1459  Lexer.Lex();
1460  }
1461 
1462  // Eat EOL.
1463  if (Lexer.is(AsmToken::EndOfStatement))
1464  Lexer.Lex();
1465 }
1466 
1468 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1470  const char *Start = getTok().getLoc().getPointer();
1471  while (Lexer.isNot(EndTok)) {
1472  if (Lexer.is(AsmToken::Eof)) {
1473  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1474  if (ParentIncludeLoc == SMLoc()) {
1475  break;
1476  }
1477  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1478 
1479  EndStatementAtEOFStack.pop_back();
1480  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1481  Lexer.Lex();
1482  Start = getTok().getLoc().getPointer();
1483  } else {
1484  Lexer.Lex();
1485  }
1486  }
1487  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1488  return Refs;
1489 }
1490 
1491 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1492  SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1493  std::string Str;
1494  for (StringRef S : Refs) {
1495  Str.append(S.str());
1496  }
1497  return Str;
1498 }
1499 
1500 StringRef MasmParser::parseStringToEndOfStatement() {
1501  const char *Start = getTok().getLoc().getPointer();
1502 
1503  while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1504  Lexer.Lex();
1505 
1506  const char *End = getTok().getLoc().getPointer();
1507  return StringRef(Start, End - Start);
1508 }
1509 
1510 /// Parse a paren expression and return it.
1511 /// NOTE: This assumes the leading '(' has already been consumed.
1512 ///
1513 /// parenexpr ::= expr)
1514 ///
1515 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1516  if (parseExpression(Res))
1517  return true;
1518  EndLoc = Lexer.getTok().getEndLoc();
1519  return parseRParen();
1520 }
1521 
1522 /// Parse a bracket expression and return it.
1523 /// NOTE: This assumes the leading '[' has already been consumed.
1524 ///
1525 /// bracketexpr ::= expr]
1526 ///
1527 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1528  if (parseExpression(Res))
1529  return true;
1530  EndLoc = getTok().getEndLoc();
1531  if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1532  return true;
1533  return false;
1534 }
1535 
1536 /// Parse a primary expression and return it.
1537 /// primaryexpr ::= (parenexpr
1538 /// primaryexpr ::= symbol
1539 /// primaryexpr ::= number
1540 /// primaryexpr ::= '.'
1541 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1542 /// primaryexpr ::= string
1543 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1544 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1545  AsmTypeInfo *TypeInfo) {
1546  SMLoc FirstTokenLoc = getLexer().getLoc();
1547  AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1548  switch (FirstTokenKind) {
1549  default:
1550  return TokError("unknown token in expression");
1551  // If we have an error assume that we've already handled it.
1552  case AsmToken::Error:
1553  return true;
1554  case AsmToken::Exclaim:
1555  Lex(); // Eat the operator.
1556  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1557  return true;
1558  Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1559  return false;
1560  case AsmToken::Dollar:
1561  case AsmToken::At:
1562  case AsmToken::Identifier: {
1564  if (parseIdentifier(Identifier)) {
1565  // We may have failed but $ may be a valid token.
1566  if (getTok().is(AsmToken::Dollar)) {
1567  if (Lexer.getMAI().getDollarIsPC()) {
1568  Lex();
1569  // This is a '$' reference, which references the current PC. Emit a
1570  // temporary label to the streamer and refer to it.
1571  MCSymbol *Sym = Ctx.createTempSymbol();
1572  Out.emitLabel(Sym);
1573  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1574  getContext());
1575  EndLoc = FirstTokenLoc;
1576  return false;
1577  }
1578  return Error(FirstTokenLoc, "invalid token in expression");
1579  }
1580  }
1581  // Parse named bitwise negation.
1582  if (Identifier.equals_insensitive("not")) {
1583  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1584  return true;
1585  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1586  return false;
1587  }
1588  // Parse symbol variant.
1589  std::pair<StringRef, StringRef> Split;
1590  if (!MAI.useParensForSymbolVariant()) {
1591  if (FirstTokenKind == AsmToken::String) {
1592  if (Lexer.is(AsmToken::At)) {
1593  Lex(); // eat @
1594  SMLoc AtLoc = getLexer().getLoc();
1595  StringRef VName;
1596  if (parseIdentifier(VName))
1597  return Error(AtLoc, "expected symbol variant after '@'");
1598 
1599  Split = std::make_pair(Identifier, VName);
1600  }
1601  } else {
1602  Split = Identifier.split('@');
1603  }
1604  } else if (Lexer.is(AsmToken::LParen)) {
1605  Lex(); // eat '('.
1606  StringRef VName;
1607  parseIdentifier(VName);
1608  // eat ')'.
1609  if (parseToken(AsmToken::RParen,
1610  "unexpected token in variant, expected ')'"))
1611  return true;
1612  Split = std::make_pair(Identifier, VName);
1613  }
1614 
1615  EndLoc = SMLoc::getFromPointer(Identifier.end());
1616 
1617  // This is a symbol reference.
1619  if (SymbolName.empty())
1620  return Error(getLexer().getLoc(), "expected a symbol reference");
1621 
1622  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1623 
1624  // Look up the symbol variant if used.
1625  if (!Split.second.empty()) {
1626  Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1627  if (Variant != MCSymbolRefExpr::VK_Invalid) {
1628  SymbolName = Split.first;
1629  } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1630  Variant = MCSymbolRefExpr::VK_None;
1631  } else {
1632  return Error(SMLoc::getFromPointer(Split.second.begin()),
1633  "invalid variant '" + Split.second + "'");
1634  }
1635  }
1636 
1637  // Find the field offset if used.
1639  Split = SymbolName.split('.');
1640  if (Split.second.empty()) {
1641  } else {
1642  SymbolName = Split.first;
1643  if (lookUpField(SymbolName, Split.second, Info)) {
1644  std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1645  StringRef Base = BaseMember.first, Member = BaseMember.second;
1646  lookUpField(Base, Member, Info);
1647  } else if (Structs.count(SymbolName.lower())) {
1648  // This is actually a reference to a field offset.
1649  Res = MCConstantExpr::create(Info.Offset, getContext());
1650  return false;
1651  }
1652  }
1653 
1654  MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1655  if (!Sym) {
1656  // If this is a built-in numeric value, treat it as a constant.
1657  auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1658  const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1659  ? BI_NO_SYMBOL
1660  : BuiltinIt->getValue();
1661  if (Symbol != BI_NO_SYMBOL) {
1662  const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1663  if (Value) {
1664  Res = Value;
1665  return false;
1666  }
1667  }
1668 
1669  // Variables use case-insensitive symbol names; if this is a variable, we
1670  // find the symbol using its canonical name.
1671  auto VarIt = Variables.find(SymbolName.lower());
1672  if (VarIt != Variables.end())
1673  SymbolName = VarIt->second.Name;
1674  Sym = getContext().getOrCreateSymbol(SymbolName);
1675  }
1676 
1677  // If this is an absolute variable reference, substitute it now to preserve
1678  // semantics in the face of reassignment.
1679  if (Sym->isVariable()) {
1680  auto V = Sym->getVariableValue(/*SetUsed=*/false);
1681  bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1682  if (auto TV = dyn_cast<MCTargetExpr>(V))
1683  DoInline = TV->inlineAssignedExpr();
1684  if (DoInline) {
1685  if (Variant)
1686  return Error(EndLoc, "unexpected modifier on variable reference");
1687  Res = Sym->getVariableValue(/*SetUsed=*/false);
1688  return false;
1689  }
1690  }
1691 
1692  // Otherwise create a symbol ref.
1693  const MCExpr *SymRef =
1694  MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1695  if (Info.Offset) {
1696  Res = MCBinaryExpr::create(
1697  MCBinaryExpr::Add, SymRef,
1698  MCConstantExpr::create(Info.Offset, getContext()), getContext());
1699  } else {
1700  Res = SymRef;
1701  }
1702  if (TypeInfo) {
1703  if (Info.Type.Name.empty()) {
1704  auto TypeIt = KnownType.find(Identifier.lower());
1705  if (TypeIt != KnownType.end()) {
1706  Info.Type = TypeIt->second;
1707  }
1708  }
1709 
1710  *TypeInfo = Info.Type;
1711  }
1712  return false;
1713  }
1714  case AsmToken::BigNum:
1715  return TokError("literal value out of range for directive");
1716  case AsmToken::Integer: {
1717  SMLoc Loc = getTok().getLoc();
1718  int64_t IntVal = getTok().getIntVal();
1719  Res = MCConstantExpr::create(IntVal, getContext());
1720  EndLoc = Lexer.getTok().getEndLoc();
1721  Lex(); // Eat token.
1722  // Look for 'b' or 'f' following an Integer as a directional label.
1723  if (Lexer.getKind() == AsmToken::Identifier) {
1724  StringRef IDVal = getTok().getString();
1725  // Look up the symbol variant if used.
1726  std::pair<StringRef, StringRef> Split = IDVal.split('@');
1727  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1728  if (Split.first.size() != IDVal.size()) {
1729  Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1730  if (Variant == MCSymbolRefExpr::VK_Invalid)
1731  return TokError("invalid variant '" + Split.second + "'");
1732  IDVal = Split.first;
1733  }
1734  if (IDVal == "f" || IDVal == "b") {
1735  MCSymbol *Sym =
1736  Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
1737  Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
1738  if (IDVal == "b" && Sym->isUndefined())
1739  return Error(Loc, "directional label undefined");
1740  DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
1741  EndLoc = Lexer.getTok().getEndLoc();
1742  Lex(); // Eat identifier.
1743  }
1744  }
1745  return false;
1746  }
1747  case AsmToken::String: {
1748  // MASM strings (used as constants) are interpreted as big-endian base-256.
1749  SMLoc ValueLoc = getTok().getLoc();
1750  std::string Value;
1751  if (parseEscapedString(Value))
1752  return true;
1753  if (Value.size() > 8)
1754  return Error(ValueLoc, "literal value out of range");
1755  uint64_t IntValue = 0;
1756  for (const unsigned char CharVal : Value)
1757  IntValue = (IntValue << 8) | CharVal;
1758  Res = MCConstantExpr::create(IntValue, getContext());
1759  return false;
1760  }
1761  case AsmToken::Real: {
1762  APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1763  uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1764  Res = MCConstantExpr::create(IntVal, getContext());
1765  EndLoc = Lexer.getTok().getEndLoc();
1766  Lex(); // Eat token.
1767  return false;
1768  }
1769  case AsmToken::Dot: {
1770  // This is a '.' reference, which references the current PC. Emit a
1771  // temporary label to the streamer and refer to it.
1772  MCSymbol *Sym = Ctx.createTempSymbol();
1773  Out.emitLabel(Sym);
1774  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1775  EndLoc = Lexer.getTok().getEndLoc();
1776  Lex(); // Eat identifier.
1777  return false;
1778  }
1779  case AsmToken::LParen:
1780  Lex(); // Eat the '('.
1781  return parseParenExpr(Res, EndLoc);
1782  case AsmToken::LBrac:
1783  if (!PlatformParser->HasBracketExpressions())
1784  return TokError("brackets expression not supported on this target");
1785  Lex(); // Eat the '['.
1786  return parseBracketExpr(Res, EndLoc);
1787  case AsmToken::Minus:
1788  Lex(); // Eat the operator.
1789  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1790  return true;
1791  Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1792  return false;
1793  case AsmToken::Plus:
1794  Lex(); // Eat the operator.
1795  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1796  return true;
1797  Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1798  return false;
1799  case AsmToken::Tilde:
1800  Lex(); // Eat the operator.
1801  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1802  return true;
1803  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1804  return false;
1805  // MIPS unary expression operators. The lexer won't generate these tokens if
1806  // MCAsmInfo::HasMipsExpressions is false for the target.
1807  case AsmToken::PercentCall16:
1808  case AsmToken::PercentCall_Hi:
1809  case AsmToken::PercentCall_Lo:
1810  case AsmToken::PercentDtprel_Hi:
1811  case AsmToken::PercentDtprel_Lo:
1812  case AsmToken::PercentGot:
1813  case AsmToken::PercentGot_Disp:
1814  case AsmToken::PercentGot_Hi:
1815  case AsmToken::PercentGot_Lo:
1816  case AsmToken::PercentGot_Ofst:
1817  case AsmToken::PercentGot_Page:
1818  case AsmToken::PercentGottprel:
1819  case AsmToken::PercentGp_Rel:
1820  case AsmToken::PercentHi:
1821  case AsmToken::PercentHigher:
1822  case AsmToken::PercentHighest:
1823  case AsmToken::PercentLo:
1824  case AsmToken::PercentNeg:
1825  case AsmToken::PercentPcrel_Hi:
1826  case AsmToken::PercentPcrel_Lo:
1827  case AsmToken::PercentTlsgd:
1828  case AsmToken::PercentTlsldm:
1829  case AsmToken::PercentTprel_Hi:
1830  case AsmToken::PercentTprel_Lo:
1831  Lex(); // Eat the operator.
1832  if (Lexer.isNot(AsmToken::LParen))
1833  return TokError("expected '(' after operator");
1834  Lex(); // Eat the operator.
1835  if (parseExpression(Res, EndLoc))
1836  return true;
1837  if (parseRParen())
1838  return true;
1839  Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1840  return !Res;
1841  }
1842 }
1843 
1844 bool MasmParser::parseExpression(const MCExpr *&Res) {
1845  SMLoc EndLoc;
1846  return parseExpression(Res, EndLoc);
1847 }
1848 
1849 /// This function checks if the next token is <string> type or arithmetic.
1850 /// string that begin with character '<' must end with character '>'.
1851 /// otherwise it is arithmetics.
1852 /// If the function returns a 'true' value,
1853 /// the End argument will be filled with the last location pointed to the '>'
1854 /// character.
1855 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1856  assert((StrLoc.getPointer() != nullptr) &&
1857  "Argument to the function cannot be a NULL value");
1858  const char *CharPtr = StrLoc.getPointer();
1859  while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1860  (*CharPtr != '\0')) {
1861  if (*CharPtr == '!')
1862  CharPtr++;
1863  CharPtr++;
1864  }
1865  if (*CharPtr == '>') {
1866  EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1867  return true;
1868  }
1869  return false;
1870 }
1871 
1872 /// creating a string without the escape characters '!'.
1873 static std::string angleBracketString(StringRef BracketContents) {
1874  std::string Res;
1875  for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1876  if (BracketContents[Pos] == '!')
1877  Pos++;
1878  Res += BracketContents[Pos];
1879  }
1880  return Res;
1881 }
1882 
1883 /// Parse an expression and return it.
1884 ///
1885 /// expr ::= expr &&,|| expr -> lowest.
1886 /// expr ::= expr |,^,&,! expr
1887 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1888 /// expr ::= expr <<,>> expr
1889 /// expr ::= expr +,- expr
1890 /// expr ::= expr *,/,% expr -> highest.
1891 /// expr ::= primaryexpr
1892 ///
1893 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1894  // Parse the expression.
1895  Res = nullptr;
1896  if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1897  parseBinOpRHS(1, Res, EndLoc))
1898  return true;
1899 
1900  // Try to constant fold it up front, if possible. Do not exploit
1901  // assembler here.
1902  int64_t Value;
1903  if (Res->evaluateAsAbsolute(Value))
1904  Res = MCConstantExpr::create(Value, getContext());
1905 
1906  return false;
1907 }
1908 
1909 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1910  Res = nullptr;
1911  return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1912 }
1913 
1914 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1915  SMLoc &EndLoc) {
1916  if (parseParenExpr(Res, EndLoc))
1917  return true;
1918 
1919  for (; ParenDepth > 0; --ParenDepth) {
1920  if (parseBinOpRHS(1, Res, EndLoc))
1921  return true;
1922 
1923  // We don't Lex() the last RParen.
1924  // This is the same behavior as parseParenExpression().
1925  if (ParenDepth - 1 > 0) {
1926  EndLoc = getTok().getEndLoc();
1927  if (parseRParen())
1928  return true;
1929  }
1930  }
1931  return false;
1932 }
1933 
1934 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1935  const MCExpr *Expr;
1936 
1937  SMLoc StartLoc = Lexer.getLoc();
1938  if (parseExpression(Expr))
1939  return true;
1940 
1941  if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1942  return Error(StartLoc, "expected absolute expression");
1943 
1944  return false;
1945 }
1946 
1949  bool ShouldUseLogicalShr,
1950  bool EndExpressionAtGreater) {
1951  switch (K) {
1952  default:
1953  return 0; // not a binop.
1954 
1955  // Lowest Precedence: &&, ||
1956  case AsmToken::AmpAmp:
1957  Kind = MCBinaryExpr::LAnd;
1958  return 2;
1959  case AsmToken::PipePipe:
1960  Kind = MCBinaryExpr::LOr;
1961  return 1;
1962 
1963  // Low Precedence: ==, !=, <>, <, <=, >, >=
1964  case AsmToken::EqualEqual:
1966  return 3;
1967  case AsmToken::ExclaimEqual:
1968  case AsmToken::LessGreater:
1970  return 3;
1971  case AsmToken::Less:
1973  return 3;
1974  case AsmToken::LessEqual:
1975  Kind = MCBinaryExpr::LTE;
1976  return 3;
1977  case AsmToken::Greater:
1978  if (EndExpressionAtGreater)
1979  return 0;
1981  return 3;
1982  case AsmToken::GreaterEqual:
1983  Kind = MCBinaryExpr::GTE;
1984  return 3;
1985 
1986  // Low Intermediate Precedence: +, -
1987  case AsmToken::Plus:
1989  return 4;
1990  case AsmToken::Minus:
1991  Kind = MCBinaryExpr::Sub;
1992  return 4;
1993 
1994  // High Intermediate Precedence: |, &, ^
1995  case AsmToken::Pipe:
1996  Kind = MCBinaryExpr::Or;
1997  return 5;
1998  case AsmToken::Caret:
1999  Kind = MCBinaryExpr::Xor;
2000  return 5;
2001  case AsmToken::Amp:
2002  Kind = MCBinaryExpr::And;
2003  return 5;
2004 
2005  // Highest Precedence: *, /, %, <<, >>
2006  case AsmToken::Star:
2008  return 6;
2009  case AsmToken::Slash:
2010  Kind = MCBinaryExpr::Div;
2011  return 6;
2012  case AsmToken::Percent:
2014  return 6;
2015  case AsmToken::LessLess:
2016  Kind = MCBinaryExpr::Shl;
2017  return 6;
2018  case AsmToken::GreaterGreater:
2019  if (EndExpressionAtGreater)
2020  return 0;
2021  Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2022  return 6;
2023  }
2024 }
2025 
2026 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2028  bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2029  return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2030  AngleBracketDepth > 0);
2031 }
2032 
2033 /// Parse all binary operators with precedence >= 'Precedence'.
2034 /// Res contains the LHS of the expression on input.
2035 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2036  SMLoc &EndLoc) {
2037  SMLoc StartLoc = Lexer.getLoc();
2038  while (true) {
2040  if (Lexer.getKind() == AsmToken::Identifier) {
2042  .CaseLower("and", AsmToken::Amp)
2043  .CaseLower("not", AsmToken::Exclaim)
2044  .CaseLower("or", AsmToken::Pipe)
2046  .CaseLower("ne", AsmToken::ExclaimEqual)
2047  .CaseLower("lt", AsmToken::Less)
2048  .CaseLower("le", AsmToken::LessEqual)
2049  .CaseLower("gt", AsmToken::Greater)
2050  .CaseLower("ge", AsmToken::GreaterEqual)
2051  .Default(TokKind);
2052  }
2054  unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2055 
2056  // If the next token is lower precedence than we are allowed to eat, return
2057  // successfully with what we ate already.
2058  if (TokPrec < Precedence)
2059  return false;
2060 
2061  Lex();
2062 
2063  // Eat the next primary expression.
2064  const MCExpr *RHS;
2065  if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2066  return true;
2067 
2068  // If BinOp binds less tightly with RHS than the operator after RHS, let
2069  // the pending operator take RHS as its LHS.
2071  unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2072  if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2073  return true;
2074 
2075  // Merge LHS and RHS according to operator.
2076  Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2077  }
2078 }
2079 
2080 /// ParseStatement:
2081 /// ::= % statement
2082 /// ::= EndOfStatement
2083 /// ::= Label* Directive ...Operands... EndOfStatement
2084 /// ::= Label* Identifier OperandList* EndOfStatement
2085 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2087  assert(!hasPendingError() && "parseStatement started with pending error");
2088  // Eat initial spaces and comments.
2089  while (Lexer.is(AsmToken::Space))
2090  Lex();
2091  if (Lexer.is(AsmToken::EndOfStatement)) {
2092  // If this is a line comment we can drop it safely.
2093  if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2094  getTok().getString().front() == '\n')
2095  Out.AddBlankLine();
2096  Lex();
2097  return false;
2098  }
2099 
2100  // If preceded by an expansion operator, first expand all text macros and
2101  // macro functions.
2102  if (getTok().is(AsmToken::Percent)) {
2103  SMLoc ExpansionLoc = getTok().getLoc();
2104  if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2105  return true;
2106  }
2107 
2108  // Statements always start with an identifier, unless we're dealing with a
2109  // processor directive (.386, .686, etc.) that lexes as a real.
2110  AsmToken ID = getTok();
2111  SMLoc IDLoc = ID.getLoc();
2112  StringRef IDVal;
2113  int64_t LocalLabelVal = -1;
2114  if (Lexer.is(AsmToken::HashDirective))
2115  return parseCppHashLineFilenameComment(IDLoc);
2116  // Allow an integer followed by a ':' as a directional local label.
2117  if (Lexer.is(AsmToken::Integer)) {
2118  LocalLabelVal = getTok().getIntVal();
2119  if (LocalLabelVal < 0) {
2120  if (!TheCondState.Ignore) {
2121  Lex(); // always eat a token
2122  return Error(IDLoc, "unexpected token at start of statement");
2123  }
2124  IDVal = "";
2125  } else {
2126  IDVal = getTok().getString();
2127  Lex(); // Consume the integer token to be used as an identifier token.
2128  if (Lexer.getKind() != AsmToken::Colon) {
2129  if (!TheCondState.Ignore) {
2130  Lex(); // always eat a token
2131  return Error(IDLoc, "unexpected token at start of statement");
2132  }
2133  }
2134  }
2135  } else if (Lexer.is(AsmToken::Dot)) {
2136  // Treat '.' as a valid identifier in this context.
2137  Lex();
2138  IDVal = ".";
2139  } else if (Lexer.is(AsmToken::LCurly)) {
2140  // Treat '{' as a valid identifier in this context.
2141  Lex();
2142  IDVal = "{";
2143 
2144  } else if (Lexer.is(AsmToken::RCurly)) {
2145  // Treat '}' as a valid identifier in this context.
2146  Lex();
2147  IDVal = "}";
2148  } else if (Lexer.is(AsmToken::Star) &&
2149  getTargetParser().starIsStartOfStatement()) {
2150  // Accept '*' as a valid start of statement.
2151  Lex();
2152  IDVal = "*";
2153  } else if (Lexer.is(AsmToken::Real)) {
2154  // Treat ".<number>" as a valid identifier in this context.
2155  IDVal = getTok().getString();
2156  Lex(); // always eat a token
2157  if (!IDVal.startswith("."))
2158  return Error(IDLoc, "unexpected token at start of statement");
2159  } else if (parseIdentifier(IDVal, StartOfStatement)) {
2160  if (!TheCondState.Ignore) {
2161  Lex(); // always eat a token
2162  return Error(IDLoc, "unexpected token at start of statement");
2163  }
2164  IDVal = "";
2165  }
2166 
2167  // Handle conditional assembly here before checking for skipping. We
2168  // have to do this so that .endif isn't skipped in a ".if 0" block for
2169  // example.
2171  DirectiveKindMap.find(IDVal.lower());
2172  DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2173  ? DK_NO_DIRECTIVE
2174  : DirKindIt->getValue();
2175  switch (DirKind) {
2176  default:
2177  break;
2178  case DK_IF:
2179  case DK_IFE:
2180  return parseDirectiveIf(IDLoc, DirKind);
2181  case DK_IFB:
2182  return parseDirectiveIfb(IDLoc, true);
2183  case DK_IFNB:
2184  return parseDirectiveIfb(IDLoc, false);
2185  case DK_IFDEF:
2186  return parseDirectiveIfdef(IDLoc, true);
2187  case DK_IFNDEF:
2188  return parseDirectiveIfdef(IDLoc, false);
2189  case DK_IFDIF:
2190  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2191  /*CaseInsensitive=*/false);
2192  case DK_IFDIFI:
2193  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2194  /*CaseInsensitive=*/true);
2195  case DK_IFIDN:
2196  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2197  /*CaseInsensitive=*/false);
2198  case DK_IFIDNI:
2199  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2200  /*CaseInsensitive=*/true);
2201  case DK_ELSEIF:
2202  case DK_ELSEIFE:
2203  return parseDirectiveElseIf(IDLoc, DirKind);
2204  case DK_ELSEIFB:
2205  return parseDirectiveElseIfb(IDLoc, true);
2206  case DK_ELSEIFNB:
2207  return parseDirectiveElseIfb(IDLoc, false);
2208  case DK_ELSEIFDEF:
2209  return parseDirectiveElseIfdef(IDLoc, true);
2210  case DK_ELSEIFNDEF:
2211  return parseDirectiveElseIfdef(IDLoc, false);
2212  case DK_ELSEIFDIF:
2213  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2214  /*CaseInsensitive=*/false);
2215  case DK_ELSEIFDIFI:
2216  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2217  /*CaseInsensitive=*/true);
2218  case DK_ELSEIFIDN:
2219  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2220  /*CaseInsensitive=*/false);
2221  case DK_ELSEIFIDNI:
2222  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2223  /*CaseInsensitive=*/true);
2224  case DK_ELSE:
2225  return parseDirectiveElse(IDLoc);
2226  case DK_ENDIF:
2227  return parseDirectiveEndIf(IDLoc);
2228  }
2229 
2230  // Ignore the statement if in the middle of inactive conditional
2231  // (e.g. ".if 0").
2232  if (TheCondState.Ignore) {
2233  eatToEndOfStatement();
2234  return false;
2235  }
2236 
2237  // FIXME: Recurse on local labels?
2238 
2239  // See what kind of statement we have.
2240  switch (Lexer.getKind()) {
2241  case AsmToken::Colon: {
2242  if (!getTargetParser().isLabel(ID))
2243  break;
2244  if (checkForValidSection())
2245  return true;
2246 
2247  // identifier ':' -> Label.
2248  Lex();
2249 
2250  // Diagnose attempt to use '.' as a label.
2251  if (IDVal == ".")
2252  return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2253 
2254  // Diagnose attempt to use a variable as a label.
2255  //
2256  // FIXME: Diagnostics. Note the location of the definition as a label.
2257  // FIXME: This doesn't diagnose assignment to a symbol which has been
2258  // implicitly marked as external.
2259  MCSymbol *Sym;
2260  if (LocalLabelVal == -1) {
2261  if (ParsingMSInlineAsm && SI) {
2262  StringRef RewrittenLabel =
2263  SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2264  assert(!RewrittenLabel.empty() &&
2265  "We should have an internal name here.");
2266  Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2267  RewrittenLabel);
2268  IDVal = RewrittenLabel;
2269  }
2270  Sym = getContext().getOrCreateSymbol(IDVal);
2271  } else
2272  Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
2273  // End of Labels should be treated as end of line for lexing
2274  // purposes but that information is not available to the Lexer who
2275  // does not understand Labels. This may cause us to see a Hash
2276  // here instead of a preprocessor line comment.
2277  if (getTok().is(AsmToken::Hash)) {
2278  std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2279  Lexer.Lex();
2280  Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2281  }
2282 
2283  // Consume any end of statement token, if present, to avoid spurious
2284  // AddBlankLine calls().
2285  if (getTok().is(AsmToken::EndOfStatement)) {
2286  Lex();
2287  }
2288 
2289  getTargetParser().doBeforeLabelEmit(Sym);
2290 
2291  // Emit the label.
2292  if (!getTargetParser().isParsingMSInlineAsm())
2293  Out.emitLabel(Sym, IDLoc);
2294 
2295  // If we are generating dwarf for assembly source files then gather the
2296  // info to make a dwarf label entry for this label if needed.
2297  if (enabledGenDwarfForAssembly())
2298  MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2299  IDLoc);
2300 
2301  getTargetParser().onLabelParsed(Sym);
2302 
2303  return false;
2304  }
2305 
2306  default: // Normal instruction or directive.
2307  break;
2308  }
2309 
2310  // If macros are enabled, check to see if this is a macro instantiation.
2311  if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2312  return handleMacroEntry(M, IDLoc);
2313  }
2314 
2315  // Otherwise, we have a normal instruction or directive.
2316 
2317  if (DirKind != DK_NO_DIRECTIVE) {
2318  // There are several entities interested in parsing directives:
2319  //
2320  // 1. Asm parser extensions. For example, platform-specific parsers
2321  // (like the ELF parser) register themselves as extensions.
2322  // 2. The target-specific assembly parser. Some directives are target
2323  // specific or may potentially behave differently on certain targets.
2324  // 3. The generic directive parser implemented by this class. These are
2325  // all the directives that behave in a target and platform independent
2326  // manner, or at least have a default behavior that's shared between
2327  // all targets and platforms.
2328 
2329  getTargetParser().flushPendingInstructions(getStreamer());
2330 
2331  // Special-case handling of structure-end directives at higher priority,
2332  // since ENDS is overloaded as a segment-end directive.
2333  if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2334  getTok().is(AsmToken::EndOfStatement)) {
2335  return parseDirectiveNestedEnds();
2336  }
2337 
2338  // First, check the extension directive map to see if any extension has
2339  // registered itself to parse this directive.
2340  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2341  ExtensionDirectiveMap.lookup(IDVal.lower());
2342  if (Handler.first)
2343  return (*Handler.second)(Handler.first, IDVal, IDLoc);
2344 
2345  // Next, let the target-specific assembly parser try.
2346  SMLoc StartTokLoc = getTok().getLoc();
2347  bool TPDirectiveReturn =
2348  ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2349 
2350  if (hasPendingError())
2351  return true;
2352  // Currently the return value should be true if we are
2353  // uninterested but as this is at odds with the standard parsing
2354  // convention (return true = error) we have instances of a parsed
2355  // directive that fails returning true as an error. Catch these
2356  // cases as best as possible errors here.
2357  if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2358  return true;
2359  // Return if we did some parsing or believe we succeeded.
2360  if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2361  return false;
2362 
2363  // Finally, if no one else is interested in this directive, it must be
2364  // generic and familiar to this class.
2365  switch (DirKind) {
2366  default:
2367  break;
2368  case DK_ASCII:
2369  return parseDirectiveAscii(IDVal, false);
2370  case DK_ASCIZ:
2371  case DK_STRING:
2372  return parseDirectiveAscii(IDVal, true);
2373  case DK_BYTE:
2374  case DK_SBYTE:
2375  case DK_DB:
2376  return parseDirectiveValue(IDVal, 1);
2377  case DK_WORD:
2378  case DK_SWORD:
2379  case DK_DW:
2380  return parseDirectiveValue(IDVal, 2);
2381  case DK_DWORD:
2382  case DK_SDWORD:
2383  case DK_DD:
2384  return parseDirectiveValue(IDVal, 4);
2385  case DK_FWORD:
2386  case DK_DF:
2387  return parseDirectiveValue(IDVal, 6);
2388  case DK_QWORD:
2389  case DK_SQWORD:
2390  case DK_DQ:
2391  return parseDirectiveValue(IDVal, 8);
2392  case DK_REAL4:
2393  return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2394  case DK_REAL8:
2395  return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2396  case DK_REAL10:
2397  return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2398  case DK_STRUCT:
2399  case DK_UNION:
2400  return parseDirectiveNestedStruct(IDVal, DirKind);
2401  case DK_ENDS:
2402  return parseDirectiveNestedEnds();
2403  case DK_ALIGN:
2404  return parseDirectiveAlign();
2405  case DK_EVEN:
2406  return parseDirectiveEven();
2407  case DK_ORG:
2408  return parseDirectiveOrg();
2409  case DK_EXTERN:
2410  return parseDirectiveExtern();
2411  case DK_PUBLIC:
2412  return parseDirectiveSymbolAttribute(MCSA_Global);
2413  case DK_COMM:
2414  return parseDirectiveComm(/*IsLocal=*/false);
2415  case DK_COMMENT:
2416  return parseDirectiveComment(IDLoc);
2417  case DK_INCLUDE:
2418  return parseDirectiveInclude();
2419  case DK_REPEAT:
2420  return parseDirectiveRepeat(IDLoc, IDVal);
2421  case DK_WHILE:
2422  return parseDirectiveWhile(IDLoc);
2423  case DK_FOR:
2424  return parseDirectiveFor(IDLoc, IDVal);
2425  case DK_FORC:
2426  return parseDirectiveForc(IDLoc, IDVal);
2427  case DK_FILE:
2428  return parseDirectiveFile(IDLoc);
2429  case DK_LINE:
2430  return parseDirectiveLine();
2431  case DK_LOC:
2432  return parseDirectiveLoc();
2433  case DK_STABS:
2434  return parseDirectiveStabs();
2435  case DK_CV_FILE:
2436  return parseDirectiveCVFile();
2437  case DK_CV_FUNC_ID:
2438  return parseDirectiveCVFuncId();
2439  case DK_CV_INLINE_SITE_ID:
2440  return parseDirectiveCVInlineSiteId();
2441  case DK_CV_LOC:
2442  return parseDirectiveCVLoc();
2443  case DK_CV_LINETABLE:
2444  return parseDirectiveCVLinetable();
2445  case DK_CV_INLINE_LINETABLE:
2446  return parseDirectiveCVInlineLinetable();
2447  case DK_CV_DEF_RANGE:
2448  return parseDirectiveCVDefRange();
2449  case DK_CV_STRING:
2450  return parseDirectiveCVString();
2451  case DK_CV_STRINGTABLE:
2452  return parseDirectiveCVStringTable();
2453  case DK_CV_FILECHECKSUMS:
2454  return parseDirectiveCVFileChecksums();
2455  case DK_CV_FILECHECKSUM_OFFSET:
2456  return parseDirectiveCVFileChecksumOffset();
2457  case DK_CV_FPO_DATA:
2458  return parseDirectiveCVFPOData();
2459  case DK_CFI_SECTIONS:
2460  return parseDirectiveCFISections();
2461  case DK_CFI_STARTPROC:
2462  return parseDirectiveCFIStartProc();
2463  case DK_CFI_ENDPROC:
2464  return parseDirectiveCFIEndProc();
2465  case DK_CFI_DEF_CFA:
2466  return parseDirectiveCFIDefCfa(IDLoc);
2467  case DK_CFI_DEF_CFA_OFFSET:
2468  return parseDirectiveCFIDefCfaOffset();
2469  case DK_CFI_ADJUST_CFA_OFFSET:
2470  return parseDirectiveCFIAdjustCfaOffset();
2471  case DK_CFI_DEF_CFA_REGISTER:
2472  return parseDirectiveCFIDefCfaRegister(IDLoc);
2473  case DK_CFI_OFFSET:
2474  return parseDirectiveCFIOffset(IDLoc);
2475  case DK_CFI_REL_OFFSET:
2476  return parseDirectiveCFIRelOffset(IDLoc);
2477  case DK_CFI_PERSONALITY:
2478  return parseDirectiveCFIPersonalityOrLsda(true);
2479  case DK_CFI_LSDA:
2480  return parseDirectiveCFIPersonalityOrLsda(false);
2481  case DK_CFI_REMEMBER_STATE:
2482  return parseDirectiveCFIRememberState();
2483  case DK_CFI_RESTORE_STATE:
2484  return parseDirectiveCFIRestoreState();
2485  case DK_CFI_SAME_VALUE:
2486  return parseDirectiveCFISameValue(IDLoc);
2487  case DK_CFI_RESTORE:
2488  return parseDirectiveCFIRestore(IDLoc);
2489  case DK_CFI_ESCAPE:
2490  return parseDirectiveCFIEscape();
2491  case DK_CFI_RETURN_COLUMN:
2492  return parseDirectiveCFIReturnColumn(IDLoc);
2493  case DK_CFI_SIGNAL_FRAME:
2494  return parseDirectiveCFISignalFrame();
2495  case DK_CFI_UNDEFINED:
2496  return parseDirectiveCFIUndefined(IDLoc);
2497  case DK_CFI_REGISTER:
2498  return parseDirectiveCFIRegister(IDLoc);
2499  case DK_CFI_WINDOW_SAVE:
2500  return parseDirectiveCFIWindowSave();
2501  case DK_EXITM:
2502  Info.ExitValue = "";
2503  return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2504  case DK_ENDM:
2505  Info.ExitValue = "";
2506  return parseDirectiveEndMacro(IDVal);
2507  case DK_PURGE:
2508  return parseDirectivePurgeMacro(IDLoc);
2509  case DK_END:
2510  return parseDirectiveEnd(IDLoc);
2511  case DK_ERR:
2512  return parseDirectiveError(IDLoc);
2513  case DK_ERRB:
2514  return parseDirectiveErrorIfb(IDLoc, true);
2515  case DK_ERRNB:
2516  return parseDirectiveErrorIfb(IDLoc, false);
2517  case DK_ERRDEF:
2518  return parseDirectiveErrorIfdef(IDLoc, true);
2519  case DK_ERRNDEF:
2520  return parseDirectiveErrorIfdef(IDLoc, false);
2521  case DK_ERRDIF:
2522  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2523  /*CaseInsensitive=*/false);
2524  case DK_ERRDIFI:
2525  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2526  /*CaseInsensitive=*/true);
2527  case DK_ERRIDN:
2528  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2529  /*CaseInsensitive=*/false);
2530  case DK_ERRIDNI:
2531  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2532  /*CaseInsensitive=*/true);
2533  case DK_ERRE:
2534  return parseDirectiveErrorIfe(IDLoc, true);
2535  case DK_ERRNZ:
2536  return parseDirectiveErrorIfe(IDLoc, false);
2537  case DK_RADIX:
2538  return parseDirectiveRadix(IDLoc);
2539  case DK_ECHO:
2540  return parseDirectiveEcho(IDLoc);
2541  }
2542 
2543  return Error(IDLoc, "unknown directive");
2544  }
2545 
2546  // We also check if this is allocating memory with user-defined type.
2547  auto IDIt = Structs.find(IDVal.lower());
2548  if (IDIt != Structs.end())
2549  return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2550  IDLoc);
2551 
2552  // Non-conditional Microsoft directives sometimes follow their first argument.
2553  const AsmToken nextTok = getTok();
2554  const StringRef nextVal = nextTok.getString();
2555  const SMLoc nextLoc = nextTok.getLoc();
2556 
2557  const AsmToken afterNextTok = peekTok();
2558 
2559  // There are several entities interested in parsing infix directives:
2560  //
2561  // 1. Asm parser extensions. For example, platform-specific parsers
2562  // (like the ELF parser) register themselves as extensions.
2563  // 2. The generic directive parser implemented by this class. These are
2564  // all the directives that behave in a target and platform independent
2565  // manner, or at least have a default behavior that's shared between
2566  // all targets and platforms.
2567 
2568  getTargetParser().flushPendingInstructions(getStreamer());
2569 
2570  // Special-case handling of structure-end directives at higher priority, since
2571  // ENDS is overloaded as a segment-end directive.
2572  if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2573  Lex();
2574  return parseDirectiveEnds(IDVal, IDLoc);
2575  }
2576 
2577  // First, check the extension directive map to see if any extension has
2578  // registered itself to parse this directive.
2579  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2580  ExtensionDirectiveMap.lookup(nextVal.lower());
2581  if (Handler.first) {
2582  Lex();
2583  Lexer.UnLex(ID);
2584  return (*Handler.second)(Handler.first, nextVal, nextLoc);
2585  }
2586 
2587  // If no one else is interested in this directive, it must be
2588  // generic and familiar to this class.
2589  DirKindIt = DirectiveKindMap.find(nextVal.lower());
2590  DirKind = (DirKindIt == DirectiveKindMap.end())
2591  ? DK_NO_DIRECTIVE
2592  : DirKindIt->getValue();
2593  switch (DirKind) {
2594  default:
2595  break;
2596  case DK_ASSIGN:
2597  case DK_EQU:
2598  case DK_TEXTEQU:
2599  Lex();
2600  return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2601  case DK_BYTE:
2602  if (afterNextTok.is(AsmToken::Identifier) &&
2603  afterNextTok.getString().equals_insensitive("ptr")) {
2604  // Size directive; part of an instruction.
2605  break;
2606  }
2608  case DK_SBYTE:
2609  case DK_DB:
2610  Lex();
2611  return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2612  case DK_WORD:
2613  if (afterNextTok.is(AsmToken::Identifier) &&
2614  afterNextTok.getString().equals_insensitive("ptr")) {
2615  // Size directive; part of an instruction.
2616  break;
2617  }
2619  case DK_SWORD:
2620  case DK_DW:
2621  Lex();
2622  return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2623  case DK_DWORD:
2624  if (afterNextTok.is(AsmToken::Identifier) &&
2625  afterNextTok.getString().equals_insensitive("ptr")) {
2626  // Size directive; part of an instruction.
2627  break;
2628  }
2630  case DK_SDWORD:
2631  case DK_DD:
2632  Lex();
2633  return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2634  case DK_FWORD:
2635  if (afterNextTok.is(AsmToken::Identifier) &&
2636  afterNextTok.getString().equals_insensitive("ptr")) {
2637  // Size directive; part of an instruction.
2638  break;
2639  }
2641  case DK_DF:
2642  Lex();
2643  return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2644  case DK_QWORD:
2645  if (afterNextTok.is(AsmToken::Identifier) &&
2646  afterNextTok.getString().equals_insensitive("ptr")) {
2647  // Size directive; part of an instruction.
2648  break;
2649  }
2651  case DK_SQWORD:
2652  case DK_DQ:
2653  Lex();
2654  return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2655  case DK_REAL4:
2656  Lex();
2657  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2658  IDVal, IDLoc);
2659  case DK_REAL8:
2660  Lex();
2661  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2662  IDVal, IDLoc);
2663  case DK_REAL10:
2664  Lex();
2665  return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2666  10, IDVal, IDLoc);
2667  case DK_STRUCT:
2668  case DK_UNION:
2669  Lex();
2670  return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2671  case DK_ENDS:
2672  Lex();
2673  return parseDirectiveEnds(IDVal, IDLoc);
2674  case DK_MACRO:
2675  Lex();
2676  return parseDirectiveMacro(IDVal, IDLoc);
2677  }
2678 
2679  // Finally, we check if this is allocating a variable with user-defined type.
2680  auto NextIt = Structs.find(nextVal.lower());
2681  if (NextIt != Structs.end()) {
2682  Lex();
2683  return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2684  nextVal, nextLoc, IDVal);
2685  }
2686 
2687  // __asm _emit or __asm __emit
2688  if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2689  IDVal == "_EMIT" || IDVal == "__EMIT"))
2690  return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2691 
2692  // __asm align
2693  if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2694  return parseDirectiveMSAlign(IDLoc, Info);
2695 
2696  if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2697  Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2698  if (checkForValidSection())
2699  return true;
2700 
2701  // Canonicalize the opcode to lower case.
2702  std::string OpcodeStr = IDVal.lower();
2703  ParseInstructionInfo IInfo(Info.AsmRewrites);
2704  bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2705  Info.ParsedOperands);
2706  Info.ParseError = ParseHadError;
2707 
2708  // Dump the parsed representation, if requested.
2709  if (getShowParsedOperands()) {
2710  SmallString<256> Str;
2711  raw_svector_ostream OS(Str);
2712  OS << "parsed instruction: [";
2713  for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2714  if (i != 0)
2715  OS << ", ";
2716  Info.ParsedOperands[i]->print(OS);
2717  }
2718  OS << "]";
2719 
2720  printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2721  }
2722 
2723  // Fail even if ParseInstruction erroneously returns false.
2724  if (hasPendingError() || ParseHadError)
2725  return true;
2726 
2727  // If we are generating dwarf for the current section then generate a .loc
2728  // directive for the instruction.
2729  if (!ParseHadError && enabledGenDwarfForAssembly() &&
2730  getContext().getGenDwarfSectionSyms().count(
2731  getStreamer().getCurrentSectionOnly())) {
2732  unsigned Line;
2733  if (ActiveMacros.empty())
2734  Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2735  else
2736  Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2737  ActiveMacros.front()->ExitBuffer);
2738 
2739  // If we previously parsed a cpp hash file line comment then make sure the
2740  // current Dwarf File is for the CppHashFilename if not then emit the
2741  // Dwarf File table for it and adjust the line number for the .loc.
2742  if (!CppHashInfo.Filename.empty()) {
2743  unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2744  0, StringRef(), CppHashInfo.Filename);
2745  getContext().setGenDwarfFileNumber(FileNumber);
2746 
2747  unsigned CppHashLocLineNo =
2748  SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2749  Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2750  }
2751 
2752  getStreamer().emitDwarfLocDirective(
2753  getContext().getGenDwarfFileNumber(), Line, 0,
2755  StringRef());
2756  }
2757 
2758  // If parsing succeeded, match the instruction.
2759  if (!ParseHadError) {
2761  if (getTargetParser().MatchAndEmitInstruction(
2762  IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2763  getTargetParser().isParsingMSInlineAsm()))
2764  return true;
2765  }
2766  return false;
2767 }
2768 
2769 // Parse and erase curly braces marking block start/end.
2770 bool MasmParser::parseCurlyBlockScope(
2771  SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2772  // Identify curly brace marking block start/end.
2773  if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2774  return false;
2775 
2776  SMLoc StartLoc = Lexer.getLoc();
2777  Lex(); // Eat the brace.
2778  if (Lexer.is(AsmToken::EndOfStatement))
2779  Lex(); // Eat EndOfStatement following the brace.
2780 
2781  // Erase the block start/end brace from the output asm string.
2782  AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2783  StartLoc.getPointer());
2784  return true;
2785 }
2786 
2787 /// parseCppHashLineFilenameComment as this:
2788 /// ::= # number "filename"
2789 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2790  Lex(); // Eat the hash token.
2791  // Lexer only ever emits HashDirective if it fully formed if it's
2792  // done the checking already so this is an internal error.
2793  assert(getTok().is(AsmToken::Integer) &&
2794  "Lexing Cpp line comment: Expected Integer");
2795  int64_t LineNumber = getTok().getIntVal();
2796  Lex();
2797  assert(getTok().is(AsmToken::String) &&
2798  "Lexing Cpp line comment: Expected String");
2799  StringRef Filename = getTok().getString();
2800  Lex();
2801 
2802  // Get rid of the enclosing quotes.
2803  Filename = Filename.substr(1, Filename.size() - 2);
2804 
2805  // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2806  // and possibly DWARF file info.
2807  CppHashInfo.Loc = L;
2808  CppHashInfo.Filename = Filename;
2809  CppHashInfo.LineNumber = LineNumber;
2810  CppHashInfo.Buf = CurBuffer;
2811  if (FirstCppHashFilename.empty())
2812  FirstCppHashFilename = Filename;
2813  return false;
2814 }
2815 
2816 /// will use the last parsed cpp hash line filename comment
2817 /// for the Filename and LineNo if any in the diagnostic.
2818 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2819  const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2820  raw_ostream &OS = errs();
2821 
2822  const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2823  SMLoc DiagLoc = Diag.getLoc();
2824  unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2825  unsigned CppHashBuf =
2826  Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2827 
2828  // Like SourceMgr::printMessage() we need to print the include stack if any
2829  // before printing the message.
2830  unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2831  if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2832  DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2833  SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2834  DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2835  }
2836 
2837  // If we have not parsed a cpp hash line filename comment or the source
2838  // manager changed or buffer changed (like in a nested include) then just
2839  // print the normal diagnostic using its Filename and LineNo.
2840  if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2841  DiagBuf != CppHashBuf) {
2842  if (Parser->SavedDiagHandler)
2843  Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2844  else
2845  Diag.print(nullptr, OS);
2846  return;
2847  }
2848 
2849  // Use the CppHashFilename and calculate a line number based on the
2850  // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2851  // for the diagnostic.
2852  const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2853 
2854  int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2855  int CppHashLocLineNo =
2856  Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2857  int LineNo =
2858  Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2859 
2860  SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2861  Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2862  Diag.getLineContents(), Diag.getRanges());
2863 
2864  if (Parser->SavedDiagHandler)
2865  Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2866  else
2867  NewDiag.print(nullptr, OS);
2868 }
2869 
2870 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2871 // not accept '.'.
2872 static bool isMacroParameterChar(char C) {
2873  return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2874 }
2875 
2876 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2877  ArrayRef<MCAsmMacroParameter> Parameters,
2879  const std::vector<std::string> &Locals, SMLoc L) {
2880  unsigned NParameters = Parameters.size();
2881  if (NParameters != A.size())
2882  return Error(L, "Wrong number of arguments");
2883  StringMap<std::string> LocalSymbols;
2884  std::string Name;
2885  Name.reserve(6);
2886  for (StringRef Local : Locals) {
2888  LocalName << "??"
2889  << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2890  LocalSymbols.insert({Local, LocalName.str()});
2891  Name.clear();
2892  }
2893 
2894  Optional<char> CurrentQuote;
2895  while (!Body.empty()) {
2896  // Scan for the next substitution.
2897  std::size_t End = Body.size(), Pos = 0;
2898  std::size_t IdentifierPos = End;
2899  for (; Pos != End; ++Pos) {
2900  // Find the next possible macro parameter, including preceding a '&'
2901  // inside quotes.
2902  if (Body[Pos] == '&')
2903  break;
2904  if (isMacroParameterChar(Body[Pos])) {
2905  if (!CurrentQuote.hasValue())
2906  break;
2907  if (IdentifierPos == End)
2908  IdentifierPos = Pos;
2909  } else {
2910  IdentifierPos = End;
2911  }
2912 
2913  // Track quotation status
2914  if (!CurrentQuote.hasValue()) {
2915  if (Body[Pos] == '\'' || Body[Pos] == '"')
2916  CurrentQuote = Body[Pos];
2917  } else if (Body[Pos] == CurrentQuote) {
2918  if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2919  // Escaped quote, and quotes aren't identifier chars; skip
2920  ++Pos;
2921  continue;
2922  } else {
2923  CurrentQuote.reset();
2924  }
2925  }
2926  }
2927  if (IdentifierPos != End) {
2928  // We've recognized an identifier before an apostrophe inside quotes;
2929  // check once to see if we can expand it.
2930  Pos = IdentifierPos;
2931  IdentifierPos = End;
2932  }
2933 
2934  // Add the prefix.
2935  OS << Body.slice(0, Pos);
2936 
2937  // Check if we reached the end.
2938  if (Pos == End)
2939  break;
2940 
2941  unsigned I = Pos;
2942  bool InitialAmpersand = (Body[I] == '&');
2943  if (InitialAmpersand) {
2944  ++I;
2945  ++Pos;
2946  }
2947  while (I < End && isMacroParameterChar(Body[I]))
2948  ++I;
2949 
2950  const char *Begin = Body.data() + Pos;
2951  StringRef Argument(Begin, I - Pos);
2952  const std::string ArgumentLower = Argument.lower();
2953  unsigned Index = 0;
2954 
2955  for (; Index < NParameters; ++Index)
2956  if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2957  break;
2958 
2959  if (Index == NParameters) {
2960  if (InitialAmpersand)
2961  OS << '&';
2962  auto it = LocalSymbols.find(ArgumentLower);
2963  if (it != LocalSymbols.end())
2964  OS << it->second;
2965  else
2966  OS << Argument;
2967  Pos = I;
2968  } else {
2969  for (const AsmToken &Token : A[Index]) {
2970  // In MASM, you can write '%expr'.
2971  // The prefix '%' evaluates the expression 'expr'
2972  // and uses the result as a string (e.g. replace %(1+2) with the
2973  // string "3").
2974  // Here, we identify the integer token which is the result of the
2975  // absolute expression evaluation and replace it with its string
2976  // representation.
2977  if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2978  // Emit an integer value to the buffer.
2979  OS << Token.getIntVal();
2980  else
2981  OS << Token.getString();
2982  }
2983 
2984  Pos += Argument.size();
2985  if (Pos < End && Body[Pos] == '&') {
2986  ++Pos;
2987  }
2988  }
2989  // Update the scan point.
2990  Body = Body.substr(Pos);
2991  }
2992 
2993  return false;
2994 }
2995 
2996 static bool isOperator(AsmToken::TokenKind kind) {
2997  switch (kind) {
2998  default:
2999  return false;
3000  case AsmToken::Plus:
3001  case AsmToken::Minus:
3002  case AsmToken::Tilde:
3003  case AsmToken::Slash:
3004  case AsmToken::Star:
3005  case AsmToken::Dot:
3006  case AsmToken::Equal:
3007  case AsmToken::EqualEqual:
3008  case AsmToken::Pipe:
3009  case AsmToken::PipePipe:
3010  case AsmToken::Caret:
3011  case AsmToken::Amp:
3012  case AsmToken::AmpAmp:
3013  case AsmToken::Exclaim:
3014  case AsmToken::ExclaimEqual:
3015  case AsmToken::Less:
3016  case AsmToken::LessEqual:
3017  case AsmToken::LessLess:
3018  case AsmToken::LessGreater:
3019  case AsmToken::Greater:
3020  case AsmToken::GreaterEqual:
3021  case AsmToken::GreaterGreater:
3022  return true;
3023  }
3024 }
3025 
3026 namespace {
3027 
3028 class AsmLexerSkipSpaceRAII {
3029 public:
3030  AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
3031  Lexer.setSkipSpace(SkipSpace);
3032  }
3033 
3034  ~AsmLexerSkipSpaceRAII() {
3035  Lexer.setSkipSpace(true);
3036  }
3037 
3038 private:
3039  AsmLexer &Lexer;
3040 };
3041 
3042 } // end anonymous namespace
3043 
3044 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3045  MCAsmMacroArgument &MA,
3046  AsmToken::TokenKind EndTok) {
3047  if (MP && MP->Vararg) {
3048  if (Lexer.isNot(EndTok)) {
3049  SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3050  for (StringRef S : Str) {
3051  MA.emplace_back(AsmToken::String, S);
3052  }
3053  }
3054  return false;
3055  }
3056 
3057  SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3058  if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3059  const char *StrChar = StrLoc.getPointer() + 1;
3060  const char *EndChar = EndLoc.getPointer() - 1;
3061  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3062  /// Eat from '<' to '>'.
3063  Lex();
3064  MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3065  return false;
3066  }
3067 
3068  unsigned ParenLevel = 0;
3069 
3070  // Darwin doesn't use spaces to delmit arguments.
3071  AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3072 
3073  bool SpaceEaten;
3074 
3075  while (true) {
3076  SpaceEaten = false;
3077  if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3078  return TokError("unexpected token");
3079 
3080  if (ParenLevel == 0) {
3081  if (Lexer.is(AsmToken::Comma))
3082  break;
3083 
3084  if (Lexer.is(AsmToken::Space)) {
3085  SpaceEaten = true;
3086  Lex(); // Eat spaces.
3087  }
3088 
3089  // Spaces can delimit parameters, but could also be part an expression.
3090  // If the token after a space is an operator, add the token and the next
3091  // one into this argument
3092  if (!IsDarwin) {
3093  if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3094  MA.push_back(getTok());
3095  Lex();
3096 
3097  // Whitespace after an operator can be ignored.
3098  if (Lexer.is(AsmToken::Space))
3099  Lex();
3100 
3101  continue;
3102  }
3103  }
3104  if (SpaceEaten)
3105  break;
3106  }
3107 
3108  // handleMacroEntry relies on not advancing the lexer here
3109  // to be able to fill in the remaining default parameter values
3110  if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3111  break;
3112 
3113  // Adjust the current parentheses level.
3114  if (Lexer.is(AsmToken::LParen))
3115  ++ParenLevel;
3116  else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3117  --ParenLevel;
3118 
3119  // Append the token to the current argument list.
3120  MA.push_back(getTok());
3121  Lex();
3122  }
3123 
3124  if (ParenLevel != 0)
3125  return TokError("unbalanced parentheses in argument");
3126 
3127  if (MA.empty() && MP) {
3128  if (MP->Required) {
3129  return TokError("missing value for required parameter '" + MP->Name +
3130  "'");
3131  } else {
3132  MA = MP->Value;
3133  }
3134  }
3135  return false;
3136 }
3137 
3138 // Parse the macro instantiation arguments.
3139 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3140  MCAsmMacroArguments &A,
3141  AsmToken::TokenKind EndTok) {
3142  const unsigned NParameters = M ? M->Parameters.size() : 0;
3143  bool NamedParametersFound = false;
3144  SmallVector<SMLoc, 4> FALocs;
3145 
3146  A.resize(NParameters);
3147  FALocs.resize(NParameters);
3148 
3149  // Parse two kinds of macro invocations:
3150  // - macros defined without any parameters accept an arbitrary number of them
3151  // - macros defined with parameters accept at most that many of them
3152  for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3153  ++Parameter) {
3154  SMLoc IDLoc = Lexer.getLoc();
3156 
3157  if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3158  if (parseIdentifier(FA.Name))
3159  return Error(IDLoc, "invalid argument identifier for formal argument");
3160 
3161  if (Lexer.isNot(AsmToken::Equal))
3162  return TokError("expected '=' after formal parameter identifier");
3163 
3164  Lex();
3165 
3166  NamedParametersFound = true;
3167  }
3168 
3169  if (NamedParametersFound && FA.Name.empty())
3170  return Error(IDLoc, "cannot mix positional and keyword arguments");
3171 
3172  unsigned PI = Parameter;
3173  if (!FA.Name.empty()) {
3174  assert(M && "expected macro to be defined");
3175  unsigned FAI = 0;
3176  for (FAI = 0; FAI < NParameters; ++FAI)
3177  if (M->Parameters[FAI].Name == FA.Name)
3178  break;
3179 
3180  if (FAI >= NParameters) {
3181  return Error(IDLoc, "parameter named '" + FA.Name +
3182  "' does not exist for macro '" + M->Name + "'");
3183  }
3184  PI = FAI;
3185  }
3186  const MCAsmMacroParameter *MP = nullptr;
3187  if (M && PI < NParameters)
3188  MP = &M->Parameters[PI];
3189 
3190  SMLoc StrLoc = Lexer.getLoc();
3191  SMLoc EndLoc;
3192  if (Lexer.is(AsmToken::Percent)) {
3193  const MCExpr *AbsoluteExp;
3194  int64_t Value;
3195  /// Eat '%'.
3196  Lex();
3197  if (parseExpression(AbsoluteExp, EndLoc))
3198  return false;
3199  if (!AbsoluteExp->evaluateAsAbsolute(Value,
3200  getStreamer().getAssemblerPtr()))
3201  return Error(StrLoc, "expected absolute expression");
3202  const char *StrChar = StrLoc.getPointer();
3203  const char *EndChar = EndLoc.getPointer();
3204  AsmToken newToken(AsmToken::Integer,
3205  StringRef(StrChar, EndChar - StrChar), Value);
3206  FA.Value.push_back(newToken);
3207  } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3208  if (M)
3209  return addErrorSuffix(" in '" + M->Name + "' macro");
3210  else
3211  return true;
3212  }
3213 
3214  if (!FA.Value.empty()) {
3215  if (A.size() <= PI)
3216  A.resize(PI + 1);
3217  A[PI] = FA.Value;
3218 
3219  if (FALocs.size() <= PI)
3220  FALocs.resize(PI + 1);
3221 
3222  FALocs[PI] = Lexer.getLoc();
3223  }
3224 
3225  // At the end of the statement, fill in remaining arguments that have
3226  // default values. If there aren't any, then the next argument is
3227  // required but missing
3228  if (Lexer.is(EndTok)) {
3229  bool Failure = false;
3230  for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3231  if (A[FAI].empty()) {
3232  if (M->Parameters[FAI].Required) {
3233  Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3234  "missing value for required parameter "
3235  "'" +
3236  M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3237  Failure = true;
3238  }
3239 
3240  if (!M->Parameters[FAI].Value.empty())
3241  A[FAI] = M->Parameters[FAI].Value;
3242  }
3243  }
3244  return Failure;
3245  }
3246 
3247  if (Lexer.is(AsmToken::Comma))
3248  Lex();
3249  }
3250 
3251  return TokError("too many positional arguments");
3252 }
3253 
3254 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3255  AsmToken::TokenKind ArgumentEndTok) {
3256  // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3257  // eliminate this, although we should protect against infinite loops.
3258  unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3259  if (ActiveMacros.size() == MaxNestingDepth) {
3260  std::ostringstream MaxNestingDepthError;
3261  MaxNestingDepthError << "macros cannot be nested more than "
3262  << MaxNestingDepth << " levels deep."
3263  << " Use -asm-macro-max-nesting-depth to increase "
3264  "this limit.";
3265  return TokError(MaxNestingDepthError.str());
3266  }
3267 
3268  MCAsmMacroArguments A;
3269  if (parseMacroArguments(M, A, ArgumentEndTok))
3270  return true;
3271 
3272  // Macro instantiation is lexical, unfortunately. We construct a new buffer
3273  // to hold the macro body with substitutions.
3274  SmallString<256> Buf;
3275  StringRef Body = M->Body;
3276  raw_svector_ostream OS(Buf);
3277 
3278  if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3279  return true;
3280 
3281  // We include the endm in the buffer as our cue to exit the macro
3282  // instantiation.
3283  OS << "endm\n";
3284 
3285  std::unique_ptr<MemoryBuffer> Instantiation =
3286  MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3287 
3288  // Create the macro instantiation object and add to the current macro
3289  // instantiation stack.
3290  MacroInstantiation *MI = new MacroInstantiation{
3291  NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3292  ActiveMacros.push_back(MI);
3293 
3294  ++NumOfMacroInstantiations;
3295 
3296  // Jump to the macro instantiation and prime the lexer.
3297  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3298  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3299  EndStatementAtEOFStack.push_back(true);
3300  Lex();
3301 
3302  return false;
3303 }
3304 
3305 void MasmParser::handleMacroExit() {
3306  // Jump to the token we should return to, and consume it.
3307  EndStatementAtEOFStack.pop_back();
3308  jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3309  EndStatementAtEOFStack.back());
3310  Lex();
3311 
3312  // Pop the instantiation entry.
3313  delete ActiveMacros.back();
3314  ActiveMacros.pop_back();
3315 }
3316 
3317 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3318  if (!M->IsFunction)
3319  return Error(NameLoc, "cannot invoke macro procedure as function");
3320 
3321  if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3322  "' requires arguments in parentheses") ||
3323  handleMacroEntry(M, NameLoc, AsmToken::RParen))
3324  return true;
3325 
3326  // Parse all statements in the macro, retrieving the exit value when it ends.
3327  std::string ExitValue;
3328  SmallVector<AsmRewrite, 4> AsmStrRewrites;
3329  while (Lexer.isNot(AsmToken::Eof)) {
3330  ParseStatementInfo Info(&AsmStrRewrites);
3331  bool Parsed = parseStatement(Info, nullptr);
3332 
3333  if (!Parsed && Info.ExitValue.hasValue()) {
3334  ExitValue = std::move(*Info.ExitValue);
3335  break;
3336  }
3337 
3338  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3339  // for printing ErrMsg via Lex() only if no (presumably better) parser error
3340  // exists.
3341  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3342  Lex();
3343  }
3344 
3345  // parseStatement returned true so may need to emit an error.
3346  printPendingErrors();
3347 
3348  // Skipping to the next line if needed.
3349  if (Parsed && !getLexer().isAtStartOfStatement())
3350  eatToEndOfStatement();
3351  }
3352 
3353  // Consume the right-parenthesis on the other side of the arguments.
3354  if (parseRParen())
3355  return true;
3356 
3357  // Exit values may require lexing, unfortunately. We construct a new buffer to
3358  // hold the exit value.
3359  std::unique_ptr<MemoryBuffer> MacroValue =
3360  MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3361 
3362  // Jump from this location to the instantiated exit value, and prime the
3363  // lexer.
3364  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3365  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3366  /*EndStatementAtEOF=*/false);
3367  EndStatementAtEOFStack.push_back(false);
3368  Lex();
3369 
3370  return false;
3371 }
3372 
3373 /// parseIdentifier:
3374 /// ::= identifier
3375 /// ::= string
3376 bool MasmParser::parseIdentifier(StringRef &Res,
3377  IdentifierPositionKind Position) {
3378  // The assembler has relaxed rules for accepting identifiers, in particular we
3379  // allow things like '.globl $foo' and '.def @feat.00', which would normally
3380  // be separate tokens. At this level, we have already lexed so we cannot
3381  // (currently) handle this as a context dependent token, instead we detect
3382  // adjacent tokens and return the combined identifier.
3383  if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3384  SMLoc PrefixLoc = getLexer().getLoc();
3385 
3386  // Consume the prefix character, and check for a following identifier.
3387 
3388  AsmToken nextTok = peekTok(false);
3389 
3390  if (nextTok.isNot(AsmToken::Identifier))
3391  return true;
3392 
3393  // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3394  if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3395  return true;
3396 
3397  // eat $ or @
3398  Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3399  // Construct the joined identifier and consume the token.
3400  Res =
3401  StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3402  Lex(); // Parser Lex to maintain invariants.
3403  return false;
3404  }
3405 
3406  if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3407  return true;
3408 
3409  Res = getTok().getIdentifier();
3410 
3411  // Consume the identifier token - but if parsing certain directives, avoid
3412  // lexical expansion of the next token.
3413  ExpandKind ExpandNextToken = ExpandMacros;
3414  if (Position == StartOfStatement &&
3415  StringSwitch<bool>(Res)
3416  .CaseLower("echo", true)
3417  .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3418  .Default(false)) {
3419  ExpandNextToken = DoNotExpandMacros;
3420  }
3421  Lex(ExpandNextToken);
3422 
3423  return false;
3424 }
3425 
3426 /// parseDirectiveEquate:
3427 /// ::= name "=" expression
3428 /// | name "equ" expression (not redefinable)
3429 /// | name "equ" text-list
3430 /// | name "textequ" text-list (redefinability unspecified)
3431 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3432  DirectiveKind DirKind, SMLoc NameLoc) {
3433  auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3434  if (BuiltinIt != BuiltinSymbolMap.end())
3435  return Error(NameLoc, "cannot redefine a built-in symbol");
3436 
3437  Variable &Var = Variables[Name.lower()];
3438  if (Var.Name.empty()) {
3439  Var.Name = Name;
3440  }
3441 
3442  SMLoc StartLoc = Lexer.getLoc();
3443  if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3444  // "equ" and "textequ" both allow text expressions.
3445  std::string Value;
3446  std::string TextItem;
3447  if (!parseTextItem(TextItem)) {
3448  Value += TextItem;
3449 
3450  // Accept a text-list, not just one text-item.
3451  auto parseItem = [&]() -> bool {
3452  if (parseTextItem(TextItem))
3453  return TokError("expected text item");
3454  Value += TextItem;
3455  return false;
3456  };
3457  if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3458  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3459 
3460  if (!Var.IsText || Var.TextValue != Value) {
3461  switch (Var.Redefinable) {
3462  case Variable::NOT_REDEFINABLE:
3463  return Error(getTok().getLoc(), "invalid variable redefinition");
3464  case Variable::WARN_ON_REDEFINITION:
3465  if (Warning(NameLoc, "redefining '" + Name +
3466  "', already defined on the command line")) {
3467  return true;
3468  }
3469  break;
3470  default:
3471  break;
3472  }
3473  }
3474  Var.IsText = true;
3475  Var.TextValue = Value;
3476  Var.Redefinable = Variable::REDEFINABLE;
3477 
3478  return false;
3479  }
3480  }
3481  if (DirKind == DK_TEXTEQU)
3482  return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3483 
3484  // Parse as expression assignment.
3485  const MCExpr *Expr;
3486  SMLoc EndLoc;
3487  if (parseExpression(Expr, EndLoc))
3488  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3489  StringRef ExprAsString = StringRef(
3490  StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3491 
3492  int64_t Value;
3493  if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3494  if (DirKind == DK_ASSIGN)
3495  return Error(
3496  StartLoc,
3497  "expected absolute expression; not all symbols have known values",
3498  {StartLoc, EndLoc});
3499 
3500  // Not an absolute expression; define as a text replacement.
3501  if (!Var.IsText || Var.TextValue != ExprAsString) {
3502  switch (Var.Redefinable) {
3503  case Variable::NOT_REDEFINABLE:
3504  return Error(getTok().getLoc(), "invalid variable redefinition");
3505  case Variable::WARN_ON_REDEFINITION:
3506  if (Warning(NameLoc, "redefining '" + Name +
3507  "', already defined on the command line")) {
3508  return true;
3509  }
3510  break;
3511  default:
3512  break;
3513  }
3514  }
3515 
3516  Var.IsText = true;
3517  Var.TextValue = ExprAsString.str();
3518  Var.Redefinable = Variable::REDEFINABLE;
3519 
3520  return false;
3521  }
3522 
3523  MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3524 
3525  const MCConstantExpr *PrevValue =
3526  Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3527  Sym->getVariableValue(/*SetUsed=*/false))
3528  : nullptr;
3529  if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3530  switch (Var.Redefinable) {
3531  case Variable::NOT_REDEFINABLE:
3532  return Error(getTok().getLoc(), "invalid variable redefinition");
3533  case Variable::WARN_ON_REDEFINITION:
3534  if (Warning(NameLoc, "redefining '" + Name +
3535  "', already defined on the command line")) {
3536  return true;
3537  }
3538  break;
3539  default:
3540  break;
3541  }
3542  }
3543 
3544  Var.IsText = false;
3545  Var.TextValue.clear();
3546  Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3547  : Variable::NOT_REDEFINABLE;
3548 
3549  Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3550  Sym->setVariableValue(Expr);
3551  Sym->setExternal(false);
3552 
3553  return false;
3554 }
3555 
3556 bool MasmParser::parseEscapedString(std::string &Data) {
3557  if (check(getTok().isNot(AsmToken::String), "expected string"))
3558  return true;
3559 
3560  Data = "";
3561  char Quote = getTok().getString().front();
3562  StringRef Str = getTok().getStringContents();
3563  Data.reserve(Str.size());
3564  for (size_t i = 0, e = Str.size(); i != e; ++i) {
3565  Data.push_back(Str[i]);
3566  if (Str[i] == Quote) {
3567  // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3568  // If we're escaping the string's trailing delimiter, we're definitely
3569  // missing a quotation mark.
3570  if (i + 1 == Str.size())
3571  return Error(getTok().getLoc(), "missing quotation mark in string");
3572  if (Str[i + 1] == Quote)
3573  ++i;
3574  }
3575  }
3576 
3577  Lex();
3578  return false;
3579 }
3580 
3581 bool MasmParser::parseAngleBracketString(std::string &Data) {
3582  SMLoc EndLoc, StartLoc = getTok().getLoc();
3583  if (isAngleBracketString(StartLoc, EndLoc)) {
3584  const char *StartChar = StartLoc.getPointer() + 1;
3585  const char *EndChar = EndLoc.getPointer() - 1;
3586  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3587  // Eat from '<' to '>'.
3588  Lex();
3589 
3590  Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3591  return false;
3592  }
3593  return true;
3594 }
3595 
3596 /// textItem ::= textLiteral | textMacroID | % constExpr
3597 bool MasmParser::parseTextItem(std::string &Data) {
3598  switch (getTok().getKind()) {
3599  default:
3600  return true;
3601  case AsmToken::Percent: {
3602  int64_t Res;
3603  if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3604  return true;
3605  Data = std::to_string(Res);
3606  return false;
3607  }
3608  case AsmToken::Less:
3609  case AsmToken::LessEqual:
3610  case AsmToken::LessLess:
3611  case AsmToken::LessGreater:
3612  return parseAngleBracketString(Data);
3613  case AsmToken::Identifier: {
3614  // This must be a text macro; we need to expand it accordingly.
3615  StringRef ID;
3616  SMLoc StartLoc = getTok().getLoc();
3617  if (parseIdentifier(ID))
3618  return true;
3619  Data = ID.str();
3620 
3621  bool Expanded = false;
3622  while (true) {
3623  // Try to resolve as a built-in text macro
3624  auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3625  if (BuiltinIt != BuiltinSymbolMap.end()) {
3626  llvm::Optional<std::string> BuiltinText =
3627  evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3628  if (!BuiltinText.hasValue()) {
3629  // Not a text macro; break without substituting
3630  break;
3631  }
3632  Data = std::move(*BuiltinText);
3633  ID = StringRef(Data);
3634  Expanded = true;
3635  continue;
3636  }
3637 
3638  // Try to resolve as a variable text macro
3639  auto VarIt = Variables.find(ID.lower());
3640  if (VarIt != Variables.end()) {
3641  const Variable &Var = VarIt->getValue();
3642  if (!Var.IsText) {
3643  // Not a text macro; break without substituting
3644  break;
3645  }
3646  Data = Var.TextValue;
3647  ID = StringRef(Data);
3648  Expanded = true;
3649  continue;
3650  }
3651 
3652  break;
3653  }
3654 
3655  if (!Expanded) {
3656  // Not a text macro; not usable in TextItem context. Since we haven't used
3657  // the token, put it back for better error recovery.
3658  getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3659  return true;
3660  }
3661  return false;
3662  }
3663  }
3664  llvm_unreachable("unhandled token kind");
3665 }
3666 
3667 /// parseDirectiveAscii:
3668 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3669 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3670  auto parseOp = [&]() -> bool {
3671  std::string Data;
3672  if (checkForValidSection() || parseEscapedString(Data))
3673  return true;
3674  getStreamer().emitBytes(Data);
3675  if (ZeroTerminated)
3676  getStreamer().emitBytes(StringRef("\0", 1));
3677  return false;
3678  };
3679 
3680  if (parseMany(parseOp))
3681  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3682  return false;
3683 }
3684 
3685 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3686  // Special case constant expressions to match code generator.
3687  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3688  assert(Size <= 8 && "Invalid size");
3689  int64_t IntValue = MCE->getValue();
3690  if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3691  return Error(MCE->getLoc(), "out of range literal value");
3692  getStreamer().emitIntValue(IntValue, Size);
3693  } else {
3694  const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3695  if (MSE && MSE->getSymbol().getName() == "?") {
3696  // ? initializer; treat as 0.
3697  getStreamer().emitIntValue(0, Size);
3698  } else {
3699  getStreamer().emitValue(Value, Size, Value->getLoc());
3700  }
3701  }
3702  return false;
3703 }
3704 
3705 bool MasmParser::parseScalarInitializer(unsigned Size,
3707  unsigned StringPadLength) {
3708  if (Size == 1 && getTok().is(AsmToken::String)) {
3709  std::string Value;
3710  if (parseEscapedString(Value))
3711  return true;
3712  // Treat each character as an initializer.
3713  for (const unsigned char CharVal : Value)
3714  Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3715 
3716  // Pad the string with spaces to the specified length.
3717  for (size_t i = Value.size(); i < StringPadLength; ++i)
3718  Values.push_back(MCConstantExpr::create(' ', getContext()));
3719  } else {
3720  const MCExpr *Value;
3721  if (parseExpression(Value))
3722  return true;
3723  if (getTok().is(AsmToken::Identifier) &&
3724  getTok().getString().equals_insensitive("dup")) {
3725  Lex(); // Eat 'dup'.
3726  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3727  if (!MCE)
3728  return Error(Value->getLoc(),
3729  "cannot repeat value a non-constant number of times");
3730  const int64_t Repetitions = MCE->getValue();
3731  if (Repetitions < 0)
3732  return Error(Value->getLoc(),
3733  "cannot repeat value a negative number of times");
3734 
3735  SmallVector<const MCExpr *, 1> DuplicatedValues;
3736  if (parseToken(AsmToken::LParen,
3737  "parentheses required for 'dup' contents") ||
3738  parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3739  return true;
3740 
3741  for (int i = 0; i < Repetitions; ++i)
3742  Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3743  } else {
3744  Values.push_back(Value);
3745  }
3746  }
3747  return false;
3748 }
3749 
3750 bool MasmParser::parseScalarInstList(unsigned Size,
3752  const AsmToken::TokenKind EndToken) {
3753  while (getTok().isNot(EndToken) &&
3754  (EndToken != AsmToken::Greater ||
3755  getTok().isNot(AsmToken::GreaterGreater))) {
3756  parseScalarInitializer(Size, Values);
3757 
3758  // If we see a comma, continue, and allow line continuation.
3759  if (!parseOptionalToken(AsmToken::Comma))
3760  break;
3761  parseOptionalToken(AsmToken::EndOfStatement);
3762  }
3763  return false;
3764 }
3765 
3766 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3768  if (checkForValidSection() || parseScalarInstList(Size, Values))
3769  return true;
3770 
3771  for (auto Value : Values) {
3772  emitIntValue(Value, Size);
3773  }
3774  if (Count)
3775  *Count = Values.size();
3776  return false;
3777 }
3778 
3779 // Add a field to the current structure.
3780 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3781  StructInfo &Struct = StructInProgress.back();
3782  FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3783  IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3784 
3785  Field.Type = Size;
3786 
3787  if (parseScalarInstList(Size, IntInfo.Values))
3788  return true;
3789 
3790  Field.SizeOf = Field.Type * IntInfo.Values.size();
3791  Field.LengthOf = IntInfo.Values.size();
3792  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3793  if (!Struct.IsUnion) {
3794  Struct.NextOffset = FieldEnd;
3795  }
3796  Struct.Size = std::max(Struct.Size, FieldEnd);
3797  return false;
3798 }
3799 
3800 /// parseDirectiveValue
3801 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3802 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3803  if (StructInProgress.empty()) {
3804  // Initialize data value.
3805  if (emitIntegralValues(Size))
3806  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3807  } else if (addIntegralField("", Size)) {
3808  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3809  }
3810 
3811  return false;
3812 }
3813 
3814 /// parseDirectiveNamedValue
3815 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3816 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3817  StringRef Name, SMLoc NameLoc) {
3818  if (StructInProgress.empty()) {
3819  // Initialize named data value.
3820  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3821  getStreamer().emitLabel(Sym);
3822  unsigned Count;
3823  if (emitIntegralValues(Size, &Count))
3824  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3825 
3826  AsmTypeInfo Type;
3827  Type.Name = TypeName;
3828  Type.Size = Size * Count;
3829  Type.ElementSize = Size;
3830  Type.Length = Count;
3831  KnownType[Name.lower()] = Type;
3832  } else if (addIntegralField(Name, Size)) {
3833  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3834  }
3835 
3836  return false;
3837 }
3838 
3839 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3840  if (Asm.getTok().isNot(AsmToken::Integer) &&
3841  Asm.getTok().isNot(AsmToken::BigNum))
3842  return Asm.TokError("unknown token in expression");
3843  SMLoc ExprLoc = Asm.getTok().getLoc();
3844  APInt IntValue = Asm.getTok().getAPIntVal();
3845  Asm.Lex();
3846  if (!IntValue.isIntN(128))
3847  return Asm.Error(ExprLoc, "out of range literal value");
3848  if (!IntValue.isIntN(64)) {
3849  hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3850  lo = IntValue.getLoBits(64).getZExtValue();
3851  } else {
3852  hi = 0;
3853  lo = IntValue.getZExtValue();
3854  }
3855  return false;
3856 }
3857 
3858 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3859  // We don't truly support arithmetic on floating point expressions, so we
3860  // have to manually parse unary prefixes.
3861  bool IsNeg = false;
3862  SMLoc SignLoc;
3863  if (getLexer().is(AsmToken::Minus)) {
3864  SignLoc = getLexer().getLoc();
3865  Lexer.Lex();
3866  IsNeg = true;
3867  } else if (getLexer().is(AsmToken::Plus)) {
3868  SignLoc = getLexer().getLoc();
3869  Lexer.Lex();
3870  }
3871 
3872  if (Lexer.is(AsmToken::Error))
3873  return TokError(Lexer.getErr());
3874  if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3875  Lexer.isNot(AsmToken::Identifier))
3876  return TokError("unexpected token in directive");
3877 
3878  // Convert to an APFloat.
3879  APFloat Value(Semantics);
3880  StringRef IDVal = getTok().getString();
3881  if (getLexer().is(AsmToken::Identifier)) {
3882  if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3883  Value = APFloat::getInf(Semantics);
3884  else if (IDVal.equals_insensitive("nan"))
3885  Value = APFloat::getNaN(Semantics, false, ~0);
3886  else if (IDVal.equals_insensitive("?"))
3887  Value = APFloat::getZero(Semantics);
3888  else
3889  return TokError("invalid floating point literal");
3890  } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3891  // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3892  // To match ML64.exe, ignore the initial sign.
3893  unsigned SizeInBits = Value.getSizeInBits(Semantics);
3894  if (SizeInBits != (IDVal.size() << 2))
3895  return TokError("invalid floating point literal");
3896 
3897  // Consume the numeric token.
3898  Lex();
3899 
3900  Res = APInt(SizeInBits, IDVal, 16);
3901  if (SignLoc.isValid())
3902  return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3903  return false;
3904  } else if (errorToBool(
3905  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3906  .takeError())) {
3907  return TokError("invalid floating point literal");
3908  }
3909  if (IsNeg)
3910  Value.changeSign();
3911 
3912  // Consume the numeric token.
3913  Lex();
3914 
3915  Res = Value.bitcastToAPInt();
3916 
3917  return false;
3918 }
3919 
3920 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3921  SmallVectorImpl<APInt> &ValuesAsInt,
3922  const AsmToken::TokenKind EndToken) {
3923  while (getTok().isNot(EndToken) ||
3924  (EndToken == AsmToken::Greater &&
3925  getTok().isNot(AsmToken::GreaterGreater))) {
3926  const AsmToken NextTok = peekTok();
3927  if (NextTok.is(AsmToken::Identifier) &&
3928  NextTok.getString().equals_insensitive("dup")) {
3929  const MCExpr *Value;
3930  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3931  return true;
3932  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3933  if (!MCE)
3934  return Error(Value->getLoc(),
3935  "cannot repeat value a non-constant number of times");
3936  const int64_t Repetitions = MCE->getValue();
3937  if (Repetitions < 0)
3938  return Error(Value->getLoc(),
3939  "cannot repeat value a negative number of times");
3940 
3941  SmallVector<APInt, 1> DuplicatedValues;
3942  if (parseToken(AsmToken::LParen,
3943  "parentheses required for 'dup' contents") ||
3944  parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3945  return true;
3946 
3947  for (int i = 0; i < Repetitions; ++i)
3948  ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3949  } else {
3950  APInt AsInt;
3951  if (parseRealValue(Semantics, AsInt))
3952  return true;
3953  ValuesAsInt.push_back(AsInt);
3954  }
3955 
3956  // Continue if we see a comma. (Also, allow line continuation.)
3957  if (!parseOptionalToken(AsmToken::Comma))
3958  break;
3959  parseOptionalToken(AsmToken::EndOfStatement);
3960  }
3961 
3962  return false;
3963 }
3964 
3965 // Initialize real data values.
3966 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3967  unsigned *Count) {
3968  if (checkForValidSection())
3969  return true;
3970 
3971  SmallVector<APInt, 1> ValuesAsInt;
3972  if (parseRealInstList(Semantics, ValuesAsInt))
3973  return true;
3974 
3975  for (const APInt &AsInt : ValuesAsInt) {
3976  getStreamer().emitIntValue(AsInt);
3977  }
3978  if (Count)
3979  *Count = ValuesAsInt.size();
3980  return false;
3981 }
3982 
3983 // Add a real field to the current struct.
3984 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3985  size_t Size) {
3986  StructInfo &Struct = StructInProgress.back();
3987  FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3988  RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3989 
3990  Field.SizeOf = 0;
3991 
3992  if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3993  return true;
3994 
3995  Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3996  Field.LengthOf = RealInfo.AsIntValues.size();
3997  Field.SizeOf = Field.Type * Field.LengthOf;
3998 
3999  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4000  if (!Struct.IsUnion) {
4001  Struct.NextOffset = FieldEnd;
4002  }
4003  Struct.Size = std::max(Struct.Size, FieldEnd);
4004  return false;
4005 }
4006 
4007 /// parseDirectiveRealValue
4008 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
4009 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
4010  const fltSemantics &Semantics,
4011  size_t Size) {
4012  if (StructInProgress.empty()) {
4013  // Initialize data value.
4014  if (emitRealValues(Semantics))
4015  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4016  } else if (addRealField("", Semantics, Size)) {
4017  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4018  }
4019  return false;
4020 }
4021 
4022 /// parseDirectiveNamedRealValue
4023 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
4024 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
4025  const fltSemantics &Semantics,
4026  unsigned Size, StringRef Name,
4027  SMLoc NameLoc) {
4028  if (StructInProgress.empty()) {
4029  // Initialize named data value.
4030  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4031  getStreamer().emitLabel(Sym);
4032  unsigned Count;
4033  if (emitRealValues(Semantics, &Count))
4034  return addErrorSuffix(" in '" + TypeName + "' directive");
4035 
4036  AsmTypeInfo Type;
4037  Type.Name = TypeName;
4038  Type.Size = Size * Count;
4039  Type.ElementSize = Size;
4040  Type.Length = Count;
4041  KnownType[Name.lower()] = Type;
4042  } else if (addRealField(Name, Semantics, Size)) {
4043  return addErrorSuffix(" in '" + TypeName + "' directive");
4044  }
4045  return false;
4046 }
4047 
4048 bool MasmParser::parseOptionalAngleBracketOpen() {
4049  const AsmToken Tok = getTok();
4050  if (parseOptionalToken(AsmToken::LessLess)) {
4051  AngleBracketDepth++;
4052  Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4053  return true;
4054  } else if (parseOptionalToken(AsmToken::LessGreater)) {
4055  AngleBracketDepth++;
4056  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4057  return true;
4058  } else if (parseOptionalToken(AsmToken::Less)) {
4059  AngleBracketDepth++;
4060  return true;
4061  }
4062 
4063  return false;
4064 }
4065 
4066 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4067  const AsmToken Tok = getTok();
4068  if (parseOptionalToken(AsmToken::GreaterGreater)) {
4069  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4070  } else if (parseToken(AsmToken::Greater, Msg)) {
4071  return true;
4072  }
4073  AngleBracketDepth--;
4074  return false;
4075 }
4076 
4077 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4078  const IntFieldInfo &Contents,
4079  FieldInitializer &Initializer) {
4080  SMLoc Loc = getTok().getLoc();
4081 
4083  if (parseOptionalToken(AsmToken::LCurly)) {
4084  if (Field.LengthOf == 1 && Field.Type > 1)
4085  return Error(Loc, "Cannot initialize scalar field with array value");
4086  if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4087  parseToken(AsmToken::RCurly))
4088  return true;
4089  } else if (parseOptionalAngleBracketOpen()) {
4090  if (Field.LengthOf == 1 && Field.Type > 1)
4091  return Error(Loc, "Cannot initialize scalar field with array value");
4092  if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4093  parseAngleBracketClose())
4094  return true;
4095  } else if (Field.LengthOf > 1 && Field.Type > 1) {
4096  return Error(Loc, "Cannot initialize array field with scalar value");
4097  } else if (parseScalarInitializer(Field.Type, Values,
4098  /*StringPadLength=*/Field.LengthOf)) {
4099  return true;
4100  }
4101 
4102  if (Values.size() > Field.LengthOf) {
4103  return Error(Loc, "Initializer too long for field; expected at most " +
4104  std::to_string(Field.LengthOf) + " elements, got " +
4105  std::to_string(Values.size()));
4106  }
4107  // Default-initialize all remaining values.
4108  Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4109 
4110  Initializer = FieldInitializer(std::move(Values));
4111  return false;
4112 }
4113 
4114 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4115  const RealFieldInfo &Contents,
4116  FieldInitializer &Initializer) {
4117  const fltSemantics *Semantics;
4118  switch (Field.Type) {
4119  case 4:
4120  Semantics = &APFloat::IEEEsingle();
4121  break;
4122  case 8:
4123  Semantics = &APFloat::IEEEdouble();
4124  break;
4125  case 10:
4126  Semantics = &APFloat::x87DoubleExtended();
4127  break;
4128  default:
4129  llvm_unreachable("unknown real field type");
4130  }
4131 
4132  SMLoc Loc = getTok().getLoc();
4133 
4134  SmallVector<APInt, 1> AsIntValues;
4135  if (parseOptionalToken(AsmToken::LCurly)) {
4136  if (Field.LengthOf == 1)
4137  return Error(Loc, "Cannot initialize scalar field with array value");
4138  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4139  parseToken(AsmToken::RCurly))
4140  return true;
4141  } else if (parseOptionalAngleBracketOpen()) {
4142  if (Field.LengthOf == 1)
4143  return Error(Loc, "Cannot initialize scalar field with array value");
4144  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4145  parseAngleBracketClose())
4146  return true;
4147  } else if (Field.LengthOf > 1) {
4148  return Error(Loc, "Cannot initialize array field with scalar value");
4149  } else {
4150  AsIntValues.emplace_back();
4151  if (parseRealValue(*Semantics, AsIntValues.back()))
4152  return true;
4153  }
4154 
4155  if (AsIntValues.size() > Field.LengthOf) {
4156  return Error(Loc, "Initializer too long for field; expected at most " +
4157  std::to_string(Field.LengthOf) + " elements, got " +
4158  std::to_string(AsIntValues.size()));
4159  }
4160  // Default-initialize all remaining values.
4161  AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4162  Contents.AsIntValues.end());
4163 
4164  Initializer = FieldInitializer(std::move(AsIntValues));
4165  return false;
4166 }
4167 
4168 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4169  const StructFieldInfo &Contents,
4170  FieldInitializer &Initializer) {
4171  SMLoc Loc = getTok().getLoc();
4172 
4173  std::vector<StructInitializer> Initializers;
4174  if (Field.LengthOf > 1) {
4175  if (parseOptionalToken(AsmToken::LCurly)) {
4176  if (parseStructInstList(Contents.Structure, Initializers,
4177  AsmToken::RCurly) ||
4178  parseToken(AsmToken::RCurly))
4179  return true;
4180  } else if (parseOptionalAngleBracketOpen()) {
4181  if (parseStructInstList(Contents.Structure, Initializers,
4182  AsmToken::Greater) ||
4183  parseAngleBracketClose())
4184  return true;
4185  } else {
4186  return Error(Loc, "Cannot initialize array field with scalar value");
4187  }
4188  } else {
4189  Initializers.emplace_back();
4190  if (parseStructInitializer(Contents.Structure, Initializers.back()))
4191  return true;
4192  }
4193 
4194  if (Initializers.size() > Field.LengthOf) {
4195  return Error(Loc, "Initializer too long for field; expected at most " +
4196  std::to_string(Field.LengthOf) + " elements, got " +
4197  std::to_string(Initializers.size()));
4198  }
4199  // Default-initialize all remaining values.
4200  Initializers.insert(Initializers.end(),
4201  Contents.Initializers.begin() + Initializers.size(),
4202  Contents.Initializers.end());
4203 
4204  Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4205  return false;
4206 }
4207 
4208 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4209  FieldInitializer &Initializer) {
4210  switch (Field.Contents.FT) {
4211  case FT_INTEGRAL:
4212  return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4213  case FT_REAL:
4214  return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4215  case FT_STRUCT:
4216  return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4217  }
4218  llvm_unreachable("Unhandled FieldType enum");
4219 }
4220 
4221 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4222  StructInitializer &Initializer) {
4223  const AsmToken FirstToken = getTok();
4224 
4226  if (parseOptionalToken(AsmToken::LCurly)) {
4227  EndToken = AsmToken::RCurly;
4228  } else if (parseOptionalAngleBracketOpen()) {
4229  EndToken = AsmToken::Greater;
4230  AngleBracketDepth++;
4231  } else if (FirstToken.is(AsmToken::Identifier) &&
4232  FirstToken.getString() == "?") {
4233  // ? initializer; leave EndToken uninitialized to treat as empty.
4234  if (parseToken(AsmToken::Identifier))
4235  return true;
4236  } else {
4237  return Error(FirstToken.getLoc(), "Expected struct initializer");
4238  }
4239 
4240  auto &FieldInitializers = Initializer.FieldInitializers;
4241  size_t FieldIndex = 0;
4242  if (EndToken.hasValue()) {
4243  // Initialize all fields with given initializers.
4244  while (getTok().isNot(EndToken.getValue()) &&
4245  FieldIndex < Structure.Fields.size()) {
4246  const FieldInfo &Field = Structure.Fields[FieldIndex++];
4247  if (parseOptionalToken(AsmToken::Comma)) {
4248  // Empty initializer; use the default and continue. (Also, allow line
4249  // continuation.)
4250  FieldInitializers.push_back(Field.Contents);
4251  parseOptionalToken(AsmToken::EndOfStatement);
4252  continue;
4253  }
4254  FieldInitializers.emplace_back(Field.Contents.FT);
4255  if (parseFieldInitializer(Field, FieldInitializers.back()))
4256  return true;
4257 
4258  // Continue if we see a comma. (Also, allow line continuation.)
4259  SMLoc CommaLoc = getTok().getLoc();
4260  if (!parseOptionalToken(AsmToken::Comma))
4261  break;
4262  if (FieldIndex == Structure.Fields.size())
4263  return Error(CommaLoc, "'" + Structure.Name +
4264  "' initializer initializes too many fields");
4265  parseOptionalToken(AsmToken::EndOfStatement);
4266  }
4267  }
4268  // Default-initialize all remaining fields.
4269  for (auto It = Structure.Fields.begin() + FieldIndex;
4270  It != Structure.Fields.end(); ++It) {
4271  const FieldInfo &Field = *It;
4272  FieldInitializers.push_back(Field.Contents);
4273  }
4274 
4275  if (EndToken.hasValue()) {
4276  if (EndToken.getValue() == AsmToken::Greater)
4277  return parseAngleBracketClose();
4278 
4279  return parseToken(EndToken.getValue());
4280  }
4281 
4282  return false;
4283 }
4284 
4285 bool MasmParser::parseStructInstList(
4286  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4287  const AsmToken::TokenKind EndToken) {
4288  while (getTok().isNot(EndToken) ||
4289  (EndToken == AsmToken::Greater &&
4290  getTok().isNot(AsmToken::GreaterGreater))) {
4291  const AsmToken NextTok = peekTok();
4292  if (NextTok.is(AsmToken::Identifier) &&
4293  NextTok.getString().equals_insensitive("dup")) {
4294  const MCExpr *Value;
4295  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4296  return true;
4297  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4298  if (!MCE)
4299  return Error(Value->getLoc(),
4300  "cannot repeat value a non-constant number of times");
4301  const int64_t Repetitions = MCE->getValue();
4302  if (Repetitions < 0)
4303  return Error(Value->getLoc(),
4304  "cannot repeat value a negative number of times");
4305 
4306  std::vector<StructInitializer> DuplicatedValues;
4307  if (parseToken(AsmToken::LParen,
4308  "parentheses required for 'dup' contents") ||
4309  parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4310  return true;
4311 
4312  for (int i = 0; i < Repetitions; ++i)
4313  llvm::append_range(Initializers, DuplicatedValues);
4314  } else {
4315  Initializers.emplace_back();
4316  if (parseStructInitializer(Structure, Initializers.back()))
4317  return true;
4318  }
4319 
4320  // Continue if we see a comma. (Also, allow line continuation.)
4321  if (!parseOptionalToken(AsmToken::Comma))
4322  break;
4323  parseOptionalToken(AsmToken::EndOfStatement);
4324  }
4325 
4326  return false;
4327 }
4328 
4329 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4330  const IntFieldInfo &Contents) {
4331  // Default-initialize all values.
4332  for (const MCExpr *Value : Contents.Values) {
4333  if (emitIntValue(Value, Field.Type))
4334  return true;
4335  }
4336  return false;
4337 }
4338 
4339 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4340  const RealFieldInfo &Contents) {
4341  for (const APInt &AsInt : Contents.AsIntValues) {
4342  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4343  AsInt.getBitWidth() / 8);
4344  }
4345  return false;
4346 }
4347 
4348 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4349  const StructFieldInfo &Contents) {
4350  for (const auto &Initializer : Contents.Initializers) {
4351  size_t Index = 0, Offset = 0;
4352  for (const auto &SubField : Contents.Structure.Fields) {
4353  getStreamer().emitZeros(SubField.Offset - Offset);
4354  Offset = SubField.Offset + SubField.SizeOf;
4355  emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4356  }
4357  }
4358  return false;
4359 }
4360 
4361 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4362  switch (Field.Contents.FT) {
4363  case FT_INTEGRAL:
4364  return emitFieldValue(Field, Field.Contents.IntInfo);
4365  case FT_REAL:
4366  return emitFieldValue(Field, Field.Contents.RealInfo);
4367  case FT_STRUCT:
4368  return emitFieldValue(Field, Field.Contents.StructInfo);
4369  }
4370  llvm_unreachable("Unhandled FieldType enum");
4371 }
4372 
4373 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4374  const IntFieldInfo &Contents,
4375  const IntFieldInfo &Initializer) {
4376  for (const auto &Value : Initializer.Values) {
4377  if (emitIntValue(Value, Field.Type))
4378  return true;
4379  }
4380  // Default-initialize all remaining values.
4381  for (auto it = Contents.Values.begin() + Initializer.Values.size();
4382  it != Contents.Values.end(); ++it) {
4383  const auto &Value = *it;
4384  if (emitIntValue(Value, Field.Type))
4385  return true;
4386  }
4387  return false;
4388 }
4389 
4390 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4391  const RealFieldInfo &Contents,
4392  const RealFieldInfo &Initializer) {
4393  for (const auto &AsInt : Initializer.AsIntValues) {
4394  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4395  AsInt.getBitWidth() / 8);
4396  }
4397  // Default-initialize all remaining values.
4398  for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
4399  It != Contents.AsIntValues.end(); ++It) {
4400  const auto &AsInt = *It;
4401  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4402  AsInt.getBitWidth() / 8);
4403  }
4404  return false;
4405 }
4406 
4407 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4408  const StructFieldInfo &Contents,
4409  const StructFieldInfo &Initializer) {
4410  for (const auto &Init : Initializer.Initializers) {
4411  if (emitStructInitializer(Contents.Structure, Init))
4412  return true;
4413  }
4414  // Default-initialize all remaining values.
4415  for (auto It =
4416  Contents.Initializers.begin() + Initializer.Initializers.size();
4417  It != Contents.Initializers.end(); ++It) {
4418  const auto &Init = *It;
4419  if (emitStructInitializer(Contents.Structure, Init))
4420  return true;
4421  }
4422  return false;
4423 }
4424 
4425 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4426  const FieldInitializer &Initializer) {
4427  switch (Field.Contents.FT) {
4428  case FT_INTEGRAL:
4429  return emitFieldInitializer(Field, Field.Contents.IntInfo,
4430  Initializer.IntInfo);
4431  case FT_REAL:
4432  return emitFieldInitializer(Field, Field.Contents.RealInfo,
4433  Initializer.RealInfo);
4434  case FT_STRUCT:
4435  return emitFieldInitializer(Field, Field.Contents.StructInfo,
4436  Initializer.StructInfo);
4437  }
4438  llvm_unreachable("Unhandled FieldType enum");
4439 }
4440 
4441 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4442  const StructInitializer &Initializer) {
4443  if (!Structure.Initializable)
4444  return Error(getLexer().getLoc(),
4445  "cannot initialize a value of type '" + Structure.Name +
4446  "'; 'org' was used in the type's declaration");
4447  size_t Index = 0, Offset = 0;
4448  for (const auto &Init : Initializer.FieldInitializers) {
4449  const auto &Field = Structure.Fields[Index++];
4450  getStreamer().emitZeros(Field.Offset - Offset);
4451  Offset = Field.Offset + Field.SizeOf;
4452  if (emitFieldInitializer(Field, Init))
4453  return true;
4454  }
4455  // Default-initialize all remaining fields.
4456  for (auto It =
4457  Structure.Fields.begin() + Initializer.FieldInitializers.size();
4458  It != Structure.Fields.end(); ++It) {
4459  const auto &Field = *It;
4460  getStreamer().emitZeros(Field.Offset - Offset);
4461  Offset = Field.Offset + Field.SizeOf;
4462  if (emitFieldValue(Field))
4463  return true;
4464  }
4465  // Add final padding.
4466  if (Offset != Structure.Size)
4467  getStreamer().emitZeros(Structure.Size - Offset);
4468  return false;
4469 }
4470 
4471 // Set data values from initializers.
4472 bool MasmParser::emitStructValues(const StructInfo &Structure,
4473  unsigned *Count) {
4474  std::vector<StructInitializer> Initializers;
4475  if (parseStructInstList(Structure, Initializers))
4476  return true;
4477 
4478  for (const auto &Initializer : Initializers) {
4479  if (emitStructInitializer(Structure, Initializer))
4480  return true;
4481  }
4482 
4483  if (Count)
4484  *Count = Initializers.size();
4485  return false;
4486 }
4487 
4488 // Declare a field in the current struct.
4489 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4490  StructInfo &OwningStruct = StructInProgress.back();
4491  FieldInfo &Field =
4492  OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4493  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4494 
4495  StructInfo.Structure = Structure;
4496  Field.Type = Structure.Size;
4497 
4498  if (parseStructInstList(Structure, StructInfo.Initializers))
4499  return true;
4500 
4501  Field.LengthOf = StructInfo.Initializers.size();
4502  Field.SizeOf = Field.Type * Field.LengthOf;
4503 
4504  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4505  if (!OwningStruct.IsUnion) {
4506  OwningStruct.NextOffset = FieldEnd;
4507  }
4508  OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4509 
4510  return false;
4511 }
4512 
4513 /// parseDirectiveStructValue
4514 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4515 /// [, (<struct-initializer> | {struct-initializer})]*
4516 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4517  StringRef Directive, SMLoc DirLoc) {
4518  if (StructInProgress.empty()) {
4519  if (emitStructValues(Structure))
4520  return true;
4521  } else if (addStructField("", Structure)) {
4522  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4523  }
4524 
4525  return false;
4526 }
4527 
4528 /// parseDirectiveNamedValue
4529 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4530 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4532  SMLoc DirLoc, StringRef Name) {
4533  if (StructInProgress.empty()) {
4534  // Initialize named data value.
4535  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4536  getStreamer().emitLabel(Sym);
4537  unsigned Count;
4538  if (emitStructValues(Structure, &Count))
4539  return true;
4540  AsmTypeInfo Type;
4541  Type.Name = Structure.Name;
4542  Type.Size = Structure.Size * Count;
4543  Type.ElementSize = Structure.Size;
4544  Type.Length = Count;
4545  KnownType[Name.lower()] = Type;
4546  } else if (addStructField(Name, Structure)) {
4547  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4548  }
4549 
4550  return false;
4551 }
4552 
4553 /// parseDirectiveStruct
4554 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4555 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4556 /// <name> ENDS
4557 ////// dataDir = data declaration
4558 ////// offsetDir = EVEN, ORG, ALIGN
4559 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4560  DirectiveKind DirKind, StringRef Name,
4561  SMLoc NameLoc) {
4562  // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4563  // anyway, so all field accesses must be qualified.
4564  AsmToken NextTok = getTok();
4565  int64_t AlignmentValue = 1;
4566  if (NextTok.isNot(AsmToken::Comma) &&
4567  NextTok.isNot(AsmToken::EndOfStatement) &&
4568  parseAbsoluteExpression(AlignmentValue)) {
4569  return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4570  "' directive");
4571  }
4572  if (!isPowerOf2_64(AlignmentValue)) {
4573  return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4574  std::to_string(AlignmentValue));
4575  }
4576 
4577  StringRef Qualifier;
4578  SMLoc QualifierLoc;
4579  if (parseOptionalToken(AsmToken::Comma)) {
4580  QualifierLoc = getTok().getLoc();
4581  if (parseIdentifier(Qualifier))
4582  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4583  if (!Qualifier.equals_insensitive("nonunique"))
4584  return Error(QualifierLoc, "Unrecognized qualifier for '" +
4585  Twine(Directive) +
4586  "' directive; expected none or NONUNIQUE");
4587  }
4588 
4589  if (parseToken(AsmToken::EndOfStatement))
4590  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4591 
4592  StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4593  return false;
4594 }
4595 
4596 /// parseDirectiveNestedStruct
4597 /// ::= (STRUC | STRUCT | UNION) [name]
4598 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4599 /// ENDS
4600 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4601  DirectiveKind DirKind) {
4602  if (StructInProgress.empty())
4603  return TokError("missing name in top-level '" + Twine(Directive) +
4604  "' directive");
4605 
4606  StringRef Name;
4607  if (getTok().is(AsmToken::Identifier)) {
4608  Name = getTok().getIdentifier();
4609  parseToken(AsmToken::Identifier);
4610  }
4611  if (parseToken(AsmToken::EndOfStatement))
4612  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4613 
4614  // Reserve space to ensure Alignment doesn't get invalidated when
4615  // StructInProgress grows.
4616  StructInProgress.reserve(StructInProgress.size() + 1);
4617  StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4618  StructInProgress.back().Alignment);
4619  return false;
4620 }
4621 
4622 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4623  if (StructInProgress.empty())
4624  return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4625  if (StructInProgress.size() > 1)
4626  return Error(NameLoc, "unexpected name in nested ENDS directive");
4627  if (StructInProgress.back().Name.compare_insensitive(Name))
4628  return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4629  StructInProgress.back().Name + "'");
4630  StructInfo Structure = StructInProgress.pop_back_val();
4631  // Pad to make the structure's size divisible by the smaller of its alignment
4632  // and the size of its largest field.
4633  Structure.Size = llvm::alignTo(
4634  Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4635  Structs[Name.lower()] = Structure;
4636 
4637  if (parseToken(AsmToken::EndOfStatement))
4638  return addErrorSuffix(" in ENDS directive");
4639 
4640  return false;
4641 }
4642 
4643 bool MasmParser::parseDirectiveNestedEnds() {
4644  if (StructInProgress.empty())
4645  return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4646  if (StructInProgress.size() == 1)
4647  return TokError("missing name in top-level ENDS directive");
4648 
4649  if (parseToken(AsmToken::EndOfStatement))
4650  return addErrorSuffix(" in nested ENDS directive");
4651 
4652  StructInfo Structure = StructInProgress.pop_back_val();
4653  // Pad to make the structure's size divisible by its alignment.
4654  Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4655 
4656  StructInfo &ParentStruct = StructInProgress.back();
4657  if (Structure.Name.empty()) {
4658  // Anonymous substructures' fields are addressed as if they belong to the
4659  // parent structure - so we transfer them to the parent here.
4660  const size_t OldFields = ParentStruct.Fields.size();
4661  ParentStruct.Fields.insert(
4662  ParentStruct.Fields.end(),
4663  std::make_move_iterator(Structure.Fields.begin()),
4664  std::make_move_iterator(Structure.Fields.end()));
4665  for (const auto &FieldByName : Structure.FieldsByName) {
4666  ParentStruct.FieldsByName[FieldByName.getKey()] =
4667  FieldByName.getValue() + OldFields;
4668  }
4669 
4670  unsigned FirstFieldOffset = 0;
4671  if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4672  FirstFieldOffset = llvm::alignTo(
4673  ParentStruct.NextOffset,
4674  std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4675  }
4676 
4677  if (ParentStruct.IsUnion) {
4678  ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4679  } else {
4680  for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
4681  FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
4682  FieldIter->Offset += FirstFieldOffset;
4683  }
4684 
4685  const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4686  if (!ParentStruct.IsUnion) {
4687  ParentStruct.NextOffset = StructureEnd;
4688  }
4689  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4690  }
4691  } else {
4692  FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4693  Structure.AlignmentSize);
4694  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4695  Field.Type = Structure.Size;
4696  Field.LengthOf = 1;
4697  Field.SizeOf = Structure.Size;
4698 
4699  const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4700  if (!ParentStruct.IsUnion) {
4701  ParentStruct.NextOffset = StructureEnd;
4702  }
4703  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4704 
4705  StructInfo.Structure = Structure;
4706  StructInfo.Initializers.emplace_back();
4707  auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4708  for (const auto &SubField : Structure.Fields) {
4709  FieldInitializers.push_back(SubField.Contents);
4710  }
4711  }
4712 
4713  return false;
4714 }
4715 
4716 /// parseDirectiveOrg
4717 /// ::= org expression
4718 bool MasmParser::parseDirectiveOrg() {
4719  const MCExpr *Offset;
4720  SMLoc OffsetLoc = Lexer.getLoc();
4721  if (checkForValidSection() || parseExpression(Offset))
4722  return true;
4723  if (parseToken(AsmToken::EndOfStatement))
4724  return addErrorSuffix(" in 'org' directive");
4725 
4726  if (StructInProgress.empty()) {
4727  // Not in a struct; change the offset for the next instruction or data
4728  if (checkForValidSection())
4729  return addErrorSuffix(" in 'org' directive");
4730 
4731  getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4732  } else {
4733  // Offset the next field of this struct
4734  StructInfo &Structure = StructInProgress.back();
4735  int64_t OffsetRes;
4736  if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4737  return Error(OffsetLoc,
4738  "expected absolute expression in 'org' directive");
4739  if (OffsetRes < 0)
4740  return Error(
4741  OffsetLoc,
4742  "expected non-negative value in struct's 'org' directive; was " +
4743  std::to_string(OffsetRes));
4744  Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4745 
4746  // ORG-affected structures cannot be initialized
4747  Structure.Initializable = false;
4748  }
4749 
4750  return false;
4751 }
4752 
4753 bool MasmParser::emitAlignTo(int64_t Alignment) {
4754  if (StructInProgress.empty()) {
4755  // Not in a struct; align the next instruction or data
4756  if (checkForValidSection())
4757  return true;
4758 
4759  // Check whether we should use optimal code alignment for this align
4760  // directive.
4761  const MCSection *Section = getStreamer().getCurrentSectionOnly();
4762  assert(Section && "must have section to emit alignment");
4763  if (Section->useCodeAlign()) {
4764  getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
4765  /*MaxBytesToEmit=*/0);
4766  } else {
4767  // FIXME: Target specific behavior about how the "extra" bytes are filled.
4768  getStreamer().emitValueToAlignment(Alignment, /*Value=*/0,
4769  /*ValueSize=*/1,
4770  /*MaxBytesToEmit=*/0);
4771  }
4772  } else {
4773  // Align the next field of this struct
4774  StructInfo &Structure = StructInProgress.back();
4775  Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4776  }
4777 
4778  return false;
4779 }
4780 
4781 /// parseDirectiveAlign
4782 /// ::= align expression
4783 bool MasmParser::parseDirectiveAlign() {
4784  SMLoc AlignmentLoc = getLexer().getLoc();
4785  int64_t Alignment;
4786 
4787  // Ignore empty 'align' directives.
4788  if (getTok().is(AsmToken::EndOfStatement)) {
4789  return Warning(AlignmentLoc,
4790  "align directive with no operand is ignored") &&
4791  parseToken(AsmToken::EndOfStatement);
4792  }
4793  if (parseAbsoluteExpression(Alignment) ||
4794  parseToken(AsmToken::EndOfStatement))
4795  return addErrorSuffix(" in align directive");
4796 
4797  // Always emit an alignment here even if we throw an error.
4798  bool ReturnVal = false;
4799 
4800  // Reject alignments that aren't either a power of two or zero, for ML.exe
4801  // compatibility. Alignment of zero is silently rounded up to one.
4802  if (Alignment == 0)
4803  Alignment = 1;
4804  if (!isPowerOf2_64(Alignment))
4805  ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4806  std::to_string(Alignment));
4807 
4808  if (emitAlignTo(Alignment))
4809  ReturnVal |= addErrorSuffix(" in align directive");
4810 
4811  return ReturnVal;
4812 }
4813 
4814 /// parseDirectiveEven
4815 /// ::= even
4816 bool MasmParser::parseDirectiveEven() {
4817  if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
4818  return addErrorSuffix(" in even directive");
4819 
4820  return false;
4821 }
4822 
4823 /// parseDirectiveFile
4824 /// ::= .file filename
4825 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4826 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4827  // FIXME: I'm not sure what this is.
4828  int64_t FileNumber = -1;
4829  if (getLexer().is(AsmToken::Integer)) {
4830  FileNumber = getTok().getIntVal();
4831  Lex();
4832 
4833  if (FileNumber < 0)
4834  return TokError("negative file number");
4835  }
4836 
4837  std::string Path;
4838 
4839  // Usually the directory and filename together, otherwise just the directory.
4840  // Allow the strings to have escaped octal character sequence.
4841  if (check(getTok().isNot(AsmToken::String),
4842  "unexpected token in '.file' directive") ||
4843  parseEscapedString(Path))
4844  return true;
4845 
4846  StringRef Directory;
4847  StringRef Filename;
4848  std::string FilenameData;
4849  if (getLexer().is(AsmToken::String)) {
4850  if (check(FileNumber == -1,
4851  "explicit path specified, but no file number") ||
4852  parseEscapedString(FilenameData))
4853  return true;
4854  Filename = FilenameData;
4855  Directory = Path;
4856  } else {
4857  Filename = Path;
4858  }
4859 
4860  uint64_t MD5Hi, MD5Lo;
4861  bool HasMD5 = false;
4862 
4864  bool HasSource = false;
4865  std::string SourceString;
4866 
4867  while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4869  if (check(getTok().isNot(AsmToken::Identifier),
4870  "unexpected token in '.file' directive") ||
4871  parseIdentifier(Keyword))
4872  return true;
4873  if (Keyword == "md5") {
4874  HasMD5 = true;
4875  if (check(FileNumber == -1,
4876  "MD5 checksum specified, but no file number") ||
4877  parseHexOcta(*this, MD5Hi, MD5Lo))
4878  return true;
4879  } else if (Keyword == "source") {
4880  HasSource = true;
4881  if (check(FileNumber == -1,
4882  "source specified, but no file number") ||
4883  check(getTok().isNot(AsmToken::String),
4884  "unexpected token in '.file' directive") ||
4885  parseEscapedString(SourceString))
4886  return true;
4887  } else {
4888  return TokError("unexpected token in '.file' directive");
4889  }
4890  }
4891 
4892  if (FileNumber == -1) {
4893  // Ignore the directive if there is no number and the target doesn't support
4894  // numberless .file directives. This allows some portability of assembler
4895  // between different object file formats.
4896  if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4897  getStreamer().emitFileDirective(Filename);
4898  } else {
4899  // In case there is a -g option as well as debug info from directive .file,
4900  // we turn off the -g option, directly use the existing debug info instead.
4901  // Throw away any implicit file table for the assembler source.
4902  if (Ctx.getGenDwarfForAssembly()) {
4904  Ctx.setGenDwarfForAssembly(false);
4905  }
4906 
4908  if (HasMD5) {
4909  MD5::MD5Result Sum;
4910  for (unsigned i = 0; i != 8; ++i) {
4911  Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4912  Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4913  }
4914  CKMem = Sum;
4915  }
4916  if (HasSource) {
4917  char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4918  memcpy(SourceBuf, SourceString.data(), SourceString.size());
4919  Source = StringRef(SourceBuf, SourceString.size());
4920  }
4921  if (FileNumber == 0) {
4922  if (Ctx.getDwarfVersion() < 5)
4923  return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4924  getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4925  } else {
4926  Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4927  FileNumber, Directory, Filename, CKMem, Source);
4928  if (!FileNumOrErr)
4929  return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4930  }
4931  // Alert the user if there are some .file directives with MD5 and some not.
4932  // But only do that once.
4933  if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4934  ReportedInconsistentMD5 = true;
4935  return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4936  }
4937  }
4938 
4939  return false;
4940 }
4941 
4942 /// parseDirectiveLine
4943 /// ::= .line [number]
4944 bool MasmParser::parseDirectiveLine() {
4945  int64_t LineNumber;
4946  if (getLexer().is(AsmToken::Integer)) {
4947  if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4948  return true;
4949  (void)LineNumber;
4950  // FIXME: Do something with the .line.
4951  }
4952  if (parseToken(AsmToken::EndOfStatement,
4953  "unexpected token in '.line' directive"))
4954  return true;
4955 
4956  return false;
4957 }
4958 
4959 /// parseDirectiveLoc
4960 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4961 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4962 /// The first number is a file number, must have been previously assigned with
4963 /// a .file directive, the second number is the line number and optionally the
4964 /// third number is a column position (zero if not specified). The remaining
4965 /// optional items are .loc sub-directives.
4966 bool MasmParser::parseDirectiveLoc() {
4967  int64_t FileNumber = 0, LineNumber = 0;
4968  SMLoc Loc = getTok().getLoc();
4969  if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4970  check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4971  "file number less than one in '.loc' directive") ||
4972  check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4973  "unassigned file number in '.loc' directive"))
4974  return true;
4975 
4976  // optional
4977  if (getLexer().is(AsmToken::Integer)) {
4978  LineNumber = getTok().getIntVal();
4979  if (LineNumber < 0)
4980  return TokError("line number less than zero in '.loc' directive");
4981  Lex();
4982  }
4983 
4984  int64_t ColumnPos = 0;
4985  if (getLexer().is(AsmToken::Integer)) {
4986  ColumnPos = getTok().getIntVal();
4987  if (ColumnPos < 0)
4988  return TokError("column position less than zero in '.loc' directive");
4989  Lex();
4990  }
4991 
4992  auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4993  unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4994  unsigned Isa = 0;
4995  int64_t Discriminator = 0;
4996 
4997  auto parseLocOp = [&]() -> bool {
4998  StringRef Name;
4999  SMLoc Loc = getTok().getLoc();
5000  if (parseIdentifier(Name))
5001  return TokError("unexpected token in '.loc' directive");
5002 
5003  if (Name == "basic_block")
5004  Flags |= DWARF2_FLAG_BASIC_BLOCK;
5005  else if (Name == "prologue_end")
5006  Flags |= DWARF2_FLAG_PROLOGUE_END;
5007  else if (Name == "epilogue_begin")
5008  Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
5009  else if (Name == "is_stmt") {
5010  Loc = getTok().getLoc();
5011  const MCExpr *Value;
5012  if (parseExpression(Value))
5013  return true;
5014  // The expression must be the constant 0 or 1.
5015  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5016  int Value = MCE->getValue();
5017  if (Value == 0)
5018  Flags &= ~DWARF2_FLAG_IS_STMT;
5019  else if (Value == 1)
5020  Flags |= DWARF2_FLAG_IS_STMT;
5021  else
5022  return Error(Loc, "is_stmt value not 0 or 1");
5023  } else {
5024  return Error(Loc, "is_stmt value not the constant value of 0 or 1");
5025  }
5026  } else if (Name == "isa") {
5027  Loc = getTok().getLoc();
5028  const MCExpr *Value;
5029  if (parseExpression(Value))
5030  return true;
5031  // The expression must be a constant greater or equal to 0.
5032  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5033  int Value = MCE->getValue();
5034  if (Value < 0)
5035  return Error(Loc, "isa number less than zero");
5036  Isa = Value;
5037  } else {
5038  return Error(Loc, "isa number not a constant value");
5039  }
5040  } else if (Name == "discriminator") {
5041  if (parseAbsoluteExpression(Discriminator))
5042  return true;
5043  } else {
5044  return Error(Loc, "unknown sub-directive in '.loc' directive");
5045  }
5046  return false;
5047  };
5048 
5049  if (parseMany(parseLocOp, false /*hasComma*/))
5050  return true;
5051 
5052  getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
5053  Isa, Discriminator, StringRef());
5054 
5055  return false;
5056 }
5057 
5058 /// parseDirectiveStabs
5059 /// ::= .stabs string, number, number, number
5060 bool MasmParser::parseDirectiveStabs() {
5061  return TokError("unsupported directive '.stabs'");
5062 }
5063 
5064 /// parseDirectiveCVFile
5065 /// ::= .cv_file number filename [checksum] [checksumkind]
5066 bool MasmParser::parseDirectiveCVFile() {
5067  SMLoc FileNumberLoc = getTok().getLoc();
5068  int64_t FileNumber;
5069  std::string Filename;
5070  std::string Checksum;
5071  int64_t ChecksumKind = 0;
5072 
5073  if (parseIntToken(FileNumber,
5074  "expected file number in '.cv_file' directive") ||
5075  check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5076  check(getTok().isNot(AsmToken::String),
5077  "unexpected token in '.cv_file' directive") ||
5078  parseEscapedString(Filename))
5079  return true;
5080  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5081  if (check(getTok().isNot(AsmToken::String),
5082  "unexpected token in '.cv_file' directive") ||
5083  parseEscapedString(Checksum) ||
5084  parseIntToken(ChecksumKind,
5085  "expected checksum kind in '.cv_file' directive") ||
5086  parseToken(AsmToken::EndOfStatement,
5087  "unexpected token in '.cv_file' directive"))
5088  return true;
5089  }
5090 
5091  Checksum = fromHex(Checksum);
5092  void *CKMem = Ctx.allocate(Checksum.size(), 1);
5093  memcpy(CKMem, Checksum.data(), Checksum.size());
5094  ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5095  Checksum.size());
5096 
5097  if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5098  static_cast<uint8_t>(ChecksumKind)))
5099  return Error(FileNumberLoc, "file number already allocated");
5100 
5101  return false;
5102 }
5103 
5104 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5105  StringRef DirectiveName) {
5106  SMLoc Loc;
5107  return parseTokenLoc(Loc) ||
5108  parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5109  "' directive") ||
5110  check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5111  "expected function id within range [0, UINT_MAX)");
5112 }
5113 
5114 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5115  SMLoc Loc;
5116  return parseTokenLoc(Loc) ||
5117  parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5118  "' directive") ||
5119  check(FileNumber < 1, Loc, "file number less than one in '" +
5120  DirectiveName + "' directive") ||
5121  check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5122  "unassigned file number in '" + DirectiveName + "' directive");
5123 }
5124 
5125 /// parseDirectiveCVFuncId
5126 /// ::= .cv_func_id FunctionId
5127 ///
5128 /// Introduces a function ID that can be used with .cv_loc.
5129 bool MasmParser::parseDirectiveCVFuncId() {
5130  SMLoc FunctionIdLoc = getTok().getLoc();
5131  int64_t FunctionId;
5132 
5133  if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
5134  parseToken(AsmToken::EndOfStatement,
5135  "unexpected token in '.cv_func_id' directive"))
5136  return true;
5137 
5138  if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
5139  return Error(FunctionIdLoc, "function id already allocated");
5140 
5141  return false;
5142 }
5143 
5144 /// parseDirectiveCVInlineSiteId
5145 /// ::= .cv_inline_site_id FunctionId
5146 /// "within" IAFunc
5147 /// "inlined_at" IAFile IALine [IACol]
5148 ///
5149 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5150 /// at" source location information for use in the line table of the caller,
5151 /// whether the caller is a real function or another inlined call site.
5152 bool MasmParser::parseDirectiveCVInlineSiteId() {
5153  SMLoc FunctionIdLoc = getTok().getLoc();
5154  int64_t FunctionId;
5155  int64_t IAFunc;
5156  int64_t IAFile;
5157  int64_t IALine;
5158  int64_t IACol = 0;
5159 
5160  // FunctionId
5161  if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5162  return true;
5163 
5164  // "within"
5165  if (check((getLexer().isNot(AsmToken::Identifier) ||
5166  getTok().getIdentifier() != "within"),
5167  "expected 'within' identifier in '.cv_inline_site_id' directive"))
5168  return true;
5169  Lex();
5170 
5171  // IAFunc
5172  if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5173  return true;
5174 
5175  // "inlined_at"
5176  if (check((getLexer().isNot(AsmToken::Identifier) ||
5177  getTok().getIdentifier() != "inlined_at"),
5178  "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5179  "directive") )
5180  return true;
5181  Lex();
5182 
5183  // IAFile IALine
5184  if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5185  parseIntToken(IALine, "expected line number after 'inlined_at'"))
5186  return true;
5187 
5188  // [IACol]
5189  if (getLexer().is(AsmToken::Integer)) {
5190  IACol = getTok().getIntVal();
5191  Lex();
5192  }
5193 
5194  if (parseToken(AsmToken::EndOfStatement,
5195  "unexpected token in '.cv_inline_site_id' directive"))
5196  return true;
5197 
5198  if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5199  IALine, IACol, FunctionIdLoc))
5200  return Error(FunctionIdLoc, "function id already allocated");
5201 
5202  return false;
5203 }
5204 
5205 /// parseDirectiveCVLoc
5206 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5207 /// [is_stmt VALUE]
5208 /// The first number is a file number, must have been previously assigned with
5209 /// a .file directive, the second number is the line number and optionally the
5210 /// third number is a column position (zero if not specified). The remaining
5211 /// optional items are .loc sub-directives.
5212 bool MasmParser::parseDirectiveCVLoc() {
5213  SMLoc DirectiveLoc = getTok().getLoc();
5214  int64_t FunctionId, FileNumber;
5215  if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5216  parseCVFileId(FileNumber, ".cv_loc"))
5217  return true;
5218 
5219  int64_t LineNumber = 0;
5220  if (getLexer().is(AsmToken::Integer)) {
5221  LineNumber = getTok().getIntVal();
5222  if (LineNumber < 0)
5223  return TokError("line number less than zero in '.cv_loc' directive");
5224  Lex();
5225  }
5226 
5227  int64_t ColumnPos = 0;
5228  if (getLexer().is(AsmToken::Integer)) {
5229  ColumnPos = getTok().getIntVal();
5230  if (ColumnPos < 0)
5231  return TokError("column position less than zero in '.cv_loc' directive");
5232  Lex();
5233  }
5234 
5235  bool PrologueEnd = false;
5236  uint64_t IsStmt = 0;
5237 
5238  auto parseOp = [&]() -> bool {
5239  StringRef Name;
5240  SMLoc Loc = getTok().getLoc();
5241  if (parseIdentifier(Name))
5242  return TokError("unexpected token in '.cv_loc' directive");
5243  if (Name == "prologue_end")
5244  PrologueEnd = true;
5245  else if (Name == "is_stmt") {
5246  Loc = getTok().getLoc();
5247  const MCExpr *Value;
5248  if (parseExpression(Value))
5249  return true;
5250  // The expression must be the constant 0 or 1.
5251  IsStmt = ~0ULL;
5252  if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5253  IsStmt = MCE->getValue();
5254 
5255  if (IsStmt > 1)
5256  return Error(Loc, "is_stmt value not 0 or 1");
5257  } else {
5258  return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5259  }
5260  return false;
5261  };
5262 
5263  if (parseMany(parseOp, false /*hasComma*/))
5264  return true;
5265 
5266  getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5267  ColumnPos, PrologueEnd, IsStmt, StringRef(),
5268  DirectiveLoc);
5269  return false;
5270 }
5271 
5272 /// parseDirectiveCVLinetable
5273 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5274 bool MasmParser::parseDirectiveCVLinetable() {
5275  int64_t FunctionId;
5276  StringRef FnStartName, FnEndName;
5277  SMLoc Loc = getTok().getLoc();
5278  if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5279  parseToken(AsmToken::Comma,
5280  "unexpected token in '.cv_linetable' directive") ||
5281  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5282  "expected identifier in directive") ||
5283  parseToken(AsmToken::Comma,
5284  "unexpected token in '.cv_linetable' directive") ||
5285  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5286  "expected identifier in directive"))
5287  return true;
5288 
5289  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5290  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5291 
5292  getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5293  return false;
5294 }
5295 
5296 /// parseDirectiveCVInlineLinetable
5297 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5298 bool MasmParser::parseDirectiveCVInlineLinetable() {
5299  int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5300  StringRef FnStartName, FnEndName;
5301  SMLoc Loc = getTok().getLoc();
5302  if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5303  parseTokenLoc(Loc) ||
5304  parseIntToken(
5305  SourceFileId,
5306  "expected SourceField in '.cv_inline_linetable' directive") ||
5307  check(SourceFileId <= 0, Loc,
5308  "File id less than zero in '.cv_inline_linetable' directive") ||
5309  parseTokenLoc(Loc) ||
5310  parseIntToken(
5311  SourceLineNum,
5312  "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5313  check(SourceLineNum < 0, Loc,
5314  "Line number less than zero in '.cv_inline_linetable' directive") ||
5315  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5316  "expected identifier in directive") ||
5317  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5318  "expected identifier in directive"))
5319  return true;
5320 
5321  if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5322  return true;
5323 
5324  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5325  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5326  getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5327  SourceLineNum, FnStartSym,
5328  FnEndSym);
5329  return false;
5330 }
5331 
5332 void MasmParser::initializeCVDefRangeTypeMap() {
5333  CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5334  CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5335  CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5336  CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5337 }
5338 
5339 /// parseDirectiveCVDefRange
5340 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5341 bool MasmParser::parseDirectiveCVDefRange() {
5342  SMLoc Loc;
5343  std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5344  while (getLexer().is(AsmToken::Identifier)) {
5345  Loc = getLexer().getLoc();
5346  StringRef GapStartName;
5347  if (parseIdentifier(GapStartName))
5348  return Error(Loc, "expected identifier in directive");
5349  MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5350 
5351  Loc = getLexer().getLoc();
5352  StringRef GapEndName;
5353  if (parseIdentifier(GapEndName))
5354  return Error(Loc, "expected identifier in directive");
5355  MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5356 
5357  Ranges.push_back({GapStartSym, GapEndSym});
5358  }
5359 
5360  StringRef CVDefRangeTypeStr;
5361  if (parseToken(
5363  "expected comma before def_range type in .cv_def_range directive") ||
5364  parseIdentifier(CVDefRangeTypeStr))
5365  return Error(Loc, "expected def_range type in directive");
5366 
5368  CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5369  CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5370  ? CVDR_DEFRANGE
5371  : CVTypeIt->getValue();
5372  switch (CVDRType) {
5373  case CVDR_DEFRANGE_REGISTER: {
5374  int64_t DRRegister;
5375  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5376  ".cv_def_range directive") ||
5377  parseAbsoluteExpression(DRRegister))
5378  return Error(Loc, "expected register number");
5379 
5381  DRHdr.Register = DRRegister;
5382  DRHdr.MayHaveNoName = 0;
5383  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5384  break;
5385  }
5386  case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5387  int64_t DROffset;
5388  if (parseToken(AsmToken::Comma,
5389  "expected comma before offset in .cv_def_range directive") ||
5390  parseAbsoluteExpression(DROffset))
5391  return Error(Loc, "expected offset value");
5392 
5394  DRHdr.Offset = DROffset;
5395  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5396  break;
5397  }
5398  case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5399  int64_t DRRegister;
5400  int64_t DROffsetInParent;
5401  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5402  ".cv_def_range directive") ||
5403  parseAbsoluteExpression(DRRegister))
5404  return Error(Loc, "expected register number");
5405  if (parseToken(AsmToken::Comma,
5406  "expected comma before offset in .cv_def_range directive") ||
5407  parseAbsoluteExpression(DROffsetInParent))
5408  return Error(Loc, "expected offset value");
5409 
5411  DRHdr.Register = DRRegister;
5412  DRHdr.MayHaveNoName = 0;
5413  DRHdr.OffsetInParent = DROffsetInParent;
5414  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5415  break;
5416  }
5417  case CVDR_DEFRANGE_REGISTER_REL: {
5418  int64_t DRRegister;
5419  int64_t DRFlags;
5420  int64_t DRBasePointerOffset;
5421  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5422  ".cv_def_range directive") ||
5423  parseAbsoluteExpression(DRRegister))
5424  return Error(Loc, "expected register value");
5425  if (parseToken(
5427  "expected comma before flag value in .cv_def_range directive") ||
5428  parseAbsoluteExpression(DRFlags))
5429  return Error(Loc, "expected flag value");
5430  if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5431  "in .cv_def_range directive") ||
5432  parseAbsoluteExpression(DRBasePointerOffset))
5433  return Error(Loc, "expected base pointer offset value");
5434 
5436  DRHdr.Register = DRRegister;
5437  DRHdr.Flags = DRFlags;
5438  DRHdr.BasePointerOffset = DRBasePointerOffset;
5439  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5440  break;
5441  }
5442  default:
5443  return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5444  }
5445  return true;
5446 }
5447 
5448 /// parseDirectiveCVString
5449 /// ::= .cv_stringtable "string"
5450 bool MasmParser::parseDirectiveCVString() {
5451  std::string Data;
5452  if (checkForValidSection() || parseEscapedString(Data))
5453  return addErrorSuffix(" in '.cv_string' directive");
5454 
5455  // Put the string in the table and emit the offset.
5456  std::pair<StringRef, unsigned> Insertion =
5457  getCVContext().addToStringTable(Data);
5458  getStreamer().emitIntValue(Insertion.second, 4);
5459  return false;
5460 }
5461 
5462 /// parseDirectiveCVStringTable
5463 /// ::= .cv_stringtable
5464 bool MasmParser::parseDirectiveCVStringTable() {
5465  getStreamer().emitCVStringTableDirective();
5466  return false;
5467 }
5468 
5469 /// parseDirectiveCVFileChecksums
5470 /// ::= .cv_filechecksums
5471 bool MasmParser::parseDirectiveCVFileChecksums() {
5472  getStreamer().emitCVFileChecksumsDirective();
5473  return false;
5474 }
5475 
5476 /// parseDirectiveCVFileChecksumOffset
5477 /// ::= .cv_filechecksumoffset fileno
5478 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5479  int64_t FileNo;
5480  if (parseIntToken(FileNo, "expected identifier in directive"))
5481  return true;
5482  if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5483  return true;
5484  getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5485  return false;
5486 }
5487 
5488 /// parseDirectiveCVFPOData
5489 /// ::= .cv_fpo_data procsym
5490 bool MasmParser::parseDirectiveCVFPOData() {
5491  SMLoc DirLoc = getLexer().getLoc();
5492  StringRef ProcName;
5493  if (parseIdentifier(ProcName))
5494  return TokError("expected symbol name");
5495  if (parseEOL("unexpected tokens"))
5496  return addErrorSuffix(" in '.cv_fpo_data' directive");
5497  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5498  getStreamer().EmitCVFPOData(ProcSym, DirLoc);
5499  return false;
5500 }
5501 
5502 /// parseDirectiveCFISections
5503 /// ::= .cfi_sections section [, section]
5504 bool MasmParser::parseDirectiveCFISections() {
5505  StringRef Name;
5506  bool EH = false;
5507  bool Debug = false;
5508 
5509  if (parseIdentifier(Name))
5510  return TokError("Expected an identifier");
5511 
5512  if (Name == ".eh_frame")
5513  EH = true;
5514  else if (Name == ".debug_frame")
5515  Debug = true;
5516 
5517  if (getLexer().is(AsmToken::Comma)) {
5518  Lex();
5519 
5520  if (parseIdentifier(Name))
5521  return TokError("Expected an identifier");
5522 
5523  if (Name == ".eh_frame")
5524  EH = true;
5525  else if (Name == ".debug_frame")
5526  Debug = true;
5527  }
5528 
5529  getStreamer().emitCFISections(EH, Debug);
5530  return false;
5531 }
5532 
5533 /// parseDirectiveCFIStartProc
5534 /// ::= .cfi_startproc [simple]
5535 bool MasmParser::parseDirectiveCFIStartProc() {
5536  StringRef Simple;
5537  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5538  if (check(parseIdentifier(Simple) || Simple != "simple",
5539  "unexpected token") ||
5540  parseToken(AsmToken::EndOfStatement))
5541  return addErrorSuffix(" in '.cfi_startproc' directive");
5542  }
5543 
5544  // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5545  // being produced if this directive is emitted as part of preprocessor macro
5546  // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5547  // Tools like llvm-mc on the other hand are not affected by it, and report
5548  // correct context information.
5549  getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5550  return false;
5551 }
5552 
5553 /// parseDirectiveCFIEndProc
5554 /// ::= .cfi_endproc
5555 bool MasmParser::parseDirectiveCFIEndProc() {
5556  getStreamer().emitCFIEndProc();
5557  return false;
5558 }
5559 
5560 /// parse register name or number.
5561 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5562  SMLoc DirectiveLoc) {
5563  unsigned RegNo;
5564 
5565  if (getLexer().isNot(AsmToken::Integer)) {
5566  if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5567  return true;
5568  Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5569  } else
5570  return parseAbsoluteExpression(Register);
5571 
5572  return false;
5573 }
5574 
5575 /// parseDirectiveCFIDefCfa
5576 /// ::= .cfi_def_cfa register, offset
5577 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5578  int64_t Register = 0, Offset = 0;
5579  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5580  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5581  parseAbsoluteExpression(Offset))
5582  return true;
5583 
5584  getStreamer().emitCFIDefCfa(Register, Offset);
5585  return false;
5586 }
5587 
5588 /// parseDirectiveCFIDefCfaOffset
5589 /// ::= .cfi_def_cfa_offset offset
5590 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5591  int64_t Offset = 0;
5592  if (parseAbsoluteExpression(Offset))
5593  return true;
5594 
5595  getStreamer().emitCFIDefCfaOffset(Offset);
5596  return false;
5597 }
5598 
5599 /// parseDirectiveCFIRegister
5600 /// ::= .cfi_register register, register
5601 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5602  int64_t Register1 = 0, Register2 = 0;
5603  if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5604  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5605  parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5606  return true;
5607 
5608  getStreamer().emitCFIRegister(Register1, Register2);
5609  return false;
5610 }
5611 
5612 /// parseDirectiveCFIWindowSave
5613 /// ::= .cfi_window_save
5614 bool MasmParser::parseDirectiveCFIWindowSave() {
5615  getStreamer().emitCFIWindowSave();
5616  return false;
5617 }
5618 
5619 /// parseDirectiveCFIAdjustCfaOffset
5620 /// ::= .cfi_adjust_cfa_offset adjustment
5621 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5622  int64_t Adjustment = 0;
5623  if (parseAbsoluteExpression(Adjustment))
5624  return true;
5625 
5626  getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5627  return false;
5628 }
5629 
5630 /// parseDirectiveCFIDefCfaRegister
5631 /// ::= .cfi_def_cfa_register register
5632 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5633  int64_t Register = 0;
5634  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5635  return true;
5636 
5637  getStreamer().emitCFIDefCfaRegister(Register);
5638  return false;
5639 }
5640 
5641 /// parseDirectiveCFIOffset
5642 /// ::= .cfi_offset register, offset
5643 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5644  int64_t Register = 0;
5645  int64_t Offset = 0;
5646 
5647  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5648  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5649  parseAbsoluteExpression(Offset))
5650  return true;
5651 
5652  getStreamer().emitCFIOffset(Register, Offset);
5653  return false;
5654 }
5655 
5656 /// parseDirectiveCFIRelOffset
5657 /// ::= .cfi_rel_offset register, offset
5658 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5659  int64_t Register = 0, Offset = 0;
5660 
5661  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5662  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5663  parseAbsoluteExpression(Offset))
5664  return true;
5665 
5666  getStreamer().emitCFIRelOffset(Register, Offset);
5667  return false;
5668 }
5669 
5670 static bool isValidEncoding(int64_t Encoding) {
5671  if (Encoding & ~0xff)
5672  return false;
5673 
5674  if (Encoding == dwarf::DW_EH_PE_omit)
5675  return true;
5676 
5677  const unsigned Format = Encoding & 0xf;
5682  return false;
5683 
5684  const unsigned Application = Encoding & 0x70;
5685  if (Application != dwarf::DW_EH_PE_absptr &&
5686  Application != dwarf::DW_EH_PE_pcrel)
5687  return false;
5688 
5689  return true;
5690 }
5691 
5692 /// parseDirectiveCFIPersonalityOrLsda
5693 /// IsPersonality true for cfi_personality, false for cfi_lsda
5694 /// ::= .cfi_personality encoding, [symbol_name]
5695 /// ::= .cfi_lsda encoding, [symbol_name]
5696 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5697  int64_t Encoding = 0;
5698  if (parseAbsoluteExpression(Encoding))
5699  return true;
5700  if (Encoding == dwarf::DW_EH_PE_omit)
5701  return false;
5702 
5703  StringRef Name;
5704  if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5705  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5706  check(parseIdentifier(Name), "expected identifier in directive"))
5707  return true;
5708 
5709  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5710 
5711  if (IsPersonality)
5712  getStreamer().emitCFIPersonality(Sym, Encoding);
5713  else
5714  getStreamer().emitCFILsda(Sym, Encoding);
5715  return false;
5716 }
5717 
5718 /// parseDirectiveCFIRememberState
5719 /// ::= .cfi_remember_state
5720 bool MasmParser::parseDirectiveCFIRememberState() {
5721  getStreamer().emitCFIRememberState();
5722  return false;
5723 }
5724 
5725 /// parseDirectiveCFIRestoreState
5726 /// ::= .cfi_remember_state
5727 bool MasmParser::parseDirectiveCFIRestoreState() {
5728  getStreamer().emitCFIRestoreState();
5729  return false;
5730 }
5731 
5732 /// parseDirectiveCFISameValue
5733 /// ::= .cfi_same_value register
5734 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5735  int64_t Register = 0;
5736 
5737  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5738  return true;
5739 
5740  getStreamer().emitCFISameValue(Register);
5741  return false;
5742 }
5743 
5744 /// parseDirectiveCFIRestore
5745 /// ::= .cfi_restore register
5746 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5747  int64_t Register = 0;
5748  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5749  return true;
5750 
5751  getStreamer().emitCFIRestore(Register);
5752  return false;
5753 }
5754 
5755 /// parseDirectiveCFIEscape
5756 /// ::= .cfi_escape expression[,...]
5757 bool MasmParser::parseDirectiveCFIEscape() {
5758  std::string Values;
5759  int64_t CurrValue;
5760  if (parseAbsoluteExpression(CurrValue))
5761  return true;
5762 
5763  Values.push_back((uint8_t)CurrValue);
5764 
5765  while (getLexer().is(AsmToken::Comma)) {
5766  Lex();
5767 
5768  if (parseAbsoluteExpression(CurrValue))
5769  return true;
5770 
5771  Values.push_back((uint8_t)CurrValue);
5772  }
5773 
5774  getStreamer().emitCFIEscape(Values);
5775  return false;
5776 }
5777 
5778 /// parseDirectiveCFIReturnColumn
5779 /// ::= .cfi_return_column register
5780 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5781  int64_t Register = 0;
5782  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5783  return true;
5784  getStreamer().emitCFIReturnColumn(Register);
5785  return false;
5786 }
5787 
5788 /// parseDirectiveCFISignalFrame
5789 /// ::= .cfi_signal_frame
5790 bool MasmParser::parseDirectiveCFISignalFrame() {
5791  if (parseToken(AsmToken::EndOfStatement,
5792  "unexpected token in '.cfi_signal_frame'"))
5793  return true;
5794 
5795  getStreamer().emitCFISignalFrame();
5796  return false;
5797 }
5798 
5799 /// parseDirectiveCFIUndefined
5800 /// ::= .cfi_undefined register
5801 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5802  int64_t Register = 0;
5803 
5804  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5805  return true;
5806 
5807  getStreamer().emitCFIUndefined(Register);
5808  return false;
5809 }
5810 
5811 /// parseDirectiveMacro
5812 /// ::= name macro [parameters]
5813 /// ["LOCAL" identifiers]
5814 /// parameters ::= parameter [, parameter]*
5815 /// parameter ::= name ":" qualifier
5816 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5817 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5818  MCAsmMacroParameters Parameters;
5819  while (getLexer().isNot(AsmToken::EndOfStatement)) {
5820  if (!Parameters.empty() && Parameters.back().Vararg)
5821  return Error(Lexer.getLoc(),
5822  "Vararg parameter '" + Parameters.back().Name +
5823  "' should be last in the list of parameters");
5824 
5825  MCAsmMacroParameter Parameter;
5826  if (parseIdentifier(Parameter.Name))
5827  return TokError("expected identifier in 'macro' directive");
5828 
5829  // Emit an error if two (or more) named parameters share the same name.
5830  for (const MCAsmMacroParameter& CurrParam : Parameters)
5831  if (CurrParam.Name.equals_insensitive(Parameter.Name))
5832  return TokError("macro '" + Name + "' has multiple parameters"
5833  " named '" + Parameter.Name + "'");
5834 
5835  if (Lexer.is(AsmToken::Colon)) {
5836  Lex(); // consume ':'
5837 
5838  if (parseOptionalToken(AsmToken::Equal)) {
5839  // Default value
5840  SMLoc ParamLoc;
5841 
5842  ParamLoc = Lexer.getLoc();
5843  if (parseMacroArgument(