LLVM  16.0.0git
MasmParser.cpp
Go to the documentation of this file.
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCCodeView.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDirectives.h"
33 #include "llvm/MC/MCDwarf.h"
34 #include "llvm/MC/MCExpr.h"
35 #include "llvm/MC/MCInstPrinter.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCInstrInfo.h"
45 #include "llvm/MC/MCRegisterInfo.h"
46 #include "llvm/MC/MCSection.h"
47 #include "llvm/MC/MCStreamer.h"
49 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Format.h"
55 #include "llvm/Support/MD5.h"
58 #include "llvm/Support/Path.h"
59 #include "llvm/Support/SMLoc.h"
60 #include "llvm/Support/SourceMgr.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <climits>
65 #include <cstddef>
66 #include <cstdint>
67 #include <ctime>
68 #include <deque>
69 #include <memory>
70 #include <optional>
71 #include <sstream>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 namespace {
80 
81 /// Helper types for tracking macro definitions.
82 typedef std::vector<AsmToken> MCAsmMacroArgument;
83 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
84 
85 /// Helper class for storing information about an active macro instantiation.
86 struct MacroInstantiation {
87  /// The location of the instantiation.
88  SMLoc InstantiationLoc;
89 
90  /// The buffer where parsing should resume upon instantiation completion.
91  unsigned ExitBuffer;
92 
93  /// The location where parsing should resume upon instantiation completion.
94  SMLoc ExitLoc;
95 
96  /// The depth of TheCondStack at the start of the instantiation.
97  size_t CondStackDepth;
98 };
99 
100 struct ParseStatementInfo {
101  /// The parsed operands from the last parsed statement.
103 
104  /// The opcode from the last parsed instruction.
105  unsigned Opcode = ~0U;
106 
107  /// Was there an error parsing the inline assembly?
108  bool ParseError = false;
109 
110  /// The value associated with a macro exit.
111  std::optional<std::string> ExitValue;
112 
113  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
114 
115  ParseStatementInfo() = delete;
116  ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
117  : AsmRewrites(rewrites) {}
118 };
119 
120 enum FieldType {
121  FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
122  FT_REAL, // Initializer: real number, stored as an APInt.
123  FT_STRUCT // Initializer: struct initializer, stored recursively.
124 };
125 
126 struct FieldInfo;
127 struct StructInfo {
128  StringRef Name;
129  bool IsUnion = false;
130  bool Initializable = true;
131  unsigned Alignment = 0;
132  unsigned AlignmentSize = 0;
133  unsigned NextOffset = 0;
134  unsigned Size = 0;
135  std::vector<FieldInfo> Fields;
136  StringMap<size_t> FieldsByName;
137 
138  FieldInfo &addField(StringRef FieldName, FieldType FT,
139  unsigned FieldAlignmentSize);
140 
141  StructInfo() = default;
142  StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
143 };
144 
145 // FIXME: This should probably use a class hierarchy, raw pointers between the
146 // objects, and dynamic type resolution instead of a union. On the other hand,
147 // ownership then becomes much more complicated; the obvious thing would be to
148 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
149 
150 struct StructInitializer;
151 struct IntFieldInfo {
153 
154  IntFieldInfo() = default;
155  IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
156  IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
157 };
158 struct RealFieldInfo {
159  SmallVector<APInt, 1> AsIntValues;
160 
161  RealFieldInfo() = default;
162  RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
163  RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
164 };
165 struct StructFieldInfo {
166  std::vector<StructInitializer> Initializers;
167  StructInfo Structure;
168 
169  StructFieldInfo() = default;
170  StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
171 };
172 
173 class FieldInitializer {
174 public:
175  FieldType FT;
176  union {
177  IntFieldInfo IntInfo;
178  RealFieldInfo RealInfo;
179  StructFieldInfo StructInfo;
180  };
181 
182  ~FieldInitializer();
183  FieldInitializer(FieldType FT);
184 
185  FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
186  FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
187  FieldInitializer(std::vector<StructInitializer> &&Initializers,
188  struct StructInfo Structure);
189 
190  FieldInitializer(const FieldInitializer &Initializer);
191  FieldInitializer(FieldInitializer &&Initializer);
192 
193  FieldInitializer &operator=(const FieldInitializer &Initializer);
194  FieldInitializer &operator=(FieldInitializer &&Initializer);
195 };
196 
197 struct StructInitializer {
198  std::vector<FieldInitializer> FieldInitializers;
199 };
200 
201 struct FieldInfo {
202  // Offset of the field within the containing STRUCT.
203  unsigned Offset = 0;
204 
205  // Total size of the field (= LengthOf * Type).
206  unsigned SizeOf = 0;
207 
208  // Number of elements in the field (1 if scalar, >1 if an array).
209  unsigned LengthOf = 0;
210 
211  // Size of a single entry in this field, in bytes ("type" in MASM standards).
212  unsigned Type = 0;
213 
214  FieldInitializer Contents;
215 
216  FieldInfo(FieldType FT) : Contents(FT) {}
217 };
218 
219 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
220  StructInfo S) {
221  Initializers = std::move(V);
222  Structure = S;
223 }
224 
225 StructInfo::StructInfo(StringRef StructName, bool Union,
226  unsigned AlignmentValue)
227  : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
228 
229 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
230  unsigned FieldAlignmentSize) {
231  if (!FieldName.empty())
232  FieldsByName[FieldName.lower()] = Fields.size();
233  Fields.emplace_back(FT);
234  FieldInfo &Field = Fields.back();
235  Field.Offset =
236  llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
237  if (!IsUnion) {
238  NextOffset = std::max(NextOffset, Field.Offset);
239  }
240  AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
241  return Field;
242 }
243 
244 FieldInitializer::~FieldInitializer() {
245  switch (FT) {
246  case FT_INTEGRAL:
247  IntInfo.~IntFieldInfo();
248  break;
249  case FT_REAL:
250  RealInfo.~RealFieldInfo();
251  break;
252  case FT_STRUCT:
253  StructInfo.~StructFieldInfo();
254  break;
255  }
256 }
257 
258 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
259  switch (FT) {
260  case FT_INTEGRAL:
261  new (&IntInfo) IntFieldInfo();
262  break;
263  case FT_REAL:
264  new (&RealInfo) RealFieldInfo();
265  break;
266  case FT_STRUCT:
267  new (&StructInfo) StructFieldInfo();
268  break;
269  }
270 }
271 
272 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
273  : FT(FT_INTEGRAL) {
274  new (&IntInfo) IntFieldInfo(Values);
275 }
276 
277 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
278  : FT(FT_REAL) {
279  new (&RealInfo) RealFieldInfo(AsIntValues);
280 }
281 
282 FieldInitializer::FieldInitializer(
283  std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
284  : FT(FT_STRUCT) {
285  new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
286 }
287 
288 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
289  : FT(Initializer.FT) {
290  switch (FT) {
291  case FT_INTEGRAL:
292  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
293  break;
294  case FT_REAL:
295  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
296  break;
297  case FT_STRUCT:
298  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
299  break;
300  }
301 }
302 
303 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
304  : FT(Initializer.FT) {
305  switch (FT) {
306  case FT_INTEGRAL:
307  new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
308  break;
309  case FT_REAL:
310  new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
311  break;
312  case FT_STRUCT:
313  new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
314  break;
315  }
316 }
317 
318 FieldInitializer &
319 FieldInitializer::operator=(const FieldInitializer &Initializer) {
320  if (FT != Initializer.FT) {
321  switch (FT) {
322  case FT_INTEGRAL:
323  IntInfo.~IntFieldInfo();
324  break;
325  case FT_REAL:
326  RealInfo.~RealFieldInfo();
327  break;
328  case FT_STRUCT:
329  StructInfo.~StructFieldInfo();
330  break;
331  }
332  }
333  FT = Initializer.FT;
334  switch (FT) {
335  case FT_INTEGRAL:
336  IntInfo = Initializer.IntInfo;
337  break;
338  case FT_REAL:
339  RealInfo = Initializer.RealInfo;
340  break;
341  case FT_STRUCT:
342  StructInfo = Initializer.StructInfo;
343  break;
344  }
345  return *this;
346 }
347 
348 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
349  if (FT != Initializer.FT) {
350  switch (FT) {
351  case FT_INTEGRAL:
352  IntInfo.~IntFieldInfo();
353  break;
354  case FT_REAL:
355  RealInfo.~RealFieldInfo();
356  break;
357  case FT_STRUCT:
358  StructInfo.~StructFieldInfo();
359  break;
360  }
361  }
362  FT = Initializer.FT;
363  switch (FT) {
364  case FT_INTEGRAL:
365  IntInfo = Initializer.IntInfo;
366  break;
367  case FT_REAL:
368  RealInfo = Initializer.RealInfo;
369  break;
370  case FT_STRUCT:
371  StructInfo = Initializer.StructInfo;
372  break;
373  }
374  return *this;
375 }
376 
377 /// The concrete assembly parser instance.
378 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
379 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
380 class MasmParser : public MCAsmParser {
381 private:
382  AsmLexer Lexer;
383  MCContext &Ctx;
384  MCStreamer &Out;
385  const MCAsmInfo &MAI;
386  SourceMgr &SrcMgr;
387  SourceMgr::DiagHandlerTy SavedDiagHandler;
388  void *SavedDiagContext;
389  std::unique_ptr<MCAsmParserExtension> PlatformParser;
390 
391  /// This is the current buffer index we're lexing from as managed by the
392  /// SourceMgr object.
393  unsigned CurBuffer;
394 
395  /// time of assembly
396  struct tm TM;
397 
398  BitVector EndStatementAtEOFStack;
399 
400  AsmCond TheCondState;
401  std::vector<AsmCond> TheCondStack;
402 
403  /// maps directive names to handler methods in parser
404  /// extensions. Extensions register themselves in this map by calling
405  /// addDirectiveHandler.
406  StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
407 
408  /// maps assembly-time variable names to variables.
409  struct Variable {
410  enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
411 
412  StringRef Name;
413  RedefinableKind Redefinable = REDEFINABLE;
414  bool IsText = false;
415  std::string TextValue;
416  };
417  StringMap<Variable> Variables;
418 
419  /// Stack of active struct definitions.
420  SmallVector<StructInfo, 1> StructInProgress;
421 
422  /// Maps struct tags to struct definitions.
423  StringMap<StructInfo> Structs;
424 
425  /// Maps data location names to types.
426  StringMap<AsmTypeInfo> KnownType;
427 
428  /// Stack of active macro instantiations.
429  std::vector<MacroInstantiation*> ActiveMacros;
430 
431  /// List of bodies of anonymous macros.
432  std::deque<MCAsmMacro> MacroLikeBodies;
433 
434  /// Keeps track of how many .macro's have been instantiated.
435  unsigned NumOfMacroInstantiations;
436 
437  /// The values from the last parsed cpp hash file line comment if any.
438  struct CppHashInfoTy {
440  int64_t LineNumber;
441  SMLoc Loc;
442  unsigned Buf;
443  CppHashInfoTy() : LineNumber(0), Buf(0) {}
444  };
445  CppHashInfoTy CppHashInfo;
446 
447  /// The filename from the first cpp hash file line comment, if any.
448  StringRef FirstCppHashFilename;
449 
450  /// List of forward directional labels for diagnosis at the end.
452 
453  /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
454  /// Defaults to 1U, meaning Intel.
455  unsigned AssemblerDialect = 1U;
456 
457  /// is Darwin compatibility enabled?
458  bool IsDarwin = false;
459 
460  /// Are we parsing ms-style inline assembly?
461  bool ParsingMSInlineAsm = false;
462 
463  /// Did we already inform the user about inconsistent MD5 usage?
464  bool ReportedInconsistentMD5 = false;
465 
466  // Current <...> expression depth.
467  unsigned AngleBracketDepth = 0U;
468 
469  // Number of locals defined.
470  uint16_t LocalCounter = 0;
471 
472 public:
473  MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
474  const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
475  MasmParser(const MasmParser &) = delete;
476  MasmParser &operator=(const MasmParser &) = delete;
477  ~MasmParser() override;
478 
479  bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
480 
482  ExtensionDirectiveHandler Handler) override {
483  ExtensionDirectiveMap[Directive] = Handler;
484  if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
485  DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
486  }
487  }
488 
489  void addAliasForDirective(StringRef Directive, StringRef Alias) override {
490  DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
491  }
492 
493  /// @name MCAsmParser Interface
494  /// {
495 
496  SourceMgr &getSourceManager() override { return SrcMgr; }
497  MCAsmLexer &getLexer() override { return Lexer; }
498  MCContext &getContext() override { return Ctx; }
499  MCStreamer &getStreamer() override { return Out; }
500 
501  CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
502 
503  unsigned getAssemblerDialect() override {
504  if (AssemblerDialect == ~0U)
505  return MAI.getAssemblerDialect();
506  else
507  return AssemblerDialect;
508  }
509  void setAssemblerDialect(unsigned i) override {
510  AssemblerDialect = i;
511  }
512 
513  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
514  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
515  bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
516 
517  enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518  const AsmToken &Lex(ExpandKind ExpandNextToken);
519  const AsmToken &Lex() override { return Lex(ExpandMacros); }
520 
521  void setParsingMSInlineAsm(bool V) override {
522  ParsingMSInlineAsm = V;
523  // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524  // hex integer literals.
525  Lexer.setLexMasmIntegers(V);
526  }
527  bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
528 
529  bool isParsingMasm() const override { return true; }
530 
531  bool defineMacro(StringRef Name, StringRef Value) override;
532 
533  bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534  bool lookUpField(StringRef Base, StringRef Member,
535  AsmFieldInfo &Info) const override;
536 
537  bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
538 
539  bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540  unsigned &NumInputs,
541  SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542  SmallVectorImpl<std::string> &Constraints,
544  const MCInstrInfo *MII, const MCInstPrinter *IP,
545  MCAsmParserSemaCallback &SI) override;
546 
547  bool parseExpression(const MCExpr *&Res);
548  bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549  bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550  AsmTypeInfo *TypeInfo) override;
551  bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552  bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553  SMLoc &EndLoc) override;
554  bool parseAbsoluteExpression(int64_t &Res) override;
555 
556  /// Parse a floating point expression using the float \p Semantics
557  /// and set \p Res to the value.
558  bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
559 
560  /// Parse an identifier or string (as a quoted identifier)
561  /// and set \p Res to the identifier contents.
562  enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563  bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
564  bool parseIdentifier(StringRef &Res) override {
565  return parseIdentifier(Res, StandardPosition);
566  }
567  void eatToEndOfStatement() override;
568 
569  bool checkForValidSection() override;
570 
571  /// }
572 
573 private:
574  bool expandMacros();
575  const AsmToken peekTok(bool ShouldSkipSpace = true);
576 
577  bool parseStatement(ParseStatementInfo &Info,
579  bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580  bool parseCppHashLineFilenameComment(SMLoc L);
581 
582  bool expandMacro(raw_svector_ostream &OS, StringRef Body,
585  const std::vector<std::string> &Locals, SMLoc L);
586 
587  /// Are we inside a macro instantiation?
588  bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
589 
590  /// Handle entry to macro instantiation.
591  ///
592  /// \param M The macro.
593  /// \param NameLoc Instantiation location.
594  bool handleMacroEntry(
595  const MCAsmMacro *M, SMLoc NameLoc,
596  AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
597 
598  /// Handle invocation of macro function.
599  ///
600  /// \param M The macro.
601  /// \param NameLoc Invocation location.
602  bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
603 
604  /// Handle exit from macro instantiation.
605  void handleMacroExit();
606 
607  /// Extract AsmTokens for a macro argument.
608  bool
609  parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
610  AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
611 
612  /// Parse all macro arguments for a given macro.
613  bool
614  parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
615  AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
616 
617  void printMacroInstantiations();
618 
619  bool expandStatement(SMLoc Loc);
620 
621  void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622  SMRange Range = None) const {
623  ArrayRef<SMRange> Ranges(Range);
624  SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
625  }
626  static void DiagHandler(const SMDiagnostic &Diag, void *Context);
627 
628  bool lookUpField(const StructInfo &Structure, StringRef Member,
629  AsmFieldInfo &Info) const;
630 
631  /// Should we emit DWARF describing this assembler source? (Returns false if
632  /// the source has .file directives, which means we don't want to generate
633  /// info describing the assembler source itself.)
634  bool enabledGenDwarfForAssembly();
635 
636  /// Enter the specified file. This returns true on failure.
637  bool enterIncludeFile(const std::string &Filename);
638 
639  /// Reset the current lexer position to that given by \p Loc. The
640  /// current token is not set; clients should ensure Lex() is called
641  /// subsequently.
642  ///
643  /// \param InBuffer If not 0, should be the known buffer id that contains the
644  /// location.
645  void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646  bool EndStatementAtEOF = true);
647 
648  /// Parse up to a token of kind \p EndTok and return the contents from the
649  /// current token up to (but not including) this token; the current token on
650  /// exit will be either this kind or EOF. Reads through instantiated macro
651  /// functions and text macros.
652  SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653  std::string parseStringTo(AsmToken::TokenKind EndTok);
654 
655  /// Parse up to the end of statement and return the contents from the current
656  /// token until the end of the statement; the current token on exit will be
657  /// either the EndOfStatement or EOF.
659 
660  bool parseTextItem(std::string &Data);
661 
662  unsigned getBinOpPrecedence(AsmToken::TokenKind K,
664 
665  bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666  bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667  bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
668 
669  bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
670 
671  bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672  bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
673 
674  // Generic (target and platform independent) directive parsing.
675  enum DirectiveKind {
676  DK_NO_DIRECTIVE, // Placeholder
677  DK_HANDLER_DIRECTIVE,
678  DK_ASSIGN,
679  DK_EQU,
680  DK_TEXTEQU,
681  DK_ASCII,
682  DK_ASCIZ,
683  DK_STRING,
684  DK_BYTE,
685  DK_SBYTE,
686  DK_WORD,
687  DK_SWORD,
688  DK_DWORD,
689  DK_SDWORD,
690  DK_FWORD,
691  DK_QWORD,
692  DK_SQWORD,
693  DK_DB,
694  DK_DD,
695  DK_DF,
696  DK_DQ,
697  DK_DW,
698  DK_REAL4,
699  DK_REAL8,
700  DK_REAL10,
701  DK_ALIGN,
702  DK_EVEN,
703  DK_ORG,
704  DK_ENDR,
705  DK_EXTERN,
706  DK_PUBLIC,
707  DK_COMM,
708  DK_COMMENT,
709  DK_INCLUDE,
710  DK_REPEAT,
711  DK_WHILE,
712  DK_FOR,
713  DK_FORC,
714  DK_IF,
715  DK_IFE,
716  DK_IFB,
717  DK_IFNB,
718  DK_IFDEF,
719  DK_IFNDEF,
720  DK_IFDIF,
721  DK_IFDIFI,
722  DK_IFIDN,
723  DK_IFIDNI,
724  DK_ELSEIF,
725  DK_ELSEIFE,
726  DK_ELSEIFB,
727  DK_ELSEIFNB,
728  DK_ELSEIFDEF,
729  DK_ELSEIFNDEF,
730  DK_ELSEIFDIF,
731  DK_ELSEIFDIFI,
732  DK_ELSEIFIDN,
733  DK_ELSEIFIDNI,
734  DK_ELSE,
735  DK_ENDIF,
736  DK_FILE,
737  DK_LINE,
738  DK_LOC,
739  DK_STABS,
740  DK_CV_FILE,
741  DK_CV_FUNC_ID,
742  DK_CV_INLINE_SITE_ID,
743  DK_CV_LOC,
744  DK_CV_LINETABLE,
745  DK_CV_INLINE_LINETABLE,
746  DK_CV_DEF_RANGE,
747  DK_CV_STRINGTABLE,
748  DK_CV_STRING,
749  DK_CV_FILECHECKSUMS,
750  DK_CV_FILECHECKSUM_OFFSET,
751  DK_CV_FPO_DATA,
752  DK_CFI_SECTIONS,
753  DK_CFI_STARTPROC,
754  DK_CFI_ENDPROC,
755  DK_CFI_DEF_CFA,
756  DK_CFI_DEF_CFA_OFFSET,
757  DK_CFI_ADJUST_CFA_OFFSET,
758  DK_CFI_DEF_CFA_REGISTER,
759  DK_CFI_OFFSET,
760  DK_CFI_REL_OFFSET,
761  DK_CFI_PERSONALITY,
762  DK_CFI_LSDA,
763  DK_CFI_REMEMBER_STATE,
764  DK_CFI_RESTORE_STATE,
765  DK_CFI_SAME_VALUE,
766  DK_CFI_RESTORE,
767  DK_CFI_ESCAPE,
768  DK_CFI_RETURN_COLUMN,
769  DK_CFI_SIGNAL_FRAME,
770  DK_CFI_UNDEFINED,
771  DK_CFI_REGISTER,
772  DK_CFI_WINDOW_SAVE,
773  DK_CFI_B_KEY_FRAME,
774  DK_MACRO,
775  DK_EXITM,
776  DK_ENDM,
777  DK_PURGE,
778  DK_ERR,
779  DK_ERRB,
780  DK_ERRNB,
781  DK_ERRDEF,
782  DK_ERRNDEF,
783  DK_ERRDIF,
784  DK_ERRDIFI,
785  DK_ERRIDN,
786  DK_ERRIDNI,
787  DK_ERRE,
788  DK_ERRNZ,
789  DK_ECHO,
790  DK_STRUCT,
791  DK_UNION,
792  DK_ENDS,
793  DK_END,
794  DK_PUSHFRAME,
795  DK_PUSHREG,
796  DK_SAVEREG,
797  DK_SAVEXMM128,
798  DK_SETFRAME,
799  DK_RADIX,
800  };
801 
802  /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803  /// class.
804  StringMap<DirectiveKind> DirectiveKindMap;
805 
806  bool isMacroLikeDirective();
807 
808  // Codeview def_range type parsing.
809  enum CVDefRangeType {
810  CVDR_DEFRANGE = 0, // Placeholder
811  CVDR_DEFRANGE_REGISTER,
812  CVDR_DEFRANGE_FRAMEPOINTER_REL,
813  CVDR_DEFRANGE_SUBFIELD_REGISTER,
814  CVDR_DEFRANGE_REGISTER_REL
815  };
816 
817  /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818  /// def_range types parsed by this class.
819  StringMap<CVDefRangeType> CVDefRangeTypeMap;
820 
821  // Generic (target and platform independent) directive parsing.
822  enum BuiltinSymbol {
823  BI_NO_SYMBOL, // Placeholder
824  BI_DATE,
825  BI_TIME,
826  BI_VERSION,
827  BI_FILECUR,
828  BI_FILENAME,
829  BI_LINE,
830  BI_CURSEG,
831  BI_CPU,
832  BI_INTERFACE,
833  BI_CODE,
834  BI_DATA,
835  BI_FARDATA,
836  BI_WORDSIZE,
837  BI_CODESIZE,
838  BI_DATASIZE,
839  BI_MODEL,
840  BI_STACK,
841  };
842 
843  /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844  /// class.
845  StringMap<BuiltinSymbol> BuiltinSymbolMap;
846 
847  const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
848 
849  llvm::Optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850  SMLoc StartLoc);
851 
852  // ".ascii", ".asciz", ".string"
853  bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
854 
855  // "byte", "word", ...
856  bool emitIntValue(const MCExpr *Value, unsigned Size);
857  bool parseScalarInitializer(unsigned Size,
859  unsigned StringPadLength = 0);
860  bool parseScalarInstList(
861  unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
862  const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
863  bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864  bool addIntegralField(StringRef Name, unsigned Size);
865  bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866  bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867  StringRef Name, SMLoc NameLoc);
868 
869  // "real4", "real8", "real10"
870  bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871  bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872  bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873  size_t Size);
874  bool parseRealInstList(
875  const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
876  const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
877  bool parseDirectiveNamedRealValue(StringRef TypeName,
878  const fltSemantics &Semantics,
879  unsigned Size, StringRef Name,
880  SMLoc NameLoc);
881 
882  bool parseOptionalAngleBracketOpen();
883  bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
884 
885  bool parseFieldInitializer(const FieldInfo &Field,
886  FieldInitializer &Initializer);
887  bool parseFieldInitializer(const FieldInfo &Field,
888  const IntFieldInfo &Contents,
889  FieldInitializer &Initializer);
890  bool parseFieldInitializer(const FieldInfo &Field,
891  const RealFieldInfo &Contents,
892  FieldInitializer &Initializer);
893  bool parseFieldInitializer(const FieldInfo &Field,
894  const StructFieldInfo &Contents,
895  FieldInitializer &Initializer);
896 
897  bool parseStructInitializer(const StructInfo &Structure,
898  StructInitializer &Initializer);
899  bool parseStructInstList(
900  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
901  const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
902 
903  bool emitFieldValue(const FieldInfo &Field);
904  bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905  bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906  bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
907 
908  bool emitFieldInitializer(const FieldInfo &Field,
909  const FieldInitializer &Initializer);
910  bool emitFieldInitializer(const FieldInfo &Field,
911  const IntFieldInfo &Contents,
912  const IntFieldInfo &Initializer);
913  bool emitFieldInitializer(const FieldInfo &Field,
914  const RealFieldInfo &Contents,
915  const RealFieldInfo &Initializer);
916  bool emitFieldInitializer(const FieldInfo &Field,
917  const StructFieldInfo &Contents,
918  const StructFieldInfo &Initializer);
919 
920  bool emitStructInitializer(const StructInfo &Structure,
921  const StructInitializer &Initializer);
922 
923  // User-defined types (structs, unions):
924  bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925  bool addStructField(StringRef Name, const StructInfo &Structure);
926  bool parseDirectiveStructValue(const StructInfo &Structure,
927  StringRef Directive, SMLoc DirLoc);
928  bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929  StringRef Directive, SMLoc DirLoc,
930  StringRef Name);
931 
932  // "=", "equ", "textequ"
933  bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934  DirectiveKind DirKind, SMLoc NameLoc);
935 
936  bool parseDirectiveOrg(); // "org"
937 
938  bool emitAlignTo(int64_t Alignment);
939  bool parseDirectiveAlign(); // "align"
940  bool parseDirectiveEven(); // "even"
941 
942  // ".file", ".line", ".loc", ".stabs"
943  bool parseDirectiveFile(SMLoc DirectiveLoc);
944  bool parseDirectiveLine();
945  bool parseDirectiveLoc();
946  bool parseDirectiveStabs();
947 
948  // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949  // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950  bool parseDirectiveCVFile();
951  bool parseDirectiveCVFuncId();
952  bool parseDirectiveCVInlineSiteId();
953  bool parseDirectiveCVLoc();
954  bool parseDirectiveCVLinetable();
955  bool parseDirectiveCVInlineLinetable();
956  bool parseDirectiveCVDefRange();
957  bool parseDirectiveCVString();
958  bool parseDirectiveCVStringTable();
959  bool parseDirectiveCVFileChecksums();
960  bool parseDirectiveCVFileChecksumOffset();
961  bool parseDirectiveCVFPOData();
962 
963  // .cfi directives
964  bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965  bool parseDirectiveCFIWindowSave();
966  bool parseDirectiveCFISections();
967  bool parseDirectiveCFIStartProc();
968  bool parseDirectiveCFIEndProc();
969  bool parseDirectiveCFIDefCfaOffset();
970  bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971  bool parseDirectiveCFIAdjustCfaOffset();
972  bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973  bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974  bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975  bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976  bool parseDirectiveCFIRememberState();
977  bool parseDirectiveCFIRestoreState();
978  bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979  bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980  bool parseDirectiveCFIEscape();
981  bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982  bool parseDirectiveCFISignalFrame();
983  bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
984 
985  // macro directives
986  bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987  bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988  std::string &Value);
989  bool parseDirectiveEndMacro(StringRef Directive);
990  bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
991 
992  bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993  StringRef Name, SMLoc NameLoc);
994  bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995  bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996  bool parseDirectiveNestedEnds();
997 
998  bool parseDirectiveExtern();
999 
1000  /// Parse a directive like ".globl" which accepts a single symbol (which
1001  /// should be a label or an external).
1002  bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1003 
1004  bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1005 
1006  bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1007 
1008  bool parseDirectiveInclude(); // "include"
1009 
1010  // "if" or "ife"
1011  bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012  // "ifb" or "ifnb", depending on ExpectBlank.
1013  bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014  // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015  // CaseInsensitive.
1016  bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017  bool CaseInsensitive);
1018  // "ifdef" or "ifndef", depending on expect_defined
1019  bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020  // "elseif" or "elseife"
1021  bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022  // "elseifb" or "elseifnb", depending on ExpectBlank.
1023  bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024  // ".elseifdef" or ".elseifndef", depending on expect_defined
1025  bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026  // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027  // ExpectEqual and CaseInsensitive.
1028  bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029  bool CaseInsensitive);
1030  bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1031  bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1032  bool parseEscapedString(std::string &Data) override;
1033  bool parseAngleBracketString(std::string &Data) override;
1034 
1035  // Macro-like directives
1036  MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038  raw_svector_ostream &OS);
1039  void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040  SMLoc ExitLoc, raw_svector_ostream &OS);
1041  bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042  bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043  bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044  bool parseDirectiveWhile(SMLoc DirectiveLoc);
1045 
1046  // "_emit" or "__emit"
1047  bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048  size_t Len);
1049 
1050  // "align"
1051  bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1052 
1053  // "end"
1054  bool parseDirectiveEnd(SMLoc DirectiveLoc);
1055 
1056  // ".err"
1057  bool parseDirectiveError(SMLoc DirectiveLoc);
1058  // ".errb" or ".errnb", depending on ExpectBlank.
1059  bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060  // ".errdef" or ".errndef", depending on ExpectBlank.
1061  bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062  // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063  // and CaseInsensitive.
1064  bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065  bool CaseInsensitive);
1066  // ".erre" or ".errnz", depending on ExpectZero.
1067  bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1068 
1069  // ".radix"
1070  bool parseDirectiveRadix(SMLoc DirectiveLoc);
1071 
1072  // "echo"
1073  bool parseDirectiveEcho(SMLoc DirectiveLoc);
1074 
1075  void initializeDirectiveKindMap();
1076  void initializeCVDefRangeTypeMap();
1077  void initializeBuiltinSymbolMap();
1078 };
1079 
1080 } // end anonymous namespace
1081 
1082 namespace llvm {
1083 
1085 
1087 
1088 } // end namespace llvm
1089 
1090 enum { DEFAULT_ADDRSPACE = 0 };
1091 
1092 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093  const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094  : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095  CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096  HadError = false;
1097  // Save the old handler.
1098  SavedDiagHandler = SrcMgr.getDiagHandler();
1099  SavedDiagContext = SrcMgr.getDiagContext();
1100  // Set our own handler which calls the saved handler.
1102  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103  EndStatementAtEOFStack.push_back(true);
1104 
1105  // Initialize the platform / file format parser.
1106  switch (Ctx.getObjectFileType()) {
1107  case MCContext::IsCOFF:
1108  PlatformParser.reset(createCOFFMasmParser());
1109  break;
1110  default:
1111  report_fatal_error("llvm-ml currently supports only COFF output.");
1112  break;
1113  }
1114 
1115  initializeDirectiveKindMap();
1116  PlatformParser->Initialize(*this);
1117  initializeCVDefRangeTypeMap();
1118  initializeBuiltinSymbolMap();
1119 
1120  NumOfMacroInstantiations = 0;
1121 }
1122 
1123 MasmParser::~MasmParser() {
1124  assert((HadError || ActiveMacros.empty()) &&
1125  "Unexpected active macro instantiation!");
1126 
1127  // Restore the saved diagnostics handler and context for use during
1128  // finalization.
1129  SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130 }
1131 
1132 void MasmParser::printMacroInstantiations() {
1133  // Print the active macro instantiation stack.
1134  for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135  it = ActiveMacros.rbegin(),
1136  ie = ActiveMacros.rend();
1137  it != ie; ++it)
1138  printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139  "while in macro instantiation");
1140 }
1141 
1142 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143  printPendingErrors();
1144  printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145  printMacroInstantiations();
1146 }
1147 
1148 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149  if (getTargetParser().getTargetOptions().MCNoWarn)
1150  return false;
1151  if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152  return Error(L, Msg, Range);
1153  printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154  printMacroInstantiations();
1155  return false;
1156 }
1157 
1158 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159  HadError = true;
1160  printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161  printMacroInstantiations();
1162  return true;
1163 }
1164 
1165 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166  std::string IncludedFile;
1167  unsigned NewBuf =
1168  SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169  if (!NewBuf)
1170  return true;
1171 
1172  CurBuffer = NewBuf;
1173  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174  EndStatementAtEOFStack.push_back(true);
1175  return false;
1176 }
1177 
1178 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179  bool EndStatementAtEOF) {
1180  CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182  Loc.getPointer(), EndStatementAtEOF);
1183 }
1184 
1185 bool MasmParser::expandMacros() {
1186  const AsmToken &Tok = getTok();
1187  const std::string IDLower = Tok.getIdentifier().lower();
1188 
1189  const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190  if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191  // This is a macro function invocation; expand it in place.
1192  const SMLoc MacroLoc = Tok.getLoc();
1193  const StringRef MacroId = Tok.getIdentifier();
1194  Lexer.Lex();
1195  if (handleMacroInvocation(M, MacroLoc)) {
1196  Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197  Lexer.Lex();
1198  }
1199  return false;
1200  }
1201 
1202  llvm::Optional<std::string> ExpandedValue;
1203  auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204  if (BuiltinIt != BuiltinSymbolMap.end()) {
1205  ExpandedValue =
1206  evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207  } else {
1208  auto VarIt = Variables.find(IDLower);
1209  if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210  ExpandedValue = VarIt->getValue().TextValue;
1211  }
1212  }
1213 
1214  if (!ExpandedValue)
1215  return true;
1216  std::unique_ptr<MemoryBuffer> Instantiation =
1217  MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1218 
1219  // Jump to the macro instantiation and prime the lexer.
1220  CurBuffer =
1221  SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223  /*EndStatementAtEOF=*/false);
1224  EndStatementAtEOFStack.push_back(false);
1225  Lexer.Lex();
1226  return false;
1227 }
1228 
1229 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230  if (Lexer.getTok().is(AsmToken::Error))
1231  Error(Lexer.getErrLoc(), Lexer.getErr());
1232 
1233  // if it's a end of statement with a comment in it
1234  if (getTok().is(AsmToken::EndOfStatement)) {
1235  // if this is a line comment output it.
1236  if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237  getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238  Out.addExplicitComment(Twine(getTok().getString()));
1239  }
1240 
1241  const AsmToken *tok = &Lexer.Lex();
1242  bool StartOfStatement = Lexer.isAtStartOfStatement();
1243 
1244  while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245  if (StartOfStatement) {
1246  AsmToken NextTok;
1247  MutableArrayRef<AsmToken> Buf(NextTok);
1248  size_t ReadCount = Lexer.peekTokens(Buf);
1249  if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250  (NextTok.getString().equals_insensitive("equ") ||
1251  NextTok.getString().equals_insensitive("textequ"))) {
1252  // This looks like an EQU or TEXTEQU directive; don't expand the
1253  // identifier, allowing for redefinitions.
1254  break;
1255  }
1256  }
1257  if (expandMacros())
1258  break;
1259  }
1260 
1261  // Parse comments here to be deferred until end of next statement.
1262  while (tok->is(AsmToken::Comment)) {
1263  if (MAI.preserveAsmComments())
1264  Out.addExplicitComment(Twine(tok->getString()));
1265  tok = &Lexer.Lex();
1266  }
1267 
1268  // Recognize and bypass line continuations.
1269  while (tok->is(AsmToken::BackSlash) &&
1270  peekTok().is(AsmToken::EndOfStatement)) {
1271  // Eat both the backslash and the end of statement.
1272  Lexer.Lex();
1273  tok = &Lexer.Lex();
1274  }
1275 
1276  if (tok->is(AsmToken::Eof)) {
1277  // If this is the end of an included file, pop the parent file off the
1278  // include stack.
1279  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280  if (ParentIncludeLoc != SMLoc()) {
1281  EndStatementAtEOFStack.pop_back();
1282  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283  return Lex();
1284  }
1285  EndStatementAtEOFStack.pop_back();
1286  assert(EndStatementAtEOFStack.empty());
1287  }
1288 
1289  return *tok;
1290 }
1291 
1292 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293  AsmToken Tok;
1294 
1295  MutableArrayRef<AsmToken> Buf(Tok);
1296  size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1297 
1298  if (ReadCount == 0) {
1299  // If this is the end of an included file, pop the parent file off the
1300  // include stack.
1301  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302  if (ParentIncludeLoc != SMLoc()) {
1303  EndStatementAtEOFStack.pop_back();
1304  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305  return peekTok(ShouldSkipSpace);
1306  }
1307  EndStatementAtEOFStack.pop_back();
1308  assert(EndStatementAtEOFStack.empty());
1309  }
1310 
1311  assert(ReadCount == 1);
1312  return Tok;
1313 }
1314 
1315 bool MasmParser::enabledGenDwarfForAssembly() {
1316  // Check whether the user specified -g.
1317  if (!getContext().getGenDwarfForAssembly())
1318  return false;
1319  // If we haven't encountered any .file directives (which would imply that
1320  // the assembler source was produced with debug info already) then emit one
1321  // describing the assembler source file itself.
1322  if (getContext().getGenDwarfFileNumber() == 0) {
1323  // Use the first #line directive for this, if any. It's preprocessed, so
1324  // there is no checksum, and of course no source directive.
1325  if (!FirstCppHashFilename.empty())
1326  getContext().setMCLineTableRootFile(/*CUID=*/0,
1327  getContext().getCompilationDir(),
1328  FirstCppHashFilename,
1329  /*Cksum=*/None, /*Source=*/None);
1330  const MCDwarfFile &RootFile =
1331  getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1332  getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1333  /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1334  RootFile.Checksum, RootFile.Source));
1335  }
1336  return true;
1337 }
1338 
1339 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1340  // Create the initial section, if requested.
1341  if (!NoInitialTextSection)
1342  Out.initSections(false, getTargetParser().getSTI());
1343 
1344  // Prime the lexer.
1345  Lex();
1346 
1347  HadError = false;
1348  AsmCond StartingCondState = TheCondState;
1349  SmallVector<AsmRewrite, 4> AsmStrRewrites;
1350 
1351  // If we are generating dwarf for assembly source files save the initial text
1352  // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1353  // emitting any actual debug info yet and haven't had a chance to parse any
1354  // embedded .file directives.)
1355  if (getContext().getGenDwarfForAssembly()) {
1356  MCSection *Sec = getStreamer().getCurrentSectionOnly();
1357  if (!Sec->getBeginSymbol()) {
1358  MCSymbol *SectionStartSym = getContext().createTempSymbol();
1359  getStreamer().emitLabel(SectionStartSym);
1360  Sec->setBeginSymbol(SectionStartSym);
1361  }
1362  bool InsertResult = getContext().addGenDwarfSection(Sec);
1363  assert(InsertResult && ".text section should not have debug info yet");
1364  (void)InsertResult;
1365  }
1366 
1367  getTargetParser().onBeginOfFile();
1368 
1369  // While we have input, parse each statement.
1370  while (Lexer.isNot(AsmToken::Eof) ||
1371  SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1372  // Skip through the EOF at the end of an inclusion.
1373  if (Lexer.is(AsmToken::Eof))
1374  Lex();
1375 
1376  ParseStatementInfo Info(&AsmStrRewrites);
1377  bool Parsed = parseStatement(Info, nullptr);
1378 
1379  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1380  // for printing ErrMsg via Lex() only if no (presumably better) parser error
1381  // exists.
1382  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1383  Lex();
1384  }
1385 
1386  // parseStatement returned true so may need to emit an error.
1387  printPendingErrors();
1388 
1389  // Skipping to the next line if needed.
1390  if (Parsed && !getLexer().isAtStartOfStatement())
1391  eatToEndOfStatement();
1392  }
1393 
1394  getTargetParser().onEndOfFile();
1395  printPendingErrors();
1396 
1397  // All errors should have been emitted.
1398  assert(!hasPendingError() && "unexpected error from parseStatement");
1399 
1400  getTargetParser().flushPendingInstructions(getStreamer());
1401 
1402  if (TheCondState.TheCond != StartingCondState.TheCond ||
1403  TheCondState.Ignore != StartingCondState.Ignore)
1404  printError(getTok().getLoc(), "unmatched .ifs or .elses");
1405  // Check to see there are no empty DwarfFile slots.
1406  const auto &LineTables = getContext().getMCDwarfLineTables();
1407  if (!LineTables.empty()) {
1408  unsigned Index = 0;
1409  for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1410  if (File.Name.empty() && Index != 0)
1411  printError(getTok().getLoc(), "unassigned file number: " +
1412  Twine(Index) +
1413  " for .file directives");
1414  ++Index;
1415  }
1416  }
1417 
1418  // Check to see that all assembler local symbols were actually defined.
1419  // Targets that don't do subsections via symbols may not want this, though,
1420  // so conservatively exclude them. Only do this if we're finalizing, though,
1421  // as otherwise we won't necessarilly have seen everything yet.
1422  if (!NoFinalize) {
1423  if (MAI.hasSubsectionsViaSymbols()) {
1424  for (const auto &TableEntry : getContext().getSymbols()) {
1425  MCSymbol *Sym = TableEntry.getValue();
1426  // Variable symbols may not be marked as defined, so check those
1427  // explicitly. If we know it's a variable, we have a definition for
1428  // the purposes of this check.
1429  if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1430  // FIXME: We would really like to refer back to where the symbol was
1431  // first referenced for a source location. We need to add something
1432  // to track that. Currently, we just point to the end of the file.
1433  printError(getTok().getLoc(), "assembler local symbol '" +
1434  Sym->getName() + "' not defined");
1435  }
1436  }
1437 
1438  // Temporary symbols like the ones for directional jumps don't go in the
1439  // symbol table. They also need to be diagnosed in all (final) cases.
1440  for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1441  if (std::get<2>(LocSym)->isUndefined()) {
1442  // Reset the state of any "# line file" directives we've seen to the
1443  // context as it was at the diagnostic site.
1444  CppHashInfo = std::get<1>(LocSym);
1445  printError(std::get<0>(LocSym), "directional label undefined");
1446  }
1447  }
1448  }
1449 
1450  // Finalize the output stream if there are no errors and if the client wants
1451  // us to.
1452  if (!HadError && !NoFinalize)
1453  Out.finish(Lexer.getLoc());
1454 
1455  return HadError || getContext().hadError();
1456 }
1457 
1458 bool MasmParser::checkForValidSection() {
1459  if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1460  Out.initSections(false, getTargetParser().getSTI());
1461  return Error(getTok().getLoc(),
1462  "expected section directive before assembly directive");
1463  }
1464  return false;
1465 }
1466 
1467 /// Throw away the rest of the line for testing purposes.
1468 void MasmParser::eatToEndOfStatement() {
1469  while (Lexer.isNot(AsmToken::EndOfStatement)) {
1470  if (Lexer.is(AsmToken::Eof)) {
1471  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1472  if (ParentIncludeLoc == SMLoc()) {
1473  break;
1474  }
1475 
1476  EndStatementAtEOFStack.pop_back();
1477  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1478  }
1479 
1480  Lexer.Lex();
1481  }
1482 
1483  // Eat EOL.
1484  if (Lexer.is(AsmToken::EndOfStatement))
1485  Lexer.Lex();
1486 }
1487 
1489 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1491  const char *Start = getTok().getLoc().getPointer();
1492  while (Lexer.isNot(EndTok)) {
1493  if (Lexer.is(AsmToken::Eof)) {
1494  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1495  if (ParentIncludeLoc == SMLoc()) {
1496  break;
1497  }
1498  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1499 
1500  EndStatementAtEOFStack.pop_back();
1501  jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1502  Lexer.Lex();
1503  Start = getTok().getLoc().getPointer();
1504  } else {
1505  Lexer.Lex();
1506  }
1507  }
1508  Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1509  return Refs;
1510 }
1511 
1512 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1513  SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1514  std::string Str;
1515  for (StringRef S : Refs) {
1516  Str.append(S.str());
1517  }
1518  return Str;
1519 }
1520 
1521 StringRef MasmParser::parseStringToEndOfStatement() {
1522  const char *Start = getTok().getLoc().getPointer();
1523 
1524  while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1525  Lexer.Lex();
1526 
1527  const char *End = getTok().getLoc().getPointer();
1528  return StringRef(Start, End - Start);
1529 }
1530 
1531 /// Parse a paren expression and return it.
1532 /// NOTE: This assumes the leading '(' has already been consumed.
1533 ///
1534 /// parenexpr ::= expr)
1535 ///
1536 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1537  if (parseExpression(Res))
1538  return true;
1539  EndLoc = Lexer.getTok().getEndLoc();
1540  return parseRParen();
1541 }
1542 
1543 /// Parse a bracket expression and return it.
1544 /// NOTE: This assumes the leading '[' has already been consumed.
1545 ///
1546 /// bracketexpr ::= expr]
1547 ///
1548 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1549  if (parseExpression(Res))
1550  return true;
1551  EndLoc = getTok().getEndLoc();
1552  if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1553  return true;
1554  return false;
1555 }
1556 
1557 /// Parse a primary expression and return it.
1558 /// primaryexpr ::= (parenexpr
1559 /// primaryexpr ::= symbol
1560 /// primaryexpr ::= number
1561 /// primaryexpr ::= '.'
1562 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1563 /// primaryexpr ::= string
1564 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1565 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1566  AsmTypeInfo *TypeInfo) {
1567  SMLoc FirstTokenLoc = getLexer().getLoc();
1568  AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1569  switch (FirstTokenKind) {
1570  default:
1571  return TokError("unknown token in expression");
1572  // If we have an error assume that we've already handled it.
1573  case AsmToken::Error:
1574  return true;
1575  case AsmToken::Exclaim:
1576  Lex(); // Eat the operator.
1577  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1578  return true;
1579  Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1580  return false;
1581  case AsmToken::Dollar:
1582  case AsmToken::At:
1583  case AsmToken::Identifier: {
1585  if (parseIdentifier(Identifier)) {
1586  // We may have failed but $ may be a valid token.
1587  if (getTok().is(AsmToken::Dollar)) {
1588  if (Lexer.getMAI().getDollarIsPC()) {
1589  Lex();
1590  // This is a '$' reference, which references the current PC. Emit a
1591  // temporary label to the streamer and refer to it.
1592  MCSymbol *Sym = Ctx.createTempSymbol();
1593  Out.emitLabel(Sym);
1594  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1595  getContext());
1596  EndLoc = FirstTokenLoc;
1597  return false;
1598  }
1599  return Error(FirstTokenLoc, "invalid token in expression");
1600  }
1601  }
1602  // Parse named bitwise negation.
1603  if (Identifier.equals_insensitive("not")) {
1604  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1605  return true;
1606  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1607  return false;
1608  }
1609  // Parse directional local label references.
1610  if (Identifier.equals_insensitive("@b") ||
1611  Identifier.equals_insensitive("@f")) {
1612  bool Before = Identifier.equals_insensitive("@b");
1613  MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1614  if (Before && Sym->isUndefined())
1615  return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1616  Res = MCSymbolRefExpr::create(Sym, getContext());
1617  return false;
1618  }
1619  // Parse symbol variant.
1620  std::pair<StringRef, StringRef> Split;
1621  if (!MAI.useParensForSymbolVariant()) {
1622  if (FirstTokenKind == AsmToken::String) {
1623  if (Lexer.is(AsmToken::At)) {
1624  Lex(); // eat @
1625  SMLoc AtLoc = getLexer().getLoc();
1626  StringRef VName;
1627  if (parseIdentifier(VName))
1628  return Error(AtLoc, "expected symbol variant after '@'");
1629 
1630  Split = std::make_pair(Identifier, VName);
1631  }
1632  } else {
1633  Split = Identifier.split('@');
1634  }
1635  } else if (Lexer.is(AsmToken::LParen)) {
1636  Lex(); // eat '('.
1637  StringRef VName;
1638  parseIdentifier(VName);
1639  // eat ')'.
1640  if (parseToken(AsmToken::RParen,
1641  "unexpected token in variant, expected ')'"))
1642  return true;
1643  Split = std::make_pair(Identifier, VName);
1644  }
1645 
1646  EndLoc = SMLoc::getFromPointer(Identifier.end());
1647 
1648  // This is a symbol reference.
1650  if (SymbolName.empty())
1651  return Error(getLexer().getLoc(), "expected a symbol reference");
1652 
1653  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1654 
1655  // Look up the symbol variant if used.
1656  if (!Split.second.empty()) {
1657  Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1658  if (Variant != MCSymbolRefExpr::VK_Invalid) {
1659  SymbolName = Split.first;
1660  } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1661  Variant = MCSymbolRefExpr::VK_None;
1662  } else {
1663  return Error(SMLoc::getFromPointer(Split.second.begin()),
1664  "invalid variant '" + Split.second + "'");
1665  }
1666  }
1667 
1668  // Find the field offset if used.
1670  Split = SymbolName.split('.');
1671  if (Split.second.empty()) {
1672  } else {
1673  SymbolName = Split.first;
1674  if (lookUpField(SymbolName, Split.second, Info)) {
1675  std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1676  StringRef Base = BaseMember.first, Member = BaseMember.second;
1677  lookUpField(Base, Member, Info);
1678  } else if (Structs.count(SymbolName.lower())) {
1679  // This is actually a reference to a field offset.
1680  Res = MCConstantExpr::create(Info.Offset, getContext());
1681  return false;
1682  }
1683  }
1684 
1685  MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1686  if (!Sym) {
1687  // If this is a built-in numeric value, treat it as a constant.
1688  auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1689  const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1690  ? BI_NO_SYMBOL
1691  : BuiltinIt->getValue();
1692  if (Symbol != BI_NO_SYMBOL) {
1693  const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1694  if (Value) {
1695  Res = Value;
1696  return false;
1697  }
1698  }
1699 
1700  // Variables use case-insensitive symbol names; if this is a variable, we
1701  // find the symbol using its canonical name.
1702  auto VarIt = Variables.find(SymbolName.lower());
1703  if (VarIt != Variables.end())
1704  SymbolName = VarIt->second.Name;
1705  Sym = getContext().getOrCreateSymbol(SymbolName);
1706  }
1707 
1708  // If this is an absolute variable reference, substitute it now to preserve
1709  // semantics in the face of reassignment.
1710  if (Sym->isVariable()) {
1711  auto V = Sym->getVariableValue(/*SetUsed=*/false);
1712  bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1713  if (auto TV = dyn_cast<MCTargetExpr>(V))
1714  DoInline = TV->inlineAssignedExpr();
1715  if (DoInline) {
1716  if (Variant)
1717  return Error(EndLoc, "unexpected modifier on variable reference");
1718  Res = Sym->getVariableValue(/*SetUsed=*/false);
1719  return false;
1720  }
1721  }
1722 
1723  // Otherwise create a symbol ref.
1724  const MCExpr *SymRef =
1725  MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1726  if (Info.Offset) {
1727  Res = MCBinaryExpr::create(
1728  MCBinaryExpr::Add, SymRef,
1729  MCConstantExpr::create(Info.Offset, getContext()), getContext());
1730  } else {
1731  Res = SymRef;
1732  }
1733  if (TypeInfo) {
1734  if (Info.Type.Name.empty()) {
1735  auto TypeIt = KnownType.find(Identifier.lower());
1736  if (TypeIt != KnownType.end()) {
1737  Info.Type = TypeIt->second;
1738  }
1739  }
1740 
1741  *TypeInfo = Info.Type;
1742  }
1743  return false;
1744  }
1745  case AsmToken::BigNum:
1746  return TokError("literal value out of range for directive");
1747  case AsmToken::Integer: {
1748  int64_t IntVal = getTok().getIntVal();
1749  Res = MCConstantExpr::create(IntVal, getContext());
1750  EndLoc = Lexer.getTok().getEndLoc();
1751  Lex(); // Eat token.
1752  return false;
1753  }
1754  case AsmToken::String: {
1755  // MASM strings (used as constants) are interpreted as big-endian base-256.
1756  SMLoc ValueLoc = getTok().getLoc();
1757  std::string Value;
1758  if (parseEscapedString(Value))
1759  return true;
1760  if (Value.size() > 8)
1761  return Error(ValueLoc, "literal value out of range");
1762  uint64_t IntValue = 0;
1763  for (const unsigned char CharVal : Value)
1764  IntValue = (IntValue << 8) | CharVal;
1765  Res = MCConstantExpr::create(IntValue, getContext());
1766  return false;
1767  }
1768  case AsmToken::Real: {
1769  APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1770  uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1771  Res = MCConstantExpr::create(IntVal, getContext());
1772  EndLoc = Lexer.getTok().getEndLoc();
1773  Lex(); // Eat token.
1774  return false;
1775  }
1776  case AsmToken::Dot: {
1777  // This is a '.' reference, which references the current PC. Emit a
1778  // temporary label to the streamer and refer to it.
1779  MCSymbol *Sym = Ctx.createTempSymbol();
1780  Out.emitLabel(Sym);
1781  Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1782  EndLoc = Lexer.getTok().getEndLoc();
1783  Lex(); // Eat identifier.
1784  return false;
1785  }
1786  case AsmToken::LParen:
1787  Lex(); // Eat the '('.
1788  return parseParenExpr(Res, EndLoc);
1789  case AsmToken::LBrac:
1790  if (!PlatformParser->HasBracketExpressions())
1791  return TokError("brackets expression not supported on this target");
1792  Lex(); // Eat the '['.
1793  return parseBracketExpr(Res, EndLoc);
1794  case AsmToken::Minus:
1795  Lex(); // Eat the operator.
1796  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1797  return true;
1798  Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1799  return false;
1800  case AsmToken::Plus:
1801  Lex(); // Eat the operator.
1802  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1803  return true;
1804  Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1805  return false;
1806  case AsmToken::Tilde:
1807  Lex(); // Eat the operator.
1808  if (parsePrimaryExpr(Res, EndLoc, nullptr))
1809  return true;
1810  Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1811  return false;
1812  // MIPS unary expression operators. The lexer won't generate these tokens if
1813  // MCAsmInfo::HasMipsExpressions is false for the target.
1814  case AsmToken::PercentCall16:
1815  case AsmToken::PercentCall_Hi:
1816  case AsmToken::PercentCall_Lo:
1817  case AsmToken::PercentDtprel_Hi:
1818  case AsmToken::PercentDtprel_Lo:
1819  case AsmToken::PercentGot:
1820  case AsmToken::PercentGot_Disp:
1821  case AsmToken::PercentGot_Hi:
1822  case AsmToken::PercentGot_Lo:
1823  case AsmToken::PercentGot_Ofst:
1824  case AsmToken::PercentGot_Page:
1825  case AsmToken::PercentGottprel:
1826  case AsmToken::PercentGp_Rel:
1827  case AsmToken::PercentHi:
1828  case AsmToken::PercentHigher:
1829  case AsmToken::PercentHighest:
1830  case AsmToken::PercentLo:
1831  case AsmToken::PercentNeg:
1832  case AsmToken::PercentPcrel_Hi:
1833  case AsmToken::PercentPcrel_Lo:
1834  case AsmToken::PercentTlsgd:
1835  case AsmToken::PercentTlsldm:
1836  case AsmToken::PercentTprel_Hi:
1837  case AsmToken::PercentTprel_Lo:
1838  Lex(); // Eat the operator.
1839  if (Lexer.isNot(AsmToken::LParen))
1840  return TokError("expected '(' after operator");
1841  Lex(); // Eat the operator.
1842  if (parseExpression(Res, EndLoc))
1843  return true;
1844  if (parseRParen())
1845  return true;
1846  Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1847  return !Res;
1848  }
1849 }
1850 
1851 bool MasmParser::parseExpression(const MCExpr *&Res) {
1852  SMLoc EndLoc;
1853  return parseExpression(Res, EndLoc);
1854 }
1855 
1856 /// This function checks if the next token is <string> type or arithmetic.
1857 /// string that begin with character '<' must end with character '>'.
1858 /// otherwise it is arithmetics.
1859 /// If the function returns a 'true' value,
1860 /// the End argument will be filled with the last location pointed to the '>'
1861 /// character.
1862 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1863  assert((StrLoc.getPointer() != nullptr) &&
1864  "Argument to the function cannot be a NULL value");
1865  const char *CharPtr = StrLoc.getPointer();
1866  while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1867  (*CharPtr != '\0')) {
1868  if (*CharPtr == '!')
1869  CharPtr++;
1870  CharPtr++;
1871  }
1872  if (*CharPtr == '>') {
1873  EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1874  return true;
1875  }
1876  return false;
1877 }
1878 
1879 /// creating a string without the escape characters '!'.
1880 static std::string angleBracketString(StringRef BracketContents) {
1881  std::string Res;
1882  for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1883  if (BracketContents[Pos] == '!')
1884  Pos++;
1885  Res += BracketContents[Pos];
1886  }
1887  return Res;
1888 }
1889 
1890 /// Parse an expression and return it.
1891 ///
1892 /// expr ::= expr &&,|| expr -> lowest.
1893 /// expr ::= expr |,^,&,! expr
1894 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1895 /// expr ::= expr <<,>> expr
1896 /// expr ::= expr +,- expr
1897 /// expr ::= expr *,/,% expr -> highest.
1898 /// expr ::= primaryexpr
1899 ///
1900 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1901  // Parse the expression.
1902  Res = nullptr;
1903  if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1904  parseBinOpRHS(1, Res, EndLoc))
1905  return true;
1906 
1907  // Try to constant fold it up front, if possible. Do not exploit
1908  // assembler here.
1909  int64_t Value;
1910  if (Res->evaluateAsAbsolute(Value))
1911  Res = MCConstantExpr::create(Value, getContext());
1912 
1913  return false;
1914 }
1915 
1916 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1917  Res = nullptr;
1918  return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1919 }
1920 
1921 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1922  SMLoc &EndLoc) {
1923  if (parseParenExpr(Res, EndLoc))
1924  return true;
1925 
1926  for (; ParenDepth > 0; --ParenDepth) {
1927  if (parseBinOpRHS(1, Res, EndLoc))
1928  return true;
1929 
1930  // We don't Lex() the last RParen.
1931  // This is the same behavior as parseParenExpression().
1932  if (ParenDepth - 1 > 0) {
1933  EndLoc = getTok().getEndLoc();
1934  if (parseRParen())
1935  return true;
1936  }
1937  }
1938  return false;
1939 }
1940 
1941 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1942  const MCExpr *Expr;
1943 
1944  SMLoc StartLoc = Lexer.getLoc();
1945  if (parseExpression(Expr))
1946  return true;
1947 
1948  if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1949  return Error(StartLoc, "expected absolute expression");
1950 
1951  return false;
1952 }
1953 
1956  bool ShouldUseLogicalShr,
1957  bool EndExpressionAtGreater) {
1958  switch (K) {
1959  default:
1960  return 0; // not a binop.
1961 
1962  // Lowest Precedence: &&, ||
1963  case AsmToken::AmpAmp:
1964  Kind = MCBinaryExpr::LAnd;
1965  return 2;
1966  case AsmToken::PipePipe:
1967  Kind = MCBinaryExpr::LOr;
1968  return 1;
1969 
1970  // Low Precedence: ==, !=, <>, <, <=, >, >=
1971  case AsmToken::EqualEqual:
1973  return 3;
1974  case AsmToken::ExclaimEqual:
1975  case AsmToken::LessGreater:
1977  return 3;
1978  case AsmToken::Less:
1980  return 3;
1981  case AsmToken::LessEqual:
1982  Kind = MCBinaryExpr::LTE;
1983  return 3;
1984  case AsmToken::Greater:
1985  if (EndExpressionAtGreater)
1986  return 0;
1988  return 3;
1989  case AsmToken::GreaterEqual:
1990  Kind = MCBinaryExpr::GTE;
1991  return 3;
1992 
1993  // Low Intermediate Precedence: +, -
1994  case AsmToken::Plus:
1996  return 4;
1997  case AsmToken::Minus:
1998  Kind = MCBinaryExpr::Sub;
1999  return 4;
2000 
2001  // High Intermediate Precedence: |, &, ^
2002  case AsmToken::Pipe:
2003  Kind = MCBinaryExpr::Or;
2004  return 5;
2005  case AsmToken::Caret:
2006  Kind = MCBinaryExpr::Xor;
2007  return 5;
2008  case AsmToken::Amp:
2009  Kind = MCBinaryExpr::And;
2010  return 5;
2011 
2012  // Highest Precedence: *, /, %, <<, >>
2013  case AsmToken::Star:
2015  return 6;
2016  case AsmToken::Slash:
2017  Kind = MCBinaryExpr::Div;
2018  return 6;
2019  case AsmToken::Percent:
2021  return 6;
2022  case AsmToken::LessLess:
2023  Kind = MCBinaryExpr::Shl;
2024  return 6;
2025  case AsmToken::GreaterGreater:
2026  if (EndExpressionAtGreater)
2027  return 0;
2028  Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2029  return 6;
2030  }
2031 }
2032 
2033 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2035  bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2036  return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2037  AngleBracketDepth > 0);
2038 }
2039 
2040 /// Parse all binary operators with precedence >= 'Precedence'.
2041 /// Res contains the LHS of the expression on input.
2042 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2043  SMLoc &EndLoc) {
2044  SMLoc StartLoc = Lexer.getLoc();
2045  while (true) {
2047  if (Lexer.getKind() == AsmToken::Identifier) {
2049  .CaseLower("and", AsmToken::Amp)
2050  .CaseLower("not", AsmToken::Exclaim)
2051  .CaseLower("or", AsmToken::Pipe)
2052  .CaseLower("xor", AsmToken::Caret)
2053  .CaseLower("shl", AsmToken::LessLess)
2054  .CaseLower("shr", AsmToken::GreaterGreater)
2056  .CaseLower("ne", AsmToken::ExclaimEqual)
2057  .CaseLower("lt", AsmToken::Less)
2058  .CaseLower("le", AsmToken::LessEqual)
2059  .CaseLower("gt", AsmToken::Greater)
2060  .CaseLower("ge", AsmToken::GreaterEqual)
2061  .Default(TokKind);
2062  }
2064  unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2065 
2066  // If the next token is lower precedence than we are allowed to eat, return
2067  // successfully with what we ate already.
2068  if (TokPrec < Precedence)
2069  return false;
2070 
2071  Lex();
2072 
2073  // Eat the next primary expression.
2074  const MCExpr *RHS;
2075  if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2076  return true;
2077 
2078  // If BinOp binds less tightly with RHS than the operator after RHS, let
2079  // the pending operator take RHS as its LHS.
2081  unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2082  if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2083  return true;
2084 
2085  // Merge LHS and RHS according to operator.
2086  Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2087  }
2088 }
2089 
2090 /// ParseStatement:
2091 /// ::= % statement
2092 /// ::= EndOfStatement
2093 /// ::= Label* Directive ...Operands... EndOfStatement
2094 /// ::= Label* Identifier OperandList* EndOfStatement
2095 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2097  assert(!hasPendingError() && "parseStatement started with pending error");
2098  // Eat initial spaces and comments.
2099  while (Lexer.is(AsmToken::Space))
2100  Lex();
2101  if (Lexer.is(AsmToken::EndOfStatement)) {
2102  // If this is a line comment we can drop it safely.
2103  if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2104  getTok().getString().front() == '\n')
2105  Out.addBlankLine();
2106  Lex();
2107  return false;
2108  }
2109 
2110  // If preceded by an expansion operator, first expand all text macros and
2111  // macro functions.
2112  if (getTok().is(AsmToken::Percent)) {
2113  SMLoc ExpansionLoc = getTok().getLoc();
2114  if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2115  return true;
2116  }
2117 
2118  // Statements always start with an identifier, unless we're dealing with a
2119  // processor directive (.386, .686, etc.) that lexes as a real.
2120  AsmToken ID = getTok();
2121  SMLoc IDLoc = ID.getLoc();
2122  StringRef IDVal;
2123  if (Lexer.is(AsmToken::HashDirective))
2124  return parseCppHashLineFilenameComment(IDLoc);
2125  if (Lexer.is(AsmToken::Dot)) {
2126  // Treat '.' as a valid identifier in this context.
2127  Lex();
2128  IDVal = ".";
2129  } else if (Lexer.is(AsmToken::LCurly)) {
2130  // Treat '{' as a valid identifier in this context.
2131  Lex();
2132  IDVal = "{";
2133 
2134  } else if (Lexer.is(AsmToken::RCurly)) {
2135  // Treat '}' as a valid identifier in this context.
2136  Lex();
2137  IDVal = "}";
2138  } else if (Lexer.is(AsmToken::Star) &&
2139  getTargetParser().starIsStartOfStatement()) {
2140  // Accept '*' as a valid start of statement.
2141  Lex();
2142  IDVal = "*";
2143  } else if (Lexer.is(AsmToken::Real)) {
2144  // Treat ".<number>" as a valid identifier in this context.
2145  IDVal = getTok().getString();
2146  Lex(); // always eat a token
2147  if (!IDVal.startswith("."))
2148  return Error(IDLoc, "unexpected token at start of statement");
2149  } else if (parseIdentifier(IDVal, StartOfStatement)) {
2150  if (!TheCondState.Ignore) {
2151  Lex(); // always eat a token
2152  return Error(IDLoc, "unexpected token at start of statement");
2153  }
2154  IDVal = "";
2155  }
2156 
2157  // Handle conditional assembly here before checking for skipping. We
2158  // have to do this so that .endif isn't skipped in a ".if 0" block for
2159  // example.
2161  DirectiveKindMap.find(IDVal.lower());
2162  DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2163  ? DK_NO_DIRECTIVE
2164  : DirKindIt->getValue();
2165  switch (DirKind) {
2166  default:
2167  break;
2168  case DK_IF:
2169  case DK_IFE:
2170  return parseDirectiveIf(IDLoc, DirKind);
2171  case DK_IFB:
2172  return parseDirectiveIfb(IDLoc, true);
2173  case DK_IFNB:
2174  return parseDirectiveIfb(IDLoc, false);
2175  case DK_IFDEF:
2176  return parseDirectiveIfdef(IDLoc, true);
2177  case DK_IFNDEF:
2178  return parseDirectiveIfdef(IDLoc, false);
2179  case DK_IFDIF:
2180  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2181  /*CaseInsensitive=*/false);
2182  case DK_IFDIFI:
2183  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2184  /*CaseInsensitive=*/true);
2185  case DK_IFIDN:
2186  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2187  /*CaseInsensitive=*/false);
2188  case DK_IFIDNI:
2189  return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2190  /*CaseInsensitive=*/true);
2191  case DK_ELSEIF:
2192  case DK_ELSEIFE:
2193  return parseDirectiveElseIf(IDLoc, DirKind);
2194  case DK_ELSEIFB:
2195  return parseDirectiveElseIfb(IDLoc, true);
2196  case DK_ELSEIFNB:
2197  return parseDirectiveElseIfb(IDLoc, false);
2198  case DK_ELSEIFDEF:
2199  return parseDirectiveElseIfdef(IDLoc, true);
2200  case DK_ELSEIFNDEF:
2201  return parseDirectiveElseIfdef(IDLoc, false);
2202  case DK_ELSEIFDIF:
2203  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2204  /*CaseInsensitive=*/false);
2205  case DK_ELSEIFDIFI:
2206  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2207  /*CaseInsensitive=*/true);
2208  case DK_ELSEIFIDN:
2209  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2210  /*CaseInsensitive=*/false);
2211  case DK_ELSEIFIDNI:
2212  return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2213  /*CaseInsensitive=*/true);
2214  case DK_ELSE:
2215  return parseDirectiveElse(IDLoc);
2216  case DK_ENDIF:
2217  return parseDirectiveEndIf(IDLoc);
2218  }
2219 
2220  // Ignore the statement if in the middle of inactive conditional
2221  // (e.g. ".if 0").
2222  if (TheCondState.Ignore) {
2223  eatToEndOfStatement();
2224  return false;
2225  }
2226 
2227  // FIXME: Recurse on local labels?
2228 
2229  // See what kind of statement we have.
2230  switch (Lexer.getKind()) {
2231  case AsmToken::Colon: {
2232  if (!getTargetParser().isLabel(ID))
2233  break;
2234  if (checkForValidSection())
2235  return true;
2236 
2237  // identifier ':' -> Label.
2238  Lex();
2239 
2240  // Diagnose attempt to use '.' as a label.
2241  if (IDVal == ".")
2242  return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2243 
2244  // Diagnose attempt to use a variable as a label.
2245  //
2246  // FIXME: Diagnostics. Note the location of the definition as a label.
2247  // FIXME: This doesn't diagnose assignment to a symbol which has been
2248  // implicitly marked as external.
2249  MCSymbol *Sym;
2250  if (ParsingMSInlineAsm && SI) {
2251  StringRef RewrittenLabel =
2252  SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2253  assert(!RewrittenLabel.empty() &&
2254  "We should have an internal name here.");
2255  Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2256  RewrittenLabel);
2257  IDVal = RewrittenLabel;
2258  }
2259  // Handle directional local labels
2260  if (IDVal == "@@") {
2261  Sym = Ctx.createDirectionalLocalSymbol(0);
2262  } else {
2263  Sym = getContext().getOrCreateSymbol(IDVal);
2264  }
2265 
2266  // End of Labels should be treated as end of line for lexing
2267  // purposes but that information is not available to the Lexer who
2268  // does not understand Labels. This may cause us to see a Hash
2269  // here instead of a preprocessor line comment.
2270  if (getTok().is(AsmToken::Hash)) {
2271  std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2272  Lexer.Lex();
2273  Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2274  }
2275 
2276  // Consume any end of statement token, if present, to avoid spurious
2277  // addBlankLine calls().
2278  if (getTok().is(AsmToken::EndOfStatement)) {
2279  Lex();
2280  }
2281 
2282  getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2283 
2284  // Emit the label.
2285  if (!getTargetParser().isParsingMSInlineAsm())
2286  Out.emitLabel(Sym, IDLoc);
2287 
2288  // If we are generating dwarf for assembly source files then gather the
2289  // info to make a dwarf label entry for this label if needed.
2290  if (enabledGenDwarfForAssembly())
2291  MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2292  IDLoc);
2293 
2294  getTargetParser().onLabelParsed(Sym);
2295 
2296  return false;
2297  }
2298 
2299  default: // Normal instruction or directive.
2300  break;
2301  }
2302 
2303  // If macros are enabled, check to see if this is a macro instantiation.
2304  if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2305  return handleMacroEntry(M, IDLoc);
2306  }
2307 
2308  // Otherwise, we have a normal instruction or directive.
2309 
2310  if (DirKind != DK_NO_DIRECTIVE) {
2311  // There are several entities interested in parsing directives:
2312  //
2313  // 1. Asm parser extensions. For example, platform-specific parsers
2314  // (like the ELF parser) register themselves as extensions.
2315  // 2. The target-specific assembly parser. Some directives are target
2316  // specific or may potentially behave differently on certain targets.
2317  // 3. The generic directive parser implemented by this class. These are
2318  // all the directives that behave in a target and platform independent
2319  // manner, or at least have a default behavior that's shared between
2320  // all targets and platforms.
2321 
2322  getTargetParser().flushPendingInstructions(getStreamer());
2323 
2324  // Special-case handling of structure-end directives at higher priority,
2325  // since ENDS is overloaded as a segment-end directive.
2326  if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2327  getTok().is(AsmToken::EndOfStatement)) {
2328  return parseDirectiveNestedEnds();
2329  }
2330 
2331  // First, check the extension directive map to see if any extension has
2332  // registered itself to parse this directive.
2333  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2334  ExtensionDirectiveMap.lookup(IDVal.lower());
2335  if (Handler.first)
2336  return (*Handler.second)(Handler.first, IDVal, IDLoc);
2337 
2338  // Next, let the target-specific assembly parser try.
2339  SMLoc StartTokLoc = getTok().getLoc();
2340  bool TPDirectiveReturn =
2341  ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2342 
2343  if (hasPendingError())
2344  return true;
2345  // Currently the return value should be true if we are
2346  // uninterested but as this is at odds with the standard parsing
2347  // convention (return true = error) we have instances of a parsed
2348  // directive that fails returning true as an error. Catch these
2349  // cases as best as possible errors here.
2350  if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2351  return true;
2352  // Return if we did some parsing or believe we succeeded.
2353  if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2354  return false;
2355 
2356  // Finally, if no one else is interested in this directive, it must be
2357  // generic and familiar to this class.
2358  switch (DirKind) {
2359  default:
2360  break;
2361  case DK_ASCII:
2362  return parseDirectiveAscii(IDVal, false);
2363  case DK_ASCIZ:
2364  case DK_STRING:
2365  return parseDirectiveAscii(IDVal, true);
2366  case DK_BYTE:
2367  case DK_SBYTE:
2368  case DK_DB:
2369  return parseDirectiveValue(IDVal, 1);
2370  case DK_WORD:
2371  case DK_SWORD:
2372  case DK_DW:
2373  return parseDirectiveValue(IDVal, 2);
2374  case DK_DWORD:
2375  case DK_SDWORD:
2376  case DK_DD:
2377  return parseDirectiveValue(IDVal, 4);
2378  case DK_FWORD:
2379  case DK_DF:
2380  return parseDirectiveValue(IDVal, 6);
2381  case DK_QWORD:
2382  case DK_SQWORD:
2383  case DK_DQ:
2384  return parseDirectiveValue(IDVal, 8);
2385  case DK_REAL4:
2386  return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2387  case DK_REAL8:
2388  return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2389  case DK_REAL10:
2390  return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2391  case DK_STRUCT:
2392  case DK_UNION:
2393  return parseDirectiveNestedStruct(IDVal, DirKind);
2394  case DK_ENDS:
2395  return parseDirectiveNestedEnds();
2396  case DK_ALIGN:
2397  return parseDirectiveAlign();
2398  case DK_EVEN:
2399  return parseDirectiveEven();
2400  case DK_ORG:
2401  return parseDirectiveOrg();
2402  case DK_EXTERN:
2403  return parseDirectiveExtern();
2404  case DK_PUBLIC:
2405  return parseDirectiveSymbolAttribute(MCSA_Global);
2406  case DK_COMM:
2407  return parseDirectiveComm(/*IsLocal=*/false);
2408  case DK_COMMENT:
2409  return parseDirectiveComment(IDLoc);
2410  case DK_INCLUDE:
2411  return parseDirectiveInclude();
2412  case DK_REPEAT:
2413  return parseDirectiveRepeat(IDLoc, IDVal);
2414  case DK_WHILE:
2415  return parseDirectiveWhile(IDLoc);
2416  case DK_FOR:
2417  return parseDirectiveFor(IDLoc, IDVal);
2418  case DK_FORC:
2419  return parseDirectiveForc(IDLoc, IDVal);
2420  case DK_FILE:
2421  return parseDirectiveFile(IDLoc);
2422  case DK_LINE:
2423  return parseDirectiveLine();
2424  case DK_LOC:
2425  return parseDirectiveLoc();
2426  case DK_STABS:
2427  return parseDirectiveStabs();
2428  case DK_CV_FILE:
2429  return parseDirectiveCVFile();
2430  case DK_CV_FUNC_ID:
2431  return parseDirectiveCVFuncId();
2432  case DK_CV_INLINE_SITE_ID:
2433  return parseDirectiveCVInlineSiteId();
2434  case DK_CV_LOC:
2435  return parseDirectiveCVLoc();
2436  case DK_CV_LINETABLE:
2437  return parseDirectiveCVLinetable();
2438  case DK_CV_INLINE_LINETABLE:
2439  return parseDirectiveCVInlineLinetable();
2440  case DK_CV_DEF_RANGE:
2441  return parseDirectiveCVDefRange();
2442  case DK_CV_STRING:
2443  return parseDirectiveCVString();
2444  case DK_CV_STRINGTABLE:
2445  return parseDirectiveCVStringTable();
2446  case DK_CV_FILECHECKSUMS:
2447  return parseDirectiveCVFileChecksums();
2448  case DK_CV_FILECHECKSUM_OFFSET:
2449  return parseDirectiveCVFileChecksumOffset();
2450  case DK_CV_FPO_DATA:
2451  return parseDirectiveCVFPOData();
2452  case DK_CFI_SECTIONS:
2453  return parseDirectiveCFISections();
2454  case DK_CFI_STARTPROC:
2455  return parseDirectiveCFIStartProc();
2456  case DK_CFI_ENDPROC:
2457  return parseDirectiveCFIEndProc();
2458  case DK_CFI_DEF_CFA:
2459  return parseDirectiveCFIDefCfa(IDLoc);
2460  case DK_CFI_DEF_CFA_OFFSET:
2461  return parseDirectiveCFIDefCfaOffset();
2462  case DK_CFI_ADJUST_CFA_OFFSET:
2463  return parseDirectiveCFIAdjustCfaOffset();
2464  case DK_CFI_DEF_CFA_REGISTER:
2465  return parseDirectiveCFIDefCfaRegister(IDLoc);
2466  case DK_CFI_OFFSET:
2467  return parseDirectiveCFIOffset(IDLoc);
2468  case DK_CFI_REL_OFFSET:
2469  return parseDirectiveCFIRelOffset(IDLoc);
2470  case DK_CFI_PERSONALITY:
2471  return parseDirectiveCFIPersonalityOrLsda(true);
2472  case DK_CFI_LSDA:
2473  return parseDirectiveCFIPersonalityOrLsda(false);
2474  case DK_CFI_REMEMBER_STATE:
2475  return parseDirectiveCFIRememberState();
2476  case DK_CFI_RESTORE_STATE:
2477  return parseDirectiveCFIRestoreState();
2478  case DK_CFI_SAME_VALUE:
2479  return parseDirectiveCFISameValue(IDLoc);
2480  case DK_CFI_RESTORE:
2481  return parseDirectiveCFIRestore(IDLoc);
2482  case DK_CFI_ESCAPE:
2483  return parseDirectiveCFIEscape();
2484  case DK_CFI_RETURN_COLUMN:
2485  return parseDirectiveCFIReturnColumn(IDLoc);
2486  case DK_CFI_SIGNAL_FRAME:
2487  return parseDirectiveCFISignalFrame();
2488  case DK_CFI_UNDEFINED:
2489  return parseDirectiveCFIUndefined(IDLoc);
2490  case DK_CFI_REGISTER:
2491  return parseDirectiveCFIRegister(IDLoc);
2492  case DK_CFI_WINDOW_SAVE:
2493  return parseDirectiveCFIWindowSave();
2494  case DK_EXITM:
2495  Info.ExitValue = "";
2496  return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2497  case DK_ENDM:
2498  Info.ExitValue = "";
2499  return parseDirectiveEndMacro(IDVal);
2500  case DK_PURGE:
2501  return parseDirectivePurgeMacro(IDLoc);
2502  case DK_END:
2503  return parseDirectiveEnd(IDLoc);
2504  case DK_ERR:
2505  return parseDirectiveError(IDLoc);
2506  case DK_ERRB:
2507  return parseDirectiveErrorIfb(IDLoc, true);
2508  case DK_ERRNB:
2509  return parseDirectiveErrorIfb(IDLoc, false);
2510  case DK_ERRDEF:
2511  return parseDirectiveErrorIfdef(IDLoc, true);
2512  case DK_ERRNDEF:
2513  return parseDirectiveErrorIfdef(IDLoc, false);
2514  case DK_ERRDIF:
2515  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2516  /*CaseInsensitive=*/false);
2517  case DK_ERRDIFI:
2518  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2519  /*CaseInsensitive=*/true);
2520  case DK_ERRIDN:
2521  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2522  /*CaseInsensitive=*/false);
2523  case DK_ERRIDNI:
2524  return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2525  /*CaseInsensitive=*/true);
2526  case DK_ERRE:
2527  return parseDirectiveErrorIfe(IDLoc, true);
2528  case DK_ERRNZ:
2529  return parseDirectiveErrorIfe(IDLoc, false);
2530  case DK_RADIX:
2531  return parseDirectiveRadix(IDLoc);
2532  case DK_ECHO:
2533  return parseDirectiveEcho(IDLoc);
2534  }
2535 
2536  return Error(IDLoc, "unknown directive");
2537  }
2538 
2539  // We also check if this is allocating memory with user-defined type.
2540  auto IDIt = Structs.find(IDVal.lower());
2541  if (IDIt != Structs.end())
2542  return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2543  IDLoc);
2544 
2545  // Non-conditional Microsoft directives sometimes follow their first argument.
2546  const AsmToken nextTok = getTok();
2547  const StringRef nextVal = nextTok.getString();
2548  const SMLoc nextLoc = nextTok.getLoc();
2549 
2550  const AsmToken afterNextTok = peekTok();
2551 
2552  // There are several entities interested in parsing infix directives:
2553  //
2554  // 1. Asm parser extensions. For example, platform-specific parsers
2555  // (like the ELF parser) register themselves as extensions.
2556  // 2. The generic directive parser implemented by this class. These are
2557  // all the directives that behave in a target and platform independent
2558  // manner, or at least have a default behavior that's shared between
2559  // all targets and platforms.
2560 
2561  getTargetParser().flushPendingInstructions(getStreamer());
2562 
2563  // Special-case handling of structure-end directives at higher priority, since
2564  // ENDS is overloaded as a segment-end directive.
2565  if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2566  Lex();
2567  return parseDirectiveEnds(IDVal, IDLoc);
2568  }
2569 
2570  // First, check the extension directive map to see if any extension has
2571  // registered itself to parse this directive.
2572  std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2573  ExtensionDirectiveMap.lookup(nextVal.lower());
2574  if (Handler.first) {
2575  Lex();
2576  Lexer.UnLex(ID);
2577  return (*Handler.second)(Handler.first, nextVal, nextLoc);
2578  }
2579 
2580  // If no one else is interested in this directive, it must be
2581  // generic and familiar to this class.
2582  DirKindIt = DirectiveKindMap.find(nextVal.lower());
2583  DirKind = (DirKindIt == DirectiveKindMap.end())
2584  ? DK_NO_DIRECTIVE
2585  : DirKindIt->getValue();
2586  switch (DirKind) {
2587  default:
2588  break;
2589  case DK_ASSIGN:
2590  case DK_EQU:
2591  case DK_TEXTEQU:
2592  Lex();
2593  return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2594  case DK_BYTE:
2595  if (afterNextTok.is(AsmToken::Identifier) &&
2596  afterNextTok.getString().equals_insensitive("ptr")) {
2597  // Size directive; part of an instruction.
2598  break;
2599  }
2600  [[fallthrough]];
2601  case DK_SBYTE:
2602  case DK_DB:
2603  Lex();
2604  return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2605  case DK_WORD:
2606  if (afterNextTok.is(AsmToken::Identifier) &&
2607  afterNextTok.getString().equals_insensitive("ptr")) {
2608  // Size directive; part of an instruction.
2609  break;
2610  }
2611  [[fallthrough]];
2612  case DK_SWORD:
2613  case DK_DW:
2614  Lex();
2615  return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2616  case DK_DWORD:
2617  if (afterNextTok.is(AsmToken::Identifier) &&
2618  afterNextTok.getString().equals_insensitive("ptr")) {
2619  // Size directive; part of an instruction.
2620  break;
2621  }
2622  [[fallthrough]];
2623  case DK_SDWORD:
2624  case DK_DD:
2625  Lex();
2626  return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2627  case DK_FWORD:
2628  if (afterNextTok.is(AsmToken::Identifier) &&
2629  afterNextTok.getString().equals_insensitive("ptr")) {
2630  // Size directive; part of an instruction.
2631  break;
2632  }
2633  [[fallthrough]];
2634  case DK_DF:
2635  Lex();
2636  return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2637  case DK_QWORD:
2638  if (afterNextTok.is(AsmToken::Identifier) &&
2639  afterNextTok.getString().equals_insensitive("ptr")) {
2640  // Size directive; part of an instruction.
2641  break;
2642  }
2643  [[fallthrough]];
2644  case DK_SQWORD:
2645  case DK_DQ:
2646  Lex();
2647  return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2648  case DK_REAL4:
2649  Lex();
2650  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2651  IDVal, IDLoc);
2652  case DK_REAL8:
2653  Lex();
2654  return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2655  IDVal, IDLoc);
2656  case DK_REAL10:
2657  Lex();
2658  return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2659  10, IDVal, IDLoc);
2660  case DK_STRUCT:
2661  case DK_UNION:
2662  Lex();
2663  return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2664  case DK_ENDS:
2665  Lex();
2666  return parseDirectiveEnds(IDVal, IDLoc);
2667  case DK_MACRO:
2668  Lex();
2669  return parseDirectiveMacro(IDVal, IDLoc);
2670  }
2671 
2672  // Finally, we check if this is allocating a variable with user-defined type.
2673  auto NextIt = Structs.find(nextVal.lower());
2674  if (NextIt != Structs.end()) {
2675  Lex();
2676  return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2677  nextVal, nextLoc, IDVal);
2678  }
2679 
2680  // __asm _emit or __asm __emit
2681  if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2682  IDVal == "_EMIT" || IDVal == "__EMIT"))
2683  return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2684 
2685  // __asm align
2686  if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2687  return parseDirectiveMSAlign(IDLoc, Info);
2688 
2689  if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2690  Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2691  if (checkForValidSection())
2692  return true;
2693 
2694  // Canonicalize the opcode to lower case.
2695  std::string OpcodeStr = IDVal.lower();
2696  ParseInstructionInfo IInfo(Info.AsmRewrites);
2697  bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2698  Info.ParsedOperands);
2699  Info.ParseError = ParseHadError;
2700 
2701  // Dump the parsed representation, if requested.
2702  if (getShowParsedOperands()) {
2703  SmallString<256> Str;
2704  raw_svector_ostream OS(Str);
2705  OS << "parsed instruction: [";
2706  for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2707  if (i != 0)
2708  OS << ", ";
2709  Info.ParsedOperands[i]->print(OS);
2710  }
2711  OS << "]";
2712 
2713  printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2714  }
2715 
2716  // Fail even if ParseInstruction erroneously returns false.
2717  if (hasPendingError() || ParseHadError)
2718  return true;
2719 
2720  // If we are generating dwarf for the current section then generate a .loc
2721  // directive for the instruction.
2722  if (!ParseHadError && enabledGenDwarfForAssembly() &&
2723  getContext().getGenDwarfSectionSyms().count(
2724  getStreamer().getCurrentSectionOnly())) {
2725  unsigned Line;
2726  if (ActiveMacros.empty())
2727  Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2728  else
2729  Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2730  ActiveMacros.front()->ExitBuffer);
2731 
2732  // If we previously parsed a cpp hash file line comment then make sure the
2733  // current Dwarf File is for the CppHashFilename if not then emit the
2734  // Dwarf File table for it and adjust the line number for the .loc.
2735  if (!CppHashInfo.Filename.empty()) {
2736  unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2737  0, StringRef(), CppHashInfo.Filename);
2738  getContext().setGenDwarfFileNumber(FileNumber);
2739 
2740  unsigned CppHashLocLineNo =
2741  SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2742  Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2743  }
2744 
2745  getStreamer().emitDwarfLocDirective(
2746  getContext().getGenDwarfFileNumber(), Line, 0,
2748  StringRef());
2749  }
2750 
2751  // If parsing succeeded, match the instruction.
2752  if (!ParseHadError) {
2754  if (getTargetParser().MatchAndEmitInstruction(
2755  IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2756  getTargetParser().isParsingMSInlineAsm()))
2757  return true;
2758  }
2759  return false;
2760 }
2761 
2762 // Parse and erase curly braces marking block start/end.
2763 bool MasmParser::parseCurlyBlockScope(
2764  SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2765  // Identify curly brace marking block start/end.
2766  if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2767  return false;
2768 
2769  SMLoc StartLoc = Lexer.getLoc();
2770  Lex(); // Eat the brace.
2771  if (Lexer.is(AsmToken::EndOfStatement))
2772  Lex(); // Eat EndOfStatement following the brace.
2773 
2774  // Erase the block start/end brace from the output asm string.
2775  AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2776  StartLoc.getPointer());
2777  return true;
2778 }
2779 
2780 /// parseCppHashLineFilenameComment as this:
2781 /// ::= # number "filename"
2782 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2783  Lex(); // Eat the hash token.
2784  // Lexer only ever emits HashDirective if it fully formed if it's
2785  // done the checking already so this is an internal error.
2786  assert(getTok().is(AsmToken::Integer) &&
2787  "Lexing Cpp line comment: Expected Integer");
2788  int64_t LineNumber = getTok().getIntVal();
2789  Lex();
2790  assert(getTok().is(AsmToken::String) &&
2791  "Lexing Cpp line comment: Expected String");
2792  StringRef Filename = getTok().getString();
2793  Lex();
2794 
2795  // Get rid of the enclosing quotes.
2796  Filename = Filename.substr(1, Filename.size() - 2);
2797 
2798  // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2799  // and possibly DWARF file info.
2800  CppHashInfo.Loc = L;
2801  CppHashInfo.Filename = Filename;
2802  CppHashInfo.LineNumber = LineNumber;
2803  CppHashInfo.Buf = CurBuffer;
2804  if (FirstCppHashFilename.empty())
2805  FirstCppHashFilename = Filename;
2806  return false;
2807 }
2808 
2809 /// will use the last parsed cpp hash line filename comment
2810 /// for the Filename and LineNo if any in the diagnostic.
2811 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2812  const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2813  raw_ostream &OS = errs();
2814 
2815  const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2816  SMLoc DiagLoc = Diag.getLoc();
2817  unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2818  unsigned CppHashBuf =
2819  Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2820 
2821  // Like SourceMgr::printMessage() we need to print the include stack if any
2822  // before printing the message.
2823  unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2824  if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2825  DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2826  SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2827  DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2828  }
2829 
2830  // If we have not parsed a cpp hash line filename comment or the source
2831  // manager changed or buffer changed (like in a nested include) then just
2832  // print the normal diagnostic using its Filename and LineNo.
2833  if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2834  DiagBuf != CppHashBuf) {
2835  if (Parser->SavedDiagHandler)
2836  Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2837  else
2838  Diag.print(nullptr, OS);
2839  return;
2840  }
2841 
2842  // Use the CppHashFilename and calculate a line number based on the
2843  // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2844  // for the diagnostic.
2845  const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2846 
2847  int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2848  int CppHashLocLineNo =
2849  Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2850  int LineNo =
2851  Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2852 
2853  SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2854  Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2855  Diag.getLineContents(), Diag.getRanges());
2856 
2857  if (Parser->SavedDiagHandler)
2858  Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2859  else
2860  NewDiag.print(nullptr, OS);
2861 }
2862 
2863 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2864 // not accept '.'.
2865 static bool isMacroParameterChar(char C) {
2866  return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2867 }
2868 
2869 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2870  ArrayRef<MCAsmMacroParameter> Parameters,
2872  const std::vector<std::string> &Locals, SMLoc L) {
2873  unsigned NParameters = Parameters.size();
2874  if (NParameters != A.size())
2875  return Error(L, "Wrong number of arguments");
2876  StringMap<std::string> LocalSymbols;
2877  std::string Name;
2878  Name.reserve(6);
2879  for (StringRef Local : Locals) {
2881  LocalName << "??"
2882  << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2883  LocalSymbols.insert({Local, LocalName.str()});
2884  Name.clear();
2885  }
2886 
2887  std::optional<char> CurrentQuote;
2888  while (!Body.empty()) {
2889  // Scan for the next substitution.
2890  std::size_t End = Body.size(), Pos = 0;
2891  std::size_t IdentifierPos = End;
2892  for (; Pos != End; ++Pos) {
2893  // Find the next possible macro parameter, including preceding a '&'
2894  // inside quotes.
2895  if (Body[Pos] == '&')
2896  break;
2897  if (isMacroParameterChar(Body[Pos])) {
2898  if (!CurrentQuote)
2899  break;
2900  if (IdentifierPos == End)
2901  IdentifierPos = Pos;
2902  } else {
2903  IdentifierPos = End;
2904  }
2905 
2906  // Track quotation status
2907  if (!CurrentQuote) {
2908  if (Body[Pos] == '\'' || Body[Pos] == '"')
2909  CurrentQuote = Body[Pos];
2910  } else if (Body[Pos] == CurrentQuote) {
2911  if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2912  // Escaped quote, and quotes aren't identifier chars; skip
2913  ++Pos;
2914  continue;
2915  } else {
2916  CurrentQuote.reset();
2917  }
2918  }
2919  }
2920  if (IdentifierPos != End) {
2921  // We've recognized an identifier before an apostrophe inside quotes;
2922  // check once to see if we can expand it.
2923  Pos = IdentifierPos;
2924  IdentifierPos = End;
2925  }
2926 
2927  // Add the prefix.
2928  OS << Body.slice(0, Pos);
2929 
2930  // Check if we reached the end.
2931  if (Pos == End)
2932  break;
2933 
2934  unsigned I = Pos;
2935  bool InitialAmpersand = (Body[I] == '&');
2936  if (InitialAmpersand) {
2937  ++I;
2938  ++Pos;
2939  }
2940  while (I < End && isMacroParameterChar(Body[I]))
2941  ++I;
2942 
2943  const char *Begin = Body.data() + Pos;
2944  StringRef Argument(Begin, I - Pos);
2945  const std::string ArgumentLower = Argument.lower();
2946  unsigned Index = 0;
2947 
2948  for (; Index < NParameters; ++Index)
2949  if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2950  break;
2951 
2952  if (Index == NParameters) {
2953  if (InitialAmpersand)
2954  OS << '&';
2955  auto it = LocalSymbols.find(ArgumentLower);
2956  if (it != LocalSymbols.end())
2957  OS << it->second;
2958  else
2959  OS << Argument;
2960  Pos = I;
2961  } else {
2962  for (const AsmToken &Token : A[Index]) {
2963  // In MASM, you can write '%expr'.
2964  // The prefix '%' evaluates the expression 'expr'
2965  // and uses the result as a string (e.g. replace %(1+2) with the
2966  // string "3").
2967  // Here, we identify the integer token which is the result of the
2968  // absolute expression evaluation and replace it with its string
2969  // representation.
2970  if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2971  // Emit an integer value to the buffer.
2972  OS << Token.getIntVal();
2973  else
2974  OS << Token.getString();
2975  }
2976 
2977  Pos += Argument.size();
2978  if (Pos < End && Body[Pos] == '&') {
2979  ++Pos;
2980  }
2981  }
2982  // Update the scan point.
2983  Body = Body.substr(Pos);
2984  }
2985 
2986  return false;
2987 }
2988 
2989 static bool isOperator(AsmToken::TokenKind kind) {
2990  switch (kind) {
2991  default:
2992  return false;
2993  case AsmToken::Plus:
2994  case AsmToken::Minus:
2995  case AsmToken::Tilde:
2996  case AsmToken::Slash:
2997  case AsmToken::Star:
2998  case AsmToken::Dot:
2999  case AsmToken::Equal:
3000  case AsmToken::EqualEqual:
3001  case AsmToken::Pipe:
3002  case AsmToken::PipePipe:
3003  case AsmToken::Caret:
3004  case AsmToken::Amp:
3005  case AsmToken::AmpAmp:
3006  case AsmToken::Exclaim:
3007  case AsmToken::ExclaimEqual:
3008  case AsmToken::Less:
3009  case AsmToken::LessEqual:
3010  case AsmToken::LessLess:
3011  case AsmToken::LessGreater:
3012  case AsmToken::Greater:
3013  case AsmToken::GreaterEqual:
3014  case AsmToken::GreaterGreater:
3015  return true;
3016  }
3017 }
3018 
3019 namespace {
3020 
3021 class AsmLexerSkipSpaceRAII {
3022 public:
3023  AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
3024  Lexer.setSkipSpace(SkipSpace);
3025  }
3026 
3027  ~AsmLexerSkipSpaceRAII() {
3028  Lexer.setSkipSpace(true);
3029  }
3030 
3031 private:
3032  AsmLexer &Lexer;
3033 };
3034 
3035 } // end anonymous namespace
3036 
3037 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3038  MCAsmMacroArgument &MA,
3039  AsmToken::TokenKind EndTok) {
3040  if (MP && MP->Vararg) {
3041  if (Lexer.isNot(EndTok)) {
3042  SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3043  for (StringRef S : Str) {
3044  MA.emplace_back(AsmToken::String, S);
3045  }
3046  }
3047  return false;
3048  }
3049 
3050  SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3051  if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3052  const char *StrChar = StrLoc.getPointer() + 1;
3053  const char *EndChar = EndLoc.getPointer() - 1;
3054  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3055  /// Eat from '<' to '>'.
3056  Lex();
3057  MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3058  return false;
3059  }
3060 
3061  unsigned ParenLevel = 0;
3062 
3063  // Darwin doesn't use spaces to delmit arguments.
3064  AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3065 
3066  bool SpaceEaten;
3067 
3068  while (true) {
3069  SpaceEaten = false;
3070  if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3071  return TokError("unexpected token");
3072 
3073  if (ParenLevel == 0) {
3074  if (Lexer.is(AsmToken::Comma))
3075  break;
3076 
3077  if (Lexer.is(AsmToken::Space)) {
3078  SpaceEaten = true;
3079  Lex(); // Eat spaces.
3080  }
3081 
3082  // Spaces can delimit parameters, but could also be part an expression.
3083  // If the token after a space is an operator, add the token and the next
3084  // one into this argument
3085  if (!IsDarwin) {
3086  if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3087  MA.push_back(getTok());
3088  Lex();
3089 
3090  // Whitespace after an operator can be ignored.
3091  if (Lexer.is(AsmToken::Space))
3092  Lex();
3093 
3094  continue;
3095  }
3096  }
3097  if (SpaceEaten)
3098  break;
3099  }
3100 
3101  // handleMacroEntry relies on not advancing the lexer here
3102  // to be able to fill in the remaining default parameter values
3103  if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3104  break;
3105 
3106  // Adjust the current parentheses level.
3107  if (Lexer.is(AsmToken::LParen))
3108  ++ParenLevel;
3109  else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3110  --ParenLevel;
3111 
3112  // Append the token to the current argument list.
3113  MA.push_back(getTok());
3114  Lex();
3115  }
3116 
3117  if (ParenLevel != 0)
3118  return TokError("unbalanced parentheses in argument");
3119 
3120  if (MA.empty() && MP) {
3121  if (MP->Required) {
3122  return TokError("missing value for required parameter '" + MP->Name +
3123  "'");
3124  } else {
3125  MA = MP->Value;
3126  }
3127  }
3128  return false;
3129 }
3130 
3131 // Parse the macro instantiation arguments.
3132 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3133  MCAsmMacroArguments &A,
3134  AsmToken::TokenKind EndTok) {
3135  const unsigned NParameters = M ? M->Parameters.size() : 0;
3136  bool NamedParametersFound = false;
3137  SmallVector<SMLoc, 4> FALocs;
3138 
3139  A.resize(NParameters);
3140  FALocs.resize(NParameters);
3141 
3142  // Parse two kinds of macro invocations:
3143  // - macros defined without any parameters accept an arbitrary number of them
3144  // - macros defined with parameters accept at most that many of them
3145  for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3146  ++Parameter) {
3147  SMLoc IDLoc = Lexer.getLoc();
3149 
3150  if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3151  if (parseIdentifier(FA.Name))
3152  return Error(IDLoc, "invalid argument identifier for formal argument");
3153 
3154  if (Lexer.isNot(AsmToken::Equal))
3155  return TokError("expected '=' after formal parameter identifier");
3156 
3157  Lex();
3158 
3159  NamedParametersFound = true;
3160  }
3161 
3162  if (NamedParametersFound && FA.Name.empty())
3163  return Error(IDLoc, "cannot mix positional and keyword arguments");
3164 
3165  unsigned PI = Parameter;
3166  if (!FA.Name.empty()) {
3167  assert(M && "expected macro to be defined");
3168  unsigned FAI = 0;
3169  for (FAI = 0; FAI < NParameters; ++FAI)
3170  if (M->Parameters[FAI].Name == FA.Name)
3171  break;
3172 
3173  if (FAI >= NParameters) {
3174  return Error(IDLoc, "parameter named '" + FA.Name +
3175  "' does not exist for macro '" + M->Name + "'");
3176  }
3177  PI = FAI;
3178  }
3179  const MCAsmMacroParameter *MP = nullptr;
3180  if (M && PI < NParameters)
3181  MP = &M->Parameters[PI];
3182 
3183  SMLoc StrLoc = Lexer.getLoc();
3184  SMLoc EndLoc;
3185  if (Lexer.is(AsmToken::Percent)) {
3186  const MCExpr *AbsoluteExp;
3187  int64_t Value;
3188  /// Eat '%'.
3189  Lex();
3190  if (parseExpression(AbsoluteExp, EndLoc))
3191  return false;
3192  if (!AbsoluteExp->evaluateAsAbsolute(Value,
3193  getStreamer().getAssemblerPtr()))
3194  return Error(StrLoc, "expected absolute expression");
3195  const char *StrChar = StrLoc.getPointer();
3196  const char *EndChar = EndLoc.getPointer();
3197  AsmToken newToken(AsmToken::Integer,
3198  StringRef(StrChar, EndChar - StrChar), Value);
3199  FA.Value.push_back(newToken);
3200  } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3201  if (M)
3202  return addErrorSuffix(" in '" + M->Name + "' macro");
3203  else
3204  return true;
3205  }
3206 
3207  if (!FA.Value.empty()) {
3208  if (A.size() <= PI)
3209  A.resize(PI + 1);
3210  A[PI] = FA.Value;
3211 
3212  if (FALocs.size() <= PI)
3213  FALocs.resize(PI + 1);
3214 
3215  FALocs[PI] = Lexer.getLoc();
3216  }
3217 
3218  // At the end of the statement, fill in remaining arguments that have
3219  // default values. If there aren't any, then the next argument is
3220  // required but missing
3221  if (Lexer.is(EndTok)) {
3222  bool Failure = false;
3223  for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3224  if (A[FAI].empty()) {
3225  if (M->Parameters[FAI].Required) {
3226  Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3227  "missing value for required parameter "
3228  "'" +
3229  M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3230  Failure = true;
3231  }
3232 
3233  if (!M->Parameters[FAI].Value.empty())
3234  A[FAI] = M->Parameters[FAI].Value;
3235  }
3236  }
3237  return Failure;
3238  }
3239 
3240  if (Lexer.is(AsmToken::Comma))
3241  Lex();
3242  }
3243 
3244  return TokError("too many positional arguments");
3245 }
3246 
3247 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3248  AsmToken::TokenKind ArgumentEndTok) {
3249  // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3250  // eliminate this, although we should protect against infinite loops.
3251  unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3252  if (ActiveMacros.size() == MaxNestingDepth) {
3253  std::ostringstream MaxNestingDepthError;
3254  MaxNestingDepthError << "macros cannot be nested more than "
3255  << MaxNestingDepth << " levels deep."
3256  << " Use -asm-macro-max-nesting-depth to increase "
3257  "this limit.";
3258  return TokError(MaxNestingDepthError.str());
3259  }
3260 
3261  MCAsmMacroArguments A;
3262  if (parseMacroArguments(M, A, ArgumentEndTok))
3263  return true;
3264 
3265  // Macro instantiation is lexical, unfortunately. We construct a new buffer
3266  // to hold the macro body with substitutions.
3267  SmallString<256> Buf;
3268  StringRef Body = M->Body;
3269  raw_svector_ostream OS(Buf);
3270 
3271  if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3272  return true;
3273 
3274  // We include the endm in the buffer as our cue to exit the macro
3275  // instantiation.
3276  OS << "endm\n";
3277 
3278  std::unique_ptr<MemoryBuffer> Instantiation =
3279  MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3280 
3281  // Create the macro instantiation object and add to the current macro
3282  // instantiation stack.
3283  MacroInstantiation *MI = new MacroInstantiation{
3284  NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3285  ActiveMacros.push_back(MI);
3286 
3287  ++NumOfMacroInstantiations;
3288 
3289  // Jump to the macro instantiation and prime the lexer.
3290  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3291  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3292  EndStatementAtEOFStack.push_back(true);
3293  Lex();
3294 
3295  return false;
3296 }
3297 
3298 void MasmParser::handleMacroExit() {
3299  // Jump to the token we should return to, and consume it.
3300  EndStatementAtEOFStack.pop_back();
3301  jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3302  EndStatementAtEOFStack.back());
3303  Lex();
3304 
3305  // Pop the instantiation entry.
3306  delete ActiveMacros.back();
3307  ActiveMacros.pop_back();
3308 }
3309 
3310 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3311  if (!M->IsFunction)
3312  return Error(NameLoc, "cannot invoke macro procedure as function");
3313 
3314  if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3315  "' requires arguments in parentheses") ||
3316  handleMacroEntry(M, NameLoc, AsmToken::RParen))
3317  return true;
3318 
3319  // Parse all statements in the macro, retrieving the exit value when it ends.
3320  std::string ExitValue;
3321  SmallVector<AsmRewrite, 4> AsmStrRewrites;
3322  while (Lexer.isNot(AsmToken::Eof)) {
3323  ParseStatementInfo Info(&AsmStrRewrites);
3324  bool Parsed = parseStatement(Info, nullptr);
3325 
3326  if (!Parsed && Info.ExitValue) {
3327  ExitValue = std::move(*Info.ExitValue);
3328  break;
3329  }
3330 
3331  // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3332  // for printing ErrMsg via Lex() only if no (presumably better) parser error
3333  // exists.
3334  if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3335  Lex();
3336  }
3337 
3338  // parseStatement returned true so may need to emit an error.
3339  printPendingErrors();
3340 
3341  // Skipping to the next line if needed.
3342  if (Parsed && !getLexer().isAtStartOfStatement())
3343  eatToEndOfStatement();
3344  }
3345 
3346  // Consume the right-parenthesis on the other side of the arguments.
3347  if (parseRParen())
3348  return true;
3349 
3350  // Exit values may require lexing, unfortunately. We construct a new buffer to
3351  // hold the exit value.
3352  std::unique_ptr<MemoryBuffer> MacroValue =
3353  MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3354 
3355  // Jump from this location to the instantiated exit value, and prime the
3356  // lexer.
3357  CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3358  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3359  /*EndStatementAtEOF=*/false);
3360  EndStatementAtEOFStack.push_back(false);
3361  Lex();
3362 
3363  return false;
3364 }
3365 
3366 /// parseIdentifier:
3367 /// ::= identifier
3368 /// ::= string
3369 bool MasmParser::parseIdentifier(StringRef &Res,
3370  IdentifierPositionKind Position) {
3371  // The assembler has relaxed rules for accepting identifiers, in particular we
3372  // allow things like '.globl $foo' and '.def @feat.00', which would normally
3373  // be separate tokens. At this level, we have already lexed so we cannot
3374  // (currently) handle this as a context dependent token, instead we detect
3375  // adjacent tokens and return the combined identifier.
3376  if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3377  SMLoc PrefixLoc = getLexer().getLoc();
3378 
3379  // Consume the prefix character, and check for a following identifier.
3380 
3381  AsmToken nextTok = peekTok(false);
3382 
3383  if (nextTok.isNot(AsmToken::Identifier))
3384  return true;
3385 
3386  // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3387  if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3388  return true;
3389 
3390  // eat $ or @
3391  Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3392  // Construct the joined identifier and consume the token.
3393  Res =
3394  StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3395  Lex(); // Parser Lex to maintain invariants.
3396  return false;
3397  }
3398 
3399  if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3400  return true;
3401 
3402  Res = getTok().getIdentifier();
3403 
3404  // Consume the identifier token - but if parsing certain directives, avoid
3405  // lexical expansion of the next token.
3406  ExpandKind ExpandNextToken = ExpandMacros;
3407  if (Position == StartOfStatement &&
3408  StringSwitch<bool>(Res)
3409  .CaseLower("echo", true)
3410  .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3411  .Default(false)) {
3412  ExpandNextToken = DoNotExpandMacros;
3413  }
3414  Lex(ExpandNextToken);
3415 
3416  return false;
3417 }
3418 
3419 /// parseDirectiveEquate:
3420 /// ::= name "=" expression
3421 /// | name "equ" expression (not redefinable)
3422 /// | name "equ" text-list
3423 /// | name "textequ" text-list (redefinability unspecified)
3424 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3425  DirectiveKind DirKind, SMLoc NameLoc) {
3426  auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3427  if (BuiltinIt != BuiltinSymbolMap.end())
3428  return Error(NameLoc, "cannot redefine a built-in symbol");
3429 
3430  Variable &Var = Variables[Name.lower()];
3431  if (Var.Name.empty()) {
3432  Var.Name = Name;
3433  }
3434 
3435  SMLoc StartLoc = Lexer.getLoc();
3436  if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3437  // "equ" and "textequ" both allow text expressions.
3438  std::string Value;
3439  std::string TextItem;
3440  if (!parseTextItem(TextItem)) {
3441  Value += TextItem;
3442 
3443  // Accept a text-list, not just one text-item.
3444  auto parseItem = [&]() -> bool {
3445  if (parseTextItem(TextItem))
3446  return TokError("expected text item");
3447  Value += TextItem;
3448  return false;
3449  };
3450  if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3451  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3452 
3453  if (!Var.IsText || Var.TextValue != Value) {
3454  switch (Var.Redefinable) {
3455  case Variable::NOT_REDEFINABLE:
3456  return Error(getTok().getLoc(), "invalid variable redefinition");
3457  case Variable::WARN_ON_REDEFINITION:
3458  if (Warning(NameLoc, "redefining '" + Name +
3459  "', already defined on the command line")) {
3460  return true;
3461  }
3462  break;
3463  default:
3464  break;
3465  }
3466  }
3467  Var.IsText = true;
3468  Var.TextValue = Value;
3469  Var.Redefinable = Variable::REDEFINABLE;
3470 
3471  return false;
3472  }
3473  }
3474  if (DirKind == DK_TEXTEQU)
3475  return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3476 
3477  // Parse as expression assignment.
3478  const MCExpr *Expr;
3479  SMLoc EndLoc;
3480  if (parseExpression(Expr, EndLoc))
3481  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3482  StringRef ExprAsString = StringRef(
3483  StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3484 
3485  int64_t Value;
3486  if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3487  if (DirKind == DK_ASSIGN)
3488  return Error(
3489  StartLoc,
3490  "expected absolute expression; not all symbols have known values",
3491  {StartLoc, EndLoc});
3492 
3493  // Not an absolute expression; define as a text replacement.
3494  if (!Var.IsText || Var.TextValue != ExprAsString) {
3495  switch (Var.Redefinable) {
3496  case Variable::NOT_REDEFINABLE:
3497  return Error(getTok().getLoc(), "invalid variable redefinition");
3498  case Variable::WARN_ON_REDEFINITION:
3499  if (Warning(NameLoc, "redefining '" + Name +
3500  "', already defined on the command line")) {
3501  return true;
3502  }
3503  break;
3504  default:
3505  break;
3506  }
3507  }
3508 
3509  Var.IsText = true;
3510  Var.TextValue = ExprAsString.str();
3511  Var.Redefinable = Variable::REDEFINABLE;
3512 
3513  return false;
3514  }
3515 
3516  MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3517 
3518  const MCConstantExpr *PrevValue =
3519  Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3520  Sym->getVariableValue(/*SetUsed=*/false))
3521  : nullptr;
3522  if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3523  switch (Var.Redefinable) {
3524  case Variable::NOT_REDEFINABLE:
3525  return Error(getTok().getLoc(), "invalid variable redefinition");
3526  case Variable::WARN_ON_REDEFINITION:
3527  if (Warning(NameLoc, "redefining '" + Name +
3528  "', already defined on the command line")) {
3529  return true;
3530  }
3531  break;
3532  default:
3533  break;
3534  }
3535  }
3536 
3537  Var.IsText = false;
3538  Var.TextValue.clear();
3539  Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3540  : Variable::NOT_REDEFINABLE;
3541 
3542  Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3543  Sym->setVariableValue(Expr);
3544  Sym->setExternal(false);
3545 
3546  return false;
3547 }
3548 
3549 bool MasmParser::parseEscapedString(std::string &Data) {
3550  if (check(getTok().isNot(AsmToken::String), "expected string"))
3551  return true;
3552 
3553  Data = "";
3554  char Quote = getTok().getString().front();
3555  StringRef Str = getTok().getStringContents();
3556  Data.reserve(Str.size());
3557  for (size_t i = 0, e = Str.size(); i != e; ++i) {
3558  Data.push_back(Str[i]);
3559  if (Str[i] == Quote) {
3560  // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3561  // If we're escaping the string's trailing delimiter, we're definitely
3562  // missing a quotation mark.
3563  if (i + 1 == Str.size())
3564  return Error(getTok().getLoc(), "missing quotation mark in string");
3565  if (Str[i + 1] == Quote)
3566  ++i;
3567  }
3568  }
3569 
3570  Lex();
3571  return false;
3572 }
3573 
3574 bool MasmParser::parseAngleBracketString(std::string &Data) {
3575  SMLoc EndLoc, StartLoc = getTok().getLoc();
3576  if (isAngleBracketString(StartLoc, EndLoc)) {
3577  const char *StartChar = StartLoc.getPointer() + 1;
3578  const char *EndChar = EndLoc.getPointer() - 1;
3579  jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3580  // Eat from '<' to '>'.
3581  Lex();
3582 
3583  Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3584  return false;
3585  }
3586  return true;
3587 }
3588 
3589 /// textItem ::= textLiteral | textMacroID | % constExpr
3590 bool MasmParser::parseTextItem(std::string &Data) {
3591  switch (getTok().getKind()) {
3592  default:
3593  return true;
3594  case AsmToken::Percent: {
3595  int64_t Res;
3596  if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3597  return true;
3598  Data = std::to_string(Res);
3599  return false;
3600  }
3601  case AsmToken::Less:
3602  case AsmToken::LessEqual:
3603  case AsmToken::LessLess:
3604  case AsmToken::LessGreater:
3605  return parseAngleBracketString(Data);
3606  case AsmToken::Identifier: {
3607  // This must be a text macro; we need to expand it accordingly.
3608  StringRef ID;
3609  SMLoc StartLoc = getTok().getLoc();
3610  if (parseIdentifier(ID))
3611  return true;
3612  Data = ID.str();
3613 
3614  bool Expanded = false;
3615  while (true) {
3616  // Try to resolve as a built-in text macro
3617  auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3618  if (BuiltinIt != BuiltinSymbolMap.end()) {
3619  llvm::Optional<std::string> BuiltinText =
3620  evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3621  if (!BuiltinText) {
3622  // Not a text macro; break without substituting
3623  break;
3624  }
3625  Data = std::move(*BuiltinText);
3626  ID = StringRef(Data);
3627  Expanded = true;
3628  continue;
3629  }
3630 
3631  // Try to resolve as a variable text macro
3632  auto VarIt = Variables.find(ID.lower());
3633  if (VarIt != Variables.end()) {
3634  const Variable &Var = VarIt->getValue();
3635  if (!Var.IsText) {
3636  // Not a text macro; break without substituting
3637  break;
3638  }
3639  Data = Var.TextValue;
3640  ID = StringRef(Data);
3641  Expanded = true;
3642  continue;
3643  }
3644 
3645  break;
3646  }
3647 
3648  if (!Expanded) {
3649  // Not a text macro; not usable in TextItem context. Since we haven't used
3650  // the token, put it back for better error recovery.
3651  getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3652  return true;
3653  }
3654  return false;
3655  }
3656  }
3657  llvm_unreachable("unhandled token kind");
3658 }
3659 
3660 /// parseDirectiveAscii:
3661 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3662 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3663  auto parseOp = [&]() -> bool {
3664  std::string Data;
3665  if (checkForValidSection() || parseEscapedString(Data))
3666  return true;
3667  getStreamer().emitBytes(Data);
3668  if (ZeroTerminated)
3669  getStreamer().emitBytes(StringRef("\0", 1));
3670  return false;
3671  };
3672 
3673  if (parseMany(parseOp))
3674  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3675  return false;
3676 }
3677 
3678 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3679  // Special case constant expressions to match code generator.
3680  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3681  assert(Size <= 8 && "Invalid size");
3682  int64_t IntValue = MCE->getValue();
3683  if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3684  return Error(MCE->getLoc(), "out of range literal value");
3685  getStreamer().emitIntValue(IntValue, Size);
3686  } else {
3687  const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3688  if (MSE && MSE->getSymbol().getName() == "?") {
3689  // ? initializer; treat as 0.
3690  getStreamer().emitIntValue(0, Size);
3691  } else {
3692  getStreamer().emitValue(Value, Size, Value->getLoc());
3693  }
3694  }
3695  return false;
3696 }
3697 
3698 bool MasmParser::parseScalarInitializer(unsigned Size,
3700  unsigned StringPadLength) {
3701  if (Size == 1 && getTok().is(AsmToken::String)) {
3702  std::string Value;
3703  if (parseEscapedString(Value))
3704  return true;
3705  // Treat each character as an initializer.
3706  for (const unsigned char CharVal : Value)
3707  Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3708 
3709  // Pad the string with spaces to the specified length.
3710  for (size_t i = Value.size(); i < StringPadLength; ++i)
3711  Values.push_back(MCConstantExpr::create(' ', getContext()));
3712  } else {
3713  const MCExpr *Value;
3714  if (parseExpression(Value))
3715  return true;
3716  if (getTok().is(AsmToken::Identifier) &&
3717  getTok().getString().equals_insensitive("dup")) {
3718  Lex(); // Eat 'dup'.
3719  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3720  if (!MCE)
3721  return Error(Value->getLoc(),
3722  "cannot repeat value a non-constant number of times");
3723  const int64_t Repetitions = MCE->getValue();
3724  if (Repetitions < 0)
3725  return Error(Value->getLoc(),
3726  "cannot repeat value a negative number of times");
3727 
3728  SmallVector<const MCExpr *, 1> DuplicatedValues;
3729  if (parseToken(AsmToken::LParen,
3730  "parentheses required for 'dup' contents") ||
3731  parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3732  return true;
3733 
3734  for (int i = 0; i < Repetitions; ++i)
3735  Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3736  } else {
3737  Values.push_back(Value);
3738  }
3739  }
3740  return false;
3741 }
3742 
3743 bool MasmParser::parseScalarInstList(unsigned Size,
3745  const AsmToken::TokenKind EndToken) {
3746  while (getTok().isNot(EndToken) &&
3747  (EndToken != AsmToken::Greater ||
3748  getTok().isNot(AsmToken::GreaterGreater))) {
3749  parseScalarInitializer(Size, Values);
3750 
3751  // If we see a comma, continue, and allow line continuation.
3752  if (!parseOptionalToken(AsmToken::Comma))
3753  break;
3754  parseOptionalToken(AsmToken::EndOfStatement);
3755  }
3756  return false;
3757 }
3758 
3759 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3761  if (checkForValidSection() || parseScalarInstList(Size, Values))
3762  return true;
3763 
3764  for (const auto *Value : Values) {
3765  emitIntValue(Value, Size);
3766  }
3767  if (Count)
3768  *Count = Values.size();
3769  return false;
3770 }
3771 
3772 // Add a field to the current structure.
3773 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3774  StructInfo &Struct = StructInProgress.back();
3775  FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3776  IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3777 
3778  Field.Type = Size;
3779 
3780  if (parseScalarInstList(Size, IntInfo.Values))
3781  return true;
3782 
3783  Field.SizeOf = Field.Type * IntInfo.Values.size();
3784  Field.LengthOf = IntInfo.Values.size();
3785  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3786  if (!Struct.IsUnion) {
3787  Struct.NextOffset = FieldEnd;
3788  }
3789  Struct.Size = std::max(Struct.Size, FieldEnd);
3790  return false;
3791 }
3792 
3793 /// parseDirectiveValue
3794 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3795 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3796  if (StructInProgress.empty()) {
3797  // Initialize data value.
3798  if (emitIntegralValues(Size))
3799  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3800  } else if (addIntegralField("", Size)) {
3801  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3802  }
3803 
3804  return false;
3805 }
3806 
3807 /// parseDirectiveNamedValue
3808 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3809 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3810  StringRef Name, SMLoc NameLoc) {
3811  if (StructInProgress.empty()) {
3812  // Initialize named data value.
3813  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3814  getStreamer().emitLabel(Sym);
3815  unsigned Count;
3816  if (emitIntegralValues(Size, &Count))
3817  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3818 
3819  AsmTypeInfo Type;
3820  Type.Name = TypeName;
3821  Type.Size = Size * Count;
3822  Type.ElementSize = Size;
3823  Type.Length = Count;
3824  KnownType[Name.lower()] = Type;
3825  } else if (addIntegralField(Name, Size)) {
3826  return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3827  }
3828 
3829  return false;
3830 }
3831 
3832 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3833  if (Asm.getTok().isNot(AsmToken::Integer) &&
3834  Asm.getTok().isNot(AsmToken::BigNum))
3835  return Asm.TokError("unknown token in expression");
3836  SMLoc ExprLoc = Asm.getTok().getLoc();
3837  APInt IntValue = Asm.getTok().getAPIntVal();
3838  Asm.Lex();
3839  if (!IntValue.isIntN(128))
3840  return Asm.Error(ExprLoc, "out of range literal value");
3841  if (!IntValue.isIntN(64)) {
3842  hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3843  lo = IntValue.getLoBits(64).getZExtValue();
3844  } else {
3845  hi = 0;
3846  lo = IntValue.getZExtValue();
3847  }
3848  return false;
3849 }
3850 
3851 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3852  // We don't truly support arithmetic on floating point expressions, so we
3853  // have to manually parse unary prefixes.
3854  bool IsNeg = false;
3855  SMLoc SignLoc;
3856  if (getLexer().is(AsmToken::Minus)) {
3857  SignLoc = getLexer().getLoc();
3858  Lexer.Lex();
3859  IsNeg = true;
3860  } else if (getLexer().is(AsmToken::Plus)) {
3861  SignLoc = getLexer().getLoc();
3862  Lexer.Lex();
3863  }
3864 
3865  if (Lexer.is(AsmToken::Error))
3866  return TokError(Lexer.getErr());
3867  if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3868  Lexer.isNot(AsmToken::Identifier))
3869  return TokError("unexpected token in directive");
3870 
3871  // Convert to an APFloat.
3872  APFloat Value(Semantics);
3873  StringRef IDVal = getTok().getString();
3874  if (getLexer().is(AsmToken::Identifier)) {
3875  if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3876  Value = APFloat::getInf(Semantics);
3877  else if (IDVal.equals_insensitive("nan"))
3878  Value = APFloat::getNaN(Semantics, false, ~0);
3879  else if (IDVal.equals_insensitive("?"))
3880  Value = APFloat::getZero(Semantics);
3881  else
3882  return TokError("invalid floating point literal");
3883  } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3884  // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3885  // To match ML64.exe, ignore the initial sign.
3886  unsigned SizeInBits = Value.getSizeInBits(Semantics);
3887  if (SizeInBits != (IDVal.size() << 2))
3888  return TokError("invalid floating point literal");
3889 
3890  // Consume the numeric token.
3891  Lex();
3892 
3893  Res = APInt(SizeInBits, IDVal, 16);
3894  if (SignLoc.isValid())
3895  return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3896  return false;
3897  } else if (errorToBool(
3898  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3899  .takeError())) {
3900  return TokError("invalid floating point literal");
3901  }
3902  if (IsNeg)
3903  Value.changeSign();
3904 
3905  // Consume the numeric token.
3906  Lex();
3907 
3908  Res = Value.bitcastToAPInt();
3909 
3910  return false;
3911 }
3912 
3913 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3914  SmallVectorImpl<APInt> &ValuesAsInt,
3915  const AsmToken::TokenKind EndToken) {
3916  while (getTok().isNot(EndToken) ||
3917  (EndToken == AsmToken::Greater &&
3918  getTok().isNot(AsmToken::GreaterGreater))) {
3919  const AsmToken NextTok = peekTok();
3920  if (NextTok.is(AsmToken::Identifier) &&
3921  NextTok.getString().equals_insensitive("dup")) {
3922  const MCExpr *Value;
3923  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3924  return true;
3925  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3926  if (!MCE)
3927  return Error(Value->getLoc(),
3928  "cannot repeat value a non-constant number of times");
3929  const int64_t Repetitions = MCE->getValue();
3930  if (Repetitions < 0)
3931  return Error(Value->getLoc(),
3932  "cannot repeat value a negative number of times");
3933 
3934  SmallVector<APInt, 1> DuplicatedValues;
3935  if (parseToken(AsmToken::LParen,
3936  "parentheses required for 'dup' contents") ||
3937  parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3938  return true;
3939 
3940  for (int i = 0; i < Repetitions; ++i)
3941  ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3942  } else {
3943  APInt AsInt;
3944  if (parseRealValue(Semantics, AsInt))
3945  return true;
3946  ValuesAsInt.push_back(AsInt);
3947  }
3948 
3949  // Continue if we see a comma. (Also, allow line continuation.)
3950  if (!parseOptionalToken(AsmToken::Comma))
3951  break;
3952  parseOptionalToken(AsmToken::EndOfStatement);
3953  }
3954 
3955  return false;
3956 }
3957 
3958 // Initialize real data values.
3959 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3960  unsigned *Count) {
3961  if (checkForValidSection())
3962  return true;
3963 
3964  SmallVector<APInt, 1> ValuesAsInt;
3965  if (parseRealInstList(Semantics, ValuesAsInt))
3966  return true;
3967 
3968  for (const APInt &AsInt : ValuesAsInt) {
3969  getStreamer().emitIntValue(AsInt);
3970  }
3971  if (Count)
3972  *Count = ValuesAsInt.size();
3973  return false;
3974 }
3975 
3976 // Add a real field to the current struct.
3977 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3978  size_t Size) {
3979  StructInfo &Struct = StructInProgress.back();
3980  FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3981  RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3982 
3983  Field.SizeOf = 0;
3984 
3985  if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3986  return true;
3987 
3988  Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3989  Field.LengthOf = RealInfo.AsIntValues.size();
3990  Field.SizeOf = Field.Type * Field.LengthOf;
3991 
3992  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3993  if (!Struct.IsUnion) {
3994  Struct.NextOffset = FieldEnd;
3995  }
3996  Struct.Size = std::max(Struct.Size, FieldEnd);
3997  return false;
3998 }
3999 
4000 /// parseDirectiveRealValue
4001 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
4002 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
4003  const fltSemantics &Semantics,
4004  size_t Size) {
4005  if (StructInProgress.empty()) {
4006  // Initialize data value.
4007  if (emitRealValues(Semantics))
4008  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4009  } else if (addRealField("", Semantics, Size)) {
4010  return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4011  }
4012  return false;
4013 }
4014 
4015 /// parseDirectiveNamedRealValue
4016 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
4017 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
4018  const fltSemantics &Semantics,
4019  unsigned Size, StringRef Name,
4020  SMLoc NameLoc) {
4021  if (StructInProgress.empty()) {
4022  // Initialize named data value.
4023  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4024  getStreamer().emitLabel(Sym);
4025  unsigned Count;
4026  if (emitRealValues(Semantics, &Count))
4027  return addErrorSuffix(" in '" + TypeName + "' directive");
4028 
4029  AsmTypeInfo Type;
4030  Type.Name = TypeName;
4031  Type.Size = Size * Count;
4032  Type.ElementSize = Size;
4033  Type.Length = Count;
4034  KnownType[Name.lower()] = Type;
4035  } else if (addRealField(Name, Semantics, Size)) {
4036  return addErrorSuffix(" in '" + TypeName + "' directive");
4037  }
4038  return false;
4039 }
4040 
4041 bool MasmParser::parseOptionalAngleBracketOpen() {
4042  const AsmToken Tok = getTok();
4043  if (parseOptionalToken(AsmToken::LessLess)) {
4044  AngleBracketDepth++;
4045  Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4046  return true;
4047  } else if (parseOptionalToken(AsmToken::LessGreater)) {
4048  AngleBracketDepth++;
4049  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4050  return true;
4051  } else if (parseOptionalToken(AsmToken::Less)) {
4052  AngleBracketDepth++;
4053  return true;
4054  }
4055 
4056  return false;
4057 }
4058 
4059 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4060  const AsmToken Tok = getTok();
4061  if (parseOptionalToken(AsmToken::GreaterGreater)) {
4062  Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4063  } else if (parseToken(AsmToken::Greater, Msg)) {
4064  return true;
4065  }
4066  AngleBracketDepth--;
4067  return false;
4068 }
4069 
4070 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4071  const IntFieldInfo &Contents,
4072  FieldInitializer &Initializer) {
4073  SMLoc Loc = getTok().getLoc();
4074 
4076  if (parseOptionalToken(AsmToken::LCurly)) {
4077  if (Field.LengthOf == 1 && Field.Type > 1)
4078  return Error(Loc, "Cannot initialize scalar field with array value");
4079  if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4080  parseToken(AsmToken::RCurly))
4081  return true;
4082  } else if (parseOptionalAngleBracketOpen()) {
4083  if (Field.LengthOf == 1 && Field.Type > 1)
4084  return Error(Loc, "Cannot initialize scalar field with array value");
4085  if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4086  parseAngleBracketClose())
4087  return true;
4088  } else if (Field.LengthOf > 1 && Field.Type > 1) {
4089  return Error(Loc, "Cannot initialize array field with scalar value");
4090  } else if (parseScalarInitializer(Field.Type, Values,
4091  /*StringPadLength=*/Field.LengthOf)) {
4092  return true;
4093  }
4094 
4095  if (Values.size() > Field.LengthOf) {
4096  return Error(Loc, "Initializer too long for field; expected at most " +
4097  std::to_string(Field.LengthOf) + " elements, got " +
4098  std::to_string(Values.size()));
4099  }
4100  // Default-initialize all remaining values.
4101  Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4102 
4103  Initializer = FieldInitializer(std::move(Values));
4104  return false;
4105 }
4106 
4107 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4108  const RealFieldInfo &Contents,
4109  FieldInitializer &Initializer) {
4110  const fltSemantics *Semantics;
4111  switch (Field.Type) {
4112  case 4:
4113  Semantics = &APFloat::IEEEsingle();
4114  break;
4115  case 8:
4116  Semantics = &APFloat::IEEEdouble();
4117  break;
4118  case 10:
4119  Semantics = &APFloat::x87DoubleExtended();
4120  break;
4121  default:
4122  llvm_unreachable("unknown real field type");
4123  }
4124 
4125  SMLoc Loc = getTok().getLoc();
4126 
4127  SmallVector<APInt, 1> AsIntValues;
4128  if (parseOptionalToken(AsmToken::LCurly)) {
4129  if (Field.LengthOf == 1)
4130  return Error(Loc, "Cannot initialize scalar field with array value");
4131  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4132  parseToken(AsmToken::RCurly))
4133  return true;
4134  } else if (parseOptionalAngleBracketOpen()) {
4135  if (Field.LengthOf == 1)
4136  return Error(Loc, "Cannot initialize scalar field with array value");
4137  if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4138  parseAngleBracketClose())
4139  return true;
4140  } else if (Field.LengthOf > 1) {
4141  return Error(Loc, "Cannot initialize array field with scalar value");
4142  } else {
4143  AsIntValues.emplace_back();
4144  if (parseRealValue(*Semantics, AsIntValues.back()))
4145  return true;
4146  }
4147 
4148  if (AsIntValues.size() > Field.LengthOf) {
4149  return Error(Loc, "Initializer too long for field; expected at most " +
4150  std::to_string(Field.LengthOf) + " elements, got " +
4151  std::to_string(AsIntValues.size()));
4152  }
4153  // Default-initialize all remaining values.
4154  AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4155  Contents.AsIntValues.end());
4156 
4157  Initializer = FieldInitializer(std::move(AsIntValues));
4158  return false;
4159 }
4160 
4161 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4162  const StructFieldInfo &Contents,
4163  FieldInitializer &Initializer) {
4164  SMLoc Loc = getTok().getLoc();
4165 
4166  std::vector<StructInitializer> Initializers;
4167  if (Field.LengthOf > 1) {
4168  if (parseOptionalToken(AsmToken::LCurly)) {
4169  if (parseStructInstList(Contents.Structure, Initializers,
4170  AsmToken::RCurly) ||
4171  parseToken(AsmToken::RCurly))
4172  return true;
4173  } else if (parseOptionalAngleBracketOpen()) {
4174  if (parseStructInstList(Contents.Structure, Initializers,
4175  AsmToken::Greater) ||
4176  parseAngleBracketClose())
4177  return true;
4178  } else {
4179  return Error(Loc, "Cannot initialize array field with scalar value");
4180  }
4181  } else {
4182  Initializers.emplace_back();
4183  if (parseStructInitializer(Contents.Structure, Initializers.back()))
4184  return true;
4185  }
4186 
4187  if (Initializers.size() > Field.LengthOf) {
4188  return Error(Loc, "Initializer too long for field; expected at most " +
4189  std::to_string(Field.LengthOf) + " elements, got " +
4190  std::to_string(Initializers.size()));
4191  }
4192  // Default-initialize all remaining values.
4193  Initializers.insert(Initializers.end(),
4194  Contents.Initializers.begin() + Initializers.size(),
4195  Contents.Initializers.end());
4196 
4197  Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4198  return false;
4199 }
4200 
4201 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4202  FieldInitializer &Initializer) {
4203  switch (Field.Contents.FT) {
4204  case FT_INTEGRAL:
4205  return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4206  case FT_REAL:
4207  return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4208  case FT_STRUCT:
4209  return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4210  }
4211  llvm_unreachable("Unhandled FieldType enum");
4212 }
4213 
4214 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4215  StructInitializer &Initializer) {
4216  const AsmToken FirstToken = getTok();
4217 
4218  std::optional<AsmToken::TokenKind> EndToken;
4219  if (parseOptionalToken(AsmToken::LCurly)) {
4220  EndToken = AsmToken::RCurly;
4221  } else if (parseOptionalAngleBracketOpen()) {
4222  EndToken = AsmToken::Greater;
4223  AngleBracketDepth++;
4224  } else if (FirstToken.is(AsmToken::Identifier) &&
4225  FirstToken.getString() == "?") {
4226  // ? initializer; leave EndToken uninitialized to treat as empty.
4227  if (parseToken(AsmToken::Identifier))
4228  return true;
4229  } else {
4230  return Error(FirstToken.getLoc(), "Expected struct initializer");
4231  }
4232 
4233  auto &FieldInitializers = Initializer.FieldInitializers;
4234  size_t FieldIndex = 0;
4235  if (EndToken) {
4236  // Initialize all fields with given initializers.
4237  while (getTok().isNot(EndToken.value()) &&
4238  FieldIndex < Structure.Fields.size()) {
4239  const FieldInfo &Field = Structure.Fields[FieldIndex++];
4240  if (parseOptionalToken(AsmToken::Comma)) {
4241  // Empty initializer; use the default and continue. (Also, allow line
4242  // continuation.)
4243  FieldInitializers.push_back(Field.Contents);
4244  parseOptionalToken(AsmToken::EndOfStatement);
4245  continue;
4246  }
4247  FieldInitializers.emplace_back(Field.Contents.FT);
4248  if (parseFieldInitializer(Field, FieldInitializers.back()))
4249  return true;
4250 
4251  // Continue if we see a comma. (Also, allow line continuation.)
4252  SMLoc CommaLoc = getTok().getLoc();
4253  if (!parseOptionalToken(AsmToken::Comma))
4254  break;
4255  if (FieldIndex == Structure.Fields.size())
4256  return Error(CommaLoc, "'" + Structure.Name +
4257  "' initializer initializes too many fields");
4258  parseOptionalToken(AsmToken::EndOfStatement);
4259  }
4260  }
4261  // Default-initialize all remaining fields.
4262  for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4263  FieldInitializers.push_back(Field.Contents);
4264 
4265  if (EndToken) {
4266  if (EndToken.value() == AsmToken::Greater)
4267  return parseAngleBracketClose();
4268 
4269  return parseToken(EndToken.value());
4270  }
4271 
4272  return false;
4273 }
4274 
4275 bool MasmParser::parseStructInstList(
4276  const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4277  const AsmToken::TokenKind EndToken) {
4278  while (getTok().isNot(EndToken) ||
4279  (EndToken == AsmToken::Greater &&
4280  getTok().isNot(AsmToken::GreaterGreater))) {
4281  const AsmToken NextTok = peekTok();
4282  if (NextTok.is(AsmToken::Identifier) &&
4283  NextTok.getString().equals_insensitive("dup")) {
4284  const MCExpr *Value;
4285  if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4286  return true;
4287  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4288  if (!MCE)
4289  return Error(Value->getLoc(),
4290  "cannot repeat value a non-constant number of times");
4291  const int64_t Repetitions = MCE->getValue();
4292  if (Repetitions < 0)
4293  return Error(Value->getLoc(),
4294  "cannot repeat value a negative number of times");
4295 
4296  std::vector<StructInitializer> DuplicatedValues;
4297  if (parseToken(AsmToken::LParen,
4298  "parentheses required for 'dup' contents") ||
4299  parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4300  return true;
4301 
4302  for (int i = 0; i < Repetitions; ++i)
4303  llvm::append_range(Initializers, DuplicatedValues);
4304  } else {
4305  Initializers.emplace_back();
4306  if (parseStructInitializer(Structure, Initializers.back()))
4307  return true;
4308  }
4309 
4310  // Continue if we see a comma. (Also, allow line continuation.)
4311  if (!parseOptionalToken(AsmToken::Comma))
4312  break;
4313  parseOptionalToken(AsmToken::EndOfStatement);
4314  }
4315 
4316  return false;
4317 }
4318 
4319 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4320  const IntFieldInfo &Contents) {
4321  // Default-initialize all values.
4322  for (const MCExpr *Value : Contents.Values) {
4323  if (emitIntValue(Value, Field.Type))
4324  return true;
4325  }
4326  return false;
4327 }
4328 
4329 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4330  const RealFieldInfo &Contents) {
4331  for (const APInt &AsInt : Contents.AsIntValues) {
4332  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4333  AsInt.getBitWidth() / 8);
4334  }
4335  return false;
4336 }
4337 
4338 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4339  const StructFieldInfo &Contents) {
4340  for (const auto &Initializer : Contents.Initializers) {
4341  size_t Index = 0, Offset = 0;
4342  for (const auto &SubField : Contents.Structure.Fields) {
4343  getStreamer().emitZeros(SubField.Offset - Offset);
4344  Offset = SubField.Offset + SubField.SizeOf;
4345  emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4346  }
4347  }
4348  return false;
4349 }
4350 
4351 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4352  switch (Field.Contents.FT) {
4353  case FT_INTEGRAL:
4354  return emitFieldValue(Field, Field.Contents.IntInfo);
4355  case FT_REAL:
4356  return emitFieldValue(Field, Field.Contents.RealInfo);
4357  case FT_STRUCT:
4358  return emitFieldValue(Field, Field.Contents.StructInfo);
4359  }
4360  llvm_unreachable("Unhandled FieldType enum");
4361 }
4362 
4363 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4364  const IntFieldInfo &Contents,
4365  const IntFieldInfo &Initializer) {
4366  for (const auto &Value : Initializer.Values) {
4367  if (emitIntValue(Value, Field.Type))
4368  return true;
4369  }
4370  // Default-initialize all remaining values.
4371  for (const auto &Value :
4372  llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4373  if (emitIntValue(Value, Field.Type))
4374  return true;
4375  }
4376  return false;
4377 }
4378 
4379 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4380  const RealFieldInfo &Contents,
4381  const RealFieldInfo &Initializer) {
4382  for (const auto &AsInt : Initializer.AsIntValues) {
4383  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4384  AsInt.getBitWidth() / 8);
4385  }
4386  // Default-initialize all remaining values.
4387  for (const auto &AsInt :
4388  llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4389  getStreamer().emitIntValue(AsInt.getLimitedValue(),
4390  AsInt.getBitWidth() / 8);
4391  }
4392  return false;
4393 }
4394 
4395 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4396  const StructFieldInfo &Contents,
4397  const StructFieldInfo &Initializer) {
4398  for (const auto &Init : Initializer.Initializers) {
4399  if (emitStructInitializer(Contents.Structure, Init))
4400  return true;
4401  }
4402  // Default-initialize all remaining values.
4403  for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4404  Initializer.Initializers.size())) {
4405  if (emitStructInitializer(Contents.Structure, Init))
4406  return true;
4407  }
4408  return false;
4409 }
4410 
4411 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4412  const FieldInitializer &Initializer) {
4413  switch (Field.Contents.FT) {
4414  case FT_INTEGRAL:
4415  return emitFieldInitializer(Field, Field.Contents.IntInfo,
4416  Initializer.IntInfo);
4417  case FT_REAL:
4418  return emitFieldInitializer(Field, Field.Contents.RealInfo,
4419  Initializer.RealInfo);
4420  case FT_STRUCT:
4421  return emitFieldInitializer(Field, Field.Contents.StructInfo,
4422  Initializer.StructInfo);
4423  }
4424  llvm_unreachable("Unhandled FieldType enum");
4425 }
4426 
4427 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4428  const StructInitializer &Initializer) {
4429  if (!Structure.Initializable)
4430  return Error(getLexer().getLoc(),
4431  "cannot initialize a value of type '" + Structure.Name +
4432  "'; 'org' was used in the type's declaration");
4433  size_t Index = 0, Offset = 0;
4434  for (const auto &Init : Initializer.FieldInitializers) {
4435  const auto &Field = Structure.Fields[Index++];
4436  getStreamer().emitZeros(Field.Offset - Offset);
4437  Offset = Field.Offset + Field.SizeOf;
4438  if (emitFieldInitializer(Field, Init))
4439  return true;
4440  }
4441  // Default-initialize all remaining fields.
4442  for (const auto &Field : llvm::drop_begin(
4443  Structure.Fields, Initializer.FieldInitializers.size())) {
4444  getStreamer().emitZeros(Field.Offset - Offset);
4445  Offset = Field.Offset + Field.SizeOf;
4446  if (emitFieldValue(Field))
4447  return true;
4448  }
4449  // Add final padding.
4450  if (Offset != Structure.Size)
4451  getStreamer().emitZeros(Structure.Size - Offset);
4452  return false;
4453 }
4454 
4455 // Set data values from initializers.
4456 bool MasmParser::emitStructValues(const StructInfo &Structure,
4457  unsigned *Count) {
4458  std::vector<StructInitializer> Initializers;
4459  if (parseStructInstList(Structure, Initializers))
4460  return true;
4461 
4462  for (const auto &Initializer : Initializers) {
4463  if (emitStructInitializer(Structure, Initializer))
4464  return true;
4465  }
4466 
4467  if (Count)
4468  *Count = Initializers.size();
4469  return false;
4470 }
4471 
4472 // Declare a field in the current struct.
4473 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4474  StructInfo &OwningStruct = StructInProgress.back();
4475  FieldInfo &Field =
4476  OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4477  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4478 
4479  StructInfo.Structure = Structure;
4480  Field.Type = Structure.Size;
4481 
4482  if (parseStructInstList(Structure, StructInfo.Initializers))
4483  return true;
4484 
4485  Field.LengthOf = StructInfo.Initializers.size();
4486  Field.SizeOf = Field.Type * Field.LengthOf;
4487 
4488  const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4489  if (!OwningStruct.IsUnion) {
4490  OwningStruct.NextOffset = FieldEnd;
4491  }
4492  OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4493 
4494  return false;
4495 }
4496 
4497 /// parseDirectiveStructValue
4498 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4499 /// [, (<struct-initializer> | {struct-initializer})]*
4500 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4501  StringRef Directive, SMLoc DirLoc) {
4502  if (StructInProgress.empty()) {
4503  if (emitStructValues(Structure))
4504  return true;
4505  } else if (addStructField("", Structure)) {
4506  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4507  }
4508 
4509  return false;
4510 }
4511 
4512 /// parseDirectiveNamedValue
4513 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4514 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4516  SMLoc DirLoc, StringRef Name) {
4517  if (StructInProgress.empty()) {
4518  // Initialize named data value.
4519  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4520  getStreamer().emitLabel(Sym);
4521  unsigned Count;
4522  if (emitStructValues(Structure, &Count))
4523  return true;
4524  AsmTypeInfo Type;
4525  Type.Name = Structure.Name;
4526  Type.Size = Structure.Size * Count;
4527  Type.ElementSize = Structure.Size;
4528  Type.Length = Count;
4529  KnownType[Name.lower()] = Type;
4530  } else if (addStructField(Name, Structure)) {
4531  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4532  }
4533 
4534  return false;
4535 }
4536 
4537 /// parseDirectiveStruct
4538 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4539 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4540 /// <name> ENDS
4541 ////// dataDir = data declaration
4542 ////// offsetDir = EVEN, ORG, ALIGN
4543 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4544  DirectiveKind DirKind, StringRef Name,
4545  SMLoc NameLoc) {
4546  // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4547  // anyway, so all field accesses must be qualified.
4548  AsmToken NextTok = getTok();
4549  int64_t AlignmentValue = 1;
4550  if (NextTok.isNot(AsmToken::Comma) &&
4551  NextTok.isNot(AsmToken::EndOfStatement) &&
4552  parseAbsoluteExpression(AlignmentValue)) {
4553  return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4554  "' directive");
4555  }
4556  if (!isPowerOf2_64(AlignmentValue)) {
4557  return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4558  std::to_string(AlignmentValue));
4559  }
4560 
4562  SMLoc QualifierLoc;
4563  if (parseOptionalToken(AsmToken::Comma)) {
4564  QualifierLoc = getTok().getLoc();
4565  if (parseIdentifier(Qualifier))
4566  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4567  if (!Qualifier.equals_insensitive("nonunique"))
4568  return Error(QualifierLoc, "Unrecognized qualifier for '" +
4569  Twine(Directive) +
4570  "' directive; expected none or NONUNIQUE");
4571  }
4572 
4573  if (parseToken(AsmToken::EndOfStatement))
4574  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4575 
4576  StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4577  return false;
4578 }
4579 
4580 /// parseDirectiveNestedStruct
4581 /// ::= (STRUC | STRUCT | UNION) [name]
4582 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4583 /// ENDS
4584 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4585  DirectiveKind DirKind) {
4586  if (StructInProgress.empty())
4587  return TokError("missing name in top-level '" + Twine(Directive) +
4588  "' directive");
4589 
4590  StringRef Name;
4591  if (getTok().is(AsmToken::Identifier)) {
4592  Name = getTok().getIdentifier();
4593  parseToken(AsmToken::Identifier);
4594  }
4595  if (parseToken(AsmToken::EndOfStatement))
4596  return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4597 
4598  // Reserve space to ensure Alignment doesn't get invalidated when
4599  // StructInProgress grows.
4600  StructInProgress.reserve(StructInProgress.size() + 1);
4601  StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4602  StructInProgress.back().Alignment);
4603  return false;
4604 }
4605 
4606 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4607  if (StructInProgress.empty())
4608  return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4609  if (StructInProgress.size() > 1)
4610  return Error(NameLoc, "unexpected name in nested ENDS directive");
4611  if (StructInProgress.back().Name.compare_insensitive(Name))
4612  return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4613  StructInProgress.back().Name + "'");
4614  StructInfo Structure = StructInProgress.pop_back_val();
4615  // Pad to make the structure's size divisible by the smaller of its alignment
4616  // and the size of its largest field.
4617  Structure.Size = llvm::alignTo(
4618  Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4619  Structs[Name.lower()] = Structure;
4620 
4621  if (parseToken(AsmToken::EndOfStatement))
4622  return addErrorSuffix(" in ENDS directive");
4623 
4624  return false;
4625 }
4626 
4627 bool MasmParser::parseDirectiveNestedEnds() {
4628  if (StructInProgress.empty())
4629  return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4630  if (StructInProgress.size() == 1)
4631  return TokError("missing name in top-level ENDS directive");
4632 
4633  if (parseToken(AsmToken::EndOfStatement))
4634  return addErrorSuffix(" in nested ENDS directive");
4635 
4636  StructInfo Structure = StructInProgress.pop_back_val();
4637  // Pad to make the structure's size divisible by its alignment.
4638  Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4639 
4640  StructInfo &ParentStruct = StructInProgress.back();
4641  if (Structure.Name.empty()) {
4642  // Anonymous substructures' fields are addressed as if they belong to the
4643  // parent structure - so we transfer them to the parent here.
4644  const size_t OldFields = ParentStruct.Fields.size();
4645  ParentStruct.Fields.insert(
4646  ParentStruct.Fields.end(),
4647  std::make_move_iterator(Structure.Fields.begin()),
4648  std::make_move_iterator(Structure.Fields.end()));
4649  for (const auto &FieldByName : Structure.FieldsByName) {
4650  ParentStruct.FieldsByName[FieldByName.getKey()] =
4651  FieldByName.getValue() + OldFields;
4652  }
4653 
4654  unsigned FirstFieldOffset = 0;
4655  if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4656  FirstFieldOffset = llvm::alignTo(
4657  ParentStruct.NextOffset,
4658  std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4659  }
4660 
4661  if (ParentStruct.IsUnion) {
4662  ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4663  } else {
4664  for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4665  Field.Offset += FirstFieldOffset;
4666 
4667  const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4668  if (!ParentStruct.IsUnion) {
4669  ParentStruct.NextOffset = StructureEnd;
4670  }
4671  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4672  }
4673  } else {
4674  FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4675  Structure.AlignmentSize);
4676  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4677  Field.Type = Structure.Size;
4678  Field.LengthOf = 1;
4679  Field.SizeOf = Structure.Size;
4680 
4681  const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4682  if (!ParentStruct.IsUnion) {
4683  ParentStruct.NextOffset = StructureEnd;
4684  }
4685  ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4686 
4687  StructInfo.Structure = Structure;
4688  StructInfo.Initializers.emplace_back();
4689  auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4690  for (const auto &SubField : Structure.Fields) {
4691  FieldInitializers.push_back(SubField.Contents);
4692  }
4693  }
4694 
4695  return false;
4696 }
4697 
4698 /// parseDirectiveOrg
4699 /// ::= org expression
4700 bool MasmParser::parseDirectiveOrg() {
4701  const MCExpr *Offset;
4702  SMLoc OffsetLoc = Lexer.getLoc();
4703  if (checkForValidSection() || parseExpression(Offset))
4704  return true;
4705  if (parseToken(AsmToken::EndOfStatement))
4706  return addErrorSuffix(" in 'org' directive");
4707 
4708  if (StructInProgress.empty()) {
4709  // Not in a struct; change the offset for the next instruction or data
4710  if (checkForValidSection())
4711  return addErrorSuffix(" in 'org' directive");
4712 
4713  getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4714  } else {
4715  // Offset the next field of this struct
4716  StructInfo &Structure = StructInProgress.back();
4717  int64_t OffsetRes;
4718  if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4719  return Error(OffsetLoc,
4720  "expected absolute expression in 'org' directive");
4721  if (OffsetRes < 0)
4722  return Error(
4723  OffsetLoc,
4724  "expected non-negative value in struct's 'org' directive; was " +
4725  std::to_string(OffsetRes));
4726  Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4727 
4728  // ORG-affected structures cannot be initialized
4729  Structure.Initializable = false;
4730  }
4731 
4732  return false;
4733 }
4734 
4735 bool MasmParser::emitAlignTo(int64_t Alignment) {
4736  if (StructInProgress.empty()) {
4737  // Not in a struct; align the next instruction or data
4738  if (checkForValidSection())
4739  return true;
4740 
4741  // Check whether we should use optimal code alignment for this align
4742  // directive.
4743  const MCSection *Section = getStreamer().getCurrentSectionOnly();
4744  assert(Section && "must have section to emit alignment");
4745  if (Section->useCodeAlign()) {
4746  getStreamer().emitCodeAlignment(Align(Alignment),
4747  &getTargetParser().getSTI(),
4748  /*MaxBytesToEmit=*/0);
4749  } else {
4750  // FIXME: Target specific behavior about how the "extra" bytes are filled.
4751  getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4752  /*ValueSize=*/1,
4753  /*MaxBytesToEmit=*/0);
4754  }
4755  } else {
4756  // Align the next field of this struct
4757  StructInfo &Structure = StructInProgress.back();
4758  Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4759  }
4760 
4761  return false;
4762 }
4763 
4764 /// parseDirectiveAlign
4765 /// ::= align expression
4766 bool MasmParser::parseDirectiveAlign() {
4767  SMLoc AlignmentLoc = getLexer().getLoc();
4768  int64_t Alignment;
4769 
4770  // Ignore empty 'align' directives.
4771  if (getTok().is(AsmToken::EndOfStatement)) {
4772  return Warning(AlignmentLoc,
4773  "align directive with no operand is ignored") &&
4774  parseToken(AsmToken::EndOfStatement);
4775  }
4776  if (parseAbsoluteExpression(Alignment) ||
4777  parseToken(AsmToken::EndOfStatement))
4778  return addErrorSuffix(" in align directive");
4779 
4780  // Always emit an alignment here even if we throw an error.
4781  bool ReturnVal = false;
4782 
4783  // Reject alignments that aren't either a power of two or zero, for ML.exe
4784  // compatibility. Alignment of zero is silently rounded up to one.
4785  if (Alignment == 0)
4786  Alignment = 1;
4787  if (!isPowerOf2_64(Alignment))
4788  ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4789  std::to_string(Alignment));
4790 
4791  if (emitAlignTo(Alignment))
4792  ReturnVal |= addErrorSuffix(" in align directive");
4793 
4794  return ReturnVal;
4795 }
4796 
4797 /// parseDirectiveEven
4798 /// ::= even
4799 bool MasmParser::parseDirectiveEven() {
4800  if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
4801  return addErrorSuffix(" in even directive");
4802 
4803  return false;
4804 }
4805 
4806 /// parseDirectiveFile
4807 /// ::= .file filename
4808 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4809 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4810  // FIXME: I'm not sure what this is.
4811  int64_t FileNumber = -1;
4812  if (getLexer().is(AsmToken::Integer)) {
4813  FileNumber = getTok().getIntVal();
4814  Lex();
4815 
4816  if (FileNumber < 0)
4817  return TokError("negative file number");
4818  }
4819 
4820  std::string Path;
4821 
4822  // Usually the directory and filename together, otherwise just the directory.
4823  // Allow the strings to have escaped octal character sequence.
4824  if (check(getTok().isNot(AsmToken::String),
4825  "unexpected token in '.file' directive") ||
4826  parseEscapedString(Path))
4827  return true;
4828 
4829  StringRef Directory;
4831  std::string FilenameData;
4832  if (getLexer().is(AsmToken::String)) {
4833  if (check(FileNumber == -1,
4834  "explicit path specified, but no file number") ||
4835  parseEscapedString(FilenameData))
4836  return true;
4837  Filename = FilenameData;
4838  Directory = Path;
4839  } else {
4840  Filename = Path;
4841  }
4842 
4843  uint64_t MD5Hi, MD5Lo;
4844  bool HasMD5 = false;
4845 
4847  bool HasSource = false;
4848  std::string SourceString;
4849 
4850  while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4852  if (check(getTok().isNot(AsmToken::Identifier),
4853  "unexpected token in '.file' directive") ||
4854  parseIdentifier(Keyword))
4855  return true;
4856  if (Keyword == "md5") {
4857  HasMD5 = true;
4858  if (check(FileNumber == -1,
4859  "MD5 checksum specified, but no file number") ||
4860  parseHexOcta(*this, MD5Hi, MD5Lo))
4861  return true;
4862  } else if (Keyword == "source") {
4863  HasSource = true;
4864  if (check(FileNumber == -1,
4865  "source specified, but no file number") ||
4866  check(getTok().isNot(AsmToken::String),
4867  "unexpected token in '.file' directive") ||
4868  parseEscapedString(SourceString))
4869  return true;
4870  } else {
4871  return TokError("unexpected token in '.file' directive");
4872  }
4873  }
4874 
4875  if (FileNumber == -1) {
4876  // Ignore the directive if there is no number and the target doesn't support
4877  // numberless .file directives. This allows some portability of assembler
4878  // between different object file formats.
4879  if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4880  getStreamer().emitFileDirective(Filename);
4881  } else {
4882  // In case there is a -g option as well as debug info from directive .file,
4883  // we turn off the -g option, directly use the existing debug info instead.
4884  // Throw away any implicit file table for the assembler source.
4885  if (Ctx.getGenDwarfForAssembly()) {
4887  Ctx.setGenDwarfForAssembly(false);
4888  }
4889 
4891  if (HasMD5) {
4892  MD5::MD5Result Sum;
4893  for (unsigned i = 0; i != 8; ++i) {
4894  Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4895  Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4896  }
4897  CKMem = Sum;
4898  }
4899  if (HasSource) {
4900  char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4901  memcpy(SourceBuf, SourceString.data(), SourceString.size());
4902  Source = StringRef(SourceBuf, SourceString.size());
4903  }
4904  if (FileNumber == 0) {
4905  if (Ctx.getDwarfVersion() < 5)
4906  return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4907  getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4908  } else {
4909  Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4910  FileNumber, Directory, Filename, CKMem, Source);
4911  if (!FileNumOrErr)
4912  return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4913  }
4914  // Alert the user if there are some .file directives with MD5 and some not.
4915  // But only do that once.
4916  if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4917  ReportedInconsistentMD5 = true;
4918  return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4919  }
4920  }
4921 
4922  return false;
4923 }
4924 
4925 /// parseDirectiveLine
4926 /// ::= .line [number]
4927 bool MasmParser::parseDirectiveLine() {
4928  int64_t LineNumber;
4929  if (getLexer().is(AsmToken::Integer)) {
4930  if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4931  return true;
4932  (void)LineNumber;
4933  // FIXME: Do something with the .line.
4934  }
4935  if (parseEOL())
4936  return true;
4937 
4938  return false;
4939 }
4940 
4941 /// parseDirectiveLoc
4942 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4943 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4944 /// The first number is a file number, must have been previously assigned with
4945 /// a .file directive, the second number is the line number and optionally the
4946 /// third number is a column position (zero if not specified). The remaining
4947 /// optional items are .loc sub-directives.
4948 bool MasmParser::parseDirectiveLoc() {
4949  int64_t FileNumber = 0, LineNumber = 0;
4950  SMLoc Loc = getTok().getLoc();
4951  if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4952  check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4953  "file number less than one in '.loc' directive") ||
4954  check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4955  "unassigned file number in '.loc' directive"))
4956  return true;
4957 
4958  // optional
4959  if (getLexer().is(AsmToken::Integer)) {
4960  LineNumber = getTok().getIntVal();
4961  if (LineNumber < 0)
4962  return TokError("line number less than zero in '.loc' directive");
4963  Lex();
4964  }
4965 
4966  int64_t ColumnPos = 0;
4967  if (getLexer().is(AsmToken::Integer)) {
4968  ColumnPos = getTok().getIntVal();
4969  if (ColumnPos < 0)
4970  return TokError("column position less than zero in '.loc' directive");
4971  Lex();
4972  }
4973 
4974  auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4975  unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4976  unsigned Isa = 0;
4977  int64_t Discriminator = 0;
4978 
4979  auto parseLocOp = [&]() -> bool {
4980  StringRef Name;
4981  SMLoc Loc = getTok().getLoc();
4982  if (parseIdentifier(Name))
4983  return TokError("unexpected token in '.loc' directive");
4984 
4985  if (Name == "basic_block")
4987  else if (Name == "prologue_end")
4989  else if (Name == "epilogue_begin")
4991  else if (Name == "is_stmt") {
4992  Loc = getTok().getLoc();
4993  const MCExpr *Value;
4994  if (parseExpression(Value))
4995  return true;
4996  // The expression must be the constant 0 or 1.
4997  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4998  int Value = MCE->getValue();
4999  if (Value == 0)
5001  else if (Value == 1)
5003  else
5004  return Error(Loc, "is_stmt value not 0 or 1");
5005  } else {
5006  return Error(Loc, "is_stmt value not the constant value of 0 or 1");
5007  }
5008  } else if (Name == "isa") {
5009  Loc = getTok().getLoc();
5010  const MCExpr *Value;
5011  if (parseExpression(Value))
5012  return true;
5013  // The expression must be a constant greater or equal to 0.
5014  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5015  int Value = MCE->getValue();
5016  if (Value < 0)
5017  return Error(Loc, "isa number less than zero");
5018  Isa = Value;
5019  } else {
5020  return Error(Loc, "isa number not a constant value");
5021  }
5022  } else if (Name == "discriminator") {
5023  if (parseAbsoluteExpression(Discriminator))
5024  return true;
5025  } else {
5026  return Error(Loc, "unknown sub-directive in '.loc' directive");
5027  }
5028  return false;
5029  };
5030 
5031  if (parseMany(parseLocOp, false /*hasComma*/))
5032  return true;
5033 
5034  getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
5035  Isa, Discriminator, StringRef());
5036 
5037  return false;
5038 }
5039 
5040 /// parseDirectiveStabs
5041 /// ::= .stabs string, number, number, number
5042 bool MasmParser::parseDirectiveStabs() {
5043  return TokError("unsupported directive '.stabs'");
5044 }
5045 
5046 /// parseDirectiveCVFile
5047 /// ::= .cv_file number filename [checksum] [checksumkind]
5048 bool MasmParser::parseDirectiveCVFile() {
5049  SMLoc FileNumberLoc = getTok().getLoc();
5050  int64_t FileNumber;
5051  std::string Filename;
5052  std::string Checksum;
5053  int64_t ChecksumKind = 0;
5054 
5055  if (parseIntToken(FileNumber,
5056  "expected file number in '.cv_file' directive") ||
5057  check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5058  check(getTok().isNot(AsmToken::String),
5059  "unexpected token in '.cv_file' directive") ||
5060  parseEscapedString(Filename))
5061  return true;
5062  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5063  if (check(getTok().isNot(AsmToken::String),
5064  "unexpected token in '.cv_file' directive") ||
5065  parseEscapedString(Checksum) ||
5066  parseIntToken(ChecksumKind,
5067  "expected checksum kind in '.cv_file' directive") ||
5068  parseEOL())
5069  return true;
5070  }
5071 
5072  Checksum = fromHex(Checksum);
5073  void *CKMem = Ctx.allocate(Checksum.size(), 1);
5074  memcpy(CKMem, Checksum.data(), Checksum.size());
5075  ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5076  Checksum.size());
5077 
5078  if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5079  static_cast<uint8_t>(ChecksumKind)))
5080  return Error(FileNumberLoc, "file number already allocated");
5081 
5082  return false;
5083 }
5084 
5085 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5086  StringRef DirectiveName) {
5087  SMLoc Loc;
5088  return parseTokenLoc(Loc) ||
5089  parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5090  "' directive") ||
5091  check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5092  "expected function id within range [0, UINT_MAX)");
5093 }
5094 
5095 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5096  SMLoc Loc;
5097  return parseTokenLoc(Loc) ||
5098  parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5099  "' directive") ||
5100  check(FileNumber < 1, Loc, "file number less than one in '" +
5101  DirectiveName + "' directive") ||
5102  check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5103  "unassigned file number in '" + DirectiveName + "' directive");
5104 }
5105 
5106 /// parseDirectiveCVFuncId
5107 /// ::= .cv_func_id FunctionId
5108 ///
5109 /// Introduces a function ID that can be used with .cv_loc.
5110 bool MasmParser::parseDirectiveCVFuncId() {
5111  SMLoc FunctionIdLoc = getTok().getLoc();
5112  int64_t FunctionId;
5113 
5114  if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5115  return true;
5116 
5117  if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5118  return Error(FunctionIdLoc, "function id already allocated");
5119 
5120  return false;
5121 }
5122 
5123 /// parseDirectiveCVInlineSiteId
5124 /// ::= .cv_inline_site_id FunctionId
5125 /// "within" IAFunc
5126 /// "inlined_at" IAFile IALine [IACol]
5127 ///
5128 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5129 /// at" source location information for use in the line table of the caller,
5130 /// whether the caller is a real function or another inlined call site.
5131 bool MasmParser::parseDirectiveCVInlineSiteId() {
5132  SMLoc FunctionIdLoc = getTok().getLoc();
5133  int64_t FunctionId;
5134  int64_t IAFunc;
5135  int64_t IAFile;
5136  int64_t IALine;
5137  int64_t IACol = 0;
5138 
5139  // FunctionId
5140  if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5141  return true;
5142 
5143  // "within"
5144  if (check((getLexer().isNot(AsmToken::Identifier) ||
5145  getTok().getIdentifier() != "within"),
5146  "expected 'within' identifier in '.cv_inline_site_id' directive"))
5147  return true;
5148  Lex();
5149 
5150  // IAFunc
5151  if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5152  return true;
5153 
5154  // "inlined_at"
5155  if (check((getLexer().isNot(AsmToken::Identifier) ||
5156  getTok().getIdentifier() != "inlined_at"),
5157  "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5158  "directive") )
5159  return true;
5160  Lex();
5161 
5162  // IAFile IALine
5163  if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5164  parseIntToken(IALine, "expected line number after 'inlined_at'"))
5165  return true;
5166 
5167  // [IACol]
5168  if (getLexer().is(AsmToken::Integer)) {
5169  IACol = getTok().getIntVal();
5170  Lex();
5171  }
5172 
5173  if (parseEOL())
5174  return true;
5175 
5176  if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5177  IALine, IACol, FunctionIdLoc))
5178  return Error(FunctionIdLoc, "function id already allocated");
5179 
5180  return false;
5181 }
5182 
5183 /// parseDirectiveCVLoc
5184 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5185 /// [is_stmt VALUE]
5186 /// The first number is a file number, must have been previously assigned with
5187 /// a .file directive, the second number is the line number and optionally the
5188 /// third number is a column position (zero if not specified). The remaining
5189 /// optional items are .loc sub-directives.
5190 bool MasmParser::parseDirectiveCVLoc() {
5191  SMLoc DirectiveLoc = getTok().getLoc();
5192  int64_t FunctionId, FileNumber;
5193  if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5194  parseCVFileId(FileNumber, ".cv_loc"))
5195  return true;
5196 
5197  int64_t LineNumber = 0;
5198  if (getLexer().is(AsmToken::Integer)) {
5199  LineNumber = getTok().getIntVal();
5200  if (LineNumber < 0)
5201  return TokError("line number less than zero in '.cv_loc' directive");
5202  Lex();
5203  }
5204 
5205  int64_t ColumnPos = 0;
5206  if (getLexer().is(AsmToken::Integer)) {
5207  ColumnPos = getTok().getIntVal();
5208  if (ColumnPos < 0)
5209  return TokError("column position less than zero in '.cv_loc' directive");
5210  Lex();
5211  }
5212 
5213  bool PrologueEnd = false;
5214  uint64_t IsStmt = 0;
5215 
5216  auto parseOp = [&]() -> bool {
5217  StringRef Name;
5218  SMLoc Loc = getTok().getLoc();
5219  if (parseIdentifier(Name))
5220  return TokError("unexpected token in '.cv_loc' directive");
5221  if (Name == "prologue_end")
5222  PrologueEnd = true;
5223  else if (Name == "is_stmt") {
5224  Loc = getTok().getLoc();
5225  const MCExpr *Value;
5226  if (parseExpression(Value))
5227  return true;
5228  // The expression must be the constant 0 or 1.
5229  IsStmt = ~0ULL;
5230  if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5231  IsStmt = MCE->getValue();
5232 
5233  if (IsStmt > 1)
5234  return Error(Loc, "is_stmt value not 0 or 1");
5235  } else {
5236  return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5237  }
5238  return false;
5239  };
5240 
5241  if (parseMany(parseOp, false /*hasComma*/))
5242  return true;
5243 
5244  getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5245  ColumnPos, PrologueEnd, IsStmt, StringRef(),
5246  DirectiveLoc);
5247  return false;
5248 }
5249 
5250 /// parseDirectiveCVLinetable
5251 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5252 bool MasmParser::parseDirectiveCVLinetable() {
5253  int64_t FunctionId;
5254  StringRef FnStartName, FnEndName;
5255  SMLoc Loc = getTok().getLoc();
5256  if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5257  parseToken(AsmToken::Comma,
5258  "unexpected token in '.cv_linetable' directive") ||
5259  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5260  "expected identifier in directive") ||
5261  parseToken(AsmToken::Comma,
5262  "unexpected token in '.cv_linetable' directive") ||
5263  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5264  "expected identifier in directive"))
5265  return true;
5266 
5267  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5268  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5269 
5270  getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5271  return false;
5272 }
5273 
5274 /// parseDirectiveCVInlineLinetable
5275 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5276 bool MasmParser::parseDirectiveCVInlineLinetable() {
5277  int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5278  StringRef FnStartName, FnEndName;
5279  SMLoc Loc = getTok().getLoc();
5280  if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5281  parseTokenLoc(Loc) ||
5282  parseIntToken(
5283  SourceFileId,
5284  "expected SourceField in '.cv_inline_linetable' directive") ||
5285  check(SourceFileId <= 0, Loc,
5286  "File id less than zero in '.cv_inline_linetable' directive") ||
5287  parseTokenLoc(Loc) ||
5288  parseIntToken(
5289  SourceLineNum,
5290  "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5291  check(SourceLineNum < 0, Loc,
5292  "Line number less than zero in '.cv_inline_linetable' directive") ||
5293  parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5294  "expected identifier in directive") ||
5295  parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5296  "expected identifier in directive"))
5297  return true;
5298 
5299  if (parseEOL())
5300  return true;
5301 
5302  MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5303  MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5304  getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5305  SourceLineNum, FnStartSym,
5306  FnEndSym);
5307  return false;
5308 }
5309 
5310 void MasmParser::initializeCVDefRangeTypeMap() {
5311  CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5312  CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5313  CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5314  CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5315 }
5316 
5317 /// parseDirectiveCVDefRange
5318 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5319 bool MasmParser::parseDirectiveCVDefRange() {
5320  SMLoc Loc;
5321  std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5322  while (getLexer().is(AsmToken::Identifier)) {
5323  Loc = getLexer().getLoc();
5324  StringRef GapStartName;
5325  if (parseIdentifier(GapStartName))
5326  return Error(Loc, "expected identifier in directive");
5327  MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5328 
5329  Loc = getLexer().getLoc();
5330  StringRef GapEndName;
5331  if (parseIdentifier(GapEndName))
5332  return Error(Loc, "expected identifier in directive");
5333  MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5334 
5335  Ranges.push_back({GapStartSym, GapEndSym});
5336  }
5337 
5338  StringRef CVDefRangeTypeStr;
5339  if (parseToken(
5341  "expected comma before def_range type in .cv_def_range directive") ||
5342  parseIdentifier(CVDefRangeTypeStr))
5343  return Error(Loc, "expected def_range type in directive");
5344 
5346  CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5347  CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5348  ? CVDR_DEFRANGE
5349  : CVTypeIt->getValue();
5350  switch (CVDRType) {
5351  case CVDR_DEFRANGE_REGISTER: {
5352  int64_t DRRegister;
5353  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5354  ".cv_def_range directive") ||
5355  parseAbsoluteExpression(DRRegister))
5356  return Error(Loc, "expected register number");
5357 
5359  DRHdr.Register = DRRegister;
5360  DRHdr.MayHaveNoName = 0;
5361  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5362  break;
5363  }
5364  case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5365  int64_t DROffset;
5366  if (parseToken(AsmToken::Comma,
5367  "expected comma before offset in .cv_def_range directive") ||
5368  parseAbsoluteExpression(DROffset))
5369  return Error(Loc, "expected offset value");
5370 
5372  DRHdr.Offset = DROffset;
5373  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5374  break;
5375  }
5376  case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5377  int64_t DRRegister;
5378  int64_t DROffsetInParent;
5379  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5380  ".cv_def_range directive") ||
5381  parseAbsoluteExpression(DRRegister))
5382  return Error(Loc, "expected register number");
5383  if (parseToken(AsmToken::Comma,
5384  "expected comma before offset in .cv_def_range directive") ||
5385  parseAbsoluteExpression(DROffsetInParent))
5386  return Error(Loc, "expected offset value");
5387 
5389  DRHdr.Register = DRRegister;
5390  DRHdr.MayHaveNoName = 0;
5391  DRHdr.OffsetInParent = DROffsetInParent;
5392  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5393  break;
5394  }
5395  case CVDR_DEFRANGE_REGISTER_REL: {
5396  int64_t DRRegister;
5397  int64_t DRFlags;
5398  int64_t DRBasePointerOffset;
5399  if (parseToken(AsmToken::Comma, "expected comma before register number in "
5400  ".cv_def_range directive") ||
5401  parseAbsoluteExpression(DRRegister))
5402  return Error(Loc, "expected register value");
5403  if (parseToken(
5405  "expected comma before flag value in .cv_def_range directive") ||
5406  parseAbsoluteExpression(DRFlags))
5407  return Error(Loc, "expected flag value");
5408  if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5409  "in .cv_def_range directive") ||
5410  parseAbsoluteExpression(DRBasePointerOffset))
5411  return Error(Loc, "expected base pointer offset value");
5412 
5414  DRHdr.Register = DRRegister;
5415  DRHdr.Flags = DRFlags;
5416  DRHdr.BasePointerOffset = DRBasePointerOffset;
5417  getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5418  break;
5419  }
5420  default:
5421  return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5422  }
5423  return true;
5424 }
5425 
5426 /// parseDirectiveCVString
5427 /// ::= .cv_stringtable "string"
5428 bool MasmParser::parseDirectiveCVString() {
5429  std::string Data;
5430  if (checkForValidSection() || parseEscapedString(Data))
5431  return addErrorSuffix(" in '.cv_string' directive");
5432 
5433  // Put the string in the table and emit the offset.
5434  std::pair<StringRef, unsigned> Insertion =
5435  getCVContext().addToStringTable(Data);
5436  getStreamer().emitIntValue(Insertion.second, 4);
5437  return false;
5438 }
5439 
5440 /// parseDirectiveCVStringTable
5441 /// ::= .cv_stringtable
5442 bool MasmParser::parseDirectiveCVStringTable() {
5443  getStreamer().emitCVStringTableDirective();
5444  return false;
5445 }
5446 
5447 /// parseDirectiveCVFileChecksums
5448 /// ::= .cv_filechecksums
5449 bool MasmParser::parseDirectiveCVFileChecksums() {
5450  getStreamer().emitCVFileChecksumsDirective();
5451  return false;
5452 }
5453 
5454 /// parseDirectiveCVFileChecksumOffset
5455 /// ::= .cv_filechecksumoffset fileno
5456 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5457  int64_t FileNo;
5458  if (parseIntToken(FileNo, "expected identifier in directive"))
5459  return true;
5460  if (parseEOL())
5461  return true;
5462  getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5463  return false;
5464 }
5465 
5466 /// parseDirectiveCVFPOData
5467 /// ::= .cv_fpo_data procsym
5468 bool MasmParser::parseDirectiveCVFPOData() {
5469  SMLoc DirLoc = getLexer().getLoc();
5470  StringRef ProcName;
5471  if (parseIdentifier(ProcName))
5472  return TokError("expected symbol name");
5473  if (parseEOL("unexpected tokens"))
5474  return addErrorSuffix(" in '.cv_fpo_data' directive");
5475  MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5476  getStreamer().emitCVFPOData(ProcSym, DirLoc);
5477  return false;
5478 }
5479 
5480 /// parseDirectiveCFISections
5481 /// ::= .cfi_sections section [, section]
5482 bool MasmParser::parseDirectiveCFISections() {
5483  StringRef Name;
5484  bool EH = false;
5485  bool Debug = false;
5486 
5487  if (parseIdentifier(Name))
5488  return TokError("Expected an identifier");
5489 
5490  if (Name == ".eh_frame")
5491  EH = true;
5492  else if (Name == ".debug_frame")
5493  Debug = true;
5494 
5495  if (getLexer().is(AsmToken::Comma)) {
5496  Lex();
5497 
5498  if (parseIdentifier(Name))
5499  return TokError("Expected an identifier");
5500 
5501  if (Name == ".eh_frame")
5502  EH = true;
5503  else if (Name == ".debug_frame")
5504  Debug = true;
5505  }
5506 
5507  getStreamer().emitCFISections(EH, Debug);
5508  return false;
5509 }
5510 
5511 /// parseDirectiveCFIStartProc
5512 /// ::= .cfi_startproc [simple]
5513 bool MasmParser::parseDirectiveCFIStartProc() {
5514  StringRef Simple;
5515  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5516  if (check(parseIdentifier(Simple) || Simple != "simple",
5517  "unexpected token") ||
5518  parseToken(AsmToken::EndOfStatement))
5519  return addErrorSuffix(" in '.cfi_startproc' directive");
5520  }
5521 
5522  // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5523  // being produced if this directive is emitted as part of preprocessor macro
5524  // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5525  // Tools like llvm-mc on the other hand are not affected by it, and report
5526  // correct context information.
5527  getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5528  return false;
5529 }
5530 
5531 /// parseDirectiveCFIEndProc
5532 /// ::= .cfi_endproc
5533 bool MasmParser::parseDirectiveCFIEndProc() {
5534  getStreamer().emitCFIEndProc();
5535  return false;
5536 }
5537 
5538 /// parse register name or number.
5539 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5540  SMLoc DirectiveLoc) {
5541  unsigned RegNo;
5542 
5543  if (getLexer().isNot(AsmToken::Integer)) {
5544  if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5545  return true;
5546  Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5547  } else
5548  return parseAbsoluteExpression(Register);
5549 
5550  return false;
5551 }
5552 
5553 /// parseDirectiveCFIDefCfa
5554 /// ::= .cfi_def_cfa register, offset
5555 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5556  int64_t Register = 0, Offset = 0;
5557  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5558  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5559  parseAbsoluteExpression(Offset))
5560  return true;
5561 
5562  getStreamer().emitCFIDefCfa(Register, Offset);
5563  return false;
5564 }
5565 
5566 /// parseDirectiveCFIDefCfaOffset
5567 /// ::= .cfi_def_cfa_offset offset
5568 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5569  int64_t Offset = 0;
5570  if (parseAbsoluteExpression(Offset))
5571  return true;
5572 
5573  getStreamer().emitCFIDefCfaOffset(Offset);
5574  return false;
5575 }
5576 
5577 /// parseDirectiveCFIRegister
5578 /// ::= .cfi_register register, register
5579 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5580  int64_t Register1 = 0, Register2 = 0;
5581  if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5582  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5583  parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5584  return true;
5585 
5586  getStreamer().emitCFIRegister(Register1, Register2);
5587  return false;
5588 }
5589 
5590 /// parseDirectiveCFIWindowSave
5591 /// ::= .cfi_window_save
5592 bool MasmParser::parseDirectiveCFIWindowSave() {
5593  getStreamer().emitCFIWindowSave();
5594  return false;
5595 }
5596 
5597 /// parseDirectiveCFIAdjustCfaOffset
5598 /// ::= .cfi_adjust_cfa_offset adjustment
5599 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5600  int64_t Adjustment = 0;
5601  if (parseAbsoluteExpression(Adjustment))
5602  return true;
5603 
5604  getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5605  return false;
5606 }
5607 
5608 /// parseDirectiveCFIDefCfaRegister
5609 /// ::= .cfi_def_cfa_register register
5610 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5611  int64_t Register = 0;
5612  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5613  return true;
5614 
5615  getStreamer().emitCFIDefCfaRegister(Register);
5616  return false;
5617 }
5618 
5619 /// parseDirectiveCFIOffset
5620 /// ::= .cfi_offset register, offset
5621 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5622  int64_t Register = 0;
5623  int64_t Offset = 0;
5624 
5625  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5626  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5627  parseAbsoluteExpression(Offset))
5628  return true;
5629 
5630  getStreamer().emitCFIOffset(Register, Offset);
5631  return false;
5632 }
5633 
5634 /// parseDirectiveCFIRelOffset
5635 /// ::= .cfi_rel_offset register, offset
5636 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5637  int64_t Register = 0, Offset = 0;
5638 
5639  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5640  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5641  parseAbsoluteExpression(Offset))
5642  return true;
5643 
5644  getStreamer().emitCFIRelOffset(Register, Offset);
5645  return false;
5646 }
5647 
5648 static bool isValidEncoding(int64_t Encoding) {
5649  if (Encoding & ~0xff)
5650  return false;
5651 
5652  if (Encoding == dwarf::DW_EH_PE_omit)
5653  return true;
5654 
5655  const unsigned Format = Encoding & 0xf;
5660  return false;
5661 
5662  const unsigned Application = Encoding & 0x70;
5663  if (Application != dwarf::DW_EH_PE_absptr &&
5664  Application != dwarf::DW_EH_PE_pcrel)
5665  return false;
5666 
5667  return true;
5668 }
5669 
5670 /// parseDirectiveCFIPersonalityOrLsda
5671 /// IsPersonality true for cfi_personality, false for cfi_lsda
5672 /// ::= .cfi_personality encoding, [symbol_name]
5673 /// ::= .cfi_lsda encoding, [symbol_name]
5674 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5675  int64_t Encoding = 0;
5676  if (parseAbsoluteExpression(Encoding))
5677  return true;
5678  if (Encoding == dwarf::DW_EH_PE_omit)
5679  return false;
5680 
5681  StringRef Name;
5682  if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5683  parseToken(AsmToken::Comma, "unexpected token in directive") ||
5684  check(parseIdentifier(Name), "expected identifier in directive"))
5685  return true;
5686 
5687  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5688 
5689  if (IsPersonality)
5690  getStreamer().emitCFIPersonality(Sym, Encoding);
5691  else
5692  getStreamer().emitCFILsda(Sym, Encoding);
5693  return false;
5694 }
5695 
5696 /// parseDirectiveCFIRememberState
5697 /// ::= .cfi_remember_state
5698 bool MasmParser::parseDirectiveCFIRememberState() {
5699  getStreamer().emitCFIRememberState();
5700  return false;
5701 }
5702 
5703 /// parseDirectiveCFIRestoreState
5704 /// ::= .cfi_remember_state
5705 bool MasmParser::parseDirectiveCFIRestoreState() {
5706  getStreamer().emitCFIRestoreState();
5707  return false;
5708 }
5709 
5710 /// parseDirectiveCFISameValue
5711 /// ::= .cfi_same_value register
5712 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5713  int64_t Register = 0;
5714 
5715  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5716  return true;
5717 
5718  getStreamer().emitCFISameValue(Register);
5719  return false;
5720 }
5721 
5722 /// parseDirectiveCFIRestore
5723 /// ::= .cfi_restore register
5724 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5725  int64_t Register = 0;
5726  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5727  return true;
5728 
5729  getStreamer().emitCFIRestore(Register);
5730  return false;
5731 }
5732 
5733 /// parseDirectiveCFIEscape
5734 /// ::= .cfi_escape expression[,...]
5735 bool MasmParser::parseDirectiveCFIEscape() {
5736  std::string Values;
5737  int64_t CurrValue;
5738  if (parseAbsoluteExpression(CurrValue))
5739  return true;
5740 
5741  Values.push_back((uint8_t)CurrValue);
5742 
5743  while (getLexer().is(AsmToken::Comma)) {
5744  Lex();
5745 
5746  if (parseAbsoluteExpression(CurrValue))
5747  return true;
5748 
5749  Values.push_back((uint8_t)CurrValue);
5750  }
5751 
5752  getStreamer().emitCFIEscape(Values);
5753  return false;
5754 }
5755 
5756 /// parseDirectiveCFIReturnColumn
5757 /// ::= .cfi_return_column register
5758 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5759  int64_t Register = 0;
5760  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5761  return true;
5762  getStreamer().emitCFIReturnColumn(Register);
5763  return false;
5764 }
5765 
5766 /// parseDirectiveCFISignalFrame
5767 /// ::= .cfi_signal_frame
5768 bool MasmParser::parseDirectiveCFISignalFrame() {
5769  if (parseEOL())
5770  return true;
5771 
5772  getStreamer().emitCFISignalFrame();
5773  return false;
5774 }
5775 
5776 /// parseDirectiveCFIUndefined
5777 /// ::= .cfi_undefined register
5778 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5779  int64_t Register = 0;
5780 
5781  if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5782  return true;
5783 
5784  getStreamer().emitCFIUndefined(Register);
5785  return false;
5786 }
5787 
5788 /// parseDirectiveMacro
5789 /// ::= name macro [parameters]
5790 /// ["LOCAL" identifiers]
5791 /// parameters ::= parameter [, parameter]*
5792 /// parameter ::= name ":" qualifier
5793 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5794 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5795  MCAsmMacroParameters Parameters;
5796  while (getLexer().isNot(AsmToken::EndOfStatement)) {
5797  if (!Parameters.empty() && Parameters.back().Vararg)
5798  return Error(Lexer.getLoc(),
5799  "Vararg parameter '" + Parameters.back().Name +
5800  "' should be last in the list of parameters");
5801 
5802  MCAsmMacroParameter Parameter;
5803  if (parseIdentifier(Parameter.Name))
5804  return TokError("expected identifier in 'macro' directive");
5805 
5806  // Emit an error if two (or more) named parameters share the same name.
5807  for (const MCAsmMacroParameter& CurrParam : Parameters)
5808  if (CurrParam.Name.equals_insensitive(Parameter.Name))
5809  return TokError("macro '" + Name + "' has multiple parameters"
5810  " named '" + Parameter.Name + "'");
5811 
5812  if (Lexer.is(AsmToken::Colon)) {
5813  Lex(); // consume ':'
5814 
5815  if (parseOptionalToken(AsmToken::Equal)) {
5816  // Default value
5817  SMLoc ParamLoc;
5818 
5819  ParamLoc = Lexer.getLoc();
5820  if (parseMacroArgument(nullptr, Parameter.Value))
5821  return true;