LLVM  15.0.0git
WasmAsmParser.cpp
Go to the documentation of this file.
1 //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // --
8 //
9 // Note, this is for wasm, the binary format (analogous to ELF), not wasm,
10 // the instruction set (analogous to x86), for which parsing code lives in
11 // WebAssemblyAsmParser.
12 //
13 // This file contains processing for generic directives implemented using
14 // MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
15 // WebAssemblyAsmParser.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/BinaryFormat/Wasm.h"
20 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCSectionWasm.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSymbolWasm.h"
28 #include "llvm/Support/Casting.h"
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 class WasmAsmParser : public MCAsmParserExtension {
35  MCAsmParser *Parser = nullptr;
36  MCAsmLexer *Lexer = nullptr;
37 
38  template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
39  void addDirectiveHandler(StringRef Directive) {
40  MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
41  this, HandleDirective<WasmAsmParser, HandlerMethod>);
42 
43  getParser().addDirectiveHandler(Directive, Handler);
44  }
45 
46 public:
47  WasmAsmParser() { BracketExpressionsSupported = true; }
48 
49  void Initialize(MCAsmParser &P) override {
50  Parser = &P;
51  Lexer = &Parser->getLexer();
52  // Call the base implementation.
53  this->MCAsmParserExtension::Initialize(*Parser);
54 
55  addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
56  addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data");
57  addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
58  addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
59  addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
60  addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
61  addDirectiveHandler<
62  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
63  addDirectiveHandler<
64  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
65  addDirectiveHandler<
66  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
67  addDirectiveHandler<
68  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
69  }
70 
71  bool error(const StringRef &Msg, const AsmToken &Tok) {
72  return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
73  }
74 
75  bool isNext(AsmToken::TokenKind Kind) {
76  auto Ok = Lexer->is(Kind);
77  if (Ok)
78  Lex();
79  return Ok;
80  }
81 
82  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
83  if (!isNext(Kind))
84  return error(std::string("Expected ") + KindName + ", instead got: ",
85  Lexer->getTok());
86  return false;
87  }
88 
89  bool parseSectionDirectiveText(StringRef, SMLoc) {
90  // FIXME: .text currently no-op.
91  return false;
92  }
93 
94  bool parseSectionDirectiveData(StringRef, SMLoc) {
95  auto *S = getContext().getObjectFileInfo()->getDataSection();
96  getStreamer().SwitchSection(S);
97  return false;
98  }
99 
100  uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
101  uint32_t flags = 0;
102  for (char C : FlagStr) {
103  switch (C) {
104  case 'p':
105  Passive = true;
106  break;
107  case 'G':
108  Group = true;
109  break;
110  case 'T':
111  flags |= wasm::WASM_SEG_FLAG_TLS;
112  break;
113  case 'S':
115  break;
116  default:
117  return -1U;
118  }
119  }
120  return flags;
121  }
122 
123  bool parseGroup(StringRef &GroupName) {
124  if (Lexer->isNot(AsmToken::Comma))
125  return TokError("expected group name");
126  Lex();
127  if (Lexer->is(AsmToken::Integer)) {
128  GroupName = getTok().getString();
129  Lex();
130  } else if (Parser->parseIdentifier(GroupName)) {
131  return TokError("invalid group name");
132  }
133  if (Lexer->is(AsmToken::Comma)) {
134  Lex();
136  if (Parser->parseIdentifier(Linkage))
137  return TokError("invalid linkage");
138  if (Linkage != "comdat")
139  return TokError("Linkage must be 'comdat'");
140  }
141  return false;
142  }
143 
144  bool parseSectionDirective(StringRef, SMLoc loc) {
145  StringRef Name;
146  if (Parser->parseIdentifier(Name))
147  return TokError("expected identifier in directive");
148 
149  if (expect(AsmToken::Comma, ","))
150  return true;
151 
152  if (Lexer->isNot(AsmToken::String))
153  return error("expected string in directive, instead got: ", Lexer->getTok());
154 
156  .StartsWith(".data", SectionKind::getData())
157  .StartsWith(".tdata", SectionKind::getThreadData())
158  .StartsWith(".tbss", SectionKind::getThreadBSS())
159  .StartsWith(".rodata", SectionKind::getReadOnly())
160  .StartsWith(".text", SectionKind::getText())
161  .StartsWith(".custom_section", SectionKind::getMetadata())
162  .StartsWith(".bss", SectionKind::getBSS())
163  // See use of .init_array in WasmObjectWriter and
164  // TargetLoweringObjectFileWasm
165  .StartsWith(".init_array", SectionKind::getData())
166  .StartsWith(".debug_", SectionKind::getMetadata())
168 
169  // Update section flags if present in this .section directive
170  bool Passive = false;
171  bool Group = false;
172  uint32_t Flags =
173  parseSectionFlags(getTok().getStringContents(), Passive, Group);
174  if (Flags == -1U)
175  return TokError("unknown flag");
176 
177  Lex();
178 
179  if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@"))
180  return true;
181 
182  StringRef GroupName;
183  if (Group && parseGroup(GroupName))
184  return true;
185 
186  if (expect(AsmToken::EndOfStatement, "eol"))
187  return true;
188 
189  // TODO: Parse UniqueID
190  MCSectionWasm *WS = getContext().getWasmSection(
191  Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID);
192 
193  if (WS->getSegmentFlags() != Flags)
194  Parser->Error(loc, "changed section flags for " + Name +
195  ", expected: 0x" +
196  utohexstr(WS->getSegmentFlags()));
197 
198  if (Passive) {
199  if (!WS->isWasmData())
200  return Parser->Error(loc, "Only data sections can be passive");
201  WS->setPassive();
202  }
203 
204  getStreamer().SwitchSection(WS);
205  return false;
206  }
207 
208  // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
209  // so maybe could be shared somehow.
210  bool parseDirectiveSize(StringRef, SMLoc) {
211  StringRef Name;
212  if (Parser->parseIdentifier(Name))
213  return TokError("expected identifier in directive");
214  auto Sym = getContext().getOrCreateSymbol(Name);
215  if (expect(AsmToken::Comma, ","))
216  return true;
217  const MCExpr *Expr;
218  if (Parser->parseExpression(Expr))
219  return true;
220  if (expect(AsmToken::EndOfStatement, "eol"))
221  return true;
222  // This is done automatically by the assembler for functions currently,
223  // so this is only currently needed for data sections:
224  getStreamer().emitELFSize(Sym, Expr);
225  return false;
226  }
227 
228  bool parseDirectiveType(StringRef, SMLoc) {
229  // This could be the start of a function, check if followed by
230  // "label,@function"
231  if (!Lexer->is(AsmToken::Identifier))
232  return error("Expected label after .type directive, got: ",
233  Lexer->getTok());
234  auto WasmSym = cast<MCSymbolWasm>(
235  getStreamer().getContext().getOrCreateSymbol(
236  Lexer->getTok().getString()));
237  Lex();
238  if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
239  Lexer->is(AsmToken::Identifier)))
240  return error("Expected label,@type declaration, got: ", Lexer->getTok());
241  auto TypeName = Lexer->getTok().getString();
242  if (TypeName == "function") {
243  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
244  auto *Current =
245  cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
246  if (Current->getGroup())
247  WasmSym->setComdat(true);
248  } else if (TypeName == "global")
249  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
250  else if (TypeName == "object")
251  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
252  else
253  return error("Unknown WASM symbol type: ", Lexer->getTok());
254  Lex();
255  return expect(AsmToken::EndOfStatement, "EOL");
256  }
257 
258  // FIXME: Shared with ELF.
259  /// ParseDirectiveIdent
260  /// ::= .ident string
261  bool ParseDirectiveIdent(StringRef, SMLoc) {
262  if (getLexer().isNot(AsmToken::String))
263  return TokError("unexpected token in '.ident' directive");
264  StringRef Data = getTok().getIdentifier();
265  Lex();
266  if (getLexer().isNot(AsmToken::EndOfStatement))
267  return TokError("unexpected token in '.ident' directive");
268  Lex();
269  getStreamer().emitIdent(Data);
270  return false;
271  }
272 
273  // FIXME: Shared with ELF.
274  /// ParseDirectiveSymbolAttribute
275  /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
276  bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
278  .Case(".weak", MCSA_Weak)
279  .Case(".local", MCSA_Local)
280  .Case(".hidden", MCSA_Hidden)
281  .Case(".internal", MCSA_Internal)
282  .Case(".protected", MCSA_Protected)
284  assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
285  if (getLexer().isNot(AsmToken::EndOfStatement)) {
286  while (true) {
287  StringRef Name;
288  if (getParser().parseIdentifier(Name))
289  return TokError("expected identifier in directive");
290  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
291  getStreamer().emitSymbolAttribute(Sym, Attr);
292  if (getLexer().is(AsmToken::EndOfStatement))
293  break;
294  if (getLexer().isNot(AsmToken::Comma))
295  return TokError("unexpected token in directive");
296  Lex();
297  }
298  }
299  Lex();
300  return false;
301  }
302 };
303 
304 } // end anonymous namespace
305 
306 namespace llvm {
307 
309  return new WasmAsmParser;
310 }
311 
312 } // end namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::MCAsmParser
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:124
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
llvm::MCAsmParser::Error
bool Error(SMLoc L, const Twine &Msg, SMRange Range=None)
Return an error at the location L, with the message Msg.
Definition: MCAsmParser.cpp:99
llvm::MCContext::GenericSectionID
@ GenericSectionID
Pass this value as the UniqueID during section creation to get the generic section with the given nam...
Definition: MCContext.h:539
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:188
llvm::MCAsmLexer
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::MCSA_Invalid
@ MCSA_Invalid
Not a valid directive.
Definition: MCDirectives.h:19
llvm::MCAsmParser::parseIdentifier
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
llvm::wasm::WASM_SYMBOL_TYPE_GLOBAL
@ WASM_SYMBOL_TYPE_GLOBAL
Definition: Wasm.h:384
llvm::MCAsmLexer::getTok
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:106
MCObjectFileInfo.h
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
error
#define error(X)
Definition: SymbolRecordMapping.cpp:14
llvm::MCSectionWasm::getSegmentFlags
unsigned getSegmentFlags() const
Definition: MCSectionWasm.h:59
llvm::MCAsmLexer::is
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:141
MCAsmParser.h
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::MCSymbolAttr
MCSymbolAttr
Definition: MCDirectives.h:18
llvm::MCSA_Local
@ MCSA_Local
.local (ELF)
Definition: MCDirectives.h:38
parseSectionFlags
static unsigned parseSectionFlags(const Triple &TT, StringRef flagsStr, bool *UseLastGroup)
Definition: ELFAsmParser.cpp:285
llvm::SectionKind::getReadOnly
static SectionKind getReadOnly()
Definition: SectionKind.h:192
llvm::MCAsmParser::parseExpression
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
llvm::SectionKind::getThreadBSS
static SectionKind getThreadBSS()
Definition: SectionKind.h:206
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::MCSectionWasm
This represents a section on wasm.
Definition: MCSectionWasm.h:26
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
MCSymbolWasm.h
llvm::MCSA_Protected
@ MCSA_Protected
.protected (ELF)
Definition: MCDirectives.h:43
MCContext.h
llvm::createWasmAsmParser
MCAsmParserExtension * createWasmAsmParser()
Definition: WasmAsmParser.cpp:308
llvm::SectionKind::getBSS
static SectionKind getBSS()
Definition: SectionKind.h:209
llvm::MCAsmParser::ExtensionDirectiveHandler
std::pair< MCAsmParserExtension *, DirectiveHandler > ExtensionDirectiveHandler
Definition: MCAsmParser.h:128
llvm::MCSectionWasm::isWasmData
bool isWasmData() const
Definition: MCSectionWasm.h:67
llvm::wasm::WASM_SYMBOL_TYPE_FUNCTION
@ WASM_SYMBOL_TYPE_FUNCTION
Definition: Wasm.h:382
llvm::MCSA_Hidden
@ MCSA_Hidden
.hidden (ELF)
Definition: MCDirectives.h:33
llvm::SectionKind::getText
static SectionKind getText()
Definition: SectionKind.h:190
isNot
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Definition: AMDGPULegalizerInfo.cpp:2886
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::wasm::WASM_SEG_FLAG_TLS
@ WASM_SEG_FLAG_TLS
Definition: Wasm.h:392
MCAsmLexer.h
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
llvm::MCAsmParserExtension::Initialize
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Definition: MCAsmParserExtension.cpp:21
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SectionKind::getThreadData
static SectionKind getThreadData()
Definition: SectionKind.h:207
llvm::AsmToken::Comma
@ Comma
Definition: MCAsmMacro.h:49
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MCAsmParserExtension
Generic interface for extending the MCAsmParser, which is implemented by target and object file assem...
Definition: MCAsmParserExtension.h:24
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MCSectionWasm::setPassive
void setPassive(bool V=true)
Definition: MCSectionWasm.h:85
llvm::AsmToken::getString
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
llvm::MCSA_Internal
@ MCSA_Internal
.internal (ELF)
Definition: MCDirectives.h:36
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::SectionKind::getData
static SectionKind getData()
Definition: SectionKind.h:213
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::AsmToken::Identifier
@ Identifier
Definition: MCAsmMacro.h:28
MCAsmParserExtension.h
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:175
llvm::MCAsmLexer::isNot
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:144
Casting.h
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
Wasm.h
llvm::MCSA_Weak
@ MCSA_Weak
.weak
Definition: MCDirectives.h:45
llvm::AsmToken::TokenKind
TokenKind
Definition: MCAsmMacro.h:23
llvm::MCAsmParser::getLexer
virtual MCAsmLexer & getLexer()=0
MCStreamer.h
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::AsmToken::String
@ String
Definition: MCAsmMacro.h:29
llvm::AsmToken::getLoc
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
llvm::wasm::WASM_SEG_FLAG_STRINGS
@ WASM_SEG_FLAG_STRINGS
Definition: Wasm.h:391
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
llvm::wasm::WASM_SYMBOL_TYPE_DATA
@ WASM_SYMBOL_TYPE_DATA
Definition: Wasm.h:383
MCSectionWasm.h