LLVM  17.0.0git
WasmAsmParser.cpp
Go to the documentation of this file.
1 //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // --
8 //
9 // Note, this is for wasm, the binary format (analogous to ELF), not wasm,
10 // the instruction set (analogous to x86), for which parsing code lives in
11 // WebAssemblyAsmParser.
12 //
13 // This file contains processing for generic directives implemented using
14 // MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
15 // WebAssemblyAsmParser.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/BinaryFormat/Wasm.h"
20 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCSectionWasm.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSymbolWasm.h"
28 #include "llvm/Support/Casting.h"
29 #include <optional>
30 
31 using namespace llvm;
32 
33 namespace {
34 
35 class WasmAsmParser : public MCAsmParserExtension {
36  MCAsmParser *Parser = nullptr;
37  MCAsmLexer *Lexer = nullptr;
38 
39  template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
40  void addDirectiveHandler(StringRef Directive) {
41  MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
42  this, HandleDirective<WasmAsmParser, HandlerMethod>);
43 
44  getParser().addDirectiveHandler(Directive, Handler);
45  }
46 
47 public:
48  WasmAsmParser() { BracketExpressionsSupported = true; }
49 
50  void Initialize(MCAsmParser &P) override {
51  Parser = &P;
52  Lexer = &Parser->getLexer();
53  // Call the base implementation.
54  this->MCAsmParserExtension::Initialize(*Parser);
55 
56  addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
57  addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data");
58  addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
59  addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
60  addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
61  addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
62  addDirectiveHandler<
63  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
64  addDirectiveHandler<
65  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
66  addDirectiveHandler<
67  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
68  addDirectiveHandler<
69  &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
70  }
71 
72  bool error(const StringRef &Msg, const AsmToken &Tok) {
73  return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
74  }
75 
76  bool isNext(AsmToken::TokenKind Kind) {
77  auto Ok = Lexer->is(Kind);
78  if (Ok)
79  Lex();
80  return Ok;
81  }
82 
83  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
84  if (!isNext(Kind))
85  return error(std::string("Expected ") + KindName + ", instead got: ",
86  Lexer->getTok());
87  return false;
88  }
89 
90  bool parseSectionDirectiveText(StringRef, SMLoc) {
91  // FIXME: .text currently no-op.
92  return false;
93  }
94 
95  bool parseSectionDirectiveData(StringRef, SMLoc) {
96  auto *S = getContext().getObjectFileInfo()->getDataSection();
97  getStreamer().switchSection(S);
98  return false;
99  }
100 
101  uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
102  uint32_t flags = 0;
103  for (char C : FlagStr) {
104  switch (C) {
105  case 'p':
106  Passive = true;
107  break;
108  case 'G':
109  Group = true;
110  break;
111  case 'T':
112  flags |= wasm::WASM_SEG_FLAG_TLS;
113  break;
114  case 'S':
116  break;
117  default:
118  return -1U;
119  }
120  }
121  return flags;
122  }
123 
124  bool parseGroup(StringRef &GroupName) {
125  if (Lexer->isNot(AsmToken::Comma))
126  return TokError("expected group name");
127  Lex();
128  if (Lexer->is(AsmToken::Integer)) {
129  GroupName = getTok().getString();
130  Lex();
131  } else if (Parser->parseIdentifier(GroupName)) {
132  return TokError("invalid group name");
133  }
134  if (Lexer->is(AsmToken::Comma)) {
135  Lex();
137  if (Parser->parseIdentifier(Linkage))
138  return TokError("invalid linkage");
139  if (Linkage != "comdat")
140  return TokError("Linkage must be 'comdat'");
141  }
142  return false;
143  }
144 
145  bool parseSectionDirective(StringRef, SMLoc loc) {
146  StringRef Name;
147  if (Parser->parseIdentifier(Name))
148  return TokError("expected identifier in directive");
149 
150  if (expect(AsmToken::Comma, ","))
151  return true;
152 
153  if (Lexer->isNot(AsmToken::String))
154  return error("expected string in directive, instead got: ", Lexer->getTok());
155 
157  .StartsWith(".data", SectionKind::getData())
158  .StartsWith(".tdata", SectionKind::getThreadData())
159  .StartsWith(".tbss", SectionKind::getThreadBSS())
160  .StartsWith(".rodata", SectionKind::getReadOnly())
161  .StartsWith(".text", SectionKind::getText())
162  .StartsWith(".custom_section", SectionKind::getMetadata())
163  .StartsWith(".bss", SectionKind::getBSS())
164  // See use of .init_array in WasmObjectWriter and
165  // TargetLoweringObjectFileWasm
166  .StartsWith(".init_array", SectionKind::getData())
167  .StartsWith(".debug_", SectionKind::getMetadata())
169 
170  // Update section flags if present in this .section directive
171  bool Passive = false;
172  bool Group = false;
173  uint32_t Flags =
174  parseSectionFlags(getTok().getStringContents(), Passive, Group);
175  if (Flags == -1U)
176  return TokError("unknown flag");
177 
178  Lex();
179 
180  if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@"))
181  return true;
182 
183  StringRef GroupName;
184  if (Group && parseGroup(GroupName))
185  return true;
186 
187  if (expect(AsmToken::EndOfStatement, "eol"))
188  return true;
189 
190  // TODO: Parse UniqueID
191  MCSectionWasm *WS = getContext().getWasmSection(
192  Name, *Kind, Flags, GroupName, MCContext::GenericSectionID);
193 
194  if (WS->getSegmentFlags() != Flags)
195  Parser->Error(loc, "changed section flags for " + Name +
196  ", expected: 0x" +
197  utohexstr(WS->getSegmentFlags()));
198 
199  if (Passive) {
200  if (!WS->isWasmData())
201  return Parser->Error(loc, "Only data sections can be passive");
202  WS->setPassive();
203  }
204 
205  getStreamer().switchSection(WS);
206  return false;
207  }
208 
209  // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
210  // so maybe could be shared somehow.
211  bool parseDirectiveSize(StringRef, SMLoc Loc) {
212  StringRef Name;
213  if (Parser->parseIdentifier(Name))
214  return TokError("expected identifier in directive");
215  auto Sym = getContext().getOrCreateSymbol(Name);
216  if (expect(AsmToken::Comma, ","))
217  return true;
218  const MCExpr *Expr;
219  if (Parser->parseExpression(Expr))
220  return true;
221  if (expect(AsmToken::EndOfStatement, "eol"))
222  return true;
223  auto WasmSym = cast<MCSymbolWasm>(Sym);
224  if (WasmSym->isFunction()) {
225  // Ignore .size directives for function symbols. They get their size
226  // set automatically based on their content.
227  Warning(Loc, ".size directive ignored for function symbols");
228  } else {
229  getStreamer().emitELFSize(Sym, Expr);
230  }
231  return false;
232  }
233 
234  bool parseDirectiveType(StringRef, SMLoc) {
235  // This could be the start of a function, check if followed by
236  // "label,@function"
237  if (!Lexer->is(AsmToken::Identifier))
238  return error("Expected label after .type directive, got: ",
239  Lexer->getTok());
240  auto WasmSym = cast<MCSymbolWasm>(
241  getStreamer().getContext().getOrCreateSymbol(
242  Lexer->getTok().getString()));
243  Lex();
244  if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
245  Lexer->is(AsmToken::Identifier)))
246  return error("Expected label,@type declaration, got: ", Lexer->getTok());
247  auto TypeName = Lexer->getTok().getString();
248  if (TypeName == "function") {
249  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
250  auto *Current =
251  cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
252  if (Current->getGroup())
253  WasmSym->setComdat(true);
254  } else if (TypeName == "global")
255  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
256  else if (TypeName == "object")
257  WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
258  else
259  return error("Unknown WASM symbol type: ", Lexer->getTok());
260  Lex();
261  return expect(AsmToken::EndOfStatement, "EOL");
262  }
263 
264  // FIXME: Shared with ELF.
265  /// ParseDirectiveIdent
266  /// ::= .ident string
267  bool ParseDirectiveIdent(StringRef, SMLoc) {
268  if (getLexer().isNot(AsmToken::String))
269  return TokError("unexpected token in '.ident' directive");
270  StringRef Data = getTok().getIdentifier();
271  Lex();
272  if (getLexer().isNot(AsmToken::EndOfStatement))
273  return TokError("unexpected token in '.ident' directive");
274  Lex();
275  getStreamer().emitIdent(Data);
276  return false;
277  }
278 
279  // FIXME: Shared with ELF.
280  /// ParseDirectiveSymbolAttribute
281  /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
282  bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
284  .Case(".weak", MCSA_Weak)
285  .Case(".local", MCSA_Local)
286  .Case(".hidden", MCSA_Hidden)
287  .Case(".internal", MCSA_Internal)
288  .Case(".protected", MCSA_Protected)
290  assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
291  if (getLexer().isNot(AsmToken::EndOfStatement)) {
292  while (true) {
293  StringRef Name;
294  if (getParser().parseIdentifier(Name))
295  return TokError("expected identifier in directive");
296  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
297  getStreamer().emitSymbolAttribute(Sym, Attr);
298  if (getLexer().is(AsmToken::EndOfStatement))
299  break;
300  if (getLexer().isNot(AsmToken::Comma))
301  return TokError("unexpected token in directive");
302  Lex();
303  }
304  }
305  Lex();
306  return false;
307  }
308 };
309 
310 } // end anonymous namespace
311 
312 namespace llvm {
313 
315  return new WasmAsmParser;
316 }
317 
318 } // end namespace llvm
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::MCAsmParser
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
is
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
Definition: README.txt:725
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:188
llvm::MCAsmLexer
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:37
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::AsmToken::EndOfStatement
@ EndOfStatement
Definition: MCAsmMacro.h:42
llvm::MCSA_Invalid
@ MCSA_Invalid
Not a valid directive.
Definition: MCDirectives.h:19
llvm::MCAsmParser::parseIdentifier
virtual bool parseIdentifier(StringRef &Res)=0
Parse an identifier or string (as a quoted identifier) and set Res to the identifier contents.
llvm::wasm::WASM_SYMBOL_TYPE_GLOBAL
@ WASM_SYMBOL_TYPE_GLOBAL
Definition: Wasm.h:385
llvm::MCAsmLexer::getTok
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:106
MCObjectFileInfo.h
llvm::AsmToken::Integer
@ Integer
Definition: MCAsmMacro.h:32
error
#define error(X)
Definition: SymbolRecordMapping.cpp:14
llvm::MCSectionWasm::getSegmentFlags
unsigned getSegmentFlags() const
Definition: MCSectionWasm.h:59
llvm::MCAsmLexer::is
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:141
MCAsmParser.h
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::AsmToken
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
llvm::MCSymbolAttr
MCSymbolAttr
Definition: MCDirectives.h:18
llvm::MCSA_Local
@ MCSA_Local
.local (ELF)
Definition: MCDirectives.h:38
parseSectionFlags
static unsigned parseSectionFlags(const Triple &TT, StringRef flagsStr, bool *UseLastGroup)
Definition: ELFAsmParser.cpp:285
llvm::SectionKind::getReadOnly
static SectionKind getReadOnly()
Definition: SectionKind.h:192
llvm::MCAsmParser::parseExpression
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc)=0
Parse an arbitrary expression.
llvm::SectionKind::getThreadBSS
static SectionKind getThreadBSS()
Definition: SectionKind.h:206
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::MCSectionWasm
This represents a section on wasm.
Definition: MCSectionWasm.h:26
llvm::MCContext::GenericSectionID
@ GenericSectionID
Pass this value as the UniqueID during section creation to get the generic section with the given nam...
Definition: MCContext.h:546
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
MCSymbolWasm.h
llvm::MCSA_Protected
@ MCSA_Protected
.protected (ELF)
Definition: MCDirectives.h:43
MCContext.h
llvm::createWasmAsmParser
MCAsmParserExtension * createWasmAsmParser()
Definition: WasmAsmParser.cpp:314
llvm::SectionKind::getBSS
static SectionKind getBSS()
Definition: SectionKind.h:209
llvm::MCAsmParser::ExtensionDirectiveHandler
std::pair< MCAsmParserExtension *, DirectiveHandler > ExtensionDirectiveHandler
Definition: MCAsmParser.h:127
llvm::MCSectionWasm::isWasmData
bool isWasmData() const
Definition: MCSectionWasm.h:67
llvm::wasm::WASM_SYMBOL_TYPE_FUNCTION
@ WASM_SYMBOL_TYPE_FUNCTION
Definition: Wasm.h:383
llvm::MCSA_Hidden
@ MCSA_Hidden
.hidden (ELF)
Definition: MCDirectives.h:33
llvm::SectionKind::getText
static SectionKind getText()
Definition: SectionKind.h:190
isNot
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
Definition: AMDGPULegalizerInfo.cpp:3198
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::wasm::WASM_SEG_FLAG_TLS
@ WASM_SEG_FLAG_TLS
Definition: Wasm.h:393
MCAsmLexer.h
llvm::AsmToken::At
@ At
Definition: MCAsmMacro.h:54
llvm::MCAsmParserExtension::Initialize
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Definition: MCAsmParserExtension.cpp:21
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SectionKind::getThreadData
static SectionKind getThreadData()
Definition: SectionKind.h:207
llvm::AsmToken::Comma
@ Comma
Definition: MCAsmMacro.h:49
llvm::MCAsmParser::Error
bool Error(SMLoc L, const Twine &Msg, SMRange Range=std::nullopt)
Return an error at the location L, with the message Msg.
Definition: MCAsmParser.cpp:101
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MCAsmParserExtension
Generic interface for extending the MCAsmParser, which is implemented by target and object file assem...
Definition: MCAsmParserExtension.h:24
uint32_t
llvm::objcopy::SymbolFlag::Warning
@ Warning
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MCSectionWasm::setPassive
void setPassive(bool V=true)
Definition: MCSectionWasm.h:85
llvm::AsmToken::getString
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
llvm::MCSA_Internal
@ MCSA_Internal
.internal (ELF)
Definition: MCDirectives.h:36
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
llvm::SectionKind::getData
static SectionKind getData()
Definition: SectionKind.h:213
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::AsmToken::Identifier
@ Identifier
Definition: MCAsmMacro.h:28
MCAsmParserExtension.h
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::TypeName
constexpr char TypeName[]
Key for Kernel::Arg::Metadata::mTypeName.
Definition: AMDGPUMetadata.h:175
llvm::MCAsmLexer::isNot
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:144
Casting.h
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
Wasm.h
llvm::MCSA_Weak
@ MCSA_Weak
.weak
Definition: MCDirectives.h:45
llvm::AsmToken::TokenKind
TokenKind
Definition: MCAsmMacro.h:23
llvm::MCAsmParser::getLexer
virtual MCAsmLexer & getLexer()=0
MCStreamer.h
llvm::StringSwitch::Default
R Default(T Value)
Definition: StringSwitch.h:182
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::omp::RTLDependInfoFields::Flags
@ Flags
llvm::AsmToken::String
@ String
Definition: MCAsmMacro.h:29
llvm::AsmToken::getLoc
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
llvm::wasm::WASM_SEG_FLAG_STRINGS
@ WASM_SEG_FLAG_STRINGS
Definition: Wasm.h:392
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
llvm::wasm::WASM_SYMBOL_TYPE_DATA
@ WASM_SYMBOL_TYPE_DATA
Definition: Wasm.h:384
MCSectionWasm.h