LLVM 22.0.0git
AsmLexer.h
Go to the documentation of this file.
1//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This class declares the lexer for assembly files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_MC_MCPARSER_ASMLEXER_H
14#define LLVM_MC_MCPARSER_ASMLEXER_H
15
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/MC/MCAsmMacro.h"
21#include <cassert>
22#include <cstddef>
23#include <string>
24
25namespace llvm {
26
27class MCAsmInfo;
28
29/// A callback class which is notified of each comment in an assembly file as
30/// it is lexed.
32public:
33 virtual ~AsmCommentConsumer() = default;
34
35 /// Callback function for when a comment is lexed. Loc is the start of the
36 /// comment text (excluding the comment-start marker). CommentText is the text
37 /// of the comment, excluding the comment start and end markers, and the
38 /// newline for single-line comments.
39 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
40};
41
42class AsmLexer {
43 /// The current token, stored in the base class for faster access.
45
46 const char *CurPtr = nullptr;
47 StringRef CurBuf;
48
49 /// The location and description of the current error
50 SMLoc ErrLoc;
51 std::string Err;
52
53 const MCAsmInfo &MAI;
54
55 bool IsAtStartOfLine = true;
56 bool JustConsumedEOL = true;
57 bool IsPeeking = false;
58 bool EndStatementAtEOF = true;
59
60 const char *TokStart = nullptr;
61 bool SkipSpace = true;
62 bool AllowAtInIdentifier = false;
63 bool AllowHashInIdentifier = false;
64 bool IsAtStartOfStatement = true;
65 bool LexMasmHexFloats = false;
66 bool LexMasmIntegers = false;
67 bool LexMasmStrings = false;
68 bool LexMotorolaIntegers = false;
69 bool UseMasmDefaultRadix = false;
70 unsigned DefaultRadix = 10;
71 bool LexHLASMIntegers = false;
72 bool LexHLASMStrings = false;
73 AsmCommentConsumer *CommentConsumer = nullptr;
74
75 LLVM_ABI AsmToken LexToken();
76
77 void SetError(SMLoc errLoc, const std::string &err) {
78 ErrLoc = errLoc;
79 Err = err;
80 }
81
82public:
83 LLVM_ABI AsmLexer(const MCAsmInfo &MAI);
84 AsmLexer(const AsmLexer &) = delete;
85 AsmLexer &operator=(const AsmLexer &) = delete;
86
87 /// Consume the next token from the input stream and return it.
88 ///
89 /// The lexer will continuously return the end-of-file token once the end of
90 /// the main input file has been reached.
91 const AsmToken &Lex() {
92 assert(!CurTok.empty());
93 // Mark if we parsing out a EndOfStatement.
94 JustConsumedEOL = CurTok.front().getKind() == AsmToken::EndOfStatement;
95 CurTok.erase(CurTok.begin());
96 // LexToken may generate multiple tokens via UnLex but will always return
97 // the first one. Place returned value at head of CurTok vector.
98 if (CurTok.empty()) {
99 AsmToken T = LexToken();
100 CurTok.insert(CurTok.begin(), T);
101 }
102 return CurTok.front();
103 }
104
105 void UnLex(AsmToken const &Token) {
106 CurTok.insert(CurTok.begin(), Token);
107 }
108
109 bool justConsumedEOL() { return JustConsumedEOL; }
110
112
113 /// Get the current source location.
114 SMLoc getLoc() const { return SMLoc::getFromPointer(TokStart); }
115
116 /// Get the current (last) lexed token.
117 const AsmToken &getTok() const { return CurTok[0]; }
118
119 /// Look ahead at the next token to be lexed.
120 const AsmToken peekTok(bool ShouldSkipSpace = true) {
121 AsmToken Tok;
122
124 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
125
126 assert(ReadCount == 1);
127 (void)ReadCount;
128
129 return Tok;
130 }
131
132 /// Look ahead an arbitrary number of tokens.
134 bool ShouldSkipSpace = true);
135
136 /// Get the current error location
137 SMLoc getErrLoc() { return ErrLoc; }
138
139 /// Get the current error string
140 const std::string &getErr() { return Err; }
141
142 /// Get the kind of current token.
144
145 /// Check if the current token has kind \p K.
146 bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
147
148 /// Check if the current token has kind \p K.
149 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
150
151 /// Set whether spaces should be ignored by the lexer
152 void setSkipSpace(bool val) { SkipSpace = val; }
153
154 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
155 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
156
157 void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
158
159 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
160 this->CommentConsumer = CommentConsumer;
161 }
162
163 /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
164 /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
165 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
166
167 /// Set whether to use masm-style default-radix integer literals. If disabled,
168 /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
169 void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
170
171 unsigned getMasmDefaultRadix() const { return DefaultRadix; }
172 void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
173
174 /// Set whether to lex masm-style hex float literals, such as 3f800000r.
175 void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
176
177 /// Set whether to lex masm-style string literals, such as 'Can''t find file'
178 /// and "This ""value"" not found".
179 void setLexMasmStrings(bool V) { LexMasmStrings = V; }
180
181 /// Set whether to lex Motorola-style integer literals, such as $deadbeef or
182 /// %01010110.
183 void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
184
185 /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
186 void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
187
188 /// Set whether to "lex" HLASM-flavour character and string literals. For now,
189 /// setting this option to true, will disable lexing for character and string
190 /// literals.
191 void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
192
193 LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr,
194 bool EndStatementAtEOF = true);
195
196 const MCAsmInfo &getMAI() const { return MAI; }
197
198private:
199 bool isAtStartOfComment(const char *Ptr);
200 bool isAtStatementSeparator(const char *Ptr);
201 [[nodiscard]] int getNextChar();
202 int peekNextChar();
203 AsmToken ReturnError(const char *Loc, const std::string &Msg);
204
205 AsmToken LexIdentifier();
206 AsmToken LexSlash();
207 AsmToken LexLineComment();
208 AsmToken LexDigit();
209 AsmToken LexSingleQuote();
210 AsmToken LexQuote();
211 AsmToken LexFloatLiteral();
212 AsmToken LexHexFloatLiteral(bool NoIntDigits);
213
214 StringRef LexUntilEndOfLine();
215};
216
217} // end namespace llvm
218
219#endif // LLVM_MC_MCPARSER_ASMLEXER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:213
#define T
This file defines the SmallVector class.
A callback class which is notified of each comment in an assembly file as it is lexed.
Definition AsmLexer.h:31
virtual ~AsmCommentConsumer()=default
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
void setLexHLASMStrings(bool V)
Set whether to "lex" HLASM-flavour character and string literals.
Definition AsmLexer.h:191
void setLexMasmIntegers(bool V)
Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified literals (e....
Definition AsmLexer.h:165
SMLoc getLoc() const
Get the current source location.
Definition AsmLexer.h:114
void setLexMasmStrings(bool V)
Set whether to lex masm-style string literals, such as 'Can''t find file' and "This ""value"" not fou...
Definition AsmLexer.h:179
LLVM_ABI AsmLexer(const MCAsmInfo &MAI)
Definition AsmLexer.cpp:110
const AsmToken peekTok(bool ShouldSkipSpace=true)
Look ahead at the next token to be lexed.
Definition AsmLexer.h:120
bool getAllowAtInIdentifier()
Definition AsmLexer.h:154
void UnLex(AsmToken const &Token)
Definition AsmLexer.h:105
void setMasmDefaultRadix(unsigned Radix)
Definition AsmLexer.h:172
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition AsmLexer.h:143
void setLexMasmHexFloats(bool V)
Set whether to lex masm-style hex float literals, such as 3f800000r.
Definition AsmLexer.h:175
const MCAsmInfo & getMAI() const
Definition AsmLexer.h:196
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition AsmLexer.h:117
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition AsmLexer.h:146
void setLexMotorolaIntegers(bool V)
Set whether to lex Motorola-style integer literals, such as $deadbeef or %01010110.
Definition AsmLexer.h:183
SMLoc getErrLoc()
Get the current error location.
Definition AsmLexer.h:137
bool justConsumedEOL()
Definition AsmLexer.h:109
AsmLexer(const AsmLexer &)=delete
const std::string & getErr()
Get the current error string.
Definition AsmLexer.h:140
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition AsmLexer.h:91
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition AsmLexer.h:152
void setAllowAtInIdentifier(bool v)
Definition AsmLexer.h:155
LLVM_ABI StringRef LexUntilEndOfStatement()
Definition AsmLexer.cpp:743
AsmLexer & operator=(const AsmLexer &)=delete
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
Definition AsmLexer.cpp:120
unsigned getMasmDefaultRadix() const
Definition AsmLexer.h:171
void useMasmDefaultRadix(bool V)
Set whether to use masm-style default-radix integer literals.
Definition AsmLexer.h:169
void setLexHLASMIntegers(bool V)
Set whether to lex HLASM-flavour integers. For now this is only [0-9]*.
Definition AsmLexer.h:186
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition AsmLexer.h:149
LLVM_ABI size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)
Look ahead an arbitrary number of tokens.
Definition AsmLexer.cpp:763
void setCommentConsumer(AsmCommentConsumer *CommentConsumer)
Definition AsmLexer.h:159
void setAllowHashInIdentifier(bool V)
Definition AsmLexer.h:157
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22
bool isNot(TokenKind K) const
Definition MCAsmMacro.h:76
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
TokenKind getKind() const
Definition MCAsmMacro.h:74
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This is an optimization pass for GlobalISel generic memory operations.