LLVM  14.0.0git
TGLexer.h
Go to the documentation of this file.
1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class represents the Lexer for tablegen files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
14 #define LLVM_LIB_TABLEGEN_TGLEXER_H
15 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/SMLoc.h"
20 #include <cassert>
21 #include <memory>
22 #include <set>
23 #include <string>
24 #include <vector>
25 
26 namespace llvm {
27 template <typename T> class ArrayRef;
28 class SourceMgr;
29 class Twine;
30 
31 namespace tgtok {
32  enum TokKind {
33  // Markers
35 
36  // Tokens with no info.
37  minus, plus, // - +
38  l_square, r_square, // [ ]
39  l_brace, r_brace, // { }
40  l_paren, r_paren, // ( )
41  less, greater, // < >
42  colon, semi, // : ;
43  comma, dot, // , .
44  equal, question, // = ?
45  paste, // #
46  dotdotdot, // ...
47 
48  // Reserved keywords. ('ElseKW' is named to distinguish it from the
49  // existing 'Else' that means the preprocessor #else.)
53 
54  // Bang operators.
59 
60  // Boolean literals.
62 
63  // Integer value.
65 
66  // Binary constant. Note that these are sized according to the number of
67  // bits given.
69 
70  // String valued tokens.
72 
73  // Preprocessing tokens for internal usage by the lexer.
74  // They are never returned as a result of Lex().
76  };
77 }
78 
79 /// TGLexer - TableGen Lexer class.
80 class TGLexer {
81  SourceMgr &SrcMgr;
82 
83  const char *CurPtr = nullptr;
84  StringRef CurBuf;
85 
86  // Information about the current token.
87  const char *TokStart = nullptr;
89  std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
90  int64_t CurIntVal = 0; // This is valid for IntVal.
91 
92  /// CurBuffer - This is the current buffer index we're lexing from as managed
93  /// by the SourceMgr object.
94  unsigned CurBuffer = 0;
95 
96 public:
97  typedef std::set<std::string> DependenciesSetTy;
98 
99 private:
100  /// Dependencies - This is the list of all included files.
101  DependenciesSetTy Dependencies;
102 
103 public:
104  TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
105 
107  return CurCode = LexToken(CurPtr == CurBuf.begin());
108  }
109 
111  return Dependencies;
112  }
113 
114  tgtok::TokKind getCode() const { return CurCode; }
115 
116  const std::string &getCurStrVal() const {
117  assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
118  CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
119  "This token doesn't have a string value");
120  return CurStrVal;
121  }
122  int64_t getCurIntVal() const {
123  assert(CurCode == tgtok::IntVal && "This token isn't an integer");
124  return CurIntVal;
125  }
126  std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
127  assert(CurCode == tgtok::BinaryIntVal &&
128  "This token isn't a binary integer");
129  return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
130  }
131 
132  SMLoc getLoc() const;
133 
134 private:
135  /// LexToken - Read the next token and return its code.
136  tgtok::TokKind LexToken(bool FileOrLineStart = false);
137 
138  tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
139  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
140 
141  int getNextChar();
142  int peekNextChar(int Index) const;
143  void SkipBCPLComment();
144  bool SkipCComment();
145  tgtok::TokKind LexIdentifier();
146  bool LexInclude();
147  tgtok::TokKind LexString();
148  tgtok::TokKind LexVarName();
149  tgtok::TokKind LexNumber();
150  tgtok::TokKind LexBracket();
151  tgtok::TokKind LexExclaim();
152 
153  // Process EOF encountered in LexToken().
154  // If EOF is met in an include file, then the method will update
155  // CurPtr, CurBuf and preprocessing include stack, and return true.
156  // If EOF is met in the top-level file, then the method will
157  // update and check the preprocessing include stack, and return false.
158  bool processEOF();
159 
160  // *** Structures and methods for preprocessing support ***
161 
162  // A set of macro names that are defined either via command line or
163  // by using:
164  // #define NAME
165  StringSet<> DefinedMacros;
166 
167  // Each of #ifdef and #else directives has a descriptor associated
168  // with it.
169  //
170  // An ordered list of preprocessing controls defined by #ifdef/#else
171  // directives that are in effect currently is called preprocessing
172  // control stack. It is represented as a vector of PreprocessorControlDesc's.
173  //
174  // The control stack is updated according to the following rules:
175  //
176  // For each #ifdef we add an element to the control stack.
177  // For each #else we replace the top element with a descriptor
178  // with an inverted IsDefined value.
179  // For each #endif we pop the top element from the control stack.
180  //
181  // When CurPtr reaches the current buffer's end, the control stack
182  // must be empty, i.e. #ifdef and the corresponding #endif
183  // must be located in the same file.
184  struct PreprocessorControlDesc {
185  // Either tgtok::Ifdef or tgtok::Else.
187 
188  // True, if the condition for this directive is true, false - otherwise.
189  // Examples:
190  // #ifdef NAME : true, if NAME is defined, false - otherwise.
191  // ...
192  // #else : false, if NAME is defined, true - otherwise.
193  bool IsDefined;
194 
195  // Pointer into CurBuf to the beginning of the preprocessing directive
196  // word, e.g.:
197  // #ifdef NAME
198  // ^ - SrcPos
199  SMLoc SrcPos;
200  };
201 
202  // We want to disallow code like this:
203  // file1.td:
204  // #define NAME
205  // #ifdef NAME
206  // include "file2.td"
207  // EOF
208  // file2.td:
209  // #endif
210  // EOF
211  //
212  // To do this, we clear the preprocessing control stack on entry
213  // to each of the included file. PrepIncludeStack is used to store
214  // preprocessing control stacks for the current file and all its
215  // parent files. The back() element is the preprocessing control
216  // stack for the current file.
217  std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
218  PrepIncludeStack;
219 
220  // Validate that the current preprocessing control stack is empty,
221  // since we are about to exit a file, and pop the include stack.
222  //
223  // If IncludeStackMustBeEmpty is true, the include stack must be empty
224  // after the popping, otherwise, the include stack must not be empty
225  // after the popping. Basically, the include stack must be empty
226  // only if we exit the "top-level" file (i.e. finish lexing).
227  //
228  // The method returns false, if the current preprocessing control stack
229  // is not empty (e.g. there is an unterminated #ifdef/#else),
230  // true - otherwise.
231  bool prepExitInclude(bool IncludeStackMustBeEmpty);
232 
233  // Look ahead for a preprocessing directive starting from CurPtr. The caller
234  // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
235  // a preprocessing directive word followed by a whitespace, then it returns
236  // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
237  //
238  // CurPtr is not adjusted by this method.
239  tgtok::TokKind prepIsDirective() const;
240 
241  // Given a preprocessing token kind, adjusts CurPtr to the end
242  // of the preprocessing directive word. Returns true, unless
243  // an unsupported token kind is passed in.
244  //
245  // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
246  // to avoid adjusting CurPtr before we are sure that '#' is followed
247  // by a preprocessing directive. If it is not, then we fall back to
248  // tgtok::paste interpretation of '#'.
249  bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
250 
251  // The main "exit" point from the token parsing to preprocessor.
252  //
253  // The method is called for CurPtr, when prepIsDirective() returns
254  // true. The first parameter matches the result of prepIsDirective(),
255  // denoting the actual preprocessor directive to be processed.
256  //
257  // If the preprocessing directive disables the tokens processing, e.g.:
258  // #ifdef NAME // NAME is undefined
259  // then lexPreprocessor() enters the lines-skipping mode.
260  // In this mode, it does not parse any tokens, because the code under
261  // the #ifdef may not even be a correct tablegen code. The preprocessor
262  // looks for lines containing other preprocessing directives, which
263  // may be prepended with whitespaces and C-style comments. If the line
264  // does not contain a preprocessing directive, it is skipped completely.
265  // Otherwise, the preprocessing directive is processed by recursively
266  // calling lexPreprocessor(). The processing of the encountered
267  // preprocessing directives includes updating preprocessing control stack
268  // and adding new macros into DefinedMacros set.
269  //
270  // The second parameter controls whether lexPreprocessor() is called from
271  // LexToken() (true) or recursively from lexPreprocessor() (false).
272  //
273  // If ReturnNextLiveToken is true, the method returns the next
274  // LEX token following the current directive or following the end
275  // of the disabled preprocessing region corresponding to this directive.
276  // If ReturnNextLiveToken is false, the method returns the first parameter,
277  // unless there were errors encountered in the disabled preprocessing
278  // region - in this case, it returns tgtok::Error.
279  tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
280  bool ReturnNextLiveToken = true);
281 
282  // Worker method for lexPreprocessor() to skip lines after some
283  // preprocessing directive up to the buffer end or to the directive
284  // that re-enables token processing. The method returns true
285  // upon processing the next directive that re-enables tokens
286  // processing. False is returned if an error was encountered.
287  //
288  // Note that prepSkipRegion() calls lexPreprocessor() to process
289  // encountered preprocessing directives. In this case, the second
290  // parameter to lexPreprocessor() is set to false. Being passed
291  // false ReturnNextLiveToken, lexPreprocessor() must never call
292  // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
293  // to prepSkipRegion() and checking that it is never set to false.
294  bool prepSkipRegion(bool MustNeverBeFalse);
295 
296  // Lex name of the macro after either #ifdef or #define. We could have used
297  // LexIdentifier(), but it has special handling of "include" word, which
298  // could result in awkward diagnostic errors. Consider:
299  // ----
300  // #ifdef include
301  // class ...
302  // ----
303  // LexIdentifier() will engage LexInclude(), which will complain about
304  // missing file with name "class". Instead, prepLexMacroName() will treat
305  // "include" as a normal macro name.
306  //
307  // On entry, CurPtr points to the end of a preprocessing directive word.
308  // The method allows for whitespaces between the preprocessing directive
309  // and the macro name. The allowed whitespaces are ' ' and '\t'.
310  //
311  // If the first non-whitespace symbol after the preprocessing directive
312  // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
313  // the method updates TokStart to the position of the first non-whitespace
314  // symbol, sets CurPtr to the position of the macro name's last symbol,
315  // and returns a string reference to the macro name. Otherwise,
316  // TokStart is set to the first non-whitespace symbol after the preprocessing
317  // directive, and the method returns an empty string reference.
318  //
319  // In all cases, TokStart may be used to point to the word following
320  // the preprocessing directive.
321  StringRef prepLexMacroName();
322 
323  // Skip any whitespaces starting from CurPtr. The method is used
324  // only in the lines-skipping mode to find the first non-whitespace
325  // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
326  // and '\r'. The method skips C-style comments as well, because
327  // it is used to find the beginning of the preprocessing directive.
328  // If we do not handle C-style comments the following code would
329  // result in incorrect detection of a preprocessing directive:
330  // /*
331  // #ifdef NAME
332  // */
333  // As long as we skip C-style comments, the following code is correctly
334  // recognized as a preprocessing directive:
335  // /* first line comment
336  // second line comment */ #ifdef NAME
337  //
338  // The method returns true upon reaching the first non-whitespace symbol
339  // or EOF, CurPtr is set to point to this symbol. The method returns false,
340  // if an error occured during skipping of a C-style comment.
341  bool prepSkipLineBegin();
342 
343  // Skip any whitespaces or comments after a preprocessing directive.
344  // The method returns true upon reaching either end of the line
345  // or end of the file. If there is a multiline C-style comment
346  // after the preprocessing directive, the method skips
347  // the comment, so the final CurPtr may point to one of the next lines.
348  // The method returns false, if an error occured during skipping
349  // C- or C++-style comment, or a non-whitespace symbol appears
350  // after the preprocessing directive.
351  //
352  // The method maybe called both during lines-skipping and tokens
353  // processing. It actually verifies that only whitespaces or/and
354  // comments follow a preprocessing directive.
355  //
356  // After the execution of this mehod, CurPtr points either to new line
357  // symbol, buffer end or non-whitespace symbol following the preprocesing
358  // directive.
359  bool prepSkipDirectiveEnd();
360 
361  // Skip all symbols to the end of the line/file.
362  // The method adjusts CurPtr, so that it points to either new line
363  // symbol in the current line or the buffer end.
364  void prepSkipToLineEnd();
365 
366  // Return true, if the current preprocessor control stack is such that
367  // we should allow lexer to process the next token, false - otherwise.
368  //
369  // In particular, the method returns true, if all the #ifdef/#else
370  // controls on the stack have their IsDefined member set to true.
371  bool prepIsProcessingEnabled();
372 
373  // Report an error, if we reach EOF with non-empty preprocessing control
374  // stack. This means there is no matching #endif for the previous
375  // #ifdef/#else.
376  void prepReportPreprocessorStackError();
377 };
378 
379 } // end namespace llvm
380 
381 #endif
llvm::tgtok::XFilter
@ XFilter
Definition: TGLexer.h:57
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::tgtok::XForEach
@ XForEach
Definition: TGLexer.h:57
StringRef.h
llvm::tgtok::Field
@ Field
Definition: TGLexer.h:51
llvm::tgtok::Code
@ Code
Definition: TGLexer.h:50
llvm::tgtok::paste
@ paste
Definition: TGLexer.h:45
llvm::tgtok::XXOR
@ XXOR
Definition: TGLexer.h:55
llvm::tgtok::l_brace
@ l_brace
Definition: TGLexer.h:39
llvm::tgtok::XStrConcat
@ XStrConcat
Definition: TGLexer.h:56
llvm::tgtok::l_paren
@ l_paren
Definition: TGLexer.h:40
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::tgtok::Defm
@ Defm
Definition: TGLexer.h:50
llvm::tgtok::comma
@ comma
Definition: TGLexer.h:43
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
llvm::tgtok::XGe
@ XGe
Definition: TGLexer.h:58
llvm::tgtok::XSUB
@ XSUB
Definition: TGLexer.h:55
llvm::tgtok::l_square
@ l_square
Definition: TGLexer.h:38
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
llvm::tgtok::XHead
@ XHead
Definition: TGLexer.h:57
llvm::tgtok::Else
@ Else
Definition: TGLexer.h:75
llvm::tgtok::BinaryIntVal
@ BinaryIntVal
Definition: TGLexer.h:68
llvm::tgtok::TrueKW
@ TrueKW
Definition: TGLexer.h:52
llvm::TGLexer::TGLexer
TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)
Definition: TGLexer.cpp:47
llvm::tgtok::plus
@ plus
Definition: TGLexer.h:37
llvm::TGLexer::Lex
tgtok::TokKind Lex()
Definition: TGLexer.h:106
llvm::tgtok::FalseKW
@ FalseKW
Definition: TGLexer.h:51
llvm::tgtok::XMUL
@ XMUL
Definition: TGLexer.h:55
llvm::TGLexer::getDependencies
const DependenciesSetTy & getDependencies() const
Definition: TGLexer.h:110
llvm::tgtok::Dag
@ Dag
Definition: TGLexer.h:50
llvm::tgtok::XFind
@ XFind
Definition: TGLexer.h:56
llvm::tgtok::Then
@ Then
Definition: TGLexer.h:52
llvm::tgtok::question
@ question
Definition: TGLexer.h:44
llvm::tgtok::XOR
@ XOR
Definition: TGLexer.h:55
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
llvm::TGLexer
TGLexer - TableGen Lexer class.
Definition: TGLexer.h:80
llvm::tgtok::Assert
@ Assert
Definition: TGLexer.h:50
llvm::TGLexer::getCurBinaryIntVal
std::pair< int64_t, unsigned > getCurBinaryIntVal() const
Definition: TGLexer.h:126
llvm::tgtok::XIsA
@ XIsA
Definition: TGLexer.h:58
llvm::tgtok::XCast
@ XCast
Definition: TGLexer.h:56
llvm::tgtok::XListConcat
@ XListConcat
Definition: TGLexer.h:56
SMLoc.h
llvm::tgtok::If
@ If
Definition: TGLexer.h:51
llvm::tgtok::equal
@ equal
Definition: TGLexer.h:44
llvm::tgtok::XNOT
@ XNOT
Definition: TGLexer.h:55
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::tgtok::XGt
@ XGt
Definition: TGLexer.h:58
llvm::tgtok::XSRA
@ XSRA
Definition: TGLexer.h:55
llvm::tgtok::Foreach
@ Foreach
Definition: TGLexer.h:51
llvm::tgtok::XInterleave
@ XInterleave
Definition: TGLexer.h:56
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::tgtok::Defset
@ Defset
Definition: TGLexer.h:50
llvm::tgtok::XIf
@ XIf
Definition: TGLexer.h:57
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::tgtok::XEq
@ XEq
Definition: TGLexer.h:58
llvm::TGLexer::getCode
tgtok::TokKind getCode() const
Definition: TGLexer.h:114
llvm::tgtok::Int
@ Int
Definition: TGLexer.h:51
llvm::tgtok::ElseKW
@ ElseKW
Definition: TGLexer.h:50
llvm::tgtok::r_square
@ r_square
Definition: TGLexer.h:38
llvm::tgtok::Defvar
@ Defvar
Definition: TGLexer.h:50
llvm::tgtok::XSubstr
@ XSubstr
Definition: TGLexer.h:56
llvm::tgtok::Include
@ Include
Definition: TGLexer.h:51
llvm::tgtok::r_paren
@ r_paren
Definition: TGLexer.h:40
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::tgtok::XConcat
@ XConcat
Definition: TGLexer.h:55
llvm::tgtok::XLe
@ XLe
Definition: TGLexer.h:58
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::tgtok::XCond
@ XCond
Definition: TGLexer.h:58
llvm::ArrayRef< std::string >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::tgtok::Endif
@ Endif
Definition: TGLexer.h:75
llvm::tgtok::String
@ String
Definition: TGLexer.h:52
llvm::tgtok::XADD
@ XADD
Definition: TGLexer.h:55
llvm::SourceMgr
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
Definition: SourceMgr.h:31
llvm::tgtok::XSRL
@ XSRL
Definition: TGLexer.h:55
llvm::tgtok::XGetDagOp
@ XGetDagOp
Definition: TGLexer.h:58
llvm::tgtok::Ifdef
@ Ifdef
Definition: TGLexer.h:75
llvm::tgtok::Ifndef
@ Ifndef
Definition: TGLexer.h:75
llvm::tgtok::StrVal
@ StrVal
Definition: TGLexer.h:71
StringSet.h
llvm::tgtok::dot
@ dot
Definition: TGLexer.h:43
llvm::tgtok::IntVal
@ IntVal
Definition: TGLexer.h:64
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::tgtok::r_brace
@ r_brace
Definition: TGLexer.h:39
llvm::tgtok::Eof
@ Eof
Definition: TGLexer.h:34
llvm::TGLexer::getCurIntVal
int64_t getCurIntVal() const
Definition: TGLexer.h:122
llvm::tgtok::XAND
@ XAND
Definition: TGLexer.h:55
llvm::tgtok::Error
@ Error
Definition: TGLexer.h:34
llvm::tgtok::XListSplat
@ XListSplat
Definition: TGLexer.h:56
llvm::tgtok::XSHL
@ XSHL
Definition: TGLexer.h:55
llvm::tgtok::XFoldl
@ XFoldl
Definition: TGLexer.h:57
llvm::tgtok::XSubst
@ XSubst
Definition: TGLexer.h:57
llvm::tgtok::Class
@ Class
Definition: TGLexer.h:50
llvm::TGLexer::getCurStrVal
const std::string & getCurStrVal() const
Definition: TGLexer.h:116
llvm::tgtok::XDag
@ XDag
Definition: TGLexer.h:58
llvm::tgtok::greater
@ greater
Definition: TGLexer.h:41
llvm::tgtok::CodeFragment
@ CodeFragment
Definition: TGLexer.h:71
llvm::tgtok::XSize
@ XSize
Definition: TGLexer.h:57
llvm::tgtok::XNe
@ XNe
Definition: TGLexer.h:58
llvm::tgtok::less
@ less
Definition: TGLexer.h:41
llvm::tgtok::MultiClass
@ MultiClass
Definition: TGLexer.h:51
llvm::tgtok::TokKind
TokKind
Definition: TGLexer.h:32
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::tgtok::XSetDagOp
@ XSetDagOp
Definition: TGLexer.h:58
DataTypes.h
llvm::TGLexer::DependenciesSetTy
std::set< std::string > DependenciesSetTy
Definition: TGLexer.h:97
llvm::TGLexer::getLoc
SMLoc getLoc() const
Definition: TGLexer.cpp:64
llvm::tgtok::List
@ List
Definition: TGLexer.h:51
llvm::tgtok::XTail
@ XTail
Definition: TGLexer.h:57
llvm::tgtok::semi
@ semi
Definition: TGLexer.h:42
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:128
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
llvm::tgtok::minus
@ minus
Definition: TGLexer.h:37
llvm::tgtok::XLt
@ XLt
Definition: TGLexer.h:58
llvm::tgtok::Define
@ Define
Definition: TGLexer.h:75
llvm::tgtok::colon
@ colon
Definition: TGLexer.h:42
llvm::tgtok::dotdotdot
@ dotdotdot
Definition: TGLexer.h:46
llvm::tgtok::Id
@ Id
Definition: TGLexer.h:71
llvm::tgtok::XEmpty
@ XEmpty
Definition: TGLexer.h:57
llvm::tgtok::Let
@ Let
Definition: TGLexer.h:51