clang  3.9.0
Token.h
Go to the documentation of this file.
1 //===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the Token interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_TOKEN_H
15 #define LLVM_CLANG_LEX_TOKEN_H
16 
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/StringRef.h"
20 #include <cassert>
21 
22 namespace clang {
23 
24 class IdentifierInfo;
25 
26 /// Token - This structure provides full information about a lexed token.
27 /// It is not intended to be space efficient, it is intended to return as much
28 /// information as possible about each returned token. This is expected to be
29 /// compressed into a smaller form if memory footprint is important.
30 ///
31 /// The parser can create a special "annotation token" representing a stream of
32 /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
33 /// can be represented by a single typename annotation token that carries
34 /// information about the SourceRange of the tokens and the type object.
35 class Token {
36  /// The location of the token. This is actually a SourceLocation.
37  unsigned Loc;
38 
39  // Conceptually these next two fields could be in a union. However, this
40  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
41  // routine. Keeping as separate members with casts until a more beautiful fix
42  // presents itself.
43 
44  /// UintData - This holds either the length of the token text, when
45  /// a normal token, or the end of the SourceRange when an annotation
46  /// token.
47  unsigned UintData;
48 
49  /// PtrData - This is a union of four different pointer types, which depends
50  /// on what type of token this is:
51  /// Identifiers, keywords, etc:
52  /// This is an IdentifierInfo*, which contains the uniqued identifier
53  /// spelling.
54  /// Literals: isLiteral() returns true.
55  /// This is a pointer to the start of the token in a text buffer, which
56  /// may be dirty (have trigraphs / escaped newlines).
57  /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
58  /// This is a pointer to sema-specific data for the annotation token.
59  /// Eof:
60  // This is a pointer to a Decl.
61  /// Other:
62  /// This is null.
63  void *PtrData;
64 
65  /// Kind - The actual flavor of token this is.
67 
68  /// Flags - Bits we track about this token, members of the TokenFlags enum.
69  unsigned short Flags;
70 
71 public:
72  // Various flags set per token:
73  enum TokenFlags {
74  StartOfLine = 0x01, // At start of line or only after whitespace
75  // (considering the line after macro expansion).
76  LeadingSpace = 0x02, // Whitespace exists before this token (considering
77  // whitespace after macro expansion).
78  DisableExpand = 0x04, // This identifier may never be macro expanded.
79  NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
80  LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
81  HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
82  HasUCN = 0x40, // This identifier contains a UCN.
83  IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
84  StringifiedInMacro = 0x100, // This string or character literal is formed by
85  // macro stringizing or charizing operator.
86  CommaAfterElided = 0x200, // The comma following this token was elided (MS).
87  };
88 
89  tok::TokenKind getKind() const { return Kind; }
90  void setKind(tok::TokenKind K) { Kind = K; }
91 
92  /// is/isNot - Predicates to check if this token is a specific kind, as in
93  /// "if (Tok.is(tok::l_brace)) {...}".
94  bool is(tok::TokenKind K) const { return Kind == K; }
95  bool isNot(tok::TokenKind K) const { return Kind != K; }
96  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
97  return is(K1) || is(K2);
98  }
99  template <typename... Ts>
100  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
101  return is(K1) || isOneOf(K2, Ks...);
102  }
103 
104  /// \brief Return true if this is a raw identifier (when lexing
105  /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
106  bool isAnyIdentifier() const {
107  return tok::isAnyIdentifier(getKind());
108  }
109 
110  /// \brief Return true if this is a "literal", like a numeric
111  /// constant, string, etc.
112  bool isLiteral() const {
113  return tok::isLiteral(getKind());
114  }
115 
116  /// \brief Return true if this is any of tok::annot_* kind tokens.
117  bool isAnnotation() const {
118  return tok::isAnnotation(getKind());
119  }
120 
121  /// \brief Return a source location identifier for the specified
122  /// offset in the current file.
125  }
126  unsigned getLength() const {
127  assert(!isAnnotation() && "Annotation tokens have no length field");
128  return UintData;
129  }
130 
132  void setLength(unsigned Len) {
133  assert(!isAnnotation() && "Annotation tokens have no length field");
134  UintData = Len;
135  }
136 
138  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
139  return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
140  }
142  assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
143  UintData = L.getRawEncoding();
144  }
145 
148  }
149 
151  return isAnnotation() ? getAnnotationEndLoc()
153  }
154 
155  /// \brief SourceRange of the group of tokens that this annotation token
156  /// represents.
159  }
161  setLocation(R.getBegin());
163  }
164 
165  const char *getName() const { return tok::getTokenName(Kind); }
166 
167  /// \brief Reset all flags to cleared.
168  void startToken() {
169  Kind = tok::unknown;
170  Flags = 0;
171  PtrData = nullptr;
172  UintData = 0;
173  Loc = SourceLocation().getRawEncoding();
174  }
175 
177  assert(isNot(tok::raw_identifier) &&
178  "getIdentifierInfo() on a tok::raw_identifier token!");
179  assert(!isAnnotation() &&
180  "getIdentifierInfo() on an annotation token!");
181  if (isLiteral()) return nullptr;
182  if (is(tok::eof)) return nullptr;
183  return (IdentifierInfo*) PtrData;
184  }
186  PtrData = (void*) II;
187  }
188 
189  const void *getEofData() const {
190  assert(is(tok::eof));
191  return reinterpret_cast<const void *>(PtrData);
192  }
193  void setEofData(const void *D) {
194  assert(is(tok::eof));
195  assert(!PtrData);
196  PtrData = const_cast<void *>(D);
197  }
198 
199  /// getRawIdentifier - For a raw identifier token (i.e., an identifier
200  /// lexed in raw mode), returns a reference to the text substring in the
201  /// buffer if known.
202  StringRef getRawIdentifier() const {
203  assert(is(tok::raw_identifier));
204  return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
205  }
206  void setRawIdentifierData(const char *Ptr) {
207  assert(is(tok::raw_identifier));
208  PtrData = const_cast<char*>(Ptr);
209  }
210 
211  /// getLiteralData - For a literal token (numeric constant, string, etc), this
212  /// returns a pointer to the start of it in the text buffer if known, null
213  /// otherwise.
214  const char *getLiteralData() const {
215  assert(isLiteral() && "Cannot get literal data of non-literal");
216  return reinterpret_cast<const char*>(PtrData);
217  }
218  void setLiteralData(const char *Ptr) {
219  assert(isLiteral() && "Cannot set literal data of non-literal");
220  PtrData = const_cast<char*>(Ptr);
221  }
222 
223  void *getAnnotationValue() const {
224  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
225  return PtrData;
226  }
227  void setAnnotationValue(void *val) {
228  assert(isAnnotation() && "Used AnnotVal on non-annotation token");
229  PtrData = val;
230  }
231 
232  /// \brief Set the specified flag.
233  void setFlag(TokenFlags Flag) {
234  Flags |= Flag;
235  }
236 
237  /// \brief Get the specified flag.
238  bool getFlag(TokenFlags Flag) const {
239  return (Flags & Flag) != 0;
240  }
241 
242  /// \brief Unset the specified flag.
243  void clearFlag(TokenFlags Flag) {
244  Flags &= ~Flag;
245  }
246 
247  /// \brief Return the internal represtation of the flags.
248  ///
249  /// This is only intended for low-level operations such as writing tokens to
250  /// disk.
251  unsigned getFlags() const {
252  return Flags;
253  }
254 
255  /// \brief Set a flag to either true or false.
256  void setFlagValue(TokenFlags Flag, bool Val) {
257  if (Val)
258  setFlag(Flag);
259  else
260  clearFlag(Flag);
261  }
262 
263  /// isAtStartOfLine - Return true if this token is at the start of a line.
264  ///
265  bool isAtStartOfLine() const { return getFlag(StartOfLine); }
266 
267  /// \brief Return true if this token has whitespace before it.
268  ///
269  bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
270 
271  /// \brief Return true if this identifier token should never
272  /// be expanded in the future, due to C99 6.10.3.4p2.
273  bool isExpandDisabled() const { return getFlag(DisableExpand); }
274 
275  /// \brief Return true if we have an ObjC keyword identifier.
276  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
277 
278  /// \brief Return the ObjC keyword kind.
280 
281  /// \brief Return true if this token has trigraphs or escaped newlines in it.
282  bool needsCleaning() const { return getFlag(NeedsCleaning); }
283 
284  /// \brief Return true if this token has an empty macro before it.
285  ///
287 
288  /// \brief Return true if this token is a string or character literal which
289  /// has a ud-suffix.
290  bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
291 
292  /// Returns true if this token contains a universal character name.
293  bool hasUCN() const { return getFlag(HasUCN); }
294 
295  /// Returns true if this token is formed by macro by stringizing or charizing
296  /// operator.
298 
299  /// Returns true if the comma after this token was elided.
300  bool commaAfterElided() const { return getFlag(CommaAfterElided); }
301 };
302 
303 /// \brief Information about the conditional stack (\#if directives)
304 /// currently active.
306  /// \brief Location where the conditional started.
308 
309  /// \brief True if this was contained in a skipping directive, e.g.,
310  /// in a "\#if 0" block.
312 
313  /// \brief True if we have emitted tokens already, and now we're in
314  /// an \#else block or something. Only useful in Skipping blocks.
316 
317  /// \brief True if we've seen a \#else in this block. If so,
318  /// \#elif/\#else directives are not allowed.
319  bool FoundElse;
320 };
321 
322 } // end namespace clang
323 
324 namespace llvm {
325  template <>
326  struct isPodLike<clang::Token> { static const bool value = true; };
327 } // end namespace llvm
328 
329 #endif // LLVM_CLANG_LEX_TOKEN_H
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:265
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:43
SourceLocation getEnd() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:256
const char * getName() const
Definition: Token.h:165
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:269
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:293
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:233
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:282
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:106
One of these records is kept for each identifier that is lexed.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:297
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:206
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:90
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getKind() const
Definition: Token.h:89
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:315
void * getAnnotationValue() const
Definition: Token.h:223
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Definition: Token.h:202
const void * getEofData() const
Definition: Token.h:189
void setAnnotationRange(SourceRange R)
Definition: Token.h:160
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:157
void setAnnotationValue(void *val)
Definition: Token.h:227
void setEofData(const void *D)
Definition: Token.h:193
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix. ...
Definition: Token.h:290
FormatToken * Token
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:123
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts...Ks) const
Definition: Token.h:100
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:305
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:137
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:214
Kind
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:311
Encodes a location in the source.
void setLength(unsigned Len)
Definition: Token.h:132
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.h:95
SourceLocation getEndLoc() const
Definition: Token.h:150
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:141
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:36
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition: Token.h:238
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:185
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:87
SourceLocation getLastLoc() const
Definition: Token.h:146
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
SourceLocation getBegin() const
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition: Token.h:300
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:307
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:251
void setLiteralData(const char *Ptr)
Definition: Token.h:218
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:96
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:286
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:25
unsigned getLength() const
Definition: Token.h:126
void setLocation(SourceLocation L)
Definition: Token.h:131
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:243
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:319
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:117
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6...
Definition: Token.h:273
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:73
void startToken()
Reset all flags to cleared.
Definition: Token.h:168
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176