14 #ifndef LLVM_CLANG_LEX_LEXER_H
15 #define LLVM_CLANG_LEX_LEXER_H
19 #include "llvm/ADT/SmallVector.h"
24 class DiagnosticsEngine;
27 class DiagnosticBuilder;
47 void anchor()
override;
51 const char *BufferStart;
52 const char *BufferEnd;
68 unsigned char ExtendedTokenMode;
77 const char *BufferPtr;
83 bool IsAtPhysicalStartOfLine;
87 bool HasLeadingEmptyMacro;
93 void operator=(
const Lexer &) =
delete;
96 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
109 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
148 void IndirectLex(
Token &
Result)
override { Lex(Result); }
159 return BufferPtr == BufferEnd;
167 return ExtendedTokenMode > 1;
174 "Can only retain whitespace in raw mode or -traditional-cpp");
175 ExtendedTokenMode = Val ? 2 : 0;
181 return ExtendedTokenMode > 0;
189 "Can't play with comment retention state when retaining whitespace");
190 ExtendedTokenMode = Mode ? 1 : 0;
203 return StringRef(BufferStart, BufferEnd - BufferStart);
231 static std::string
Stringify(StringRef Str,
bool Charify =
false);
251 bool *Invalid =
nullptr);
261 bool *Invalid =
nullptr);
275 bool *invalid =
nullptr);
290 bool IgnoreWhiteSpace =
false);
400 bool *Invalid =
nullptr);
450 unsigned MaxLines = 0);
461 bool SkipTrailingWhitespaceAndNewLine);
472 if (isObviouslySimpleCharacter(Ptr[0])) {
478 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
488 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
490 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
495 bool LexUnicode(
Token &
Result, uint32_t
C,
const char *CurPtr);
502 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
504 unsigned TokLen = TokEnd-BufferPtr;
514 unsigned isNextPPTokenLParen();
538 static bool isObviouslySimpleCharacter(
char C) {
539 return C !=
'?' && C !=
'\\';
546 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
549 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
552 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
561 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
569 getCharAndSizeSlow(Ptr, Size, &Tok);
577 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
580 if (isObviouslySimpleCharacter(Ptr[0])) {
586 return getCharAndSizeSlow(Ptr, Size);
591 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
592 Token *Tok =
nullptr);
597 static unsigned getEscapedNewLineSize(
const char *
P);
602 static const char *SkipEscapedNewLines(
const char *
P);
606 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
607 const LangOptions &LangOpts);
612 void SkipBytes(
unsigned Bytes,
bool StartOfLine);
614 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
616 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
617 bool IsStringLiteral);
620 bool LexIdentifier (
Token &Result,
const char *CurPtr);
621 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
622 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
624 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
626 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
627 bool LexCharConstant (
Token &Result,
const char *CurPtr,
629 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
630 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
631 bool &TokAtPhysicalStartOfLine);
632 bool SkipLineComment (
Token &Result,
const char *CurPtr,
633 bool &TokAtPhysicalStartOfLine);
634 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
635 bool &TokAtPhysicalStartOfLine);
636 bool SaveLineComment (
Token &Result,
const char *CurPtr);
638 bool IsStartOfConflictMarker(
const char *CurPtr);
639 bool HandleEndOfConflictMarker(
const char *CurPtr);
641 bool isCodeCompletionPoint(
const char *CurPtr)
const;
642 void cutOffLexing() { BufferPtr = BufferEnd; }
644 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
659 uint32_t tryReadUCN(
const char *&CurPtr,
const char *SlashLoc,
Token *Tok);
672 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
680 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
SourceLocation getEnd() const
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
static std::pair< unsigned, bool > ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
std::unique_ptr< llvm::MemoryBuffer > Buffer
StringRef getBuffer() const
Gets source code buffer.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
A little helper class used to produce diagnostics.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
The result type of a method or function.
static CharSourceRange getCharRange(SourceRange R)
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
Encodes a location in the source.
void setLength(unsigned Len)
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
SourceLocation getBegin() const
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
SourceRange getAsRange() const
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
void setLocation(SourceLocation L)
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
This class handles loading and caching of source files into memory.
const char * getBufferLocation() const
Return the current location in the buffer.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by escaping '\' and " characters. This does not add surrounding ""'s to the string.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.