22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/ConvertUTF.h"
27 #include "llvm/Support/MemoryBuffer.h"
29 using namespace clang;
38 return II->getObjCKeywordID() == objcKey;
53 void Lexer::anchor() { }
55 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
57 BufferStart = BufStart;
61 assert(BufEnd[0] == 0 &&
62 "We assume that the input buffer has a null character at the end"
63 " to simplify lexing!");
68 if (BufferStart == BufferPtr) {
70 StringRef Buf(BufferStart, BufferEnd - BufferStart);
71 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
72 .StartsWith(
"\xEF\xBB\xBF", 3)
76 BufferPtr += BOMLength;
79 Is_PragmaLexer =
false;
80 CurrentConflictMarkerState =
CMK_None;
83 IsAtStartOfLine =
true;
84 IsAtPhysicalStartOfLine =
true;
86 HasLeadingSpace =
false;
87 HasLeadingEmptyMacro =
false;
102 ExtendedTokenMode = 0;
111 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
112 LangOpts(PP.getLangOpts()) {
114 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
115 InputFile->getBufferEnd());
121 assert(
PP &&
"Cannot reset token mode without a preprocessor");
122 if (LangOpts.TraditionalCPP)
132 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
133 : FileLoc(fileloc), LangOpts(langOpts) {
135 InitLexer(BufStart, BufPtr, BufEnd);
144 Lexer::Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
146 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
147 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
172 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
173 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
180 L->BufferPtr = StrData;
181 L->BufferEnd = StrData+TokLen;
182 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
188 ExpansionLocEnd, TokLen);
195 L->Is_PragmaLexer =
true;
204 char Quote = Charify ?
'\'' :
'"';
205 for (
unsigned i = 0, e = Result.size(); i != e; ++i) {
206 if (Result[i] ==
'\\' || Result[i] == Quote) {
207 Result.insert(Result.begin()+i,
'\\');
217 for (
unsigned i = 0, e = Str.size(); i != e; ++i) {
218 if (Str[i] ==
'\\' || Str[i] ==
'"') {
219 Str.insert(Str.begin()+i,
'\\');
233 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
236 const char *BufEnd = BufPtr + Tok.
getLength();
240 while (BufPtr < BufEnd) {
245 if (Spelling[Length - 1] ==
'"')
253 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
256 const char *RawEnd = BufEnd;
257 do --RawEnd;
while (*RawEnd !=
'"');
258 size_t RawLength = RawEnd - BufPtr + 1;
261 memcpy(Spelling + Length, BufPtr, RawLength);
269 while (BufPtr < BufEnd) {
276 "NeedsCleaning flag set on token that didn't need cleaning!");
294 bool invalidTemp =
false;
295 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
297 if (invalid) *invalid =
true;
301 const char *tokenBegin = file.data() + locInfo.second;
305 file.begin(), tokenBegin, file.end());
307 lexer.LexFromRawLexer(token);
309 unsigned length = token.getLength();
312 if (!token.needsCleaning())
313 return StringRef(tokenBegin, length);
316 buffer.resize(length);
317 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
318 return StringRef(buffer.data(), buffer.size());
328 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
330 bool CharDataInvalid =
false;
334 *Invalid = CharDataInvalid;
336 return std::string();
340 return std::string(TokStart, TokStart + Tok.
getLength());
344 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
361 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
363 const char *TokStart =
nullptr;
365 if (Tok.
is(tok::raw_identifier))
370 Buffer = II->getNameStart();
371 return II->getLength();
381 bool CharDataInvalid =
false;
384 *Invalid = CharDataInvalid;
385 if (CharDataInvalid) {
398 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
420 bool IgnoreWhiteSpace) {
431 bool Invalid =
false;
436 const char *StrData = Buffer.data()+LocInfo.second;
443 Buffer.begin(), StrData, Buffer.end());
445 TheLexer.LexFromRawLexer(Result);
454 if (LocInfo.first.isInvalid())
457 bool Invalid =
false;
464 const char *BufStart = Buffer.data();
465 if (LocInfo.second >= Buffer.size())
468 const char *StrData = BufStart+LocInfo.second;
469 if (StrData[0] ==
'\n' || StrData[0] ==
'\r')
472 const char *LexStart = StrData;
473 while (LexStart != BufStart) {
474 if (LexStart[0] ==
'\n' || LexStart[0] ==
'\r') {
484 Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
490 TheLexer.LexFromRawLexer(TheTok);
492 if (TheLexer.getBufferLocation() > StrData) {
496 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
521 std::pair<FileID, unsigned> BeginFileLocInfo
523 assert(FileLocInfo.first == BeginFileLocInfo.first &&
524 FileLocInfo.second >= BeginFileLocInfo.second);
543 const unsigned StartOffset = 1;
545 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
552 bool InPreprocessorDirective =
false;
555 unsigned IfCount = 0;
558 unsigned MaxLineOffset = 0;
560 const char *CurPtr = Buffer.begin();
561 unsigned CurLine = 0;
562 while (CurPtr != Buffer.end()) {
566 if (CurLine == MaxLines)
570 if (CurPtr != Buffer.end())
571 MaxLineOffset = CurPtr - Buffer.begin();
575 TheLexer.LexFromRawLexer(TheTok);
577 if (InPreprocessorDirective) {
590 InPreprocessorDirective =
false;
599 if (MaxLineOffset && TokOffset >= MaxLineOffset)
604 if (TheTok.
getKind() == tok::comment) {
612 Token HashTok = TheTok;
613 InPreprocessorDirective =
true;
619 TheLexer.LexFromRawLexer(TheTok);
623 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
624 .Case(
"include", PDK_Skipped)
625 .Case(
"__include_macros", PDK_Skipped)
626 .Case(
"define", PDK_Skipped)
627 .Case(
"undef", PDK_Skipped)
628 .Case(
"line", PDK_Skipped)
629 .Case(
"error", PDK_Skipped)
630 .Case(
"pragma", PDK_Skipped)
631 .Case(
"import", PDK_Skipped)
632 .Case(
"include_next", PDK_Skipped)
633 .Case(
"warning", PDK_Skipped)
634 .Case(
"ident", PDK_Skipped)
635 .Case(
"sccs", PDK_Skipped)
636 .Case(
"assert", PDK_Skipped)
637 .Case(
"unassert", PDK_Skipped)
638 .Case(
"if", PDK_StartIf)
639 .Case(
"ifdef", PDK_StartIf)
640 .Case(
"ifndef", PDK_StartIf)
641 .Case(
"elif", PDK_Skipped)
642 .Case(
"else", PDK_Skipped)
643 .Case(
"endif", PDK_EndIf)
644 .Default(PDK_Unknown);
652 IfStartTok = HashTok;
674 InPreprocessorDirective =
false;
687 else if (ActiveCommentLoc.
isValid())
688 End = ActiveCommentLoc;
707 bool Invalid =
false;
711 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
714 unsigned PhysOffset = 0;
719 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
729 for (; CharNo; --CharNo) {
740 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
741 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
793 if (expansionLoc.isFileID()) {
796 *MacroBegin = expansionLoc;
824 *MacroEnd = expansionLoc;
898 bool Invalid =
false;
928 if (Invalid) *Invalid =
true;
934 if (beginInfo.first.isInvalid()) {
935 if (Invalid) *Invalid =
true;
941 beginInfo.second > EndOffs) {
942 if (Invalid) *Invalid =
true;
947 bool invalidTemp =
false;
948 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
950 if (Invalid) *Invalid =
true;
954 if (Invalid) *Invalid =
false;
955 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
961 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1001 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1002 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1007 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1026 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1027 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1047 unsigned CharNo,
unsigned TokLen) {
1048 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1062 std::pair<SourceLocation,SourceLocation> II =
1063 SM.getImmediateExpansionRange(FileLoc);
1065 return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
1071 unsigned TokLen)
const {
1072 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1073 "Location out of range for this buffer!");
1077 unsigned CharNo = Loc-BufferStart;
1083 assert(
PP &&
"This doesn't work on raw lexers");
1102 case '=':
return '#';
1103 case ')':
return ']';
1104 case '(':
return '[';
1105 case '!':
return '|';
1106 case '\'':
return '^';
1107 case '>':
return '}';
1108 case '/':
return '\\';
1109 case '<':
return '{';
1110 case '-':
return '~';
1120 if (!Res || !L)
return Res;
1124 L->
Diag(CP-2, diag::trigraph_ignored);
1129 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1136 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1141 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1145 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1146 Ptr[Size-1] != Ptr[Size])
1159 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1161 const char *AfterEscape;
1164 }
else if (*P ==
'?') {
1166 if (P[1] !=
'?' || P[2] !=
'/')
1173 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1174 if (NewLineSize == 0)
return P;
1175 P = AfterEscape+NewLineSize;
1187 bool SkipTrailingWhitespaceAndNewLine) {
1198 bool InvalidTemp =
false;
1199 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1203 const char *TokenBegin = File.data() + LocInfo.second;
1207 TokenBegin, File.end());
1210 lexer.LexFromRawLexer(Tok);
1211 if (Tok.isNot(TKind))
1216 unsigned NumWhitespaceChars = 0;
1217 if (SkipTrailingWhitespaceAndNewLine) {
1220 unsigned char C = *TokenEnd;
1223 NumWhitespaceChars++;
1227 if (C ==
'\n' || C ==
'\r') {
1230 NumWhitespaceChars++;
1231 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1232 NumWhitespaceChars++;
1255 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1258 if (Ptr[0] ==
'\\') {
1267 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1273 Diag(Ptr, diag::backslash_newline_space);
1276 Size += EscapedNewLineSize;
1277 Ptr += EscapedNewLineSize;
1282 if (*Ptr ==
'\n' || *Ptr ==
'\r' || *Ptr ==
'\0')
1286 return getCharAndSizeSlow(Ptr, Size, Tok);
1294 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1303 if (C ==
'\\')
goto Slash;
1320 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1323 if (Ptr[0] ==
'\\') {
1331 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1333 Size += EscapedNewLineSize;
1334 Ptr += EscapedNewLineSize;
1339 if (*Ptr ==
'\n' || *Ptr ==
'\r' || *Ptr ==
'\0')
1343 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1351 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1357 if (C ==
'\\')
goto Slash;
1372 void Lexer::SkipBytes(
unsigned Bytes,
bool StartOfLine) {
1374 if (BufferPtr > BufferEnd)
1375 BufferPtr = BufferEnd;
1379 IsAtStartOfLine = StartOfLine;
1380 IsAtPhysicalStartOfLine = StartOfLine;
1384 if (LangOpts.AsmPreprocessor) {
1386 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1387 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1389 return C11AllowedIDChars.contains(C);
1390 }
else if (LangOpts.CPlusPlus) {
1391 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1393 return CXX03AllowedIDChars.contains(C);
1395 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1397 return C99AllowedIDChars.contains(C);
1403 if (LangOpts.AsmPreprocessor) {
1405 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1406 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1408 return !C11DisallowedInitialIDChars.contains(C);
1409 }
else if (LangOpts.CPlusPlus) {
1412 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1414 return !C99DisallowedInitialIDChars.contains(C);
1429 CannotAppearInIdentifier = 0,
1430 CannotStartIdentifier
1433 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1435 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1437 if (!C99AllowedIDChars.contains(C)) {
1440 << CannotAppearInIdentifier;
1441 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1444 << CannotStartIdentifier;
1450 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1452 if (!CXX03AllowedIDChars.contains(C)) {
1453 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1459 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1461 const char *UCNPtr = CurPtr + Size;
1462 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1472 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1473 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1476 while (CurPtr != UCNPtr)
1477 (void)getAndAdvanceChar(CurPtr, Result);
1481 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1482 const char *UnicodePtr = CurPtr;
1484 ConversionResult Result =
1485 llvm::convertUTF8Sequence((
const UTF8 **)&UnicodePtr,
1486 (
const UTF8 *)BufferEnd,
1489 if (Result != conversionOK ||
1498 CurPtr = UnicodePtr;
1502 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1505 unsigned char C = *CurPtr++;
1516 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1517 (C !=
'$' || !LangOpts.DollarIdents)) {
1519 const char *IdStart = BufferPtr;
1520 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1542 C = getCharAndSize(CurPtr, Size);
1546 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1550 Diag(CurPtr, diag::ext_dollar_in_identifier);
1551 CurPtr = ConsumeChar(CurPtr, Size, Result);
1552 C = getCharAndSize(CurPtr, Size);
1555 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1556 C = getCharAndSize(CurPtr, Size);
1558 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1559 C = getCharAndSize(CurPtr, Size);
1562 goto FinishIdentifier;
1566 CurPtr = ConsumeChar(CurPtr, Size, Result);
1568 C = getCharAndSize(CurPtr, Size);
1570 CurPtr = ConsumeChar(CurPtr, Size, Result);
1571 C = getCharAndSize(CurPtr, Size);
1578 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1584 return (C2 ==
'x' || C2 ==
'X');
1590 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1592 char C = getCharAndSize(CurPtr, Size);
1595 CurPtr = ConsumeChar(CurPtr, Size, Result);
1597 C = getCharAndSize(CurPtr, Size);
1601 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1604 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1605 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1609 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1613 bool IsHexFloat =
true;
1614 if (!LangOpts.C99) {
1615 if (!isHexaLiteral(BufferPtr, LangOpts))
1618 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1622 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1631 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1632 CurPtr = ConsumeChar(CurPtr, Size, Result);
1633 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1634 return LexNumericConstant(Result, CurPtr);
1639 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1640 return LexNumericConstant(Result, CurPtr);
1641 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1642 return LexNumericConstant(Result, CurPtr);
1645 const char *TokStart = BufferPtr;
1646 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1653 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1654 bool IsStringLiteral) {
1659 char C = getCharAndSize(CurPtr, Size);
1660 bool Consumed =
false;
1663 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1665 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1674 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1675 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1686 bool IsUDSuffix =
false;
1693 const unsigned MaxStandardSuffixLength = 3;
1694 char Buffer[MaxStandardSuffixLength] = { C };
1695 unsigned Consumed = Size;
1703 IsUDSuffix = (Chars == 1 && Buffer[0] ==
's') ||
1709 if (Chars == MaxStandardSuffixLength)
1713 Buffer[Chars++] =
Next;
1714 Consumed += NextSize;
1721 ? diag::ext_ms_reserved_user_defined_literal
1722 : diag::ext_reserved_user_defined_literal)
1727 CurPtr = ConsumeChar(CurPtr, Size, Result);
1732 C = getCharAndSize(CurPtr, Size);
1734 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1735 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1744 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1747 const char *NulCharacter =
nullptr;
1750 (Kind == tok::utf8_string_literal ||
1751 Kind == tok::utf16_string_literal ||
1752 Kind == tok::utf32_string_literal))
1754 ? diag::warn_cxx98_compat_unicode_literal
1755 : diag::warn_c99_compat_unicode_literal);
1757 char C = getAndAdvanceChar(CurPtr, Result);
1762 C = getAndAdvanceChar(CurPtr, Result);
1764 if (C ==
'\n' || C ==
'\r' ||
1765 (C == 0 && CurPtr-1 == BufferEnd)) {
1767 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1768 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1773 if (isCodeCompletionPoint(CurPtr-1)) {
1775 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1780 NulCharacter = CurPtr-1;
1782 C = getAndAdvanceChar(CurPtr, Result);
1787 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1791 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1794 const char *TokStart = BufferPtr;
1795 FormTokenWithChars(Result, CurPtr, Kind);
1802 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1810 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1812 unsigned PrefixLen = 0;
1818 if (CurPtr[PrefixLen] !=
'(') {
1820 const char *PrefixEnd = &CurPtr[PrefixLen];
1821 if (PrefixLen == 16) {
1822 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1824 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1825 << StringRef(PrefixEnd, 1);
1837 if (C == 0 && CurPtr-1 == BufferEnd) {
1843 FormTokenWithChars(Result, CurPtr, tok::unknown);
1848 const char *Prefix = CurPtr;
1849 CurPtr += PrefixLen + 1;
1856 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
1857 CurPtr += PrefixLen + 1;
1860 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
1862 Diag(BufferPtr, diag::err_unterminated_raw_string)
1863 << StringRef(Prefix, PrefixLen);
1864 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1871 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1874 const char *TokStart = BufferPtr;
1875 FormTokenWithChars(Result, CurPtr, Kind);
1882 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
1884 const char *NulCharacter =
nullptr;
1885 const char *AfterLessPos = CurPtr;
1886 char C = getAndAdvanceChar(CurPtr, Result);
1889 if (C ==
'\\' && CurPtr < BufferEnd) {
1891 getAndAdvanceChar(CurPtr, Result);
1892 }
else if (C ==
'\n' || C ==
'\r' ||
1893 (C == 0 && (CurPtr-1 == BufferEnd ||
1894 isCodeCompletionPoint(CurPtr-1)))) {
1897 FormTokenWithChars(Result, AfterLessPos, tok::less);
1899 }
else if (C == 0) {
1900 NulCharacter = CurPtr-1;
1902 C = getAndAdvanceChar(CurPtr, Result);
1907 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1910 const char *TokStart = BufferPtr;
1911 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
1919 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
1922 const char *NulCharacter =
nullptr;
1925 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
1927 ? diag::warn_cxx98_compat_unicode_literal
1928 : diag::warn_c99_compat_unicode_literal);
1929 else if (Kind == tok::utf8_char_constant)
1930 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
1933 char C = getAndAdvanceChar(CurPtr, Result);
1936 Diag(BufferPtr, diag::ext_empty_character);
1937 FormTokenWithChars(Result, CurPtr, tok::unknown);
1944 C = getAndAdvanceChar(CurPtr, Result);
1946 if (C ==
'\n' || C ==
'\r' ||
1947 (C == 0 && CurPtr-1 == BufferEnd)) {
1949 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
1950 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1955 if (isCodeCompletionPoint(CurPtr-1)) {
1957 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1962 NulCharacter = CurPtr-1;
1964 C = getAndAdvanceChar(CurPtr, Result);
1969 CurPtr = LexUDSuffix(Result, CurPtr,
false);
1973 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
1976 const char *TokStart = BufferPtr;
1977 FormTokenWithChars(Result, CurPtr, Kind);
1987 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
1988 bool &TokAtPhysicalStartOfLine) {
1992 unsigned char Char = *CurPtr;
2017 FormTokenWithChars(Result, CurPtr, tok::unknown);
2019 IsAtStartOfLine =
true;
2020 IsAtPhysicalStartOfLine =
true;
2027 char PrevChar = CurPtr[-1];
2033 TokAtPhysicalStartOfLine =
true;
2046 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2047 bool &TokAtPhysicalStartOfLine) {
2051 Diag(BufferPtr, diag::ext_line_comment);
2055 LangOpts.LineComment =
true;
2066 C !=
'\n' && C !=
'\r')
2069 const char *NextLine = CurPtr;
2072 const char *EscapePtr = CurPtr-1;
2073 bool HasSpace =
false;
2079 if (*EscapePtr ==
'\\')
2081 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2082 EscapePtr[-2] ==
'?')
2083 CurPtr = EscapePtr-2;
2089 Diag(EscapePtr, diag::backslash_newline_space);
2096 const char *OldPtr = CurPtr;
2099 C = getAndAdvanceChar(CurPtr, Result);
2104 if (C != 0 && CurPtr == OldPtr+1) {
2112 if (CurPtr != OldPtr+1 && C !=
'/' && CurPtr[0] !=
'/') {
2113 for (; OldPtr != CurPtr; ++OldPtr)
2114 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2118 const char *ForwardPtr = CurPtr;
2121 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2126 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2131 if (CurPtr == BufferEnd+1) {
2136 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2142 }
while (C !=
'\n' && C !=
'\r');
2155 return SaveLineComment(Result, CurPtr);
2173 TokAtPhysicalStartOfLine =
true;
2182 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2185 FormTokenWithChars(Result, CurPtr, tok::comment);
2197 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2212 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2218 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2220 if (CurPtr[0] == CurPtr[1])
2228 bool HasSpace =
false;
2235 if (*CurPtr ==
'\\') {
2236 if (CurPtr[-1] !=
'*')
return false;
2239 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2250 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2254 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2259 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2263 L->
Diag(CurPtr, diag::backslash_newline_space);
2284 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2285 bool &TokAtPhysicalStartOfLine) {
2295 unsigned char C = getCharAndSize(CurPtr, CharSize);
2297 if (C == 0 && CurPtr == BufferEnd+1) {
2299 Diag(BufferPtr, diag::err_unterminated_block_comment);
2305 FormTokenWithChars(Result, CurPtr, tok::unknown);
2321 if (CurPtr + 24 < BufferEnd &&
2326 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2329 if (C ==
'/')
goto FoundSlash;
2333 while (CurPtr+16 <= BufferEnd) {
2340 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2346 __vector
unsigned char Slashes = {
2347 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2348 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2350 while (CurPtr+16 <= BufferEnd &&
2351 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2355 while (CurPtr[0] !=
'/' &&
2359 CurPtr+4 < BufferEnd) {
2369 while (C !=
'/' && C !=
'\0')
2374 if (CurPtr[-2] ==
'*')
2377 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2384 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2389 Diag(CurPtr-1, diag::warn_nested_block_comment);
2391 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2393 Diag(BufferPtr, diag::err_unterminated_block_comment);
2402 FormTokenWithChars(Result, CurPtr, tok::unknown);
2408 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2427 FormTokenWithChars(Result, CurPtr, tok::comment);
2436 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2454 "Must be in a preprocessing directive!");
2458 const char *CurPtr = BufferPtr;
2460 char Char = getAndAdvanceChar(CurPtr, Tmp);
2464 Result->push_back(Char);
2468 if (CurPtr-1 != BufferEnd) {
2469 if (isCodeCompletionPoint(CurPtr-1)) {
2477 Result->push_back(Char);
2484 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2485 BufferPtr = CurPtr-1;
2489 if (Tmp.
is(tok::code_completion)) {
2494 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2506 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2514 FormTokenWithChars(Result, CurPtr, tok::eod);
2526 BufferPtr = BufferEnd;
2527 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2537 diag::err_pp_unterminated_conditional);
2543 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2548 if (LangOpts.CPlusPlus11) {
2552 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2553 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2555 DiagID = diag::warn_no_newline_eof;
2558 DiagID = diag::ext_no_newline_eof;
2561 Diag(BufferEnd, DiagID)
2575 unsigned Lexer::isNextPPTokenLParen() {
2576 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2584 const char *TmpBufferPtr = BufferPtr;
2586 bool atStartOfLine = IsAtStartOfLine;
2587 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2588 bool leadingSpace = HasLeadingSpace;
2594 BufferPtr = TmpBufferPtr;
2596 HasLeadingSpace = leadingSpace;
2597 IsAtStartOfLine = atStartOfLine;
2598 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2605 return Tok.
is(tok::l_paren);
2611 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2613 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2614 size_t Pos = RestOfBuffer.find(Terminator);
2615 while (Pos != StringRef::npos) {
2618 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2619 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2620 Pos = RestOfBuffer.find(Terminator);
2623 return RestOfBuffer.data()+Pos;
2632 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2634 if (CurPtr != BufferStart &&
2635 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2639 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2640 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2655 Diag(CurPtr, diag::err_conflict_marker);
2656 CurrentConflictMarkerState =
Kind;
2660 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2661 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2677 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2679 if (CurPtr != BufferStart &&
2680 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2689 for (
unsigned i = 1; i != 4; ++i)
2690 if (CurPtr[i] != CurPtr[0])
2697 CurrentConflictMarkerState)) {
2701 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2707 CurrentConflictMarkerState =
CMK_None;
2714 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2723 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2726 char Kind = getCharAndSize(StartPtr, CharSize);
2728 unsigned NumHexDigits;
2731 else if (Kind ==
'U')
2736 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2738 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2742 const char *CurPtr = StartPtr + CharSize;
2743 const char *KindLoc = &CurPtr[-1];
2745 uint32_t CodePoint = 0;
2746 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2747 char C = getCharAndSize(CurPtr, CharSize);
2749 unsigned Value = llvm::hexDigitValue(C);
2753 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2754 << StringRef(KindLoc, 1);
2756 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2759 if (i == 4 && NumHexDigits == 8) {
2761 Diag(KindLoc, diag::note_ucn_four_not_eight)
2778 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
2781 while (StartPtr != CurPtr)
2782 (void)getAndAdvanceChar(StartPtr, *Result);
2788 if (LangOpts.AsmPreprocessor)
2802 if (CodePoint < 0xA0) {
2803 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
2809 if (CodePoint < 0x20 || CodePoint >= 0x7F)
2810 Diag(BufferPtr, diag::err_ucn_control_character);
2812 char C =
static_cast<char>(CodePoint);
2813 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
2819 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
2824 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
2825 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
2827 Diag(BufferPtr, diag::err_ucn_escape_invalid);
2835 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t C,
2836 const char *CurPtr) {
2837 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
2840 UnicodeWhitespaceChars.contains(C)) {
2841 Diag(BufferPtr, diag::ext_unicode_whitespace)
2850 bool Lexer::LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr) {
2860 return LexIdentifier(Result, CurPtr);
2875 Diag(BufferPtr, diag::err_non_ascii)
2885 FormTokenWithChars(Result, CurPtr, tok::unknown);
2889 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
2896 bool Lexer::Lex(
Token &Result) {
2901 if (IsAtStartOfLine) {
2903 IsAtStartOfLine =
false;
2906 if (HasLeadingSpace) {
2908 HasLeadingSpace =
false;
2911 if (HasLeadingEmptyMacro) {
2913 HasLeadingEmptyMacro =
false;
2916 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2917 IsAtPhysicalStartOfLine =
false;
2920 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
2922 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
2923 return returnedToken;
2931 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
2938 const char *CurPtr = BufferPtr;
2941 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
2943 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
2950 FormTokenWithChars(Result, CurPtr, tok::unknown);
2959 unsigned SizeTmp, SizeTmp2;
2962 char Char = getAndAdvanceChar(CurPtr, Result);
2968 if (CurPtr-1 == BufferEnd)
2969 return LexEndOfFile(Result, CurPtr-1);
2972 if (isCodeCompletionPoint(CurPtr-1)) {
2975 FormTokenWithChars(Result, CurPtr, tok::code_completion);
2980 Diag(CurPtr-1, diag::null_in_file);
2982 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
2991 if (LangOpts.MicrosoftExt) {
2993 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
2994 return LexEndOfFile(Result, CurPtr-1);
2998 Kind = tok::unknown;
3014 IsAtStartOfLine =
true;
3015 IsAtPhysicalStartOfLine =
true;
3024 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3034 SkipHorizontalWhitespace:
3036 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3045 LangOpts.LineComment &&
3046 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3047 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3049 goto SkipIgnoredUnits;
3051 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3053 goto SkipIgnoredUnits;
3055 goto SkipHorizontalWhitespace;
3063 case '0':
case '1':
case '2':
case '3':
case '4':
3064 case '5':
case '6':
case '7':
case '8':
case '9':
3067 return LexNumericConstant(Result, CurPtr);
3073 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3074 Char = getCharAndSize(CurPtr, SizeTmp);
3078 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3079 tok::utf16_string_literal);
3083 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3084 tok::utf16_char_constant);
3087 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3088 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3089 return LexRawStringLiteral(Result,
3090 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3092 tok::utf16_string_literal);
3095 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3099 return LexStringLiteral(Result,
3100 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3102 tok::utf8_string_literal);
3103 if (Char2 ==
'\'' && LangOpts.CPlusPlus1z)
3104 return LexCharConstant(
3105 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3107 tok::utf8_char_constant);
3109 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3111 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3114 return LexRawStringLiteral(Result,
3115 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3118 tok::utf8_string_literal);
3125 return LexIdentifier(Result, CurPtr);
3131 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3132 Char = getCharAndSize(CurPtr, SizeTmp);
3136 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3137 tok::utf32_string_literal);
3141 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3142 tok::utf32_char_constant);
3145 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3146 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3147 return LexRawStringLiteral(Result,
3148 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3150 tok::utf32_string_literal);
3154 return LexIdentifier(Result, CurPtr);
3160 if (LangOpts.CPlusPlus11) {
3161 Char = getCharAndSize(CurPtr, SizeTmp);
3164 return LexRawStringLiteral(Result,
3165 ConsumeChar(CurPtr, SizeTmp, Result),
3166 tok::string_literal);
3170 return LexIdentifier(Result, CurPtr);
3175 Char = getCharAndSize(CurPtr, SizeTmp);
3179 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3180 tok::wide_string_literal);
3183 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3184 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3185 return LexRawStringLiteral(Result,
3186 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3188 tok::wide_string_literal);
3192 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3193 tok::wide_char_constant);
3197 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3198 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3199 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3200 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3201 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3202 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3203 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3204 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3208 return LexIdentifier(Result, CurPtr);
3211 if (LangOpts.DollarIdents) {
3213 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3216 return LexIdentifier(Result, CurPtr);
3219 Kind = tok::unknown;
3226 return LexCharConstant(Result, CurPtr, tok::char_constant);
3232 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3236 Kind = tok::question;
3239 Kind = tok::l_square;
3242 Kind = tok::r_square;
3245 Kind = tok::l_paren;
3248 Kind = tok::r_paren;
3251 Kind = tok::l_brace;
3254 Kind = tok::r_brace;
3257 Char = getCharAndSize(CurPtr, SizeTmp);
3258 if (Char >=
'0' && Char <=
'9') {
3262 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3263 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3264 Kind = tok::periodstar;
3266 }
else if (Char ==
'.' &&
3267 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3268 Kind = tok::ellipsis;
3269 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3276 Char = getCharAndSize(CurPtr, SizeTmp);
3279 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3280 }
else if (Char ==
'=') {
3281 Kind = tok::ampequal;
3282 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3288 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3289 Kind = tok::starequal;
3290 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3296 Char = getCharAndSize(CurPtr, SizeTmp);
3298 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3299 Kind = tok::plusplus;
3300 }
else if (Char ==
'=') {
3301 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3302 Kind = tok::plusequal;
3308 Char = getCharAndSize(CurPtr, SizeTmp);
3310 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3311 Kind = tok::minusminus;
3312 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3313 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3314 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3316 Kind = tok::arrowstar;
3317 }
else if (Char ==
'>') {
3318 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3320 }
else if (Char ==
'=') {
3321 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3322 Kind = tok::minusequal;
3331 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3332 Kind = tok::exclaimequal;
3333 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3335 Kind = tok::exclaim;
3340 Char = getCharAndSize(CurPtr, SizeTmp);
3350 bool TreatAsComment = LangOpts.LineComment &&
3351 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3352 if (!TreatAsComment)
3354 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3356 if (TreatAsComment) {
3357 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3358 TokAtPhysicalStartOfLine))
3364 goto SkipIgnoredUnits;
3369 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3370 TokAtPhysicalStartOfLine))
3379 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3380 Kind = tok::slashequal;
3386 Char = getCharAndSize(CurPtr, SizeTmp);
3388 Kind = tok::percentequal;
3389 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3390 }
else if (LangOpts.Digraphs && Char ==
'>') {
3391 Kind = tok::r_brace;
3392 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3393 }
else if (LangOpts.Digraphs && Char ==
':') {
3394 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3395 Char = getCharAndSize(CurPtr, SizeTmp);
3396 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3397 Kind = tok::hashhash;
3398 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3400 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3401 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3403 Diag(BufferPtr, diag::ext_charize_microsoft);
3410 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3411 goto HandleDirective;
3416 Kind = tok::percent;
3420 Char = getCharAndSize(CurPtr, SizeTmp);
3422 return LexAngledStringLiteral(Result, CurPtr);
3423 }
else if (Char ==
'<') {
3424 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3426 Kind = tok::lesslessequal;
3427 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3429 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3433 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3437 }
else if (LangOpts.CUDA && After ==
'<') {
3438 Kind = tok::lesslessless;
3439 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3442 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3443 Kind = tok::lessless;
3445 }
else if (Char ==
'=') {
3446 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3447 Kind = tok::lessequal;
3448 }
else if (LangOpts.Digraphs && Char ==
':') {
3449 if (LangOpts.CPlusPlus11 &&
3450 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3457 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3458 if (After !=
':' && After !=
'>') {
3461 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3466 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3467 Kind = tok::l_square;
3468 }
else if (LangOpts.Digraphs && Char ==
'%') {
3469 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3470 Kind = tok::l_brace;
3476 Char = getCharAndSize(CurPtr, SizeTmp);
3478 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3479 Kind = tok::greaterequal;
3480 }
else if (Char ==
'>') {
3481 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3483 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3485 Kind = tok::greatergreaterequal;
3486 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3490 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3493 }
else if (LangOpts.CUDA && After ==
'>') {
3494 Kind = tok::greatergreatergreater;
3495 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3498 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3499 Kind = tok::greatergreater;
3503 Kind = tok::greater;
3507 Char = getCharAndSize(CurPtr, SizeTmp);
3509 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3510 Kind = tok::caretequal;
3511 }
else if (LangOpts.OpenCL && Char ==
'^') {
3512 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3513 Kind = tok::caretcaret;
3519 Char = getCharAndSize(CurPtr, SizeTmp);
3521 Kind = tok::pipeequal;
3522 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3523 }
else if (Char ==
'|') {
3525 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3527 Kind = tok::pipepipe;
3528 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3534 Char = getCharAndSize(CurPtr, SizeTmp);
3535 if (LangOpts.Digraphs && Char ==
'>') {
3536 Kind = tok::r_square;
3537 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3538 }
else if (LangOpts.CPlusPlus && Char ==
':') {
3539 Kind = tok::coloncolon;
3540 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3549 Char = getCharAndSize(CurPtr, SizeTmp);
3552 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3555 Kind = tok::equalequal;
3556 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3565 Char = getCharAndSize(CurPtr, SizeTmp);
3567 Kind = tok::hashhash;
3568 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3569 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3572 Diag(BufferPtr, diag::ext_charize_microsoft);
3573 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3579 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3580 goto HandleDirective;
3588 if (CurPtr[-1] ==
'@' && LangOpts.ObjC1)
3591 Kind = tok::unknown;
3596 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3597 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3598 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3606 return LexUnicode(Result, CodePoint, CurPtr);
3609 Kind = tok::unknown;
3614 Kind = tok::unknown;
3623 ConversionResult Status =
3624 llvm::convertUTF8Sequence((
const UTF8 **)&CurPtr,
3625 (
const UTF8 *)BufferEnd,
3628 if (Status == conversionOK) {
3629 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3630 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3637 return LexUnicode(Result, CodePoint, CurPtr);
3643 Kind = tok::unknown;
3650 Diag(CurPtr, diag::err_invalid_utf8);
3652 BufferPtr = CurPtr+1;
3664 FormTokenWithChars(Result, CurPtr, Kind);
3670 FormTokenWithChars(Result, CurPtr, tok::hash);
3675 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
SourceManager & getSourceManager() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
This is a discriminated union of FileInfo and ExpansionInfo.
SourceLocation getBegin() const
static std::pair< unsigned, bool > ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
static LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
void setBegin(SourceLocation b)
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Defines the SourceManager interface.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
const ExpansionInfo & getExpansion() const
std::unique_ptr< llvm::MemoryBuffer > Buffer
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool hasUCN() const
Returns true if this token contains a universal character name.
void setFlag(TokenFlags Flag)
Set the specified flag.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
static LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
static LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
One of these records is kept for each identifier that is lexed.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
void setRawIdentifierData(const char *Ptr)
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
static LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
bool getCommentRetentionState() const
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
bool hadModuleLoaderFatalFailure() const
static LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
tok::TokenKind getKind() const
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
SourceLocation getEnd() const
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
Defines the clang::Preprocessor interface.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
static CharSourceRange getCharRange(SourceRange R)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
void setIdentifierInfo(IdentifierInfo *II)
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
DiagnosticsEngine & getDiagnostics() const
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
detail::InMemoryDirectory::const_iterator E
SourceLocation getExpansionLocStart() const
void setLiteralData(const char *Ptr)
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
bool isMacroArgExpansion() const
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
bool HandleComment(Token &Token, SourceRange Comment)
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Not within a conflict marker.
static LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
unsigned getLength() const
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
static LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by escaping '\' and " characters. This does not add surrounding ""'s to the string.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getSpellingLoc() const
IdentifierInfo * getIdentifierInfo() const