LLVM  12.0.0git
TGLexer.cpp
Go to the documentation of this file.
1 //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implement the Lexer for TableGen.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "TGLexer.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringSwitch.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Config/config.h" // for strtoull()/strtoll() define
18 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/SourceMgr.h"
21 #include "llvm/TableGen/Error.h"
22 #include <algorithm>
23 #include <cctype>
24 #include <cerrno>
25 #include <cstdint>
26 #include <cstdio>
27 #include <cstdlib>
28 #include <cstring>
29 
30 using namespace llvm;
31 
32 namespace {
33 // A list of supported preprocessing directives with their
34 // internal token kinds and names.
35 struct {
37  const char *Word;
38 } PreprocessorDirs[] = {
39  { tgtok::Ifdef, "ifdef" },
40  { tgtok::Ifndef, "ifndef" },
41  { tgtok::Else, "else" },
42  { tgtok::Endif, "endif" },
43  { tgtok::Define, "define" }
44 };
45 } // end anonymous namespace
46 
48  CurBuffer = SrcMgr.getMainFileID();
49  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
50  CurPtr = CurBuf.begin();
51  TokStart = nullptr;
52 
53  // Pretend that we enter the "top-level" include file.
54  PrepIncludeStack.push_back(
55  std::make_unique<std::vector<PreprocessorControlDesc>>());
56 
57  // Put all macros defined in the command line into the DefinedMacros set.
58  std::for_each(Macros.begin(), Macros.end(),
59  [this](const std::string &MacroName) {
60  DefinedMacros.insert(MacroName);
61  });
62 }
63 
65  return SMLoc::getFromPointer(TokStart);
66 }
67 
68 /// ReturnError - Set the error to the specified string at the specified
69 /// location. This is defined to always return tgtok::Error.
70 tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
71  PrintError(Loc, Msg);
72  return tgtok::Error;
73 }
74 
75 tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
76  return ReturnError(SMLoc::getFromPointer(Loc), Msg);
77 }
78 
79 bool TGLexer::processEOF() {
80  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
81  if (ParentIncludeLoc != SMLoc()) {
82  // If prepExitInclude() detects a problem with the preprocessing
83  // control stack, it will return false. Pretend that we reached
84  // the final EOF and stop lexing more tokens by returning false
85  // to LexToken().
86  if (!prepExitInclude(false))
87  return false;
88 
89  CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
90  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
91  CurPtr = ParentIncludeLoc.getPointer();
92  // Make sure TokStart points into the parent file's buffer.
93  // LexToken() assigns to it before calling getNextChar(),
94  // so it is pointing into the included file now.
95  TokStart = CurPtr;
96  return true;
97  }
98 
99  // Pretend that we exit the "top-level" include file.
100  // Note that in case of an error (e.g. control stack imbalance)
101  // the routine will issue a fatal error.
102  prepExitInclude(true);
103  return false;
104 }
105 
106 int TGLexer::getNextChar() {
107  char CurChar = *CurPtr++;
108  switch (CurChar) {
109  default:
110  return (unsigned char)CurChar;
111  case 0: {
112  // A nul character in the stream is either the end of the current buffer or
113  // a random nul in the file. Disambiguate that here.
114  if (CurPtr-1 != CurBuf.end())
115  return 0; // Just whitespace.
116 
117  // Otherwise, return end of file.
118  --CurPtr; // Another call to lex will return EOF again.
119  return EOF;
120  }
121  case '\n':
122  case '\r':
123  // Handle the newline character by ignoring it and incrementing the line
124  // count. However, be careful about 'dos style' files with \n\r in them.
125  // Only treat a \n\r or \r\n as a single line.
126  if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
127  *CurPtr != CurChar)
128  ++CurPtr; // Eat the two char newline sequence.
129  return '\n';
130  }
131 }
132 
133 int TGLexer::peekNextChar(int Index) const {
134  return *(CurPtr + Index);
135 }
136 
137 tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
138  TokStart = CurPtr;
139  // This always consumes at least one character.
140  int CurChar = getNextChar();
141 
142  switch (CurChar) {
143  default:
144  // Handle letters: [a-zA-Z_]
145  if (isalpha(CurChar) || CurChar == '_')
146  return LexIdentifier();
147 
148  // Unknown character, emit an error.
149  return ReturnError(TokStart, "Unexpected character");
150  case EOF:
151  // Lex next token, if we just left an include file.
152  // Note that leaving an include file means that the next
153  // symbol is located at the end of 'include "..."'
154  // construct, so LexToken() is called with default
155  // false parameter.
156  if (processEOF())
157  return LexToken();
158 
159  // Return EOF denoting the end of lexing.
160  return tgtok::Eof;
161 
162  case ':': return tgtok::colon;
163  case ';': return tgtok::semi;
164  case '.': return tgtok::period;
165  case ',': return tgtok::comma;
166  case '<': return tgtok::less;
167  case '>': return tgtok::greater;
168  case ']': return tgtok::r_square;
169  case '{': return tgtok::l_brace;
170  case '}': return tgtok::r_brace;
171  case '(': return tgtok::l_paren;
172  case ')': return tgtok::r_paren;
173  case '=': return tgtok::equal;
174  case '?': return tgtok::question;
175  case '#':
176  if (FileOrLineStart) {
177  tgtok::TokKind Kind = prepIsDirective();
178  if (Kind != tgtok::Error)
179  return lexPreprocessor(Kind);
180  }
181 
182  return tgtok::paste;
183 
184  case '\r':
185  PrintFatalError("getNextChar() must never return '\r'");
186  return tgtok::Error;
187 
188  case 0:
189  case ' ':
190  case '\t':
191  // Ignore whitespace.
192  return LexToken(FileOrLineStart);
193  case '\n':
194  // Ignore whitespace, and identify the new line.
195  return LexToken(true);
196  case '/':
197  // If this is the start of a // comment, skip until the end of the line or
198  // the end of the buffer.
199  if (*CurPtr == '/')
200  SkipBCPLComment();
201  else if (*CurPtr == '*') {
202  if (SkipCComment())
203  return tgtok::Error;
204  } else // Otherwise, this is an error.
205  return ReturnError(TokStart, "Unexpected character");
206  return LexToken(FileOrLineStart);
207  case '-': case '+':
208  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
209  case '7': case '8': case '9': {
210  int NextChar = 0;
211  if (isdigit(CurChar)) {
212  // Allow identifiers to start with a number if it is followed by
213  // an identifier. This can happen with paste operations like
214  // foo#8i.
215  int i = 0;
216  do {
217  NextChar = peekNextChar(i++);
218  } while (isdigit(NextChar));
219 
220  if (NextChar == 'x' || NextChar == 'b') {
221  // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
222  // likely a number.
223  int NextNextChar = peekNextChar(i);
224  switch (NextNextChar) {
225  default:
226  break;
227  case '0': case '1':
228  if (NextChar == 'b')
229  return LexNumber();
231  case '2': case '3': case '4': case '5':
232  case '6': case '7': case '8': case '9':
233  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
234  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
235  if (NextChar == 'x')
236  return LexNumber();
237  break;
238  }
239  }
240  }
241 
242  if (isalpha(NextChar) || NextChar == '_')
243  return LexIdentifier();
244 
245  return LexNumber();
246  }
247  case '"': return LexString();
248  case '$': return LexVarName();
249  case '[': return LexBracket();
250  case '!': return LexExclaim();
251  }
252 }
253 
254 /// LexString - Lex "[^"]*"
255 tgtok::TokKind TGLexer::LexString() {
256  const char *StrStart = CurPtr;
257 
258  CurStrVal = "";
259 
260  while (*CurPtr != '"') {
261  // If we hit the end of the buffer, report an error.
262  if (*CurPtr == 0 && CurPtr == CurBuf.end())
263  return ReturnError(StrStart, "End of file in string literal");
264 
265  if (*CurPtr == '\n' || *CurPtr == '\r')
266  return ReturnError(StrStart, "End of line in string literal");
267 
268  if (*CurPtr != '\\') {
269  CurStrVal += *CurPtr++;
270  continue;
271  }
272 
273  ++CurPtr;
274 
275  switch (*CurPtr) {
276  case '\\': case '\'': case '"':
277  // These turn into their literal character.
278  CurStrVal += *CurPtr++;
279  break;
280  case 't':
281  CurStrVal += '\t';
282  ++CurPtr;
283  break;
284  case 'n':
285  CurStrVal += '\n';
286  ++CurPtr;
287  break;
288 
289  case '\n':
290  case '\r':
291  return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
292 
293  // If we hit the end of the buffer, report an error.
294  case '\0':
295  if (CurPtr == CurBuf.end())
296  return ReturnError(StrStart, "End of file in string literal");
298  default:
299  return ReturnError(CurPtr, "invalid escape in string literal");
300  }
301  }
302 
303  ++CurPtr;
304  return tgtok::StrVal;
305 }
306 
307 tgtok::TokKind TGLexer::LexVarName() {
308  if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
309  return ReturnError(TokStart, "Invalid variable name");
310 
311  // Otherwise, we're ok, consume the rest of the characters.
312  const char *VarNameStart = CurPtr++;
313 
314  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
315  ++CurPtr;
316 
317  CurStrVal.assign(VarNameStart, CurPtr);
318  return tgtok::VarName;
319 }
320 
321 tgtok::TokKind TGLexer::LexIdentifier() {
322  // The first letter is [a-zA-Z_].
323  const char *IdentStart = TokStart;
324 
325  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
326  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
327  ++CurPtr;
328 
329  // Check to see if this identifier is a keyword.
330  StringRef Str(IdentStart, CurPtr-IdentStart);
331 
332  if (Str == "include") {
333  if (LexInclude()) return tgtok::Error;
334  return Lex();
335  }
336 
338  .Case("int", tgtok::Int)
339  .Case("bit", tgtok::Bit)
340  .Case("bits", tgtok::Bits)
341  .Case("string", tgtok::String)
342  .Case("list", tgtok::List)
343  .Case("code", tgtok::Code)
344  .Case("dag", tgtok::Dag)
345  .Case("class", tgtok::Class)
346  .Case("def", tgtok::Def)
347  .Case("foreach", tgtok::Foreach)
348  .Case("defm", tgtok::Defm)
349  .Case("defset", tgtok::Defset)
350  .Case("multiclass", tgtok::MultiClass)
351  .Case("field", tgtok::Field)
352  .Case("let", tgtok::Let)
353  .Case("in", tgtok::In)
354  .Case("defvar", tgtok::Defvar)
355  .Case("if", tgtok::If)
356  .Case("then", tgtok::Then)
357  .Case("else", tgtok::ElseKW)
358  .Default(tgtok::Id);
359 
360  if (Kind == tgtok::Id)
361  CurStrVal.assign(Str.begin(), Str.end());
362  return Kind;
363 }
364 
365 /// LexInclude - We just read the "include" token. Get the string token that
366 /// comes next and enter the include.
367 bool TGLexer::LexInclude() {
368  // The token after the include must be a string.
369  tgtok::TokKind Tok = LexToken();
370  if (Tok == tgtok::Error) return true;
371  if (Tok != tgtok::StrVal) {
372  PrintError(getLoc(), "Expected filename after include");
373  return true;
374  }
375 
376  // Get the string.
377  std::string Filename = CurStrVal;
378  std::string IncludedFile;
379 
380  CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
381  IncludedFile);
382  if (!CurBuffer) {
383  PrintError(getLoc(), "Could not find include file '" + Filename + "'");
384  return true;
385  }
386 
387  Dependencies.insert(IncludedFile);
388  // Save the line number and lex buffer of the includer.
389  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
390  CurPtr = CurBuf.begin();
391 
392  PrepIncludeStack.push_back(
393  std::make_unique<std::vector<PreprocessorControlDesc>>());
394  return false;
395 }
396 
397 void TGLexer::SkipBCPLComment() {
398  ++CurPtr; // skip the second slash.
399  while (true) {
400  switch (*CurPtr) {
401  case '\n':
402  case '\r':
403  return; // Newline is end of comment.
404  case 0:
405  // If this is the end of the buffer, end the comment.
406  if (CurPtr == CurBuf.end())
407  return;
408  break;
409  }
410  // Otherwise, skip the character.
411  ++CurPtr;
412  }
413 }
414 
415 /// SkipCComment - This skips C-style /**/ comments. The only difference from C
416 /// is that we allow nesting.
417 bool TGLexer::SkipCComment() {
418  ++CurPtr; // skip the star.
419  unsigned CommentDepth = 1;
420 
421  while (true) {
422  int CurChar = getNextChar();
423  switch (CurChar) {
424  case EOF:
425  PrintError(TokStart, "Unterminated comment!");
426  return true;
427  case '*':
428  // End of the comment?
429  if (CurPtr[0] != '/') break;
430 
431  ++CurPtr; // End the */.
432  if (--CommentDepth == 0)
433  return false;
434  break;
435  case '/':
436  // Start of a nested comment?
437  if (CurPtr[0] != '*') break;
438  ++CurPtr;
439  ++CommentDepth;
440  break;
441  }
442  }
443 }
444 
445 /// LexNumber - Lex:
446 /// [-+]?[0-9]+
447 /// 0x[0-9a-fA-F]+
448 /// 0b[01]+
449 tgtok::TokKind TGLexer::LexNumber() {
450  if (CurPtr[-1] == '0') {
451  if (CurPtr[0] == 'x') {
452  ++CurPtr;
453  const char *NumStart = CurPtr;
454  while (isxdigit(CurPtr[0]))
455  ++CurPtr;
456 
457  // Requires at least one hex digit.
458  if (CurPtr == NumStart)
459  return ReturnError(TokStart, "Invalid hexadecimal number");
460 
461  errno = 0;
462  CurIntVal = strtoll(NumStart, nullptr, 16);
463  if (errno == EINVAL)
464  return ReturnError(TokStart, "Invalid hexadecimal number");
465  if (errno == ERANGE) {
466  errno = 0;
467  CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
468  if (errno == EINVAL)
469  return ReturnError(TokStart, "Invalid hexadecimal number");
470  if (errno == ERANGE)
471  return ReturnError(TokStart, "Hexadecimal number out of range");
472  }
473  return tgtok::IntVal;
474  } else if (CurPtr[0] == 'b') {
475  ++CurPtr;
476  const char *NumStart = CurPtr;
477  while (CurPtr[0] == '0' || CurPtr[0] == '1')
478  ++CurPtr;
479 
480  // Requires at least one binary digit.
481  if (CurPtr == NumStart)
482  return ReturnError(CurPtr-2, "Invalid binary number");
483  CurIntVal = strtoll(NumStart, nullptr, 2);
484  return tgtok::BinaryIntVal;
485  }
486  }
487 
488  // Check for a sign without a digit.
489  if (!isdigit(CurPtr[0])) {
490  if (CurPtr[-1] == '-')
491  return tgtok::minus;
492  else if (CurPtr[-1] == '+')
493  return tgtok::plus;
494  }
495 
496  while (isdigit(CurPtr[0]))
497  ++CurPtr;
498  CurIntVal = strtoll(TokStart, nullptr, 10);
499  return tgtok::IntVal;
500 }
501 
502 /// LexBracket - We just read '['. If this is a code block, return it,
503 /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
504 tgtok::TokKind TGLexer::LexBracket() {
505  if (CurPtr[0] != '{')
506  return tgtok::l_square;
507  ++CurPtr;
508  const char *CodeStart = CurPtr;
509  while (true) {
510  int Char = getNextChar();
511  if (Char == EOF) break;
512 
513  if (Char != '}') continue;
514 
515  Char = getNextChar();
516  if (Char == EOF) break;
517  if (Char == ']') {
518  CurStrVal.assign(CodeStart, CurPtr-2);
519  return tgtok::CodeFragment;
520  }
521  }
522 
523  return ReturnError(CodeStart-2, "Unterminated Code Block");
524 }
525 
526 /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
527 tgtok::TokKind TGLexer::LexExclaim() {
528  if (!isalpha(*CurPtr))
529  return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
530 
531  const char *Start = CurPtr++;
532  while (isalpha(*CurPtr))
533  ++CurPtr;
534 
535  // Check to see which operator this is.
537  StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
538  .Case("eq", tgtok::XEq)
539  .Case("ne", tgtok::XNe)
540  .Case("le", tgtok::XLe)
541  .Case("lt", tgtok::XLt)
542  .Case("ge", tgtok::XGe)
543  .Case("gt", tgtok::XGt)
544  .Case("if", tgtok::XIf)
545  .Case("cond", tgtok::XCond)
546  .Case("isa", tgtok::XIsA)
547  .Case("head", tgtok::XHead)
548  .Case("tail", tgtok::XTail)
549  .Case("size", tgtok::XSize)
550  .Case("con", tgtok::XConcat)
551  .Case("dag", tgtok::XDag)
552  .Case("add", tgtok::XADD)
553  .Case("mul", tgtok::XMUL)
554  .Case("and", tgtok::XAND)
555  .Case("or", tgtok::XOR)
556  .Case("shl", tgtok::XSHL)
557  .Case("sra", tgtok::XSRA)
558  .Case("srl", tgtok::XSRL)
559  .Case("cast", tgtok::XCast)
560  .Case("empty", tgtok::XEmpty)
561  .Case("subst", tgtok::XSubst)
562  .Case("foldl", tgtok::XFoldl)
563  .Case("foreach", tgtok::XForEach)
564  .Case("listconcat", tgtok::XListConcat)
565  .Case("listsplat", tgtok::XListSplat)
566  .Case("strconcat", tgtok::XStrConcat)
567  .Case("setop", tgtok::XSetOp)
568  .Case("getop", tgtok::XGetOp)
570 
571  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
572 }
573 
574 bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
575  // Report an error, if preprocessor control stack for the current
576  // file is not empty.
577  if (!PrepIncludeStack.back()->empty()) {
578  prepReportPreprocessorStackError();
579 
580  return false;
581  }
582 
583  // Pop the preprocessing controls from the include stack.
584  if (PrepIncludeStack.empty()) {
585  PrintFatalError("Preprocessor include stack is empty");
586  }
587 
588  PrepIncludeStack.pop_back();
589 
590  if (IncludeStackMustBeEmpty) {
591  if (!PrepIncludeStack.empty())
592  PrintFatalError("Preprocessor include stack is not empty");
593  } else {
594  if (PrepIncludeStack.empty())
595  PrintFatalError("Preprocessor include stack is empty");
596  }
597 
598  return true;
599 }
600 
601 tgtok::TokKind TGLexer::prepIsDirective() const {
602  for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) {
603  int NextChar = *CurPtr;
604  bool Match = true;
605  unsigned I = 0;
606  for (; I < strlen(PreprocessorDirs[ID].Word); ++I) {
607  if (NextChar != PreprocessorDirs[ID].Word[I]) {
608  Match = false;
609  break;
610  }
611 
612  NextChar = peekNextChar(I + 1);
613  }
614 
615  // Check for whitespace after the directive. If there is no whitespace,
616  // then we do not recognize it as a preprocessing directive.
617  if (Match) {
618  tgtok::TokKind Kind = PreprocessorDirs[ID].Kind;
619 
620  // New line and EOF may follow only #else/#endif. It will be reported
621  // as an error for #ifdef/#define after the call to prepLexMacroName().
622  if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
623  NextChar == '\n' ||
624  // It looks like TableGen does not support '\r' as the actual
625  // carriage return, e.g. getNextChar() treats a single '\r'
626  // as '\n'. So we do the same here.
627  NextChar == '\r')
628  return Kind;
629 
630  // Allow comments after some directives, e.g.:
631  // #else// OR #else/**/
632  // #endif// OR #endif/**/
633  //
634  // Note that we do allow comments after #ifdef/#define here, e.g.
635  // #ifdef/**/ AND #ifdef//
636  // #define/**/ AND #define//
637  //
638  // These cases will be reported as incorrect after calling
639  // prepLexMacroName(). We could have supported C-style comments
640  // after #ifdef/#define, but this would complicate the code
641  // for little benefit.
642  if (NextChar == '/') {
643  NextChar = peekNextChar(I + 1);
644 
645  if (NextChar == '*' || NextChar == '/')
646  return Kind;
647 
648  // Pretend that we do not recognize the directive.
649  }
650  }
651  }
652 
653  return tgtok::Error;
654 }
655 
656 bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
657  TokStart = CurPtr;
658 
659  for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID)
660  if (PreprocessorDirs[ID].Kind == Kind) {
661  // Advance CurPtr to the end of the preprocessing word.
662  CurPtr += strlen(PreprocessorDirs[ID].Word);
663  return true;
664  }
665 
666  PrintFatalError("Unsupported preprocessing token in "
667  "prepEatPreprocessorDirective()");
668  return false;
669 }
670 
671 tgtok::TokKind TGLexer::lexPreprocessor(
672  tgtok::TokKind Kind, bool ReturnNextLiveToken) {
673 
674  // We must be looking at a preprocessing directive. Eat it!
675  if (!prepEatPreprocessorDirective(Kind))
676  PrintFatalError("lexPreprocessor() called for unknown "
677  "preprocessor directive");
678 
679  if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
680  StringRef MacroName = prepLexMacroName();
681  StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
682  if (MacroName.empty())
683  return ReturnError(TokStart, "Expected macro name after " + IfTokName);
684 
685  bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
686 
687  // Canonicalize ifndef to ifdef equivalent
688  if (Kind == tgtok::Ifndef) {
689  MacroIsDefined = !MacroIsDefined;
690  Kind = tgtok::Ifdef;
691  }
692 
693  // Regardless of whether we are processing tokens or not,
694  // we put the #ifdef control on stack.
695  PrepIncludeStack.back()->push_back(
696  {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
697 
698  if (!prepSkipDirectiveEnd())
699  return ReturnError(CurPtr, "Only comments are supported after " +
700  IfTokName + " NAME");
701 
702  // If we were not processing tokens before this #ifdef,
703  // then just return back to the lines skipping code.
704  if (!ReturnNextLiveToken)
705  return Kind;
706 
707  // If we were processing tokens before this #ifdef,
708  // and the macro is defined, then just return the next token.
709  if (MacroIsDefined)
710  return LexToken();
711 
712  // We were processing tokens before this #ifdef, and the macro
713  // is not defined, so we have to start skipping the lines.
714  // If the skipping is successful, it will return the token following
715  // either #else or #endif corresponding to this #ifdef.
716  if (prepSkipRegion(ReturnNextLiveToken))
717  return LexToken();
718 
719  return tgtok::Error;
720  } else if (Kind == tgtok::Else) {
721  // Check if this #else is correct before calling prepSkipDirectiveEnd(),
722  // which will move CurPtr away from the beginning of #else.
723  if (PrepIncludeStack.back()->empty())
724  return ReturnError(TokStart, "#else without #ifdef or #ifndef");
725 
726  PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
727 
728  if (IfdefEntry.Kind != tgtok::Ifdef) {
729  PrintError(TokStart, "double #else");
730  return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
731  }
732 
733  // Replace the corresponding #ifdef's control with its negation
734  // on the control stack.
735  PrepIncludeStack.back()->pop_back();
736  PrepIncludeStack.back()->push_back(
737  {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
738 
739  if (!prepSkipDirectiveEnd())
740  return ReturnError(CurPtr, "Only comments are supported after #else");
741 
742  // If we were processing tokens before this #else,
743  // we have to start skipping lines until the matching #endif.
744  if (ReturnNextLiveToken) {
745  if (prepSkipRegion(ReturnNextLiveToken))
746  return LexToken();
747 
748  return tgtok::Error;
749  }
750 
751  // Return to the lines skipping code.
752  return Kind;
753  } else if (Kind == tgtok::Endif) {
754  // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
755  // which will move CurPtr away from the beginning of #endif.
756  if (PrepIncludeStack.back()->empty())
757  return ReturnError(TokStart, "#endif without #ifdef");
758 
759  auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
760 
761  if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
762  IfdefOrElseEntry.Kind != tgtok::Else) {
763  PrintFatalError("Invalid preprocessor control on the stack");
764  return tgtok::Error;
765  }
766 
767  if (!prepSkipDirectiveEnd())
768  return ReturnError(CurPtr, "Only comments are supported after #endif");
769 
770  PrepIncludeStack.back()->pop_back();
771 
772  // If we were processing tokens before this #endif, then
773  // we should continue it.
774  if (ReturnNextLiveToken) {
775  return LexToken();
776  }
777 
778  // Return to the lines skipping code.
779  return Kind;
780  } else if (Kind == tgtok::Define) {
781  StringRef MacroName = prepLexMacroName();
782  if (MacroName.empty())
783  return ReturnError(TokStart, "Expected macro name after #define");
784 
785  if (!DefinedMacros.insert(MacroName).second)
787  "Duplicate definition of macro: " + Twine(MacroName));
788 
789  if (!prepSkipDirectiveEnd())
790  return ReturnError(CurPtr,
791  "Only comments are supported after #define NAME");
792 
793  if (!ReturnNextLiveToken) {
794  PrintFatalError("#define must be ignored during the lines skipping");
795  return tgtok::Error;
796  }
797 
798  return LexToken();
799  }
800 
801  PrintFatalError("Preprocessing directive is not supported");
802  return tgtok::Error;
803 }
804 
805 bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
806  if (!MustNeverBeFalse)
807  PrintFatalError("Invalid recursion.");
808 
809  do {
810  // Skip all symbols to the line end.
811  prepSkipToLineEnd();
812 
813  // Find the first non-whitespace symbol in the next line(s).
814  if (!prepSkipLineBegin())
815  return false;
816 
817  // If the first non-blank/comment symbol on the line is '#',
818  // it may be a start of preprocessing directive.
819  //
820  // If it is not '#' just go to the next line.
821  if (*CurPtr == '#')
822  ++CurPtr;
823  else
824  continue;
825 
826  tgtok::TokKind Kind = prepIsDirective();
827 
828  // If we did not find a preprocessing directive or it is #define,
829  // then just skip to the next line. We do not have to do anything
830  // for #define in the line-skipping mode.
831  if (Kind == tgtok::Error || Kind == tgtok::Define)
832  continue;
833 
834  tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
835 
836  // If lexPreprocessor() encountered an error during lexing this
837  // preprocessor idiom, then return false to the calling lexPreprocessor().
838  // This will force tgtok::Error to be returned to the tokens processing.
839  if (ProcessedKind == tgtok::Error)
840  return false;
841 
842  if (Kind != ProcessedKind)
843  PrintFatalError("prepIsDirective() and lexPreprocessor() "
844  "returned different token kinds");
845 
846  // If this preprocessing directive enables tokens processing,
847  // then return to the lexPreprocessor() and get to the next token.
848  // We can move from line-skipping mode to processing tokens only
849  // due to #else or #endif.
850  if (prepIsProcessingEnabled()) {
851  if (Kind != tgtok::Else && Kind != tgtok::Endif) {
852  PrintFatalError("Tokens processing was enabled by an unexpected "
853  "preprocessing directive");
854  return false;
855  }
856 
857  return true;
858  }
859  } while (CurPtr != CurBuf.end());
860 
861  // We have reached the end of the file, but never left the lines-skipping
862  // mode. This means there is no matching #endif.
863  prepReportPreprocessorStackError();
864  return false;
865 }
866 
867 StringRef TGLexer::prepLexMacroName() {
868  // Skip whitespaces between the preprocessing directive and the macro name.
869  while (*CurPtr == ' ' || *CurPtr == '\t')
870  ++CurPtr;
871 
872  TokStart = CurPtr;
873  // Macro names start with [a-zA-Z_].
874  if (*CurPtr != '_' && !isalpha(*CurPtr))
875  return "";
876 
877  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
878  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
879  ++CurPtr;
880 
881  return StringRef(TokStart, CurPtr - TokStart);
882 }
883 
884 bool TGLexer::prepSkipLineBegin() {
885  while (CurPtr != CurBuf.end()) {
886  switch (*CurPtr) {
887  case ' ':
888  case '\t':
889  case '\n':
890  case '\r':
891  break;
892 
893  case '/': {
894  int NextChar = peekNextChar(1);
895  if (NextChar == '*') {
896  // Skip C-style comment.
897  // Note that we do not care about skipping the C++-style comments.
898  // If the line contains "//", it may not contain any processable
899  // preprocessing directive. Just return CurPtr pointing to
900  // the first '/' in this case. We also do not care about
901  // incorrect symbols after the first '/' - we are in lines-skipping
902  // mode, so incorrect code is allowed to some extent.
903 
904  // Set TokStart to the beginning of the comment to enable proper
905  // diagnostic printing in case of error in SkipCComment().
906  TokStart = CurPtr;
907 
908  // CurPtr must point to '*' before call to SkipCComment().
909  ++CurPtr;
910  if (SkipCComment())
911  return false;
912  } else {
913  // CurPtr points to the non-whitespace '/'.
914  return true;
915  }
916 
917  // We must not increment CurPtr after the comment was lexed.
918  continue;
919  }
920 
921  default:
922  return true;
923  }
924 
925  ++CurPtr;
926  }
927 
928  // We have reached the end of the file. Return to the lines skipping
929  // code, and allow it to handle the EOF as needed.
930  return true;
931 }
932 
933 bool TGLexer::prepSkipDirectiveEnd() {
934  while (CurPtr != CurBuf.end()) {
935  switch (*CurPtr) {
936  case ' ':
937  case '\t':
938  break;
939 
940  case '\n':
941  case '\r':
942  return true;
943 
944  case '/': {
945  int NextChar = peekNextChar(1);
946  if (NextChar == '/') {
947  // Skip C++-style comment.
948  // We may just return true now, but let's skip to the line/buffer end
949  // to simplify the method specification.
950  ++CurPtr;
951  SkipBCPLComment();
952  } else if (NextChar == '*') {
953  // When we are skipping C-style comment at the end of a preprocessing
954  // directive, we can skip several lines. If any meaningful TD token
955  // follows the end of the C-style comment on the same line, it will
956  // be considered as an invalid usage of TD token.
957  // For example, we want to forbid usages like this one:
958  // #define MACRO class Class {}
959  // But with C-style comments we also disallow the following:
960  // #define MACRO /* This macro is used
961  // to ... */ class Class {}
962  // One can argue that this should be allowed, but it does not seem
963  // to be worth of the complication. Moreover, this matches
964  // the C preprocessor behavior.
965 
966  // Set TokStart to the beginning of the comment to enable proper
967  // diagnostic printer in case of error in SkipCComment().
968  TokStart = CurPtr;
969  ++CurPtr;
970  if (SkipCComment())
971  return false;
972  } else {
973  TokStart = CurPtr;
974  PrintError(CurPtr, "Unexpected character");
975  return false;
976  }
977 
978  // We must not increment CurPtr after the comment was lexed.
979  continue;
980  }
981 
982  default:
983  // Do not allow any non-whitespaces after the directive.
984  TokStart = CurPtr;
985  return false;
986  }
987 
988  ++CurPtr;
989  }
990 
991  return true;
992 }
993 
994 void TGLexer::prepSkipToLineEnd() {
995  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end())
996  ++CurPtr;
997 }
998 
999 bool TGLexer::prepIsProcessingEnabled() {
1000  for (auto I = PrepIncludeStack.back()->rbegin(),
1001  E = PrepIncludeStack.back()->rend();
1002  I != E; ++I) {
1003  if (!I->IsDefined)
1004  return false;
1005  }
1006 
1007  return true;
1008 }
1009 
1010 void TGLexer::prepReportPreprocessorStackError() {
1011  if (PrepIncludeStack.back()->empty())
1012  PrintFatalError("prepReportPreprocessorStackError() called with "
1013  "empty control stack");
1014 
1015  auto &PrepControl = PrepIncludeStack.back()->back();
1016  PrintError(CurBuf.end(), "Reached EOF without matching #endif");
1017  PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
1018 
1019  TokStart = CurPtr;
1020 }
SMLoc getLoc() const
Definition: TGLexer.cpp:64
unsigned FindBufferContainingLoc(SMLoc Loc) const
Return the ID of the buffer containing the specified location.
Definition: SourceMgr.cpp:61
StringRef getBuffer() const
Definition: MemoryBuffer.h:71
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator begin() const
Definition: ArrayRef.h:144
SourceMgr SrcMgr
Definition: Error.cpp:23
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
support::ulittle32_t Word
Definition: IRSymtab.h:51
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:122
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getMainFileID() const
Definition: SourceMgr.h:129
const char * getPointer() const
Definition: SMLoc.h:34
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:244
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)
Definition: TGLexer.cpp:47
void PrintWarning(ArrayRef< SMLoc > WarningLoc, const Twine &Msg)
Definition: Error.cpp:48
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:31
tgtok::TokKind Lex()
Definition: TGLexer.h:100
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1335
iterator end() const
Definition: ArrayRef.h:145
LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg)
Definition: Error.cpp:68
iterator begin() const
Definition: StringRef.h:131
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile)
Search for a file with the specified name in the current directory or in one of the IncludeDirs...
Definition: SourceMgr.cpp:40
SMLoc getParentIncludeLoc(unsigned i) const
Definition: SourceMgr.h:134
#define I(x, y, z)
Definition: MD5.cpp:59
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1484
Represents a location in source code.
Definition: SMLoc.h:23
iterator end() const
Definition: StringRef.h:133
void PrintError(ArrayRef< SMLoc > ErrorLoc, const Twine &Msg)
Definition: Error.cpp:58