clang  3.9.0
TokenLexer.cpp
Go to the documentation of this file.
1 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the TokenLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/TokenLexer.h"
17 #include "clang/Lex/MacroArgs.h"
18 #include "clang/Lex/MacroInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 
22 using namespace clang;
23 
24 /// Create a TokenLexer for the specified macro with the specified actual
25 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
27  MacroArgs *Actuals) {
28  // If the client is reusing a TokenLexer, make sure to free any memory
29  // associated with it.
30  destroy();
31 
32  Macro = MI;
33  ActualArgs = Actuals;
34  CurToken = 0;
35 
36  ExpandLocStart = Tok.getLocation();
37  ExpandLocEnd = ELEnd;
38  AtStartOfLine = Tok.isAtStartOfLine();
39  HasLeadingSpace = Tok.hasLeadingSpace();
40  NextTokGetsSpace = false;
41  Tokens = &*Macro->tokens_begin();
42  OwnsTokens = false;
43  DisableMacroExpansion = false;
44  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
45  MacroExpansionStart = SourceLocation();
46 
48  MacroStartSLocOffset = SM.getNextLocalOffset();
49 
50  if (NumTokens > 0) {
51  assert(Tokens[0].getLocation().isValid());
52  assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
53  "Macro defined in macro?");
54  assert(ExpandLocStart.isValid());
55 
56  // Reserve a source location entry chunk for the length of the macro
57  // definition. Tokens that get lexed directly from the definition will
58  // have their locations pointing inside this chunk. This is to avoid
59  // creating separate source location entries for each token.
60  MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
61  MacroDefLength = Macro->getDefinitionLength(SM);
62  MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
63  ExpandLocStart,
64  ExpandLocEnd,
65  MacroDefLength);
66  }
67 
68  // If this is a function-like macro, expand the arguments and change
69  // Tokens to point to the expanded tokens.
70  if (Macro->isFunctionLike() && Macro->getNumArgs())
71  ExpandFunctionArguments();
72 
73  // Mark the macro as currently disabled, so that it is not recursively
74  // expanded. The macro must be disabled only after argument pre-expansion of
75  // function-like macro arguments occurs.
76  Macro->DisableMacro();
77 }
78 
79 /// Create a TokenLexer for the specified token stream. This does not
80 /// take ownership of the specified token vector.
81 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
82  bool disableMacroExpansion, bool ownsTokens) {
83  // If the client is reusing a TokenLexer, make sure to free any memory
84  // associated with it.
85  destroy();
86 
87  Macro = nullptr;
88  ActualArgs = nullptr;
89  Tokens = TokArray;
90  OwnsTokens = ownsTokens;
91  DisableMacroExpansion = disableMacroExpansion;
92  NumTokens = NumToks;
93  CurToken = 0;
94  ExpandLocStart = ExpandLocEnd = SourceLocation();
95  AtStartOfLine = false;
96  HasLeadingSpace = false;
97  NextTokGetsSpace = false;
98  MacroExpansionStart = SourceLocation();
99 
100  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
101  // returned unmodified.
102  if (NumToks != 0) {
103  AtStartOfLine = TokArray[0].isAtStartOfLine();
104  HasLeadingSpace = TokArray[0].hasLeadingSpace();
105  }
106 }
107 
108 void TokenLexer::destroy() {
109  // If this was a function-like macro that actually uses its arguments, delete
110  // the expanded tokens.
111  if (OwnsTokens) {
112  delete [] Tokens;
113  Tokens = nullptr;
114  OwnsTokens = false;
115  }
116 
117  // TokenLexer owns its formal arguments.
118  if (ActualArgs) ActualArgs->destroy(PP);
119 }
120 
121 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
122  SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
123  unsigned MacroArgNo, Preprocessor &PP) {
124  // Is the macro argument __VA_ARGS__?
125  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1)
126  return false;
127 
128  // In Microsoft-compatibility mode, a comma is removed in the expansion
129  // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
130  // not supported by gcc.
131  if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
132  return false;
133 
134  // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
135  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
136  // named arguments, where it remains. In all other modes, including C99
137  // with GNU extensions, it is removed regardless of named arguments.
138  // Microsoft also appears to support this extension, unofficially.
139  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
140  && Macro->getNumArgs() < 2)
141  return false;
142 
143  // Is a comma available to be removed?
144  if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
145  return false;
146 
147  // Issue an extension diagnostic for the paste operator.
148  if (HasPasteOperator)
149  PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
150 
151  // Remove the comma.
152  ResultToks.pop_back();
153 
154  if (!ResultToks.empty()) {
155  // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
156  // then removal of the comma should produce a placemarker token (in C99
157  // terms) which we model by popping off the previous ##, giving us a plain
158  // "X" when __VA_ARGS__ is empty.
159  if (ResultToks.back().is(tok::hashhash))
160  ResultToks.pop_back();
161 
162  // Remember that this comma was elided.
163  ResultToks.back().setFlag(Token::CommaAfterElided);
164  }
165 
166  // Never add a space, even if the comma, ##, or arg had a space.
167  NextTokGetsSpace = false;
168  return true;
169 }
170 
171 /// Expand the arguments of a function-like macro so that we can quickly
172 /// return preexpanded tokens from Tokens.
173 void TokenLexer::ExpandFunctionArguments() {
174  SmallVector<Token, 128> ResultToks;
175 
176  // Loop through 'Tokens', expanding them into ResultToks. Keep
177  // track of whether we change anything. If not, no need to keep them. If so,
178  // we install the newly expanded sequence as the new 'Tokens' list.
179  bool MadeChange = false;
180 
181  for (unsigned i = 0, e = NumTokens; i != e; ++i) {
182  // If we found the stringify operator, get the argument stringified. The
183  // preprocessor already verified that the following token is a macro name
184  // when the #define was parsed.
185  const Token &CurTok = Tokens[i];
186  if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
187  NextTokGetsSpace = true;
188 
189  if (CurTok.isOneOf(tok::hash, tok::hashat)) {
190  int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
191  assert(ArgNo != -1 && "Token following # is not an argument?");
192 
193  SourceLocation ExpansionLocStart =
194  getExpansionLocForMacroDefLoc(CurTok.getLocation());
195  SourceLocation ExpansionLocEnd =
196  getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());
197 
198  Token Res;
199  if (CurTok.is(tok::hash)) // Stringify
200  Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
201  ExpansionLocStart,
202  ExpansionLocEnd);
203  else {
204  // 'charify': don't bother caching these.
205  Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
206  PP, true,
207  ExpansionLocStart,
208  ExpansionLocEnd);
209  }
211 
212  // The stringified/charified string leading space flag gets set to match
213  // the #/#@ operator.
214  if (NextTokGetsSpace)
216 
217  ResultToks.push_back(Res);
218  MadeChange = true;
219  ++i; // Skip arg name.
220  NextTokGetsSpace = false;
221  continue;
222  }
223 
224  // Find out if there is a paste (##) operator before or after the token.
225  bool NonEmptyPasteBefore =
226  !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
227  bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash);
228  bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
229  assert(!NonEmptyPasteBefore || PasteBefore);
230 
231  // Otherwise, if this is not an argument token, just add the token to the
232  // output buffer.
233  IdentifierInfo *II = CurTok.getIdentifierInfo();
234  int ArgNo = II ? Macro->getArgumentNum(II) : -1;
235  if (ArgNo == -1) {
236  // This isn't an argument, just add it.
237  ResultToks.push_back(CurTok);
238 
239  if (NextTokGetsSpace) {
240  ResultToks.back().setFlag(Token::LeadingSpace);
241  NextTokGetsSpace = false;
242  } else if (PasteBefore && !NonEmptyPasteBefore)
243  ResultToks.back().clearFlag(Token::LeadingSpace);
244 
245  continue;
246  }
247 
248  // An argument is expanded somehow, the result is different than the
249  // input.
250  MadeChange = true;
251 
252  // Otherwise, this is a use of the argument.
253 
254  // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
255  // are no trailing commas if __VA_ARGS__ is empty.
256  if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
257  MaybeRemoveCommaBeforeVaArgs(ResultToks,
258  /*HasPasteOperator=*/false,
259  Macro, ArgNo, PP))
260  continue;
261 
262  // If it is not the LHS/RHS of a ## operator, we must pre-expand the
263  // argument and substitute the expanded tokens into the result. This is
264  // C99 6.10.3.1p1.
265  if (!PasteBefore && !PasteAfter) {
266  const Token *ResultArgToks;
267 
268  // Only preexpand the argument if it could possibly need it. This
269  // avoids some work in common cases.
270  const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
271  if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
272  ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
273  else
274  ResultArgToks = ArgTok; // Use non-preexpanded tokens.
275 
276  // If the arg token expanded into anything, append it.
277  if (ResultArgToks->isNot(tok::eof)) {
278  unsigned FirstResult = ResultToks.size();
279  unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
280  ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
281 
282  // In Microsoft-compatibility mode, we follow MSVC's preprocessing
283  // behavior by not considering single commas from nested macro
284  // expansions as argument separators. Set a flag on the token so we can
285  // test for this later when the macro expansion is processed.
286  if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
287  ResultToks.back().is(tok::comma))
288  ResultToks.back().setFlag(Token::IgnoredComma);
289 
290  // If the '##' came from expanding an argument, turn it into 'unknown'
291  // to avoid pasting.
292  for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {
293  Token &Tok = ResultToks[i];
294  if (Tok.is(tok::hashhash))
295  Tok.setKind(tok::unknown);
296  }
297 
298  if(ExpandLocStart.isValid()) {
299  updateLocForMacroArgTokens(CurTok.getLocation(),
300  ResultToks.begin()+FirstResult,
301  ResultToks.end());
302  }
303 
304  // If any tokens were substituted from the argument, the whitespace
305  // before the first token should match the whitespace of the arg
306  // identifier.
307  ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
308  NextTokGetsSpace);
309  ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
310  NextTokGetsSpace = false;
311  }
312  continue;
313  }
314 
315  // Okay, we have a token that is either the LHS or RHS of a paste (##)
316  // argument. It gets substituted as its non-pre-expanded tokens.
317  const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
318  unsigned NumToks = MacroArgs::getArgLength(ArgToks);
319  if (NumToks) { // Not an empty argument?
320  // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
321  // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
322  // the expander trys to paste ',' with the first token of the __VA_ARGS__
323  // expansion.
324  if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
325  ResultToks[ResultToks.size()-2].is(tok::comma) &&
326  (unsigned)ArgNo == Macro->getNumArgs()-1 &&
327  Macro->isVariadic()) {
328  // Remove the paste operator, report use of the extension.
329  PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
330  }
331 
332  ResultToks.append(ArgToks, ArgToks+NumToks);
333 
334  // If the '##' came from expanding an argument, turn it into 'unknown'
335  // to avoid pasting.
336  for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();
337  i != e; ++i) {
338  Token &Tok = ResultToks[i];
339  if (Tok.is(tok::hashhash))
340  Tok.setKind(tok::unknown);
341  }
342 
343  if (ExpandLocStart.isValid()) {
344  updateLocForMacroArgTokens(CurTok.getLocation(),
345  ResultToks.end()-NumToks, ResultToks.end());
346  }
347 
348  // If this token (the macro argument) was supposed to get leading
349  // whitespace, transfer this information onto the first token of the
350  // expansion.
351  //
352  // Do not do this if the paste operator occurs before the macro argument,
353  // as in "A ## MACROARG". In valid code, the first token will get
354  // smooshed onto the preceding one anyway (forming AMACROARG). In
355  // assembler-with-cpp mode, invalid pastes are allowed through: in this
356  // case, we do not want the extra whitespace to be added. For example,
357  // we want ". ## foo" -> ".foo" not ". foo".
358  if (NextTokGetsSpace)
359  ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
360 
361  NextTokGetsSpace = false;
362  continue;
363  }
364 
365  // If an empty argument is on the LHS or RHS of a paste, the standard (C99
366  // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
367  // implement this by eating ## operators when a LHS or RHS expands to
368  // empty.
369  if (PasteAfter) {
370  // Discard the argument token and skip (don't copy to the expansion
371  // buffer) the paste operator after it.
372  ++i;
373  continue;
374  }
375 
376  // If this is on the RHS of a paste operator, we've already copied the
377  // paste operator to the ResultToks list, unless the LHS was empty too.
378  // Remove it.
379  assert(PasteBefore);
380  if (NonEmptyPasteBefore) {
381  assert(ResultToks.back().is(tok::hashhash));
382  ResultToks.pop_back();
383  }
384 
385  // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
386  // and if the macro had at least one real argument, and if the token before
387  // the ## was a comma, remove the comma. This is a GCC extension which is
388  // disabled when using -std=c99.
389  if (ActualArgs->isVarargsElidedUse())
390  MaybeRemoveCommaBeforeVaArgs(ResultToks,
391  /*HasPasteOperator=*/true,
392  Macro, ArgNo, PP);
393  }
394 
395  // If anything changed, install this as the new Tokens list.
396  if (MadeChange) {
397  assert(!OwnsTokens && "This would leak if we already own the token list");
398  // This is deleted in the dtor.
399  NumTokens = ResultToks.size();
400  // The tokens will be added to Preprocessor's cache and will be removed
401  // when this TokenLexer finishes lexing them.
402  Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
403 
404  // The preprocessor cache of macro expanded tokens owns these tokens,not us.
405  OwnsTokens = false;
406  }
407 }
408 
409 /// \brief Checks if two tokens form wide string literal.
410 static bool isWideStringLiteralFromMacro(const Token &FirstTok,
411  const Token &SecondTok) {
412  return FirstTok.is(tok::identifier) &&
413  FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
414  SecondTok.stringifiedInMacro();
415 }
416 
417 /// Lex - Lex and return a token from this macro stream.
418 ///
419 bool TokenLexer::Lex(Token &Tok) {
420  // Lexing off the end of the macro, pop this macro off the expansion stack.
421  if (isAtEnd()) {
422  // If this is a macro (not a token stream), mark the macro enabled now
423  // that it is no longer being expanded.
424  if (Macro) Macro->EnableMacro();
425 
426  Tok.startToken();
427  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
428  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
429  if (CurToken == 0)
431  return PP.HandleEndOfTokenLexer(Tok);
432  }
433 
435 
436  // If this is the first token of the expanded result, we inherit spacing
437  // properties later.
438  bool isFirstToken = CurToken == 0;
439 
440  // Get the next token to return.
441  Tok = Tokens[CurToken++];
442 
443  bool TokenIsFromPaste = false;
444 
445  // If this token is followed by a token paste (##) operator, paste the tokens!
446  // Note that ## is a normal token when not expanding a macro.
447  if (!isAtEnd() && Macro &&
448  (Tokens[CurToken].is(tok::hashhash) ||
449  // Special processing of L#x macros in -fms-compatibility mode.
450  // Microsoft compiler is able to form a wide string literal from
451  // 'L#macro_arg' construct in a function-like macro.
452  (PP.getLangOpts().MSVCCompat &&
453  isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) {
454  // When handling the microsoft /##/ extension, the final token is
455  // returned by PasteTokens, not the pasted token.
456  if (PasteTokens(Tok))
457  return true;
458 
459  TokenIsFromPaste = true;
460  }
461 
462  // The token's current location indicate where the token was lexed from. We
463  // need this information to compute the spelling of the token, but any
464  // diagnostics for the expanded token should appear as if they came from
465  // ExpansionLoc. Pull this information together into a new SourceLocation
466  // that captures all of this.
467  if (ExpandLocStart.isValid() && // Don't do this for token streams.
468  // Check that the token's location was not already set properly.
469  SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
470  SourceLocation instLoc;
471  if (Tok.is(tok::comment)) {
472  instLoc = SM.createExpansionLoc(Tok.getLocation(),
473  ExpandLocStart,
474  ExpandLocEnd,
475  Tok.getLength());
476  } else {
477  instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
478  }
479 
480  Tok.setLocation(instLoc);
481  }
482 
483  // If this is the first token, set the lexical properties of the token to
484  // match the lexical properties of the macro identifier.
485  if (isFirstToken) {
486  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
487  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
488  } else {
489  // If this is not the first token, we may still need to pass through
490  // leading whitespace if we've expanded a macro.
491  if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
492  if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
493  }
494  AtStartOfLine = false;
495  HasLeadingSpace = false;
496 
497  // Handle recursive expansion!
498  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
499  // Change the kind of this identifier to the appropriate token kind, e.g.
500  // turning "for" into a keyword.
501  IdentifierInfo *II = Tok.getIdentifierInfo();
502  Tok.setKind(II->getTokenID());
503 
504  // If this identifier was poisoned and from a paste, emit an error. This
505  // won't be handled by Preprocessor::HandleIdentifier because this is coming
506  // from a macro expansion.
507  if (II->isPoisoned() && TokenIsFromPaste) {
508  PP.HandlePoisonedIdentifier(Tok);
509  }
510 
511  if (!DisableMacroExpansion && II->isHandleIdentifierCase())
512  return PP.HandleIdentifier(Tok);
513  }
514 
515  // Otherwise, return a normal token.
516  return true;
517 }
518 
519 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
520 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
521 /// are more ## after it, chomp them iteratively. Return the result as Tok.
522 /// If this returns true, the caller should immediately return the token.
523 bool TokenLexer::PasteTokens(Token &Tok) {
524  // MSVC: If previous token was pasted, this must be a recovery from an invalid
525  // paste operation. Ignore spaces before this token to mimic MSVC output.
526  // Required for generating valid UUID strings in some MS headers.
527  if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) &&
528  Tokens[CurToken - 2].is(tok::hashhash))
530 
532  const char *ResultTokStrPtr = nullptr;
533  SourceLocation StartLoc = Tok.getLocation();
534  SourceLocation PasteOpLoc;
535  do {
536  // Consume the ## operator if any.
537  PasteOpLoc = Tokens[CurToken].getLocation();
538  if (Tokens[CurToken].is(tok::hashhash))
539  ++CurToken;
540  assert(!isAtEnd() && "No token on the RHS of a paste operator!");
541 
542  // Get the RHS token.
543  const Token &RHS = Tokens[CurToken];
544 
545  // Allocate space for the result token. This is guaranteed to be enough for
546  // the two tokens.
547  Buffer.resize(Tok.getLength() + RHS.getLength());
548 
549  // Get the spelling of the LHS token in Buffer.
550  const char *BufPtr = &Buffer[0];
551  bool Invalid = false;
552  unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid);
553  if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
554  memcpy(&Buffer[0], BufPtr, LHSLen);
555  if (Invalid)
556  return true;
557 
558  BufPtr = Buffer.data() + LHSLen;
559  unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
560  if (Invalid)
561  return true;
562  if (RHSLen && BufPtr != &Buffer[LHSLen])
563  // Really, we want the chars in Buffer!
564  memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
565 
566  // Trim excess space.
567  Buffer.resize(LHSLen+RHSLen);
568 
569  // Plop the pasted result (including the trailing newline and null) into a
570  // scratch buffer where we can lex it.
571  Token ResultTokTmp;
572  ResultTokTmp.startToken();
573 
574  // Claim that the tmp token is a string_literal so that we can get the
575  // character pointer back from CreateString in getLiteralData().
576  ResultTokTmp.setKind(tok::string_literal);
577  PP.CreateString(Buffer, ResultTokTmp);
578  SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
579  ResultTokStrPtr = ResultTokTmp.getLiteralData();
580 
581  // Lex the resultant pasted token into Result.
582  Token Result;
583 
584  if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
585  // Common paste case: identifier+identifier = identifier. Avoid creating
586  // a lexer and other overhead.
587  PP.IncrementPasteCounter(true);
588  Result.startToken();
589  Result.setKind(tok::raw_identifier);
590  Result.setRawIdentifierData(ResultTokStrPtr);
591  Result.setLocation(ResultTokLoc);
592  Result.setLength(LHSLen+RHSLen);
593  } else {
594  PP.IncrementPasteCounter(false);
595 
596  assert(ResultTokLoc.isFileID() &&
597  "Should be a raw location into scratch buffer");
598  SourceManager &SourceMgr = PP.getSourceManager();
599  FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
600 
601  bool Invalid = false;
602  const char *ScratchBufStart
603  = SourceMgr.getBufferData(LocFileID, &Invalid).data();
604  if (Invalid)
605  return false;
606 
607  // Make a lexer to lex this string from. Lex just this one token.
608  // Make a lexer object so that we lex and expand the paste result.
609  Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
610  PP.getLangOpts(), ScratchBufStart,
611  ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
612 
613  // Lex a token in raw mode. This way it won't look up identifiers
614  // automatically, lexing off the end will return an eof token, and
615  // warnings are disabled. This returns true if the result token is the
616  // entire buffer.
617  bool isInvalid = !TL.LexFromRawLexer(Result);
618 
619  // If we got an EOF token, we didn't form even ONE token. For example, we
620  // did "/ ## /" to get "//".
621  isInvalid |= Result.is(tok::eof);
622 
623  // If pasting the two tokens didn't form a full new token, this is an
624  // error. This occurs with "x ## +" and other stuff. Return with Tok
625  // unmodified and with RHS as the next token to lex.
626  if (isInvalid) {
627  // Explicitly convert the token location to have proper expansion
628  // information so that the user knows where it came from.
630  SourceLocation Loc =
631  SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
632 
633  // Test for the Microsoft extension of /##/ turning into // here on the
634  // error path.
635  if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) &&
636  RHS.is(tok::slash)) {
637  HandleMicrosoftCommentPaste(Tok, Loc);
638  return true;
639  }
640 
641  // Do not emit the error when preprocessing assembler code.
642  if (!PP.getLangOpts().AsmPreprocessor) {
643  // If we're in microsoft extensions mode, downgrade this from a hard
644  // error to an extension that defaults to an error. This allows
645  // disabling it.
646  PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
647  : diag::err_pp_bad_paste)
648  << Buffer;
649  }
650 
651  // An error has occurred so exit loop.
652  break;
653  }
654 
655  // Turn ## into 'unknown' to avoid # ## # from looking like a paste
656  // operator.
657  if (Result.is(tok::hashhash))
658  Result.setKind(tok::unknown);
659  }
660 
661  // Transfer properties of the LHS over the Result.
664 
665  // Finally, replace LHS with the result, consume the RHS, and iterate.
666  ++CurToken;
667  Tok = Result;
668  } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
669 
670  SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();
671 
672  // The token's current location indicate where the token was lexed from. We
673  // need this information to compute the spelling of the token, but any
674  // diagnostics for the expanded token should appear as if the token was
675  // expanded from the full ## expression. Pull this information together into
676  // a new SourceLocation that captures all of this.
677  SourceManager &SM = PP.getSourceManager();
678  if (StartLoc.isFileID())
679  StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
680  if (EndLoc.isFileID())
681  EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
682  FileID MacroFID = SM.getFileID(MacroExpansionStart);
683  while (SM.getFileID(StartLoc) != MacroFID)
684  StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
685  while (SM.getFileID(EndLoc) != MacroFID)
686  EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
687 
688  Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
689  Tok.getLength()));
690 
691  // Now that we got the result token, it will be subject to expansion. Since
692  // token pasting re-lexes the result token in raw mode, identifier information
693  // isn't looked up. As such, if the result is an identifier, look up id info.
694  if (Tok.is(tok::raw_identifier)) {
695  // Look up the identifier info for the token. We disabled identifier lookup
696  // by saying we're skipping contents, so we need to do this manually.
697  PP.LookUpIdentifierInfo(Tok);
698  }
699  return false;
700 }
701 
702 /// isNextTokenLParen - If the next token lexed will pop this macro off the
703 /// expansion stack, return 2. If the next unexpanded token is a '(', return
704 /// 1, otherwise return 0.
706  // Out of tokens?
707  if (isAtEnd())
708  return 2;
709  return Tokens[CurToken].is(tok::l_paren);
710 }
711 
712 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
713 /// preprocessor directive.
715  return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
716 }
717 
718 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
719 /// together to form a comment that comments out everything in the current
720 /// macro, other active macros, and anything left on the current physical
721 /// source line of the expanded buffer. Handle this by returning the
722 /// first token on the next line.
723 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
724  PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
725 
726  // We 'comment out' the rest of this macro by just ignoring the rest of the
727  // tokens that have not been lexed yet, if any.
728 
729  // Since this must be a macro, mark the macro enabled now that it is no longer
730  // being expanded.
731  assert(Macro && "Token streams can't paste comments");
732  Macro->EnableMacro();
733 
735 }
736 
737 /// \brief If \arg loc is a file ID and points inside the current macro
738 /// definition, returns the appropriate source location pointing at the
739 /// macro expansion source location entry, otherwise it returns an invalid
740 /// SourceLocation.
742 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
743  assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
744  "Not appropriate for token streams");
745  assert(loc.isValid() && loc.isFileID());
746 
747  SourceManager &SM = PP.getSourceManager();
748  assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
749  "Expected loc to come from the macro definition");
750 
751  unsigned relativeOffset = 0;
752  SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
753  return MacroExpansionStart.getLocWithOffset(relativeOffset);
754 }
755 
756 /// \brief Finds the tokens that are consecutive (from the same FileID)
757 /// creates a single SLocEntry, and assigns SourceLocations to each token that
758 /// point to that SLocEntry. e.g for
759 /// assert(foo == bar);
760 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
761 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
762 ///
763 /// \arg begin_tokens will be updated to a position past all the found
764 /// consecutive tokens.
766  SourceLocation InstLoc,
767  Token *&begin_tokens,
768  Token * end_tokens) {
769  assert(begin_tokens < end_tokens);
770 
771  SourceLocation FirstLoc = begin_tokens->getLocation();
772  SourceLocation CurLoc = FirstLoc;
773 
774  // Compare the source location offset of tokens and group together tokens that
775  // are close, even if their locations point to different FileIDs. e.g.
776  //
777  // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
778  // ^ ^
779  // |bar foo cake| (one SLocEntry chunk for all tokens)
780  //
781  // we can perform this "merge" since the token's spelling location depends
782  // on the relative offset.
783 
784  Token *NextTok = begin_tokens + 1;
785  for (; NextTok < end_tokens; ++NextTok) {
786  SourceLocation NextLoc = NextTok->getLocation();
787  if (CurLoc.isFileID() != NextLoc.isFileID())
788  break; // Token from different kind of FileID.
789 
790  int RelOffs;
791  if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
792  break; // Token from different local/loaded location.
793  // Check that token is not before the previous token or more than 50
794  // "characters" away.
795  if (RelOffs < 0 || RelOffs > 50)
796  break;
797 
798  if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
799  break; // Token from a different macro.
800 
801  CurLoc = NextLoc;
802  }
803 
804  // For the consecutive tokens, find the length of the SLocEntry to contain
805  // all of them.
806  Token &LastConsecutiveTok = *(NextTok-1);
807  int LastRelOffs = 0;
808  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
809  &LastRelOffs);
810  unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
811 
812  // Create a macro expansion SLocEntry that will "contain" all of the tokens.
813  SourceLocation Expansion =
814  SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
815 
816  // Change the location of the tokens from the spelling location to the new
817  // expanded location.
818  for (; begin_tokens < NextTok; ++begin_tokens) {
819  Token &Tok = *begin_tokens;
820  int RelOffs = 0;
821  SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
822  Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
823  }
824 }
825 
826 /// \brief Creates SLocEntries and updates the locations of macro argument
827 /// tokens to their new expanded locations.
828 ///
829 /// \param ArgIdDefLoc the location of the macro argument id inside the macro
830 /// definition.
831 /// \param Tokens the macro argument tokens to update.
832 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
833  Token *begin_tokens,
834  Token *end_tokens) {
835  SourceManager &SM = PP.getSourceManager();
836 
837  SourceLocation InstLoc =
838  getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
839 
840  while (begin_tokens < end_tokens) {
841  // If there's only one token just create a SLocEntry for it.
842  if (end_tokens - begin_tokens == 1) {
843  Token &Tok = *begin_tokens;
845  InstLoc,
846  Tok.getLength()));
847  return;
848  }
849 
850  updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
851  }
852 }
853 
854 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
855  AtStartOfLine = Result.isAtStartOfLine();
856  HasLeadingSpace = Result.hasLeadingSpace();
857 }
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:265
SourceManager & getSourceManager() const
Definition: Preprocessor.h:694
bool isPoisoned() const
Return true if this token has been poisoned.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
Definition: Lexer.h:46
void DisableMacro()
Definition: MacroInfo.h:262
void Init(Token &Tok, SourceLocation ILEnd, MacroInfo *MI, MacroArgs *ActualArgs)
Init - Initialize this TokenLexer to expand from the specified macro with the specified argument info...
Definition: TokenLexer.cpp:26
bool isMacroID() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:256
unsigned isNextTokenLParen() const
isNextTokenLParen - If the next token lexed will pop this macro off the expansion stack...
Definition: TokenLexer.cpp:705
Defines the SourceManager interface.
unsigned getNextLocalOffset() const
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation InstLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry, and assigns SourceLocations to each token that point to that SLocEntry.
Definition: TokenLexer.cpp:765
bool isParsingPreprocessorDirective() const
isParsingPreprocessorDirective - Return true if we are in the middle of a preprocessor directive...
Definition: TokenLexer.cpp:714
Defines the clang::MacroInfo and clang::MacroDirective classes.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:269
std::unique_ptr< llvm::MemoryBuffer > Buffer
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:233
bool isVarargsElidedUse() const
isVarargsElidedUse - Return true if this is a C99 style varargs macro invocation and there was no arg...
Definition: MacroArgs.h:107
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:106
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
One of these records is kept for each identifier that is lexed.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:297
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:206
bool isFileID() const
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:690
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
void setKind(tok::TokenKind K)
Definition: Token.h:90
bool Lex(Token &Tok)
Lex - Lex and return a token from this macro stream.
Definition: TokenLexer.cpp:419
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
const std::vector< Token > & getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:147
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:73
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:113
tokens_iterator tokens_begin() const
Definition: MacroInfo.h:239
bool isVariadic() const
Definition: MacroInfo.h:204
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
const Token & getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
getStringifiedArgument - Compute, cache, and return the specified argument that has been 'stringified...
Definition: MacroArgs.cpp:297
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
Definition: TokenLexer.cpp:410
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:194
bool isWrittenInSameFile(SourceLocation Loc1, SourceLocation Loc2) const
Returns true if the spelling locations for both SourceLocations are part of the same file buffer...
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:29
unsigned getNumArgs() const
Definition: MacroInfo.h:179
SourceLocation createMacroArgExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLoc, unsigned TokLength)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
Defines the clang::Preprocessor interface.
int getArgumentNum(const IdentifierInfo *Arg) const
Return the argument number of the specified identifier, or -1 if the identifier is not a formal argum...
Definition: MacroInfo.h:186
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Definition: Token.h:123
unsigned getDefinitionLength(SourceManager &SM) const
Get length in characters of the macro definition.
Definition: MacroInfo.h:129
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
The result type of a method or function.
bool isBeforeInSLocAddrSpace(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the "source location address space".
const SourceManager & SM
Definition: Format.cpp:1184
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:214
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
Encodes a location in the source.
void setLength(unsigned Len)
Definition: Token.h:132
bool isValid() const
Return true if this is a valid SourceLocation object.
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion, return false.
Definition: MacroArgs.cpp:131
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:94
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static bool isInvalid(LocType Loc, bool *Invalid)
bool isInSLocAddrSpace(SourceLocation Loc, SourceLocation Start, unsigned Length, unsigned *RelativeOffset=nullptr) const
Returns true if Loc is inside the [Start, +Length) chunk of the source location address space...
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens...
Definition: MacroArgs.cpp:103
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
bool isFunctionLike() const
Definition: MacroInfo.h:196
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:34
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:96
bool isInSameSLocAddrSpace(SourceLocation LHS, SourceLocation RHS, int *RelativeOffset) const
Return true if both LHS and RHS are in the local source location address space or the loaded one...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
tokens_iterator tokens_end() const
Definition: MacroInfo.h:240
void HandlePoisonedIdentifier(Token &Tok)
Display reason for poisoned identifier.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
unsigned getLength() const
Definition: Token.h:126
void setLocation(SourceLocation L)
Definition: Token.h:131
void EnableMacro()
Definition: MacroInfo.h:257
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:243
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:117
This class handles loading and caching of source files into memory.
void startToken()
Reset all flags to cleared.
Definition: Token.h:168
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:97
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176