clang  3.9.0
UnwrappedLineParser.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
27 public:
28  virtual ~FormatTokenSource() {}
29  virtual FormatToken *getNextToken() = 0;
30 
31  virtual unsigned getPosition() = 0;
32  virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40  bool MustBeDeclaration)
41  : Line(Line), Stack(Stack) {
42  Line.MustBeDeclaration = MustBeDeclaration;
43  Stack.push_back(MustBeDeclaration);
44  }
45  ~ScopedDeclarationState() {
46  Stack.pop_back();
47  if (!Stack.empty())
48  Line.MustBeDeclaration = Stack.back();
49  else
50  Line.MustBeDeclaration = true;
51  }
52 
53 private:
54  UnwrappedLine &Line;
55  std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61  FormatToken *&ResetToken)
62  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64  Token(nullptr) {
65  TokenSource = this;
66  Line.Level = 0;
67  Line.InPPDirective = true;
68  }
69 
70  ~ScopedMacroState() override {
71  TokenSource = PreviousTokenSource;
72  ResetToken = Token;
73  Line.InPPDirective = false;
74  Line.Level = PreviousLineLevel;
75  }
76 
77  FormatToken *getNextToken() override {
78  // The \c UnwrappedLineParser guards against this by never calling
79  // \c getNextToken() after it has encountered the first eof token.
80  assert(!eof());
81  Token = PreviousTokenSource->getNextToken();
82  if (eof())
83  return getFakeEOF();
84  return Token;
85  }
86 
87  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89  FormatToken *setPosition(unsigned Position) override {
90  Token = PreviousTokenSource->setPosition(Position);
91  return Token;
92  }
93 
94 private:
95  bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97  FormatToken *getFakeEOF() {
98  static bool EOFInitialized = false;
99  static FormatToken FormatTok;
100  if (!EOFInitialized) {
101  FormatTok.Tok.startToken();
102  FormatTok.Tok.setKind(tok::eof);
103  EOFInitialized = true;
104  }
105  return &FormatTok;
106  }
107 
108  UnwrappedLine &Line;
109  FormatTokenSource *&TokenSource;
110  FormatToken *&ResetToken;
112  FormatTokenSource *PreviousTokenSource;
113 
114  FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
120 public:
122  bool SwitchToPreprocessorLines = false)
123  : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124  if (SwitchToPreprocessorLines)
125  Parser.CurrentLines = &Parser.PreprocessorDirectives;
126  else if (!Parser.Line->Tokens.empty())
127  Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128  PreBlockLine = std::move(Parser.Line);
129  Parser.Line = llvm::make_unique<UnwrappedLine>();
130  Parser.Line->Level = PreBlockLine->Level;
131  Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132  }
133 
135  if (!Parser.Line->Tokens.empty()) {
136  Parser.addUnwrappedLine();
137  }
138  assert(Parser.Line->Tokens.empty());
139  Parser.Line = std::move(PreBlockLine);
140  if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141  Parser.MustBreakBeforeNextToken = true;
142  Parser.CurrentLines = OriginalLines;
143  }
144 
145 private:
147 
148  std::unique_ptr<UnwrappedLine> PreBlockLine;
149  SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
153 public:
155  const FormatStyle &Style, unsigned &LineLevel)
156  : LineLevel(LineLevel), OldLineLevel(LineLevel) {
158  Parser->addUnwrappedLine();
159  if (Style.BraceWrapping.IndentBraces)
160  ++LineLevel;
161  }
162  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165  unsigned &LineLevel;
166  unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174  : Tokens(Tokens), Position(-1) {}
175 
176  FormatToken *getNextToken() override {
177  ++Position;
178  return Tokens[Position];
179  }
180 
181  unsigned getPosition() override {
182  assert(Position >= 0);
183  return Position;
184  }
185 
186  FormatToken *setPosition(unsigned P) override {
187  Position = P;
188  return Tokens[Position];
189  }
190 
191  void reset() { Position = -1; }
192 
193 private:
194  ArrayRef<FormatToken *> Tokens;
195  int Position;
196 };
197 
198 } // end anonymous namespace
199 
204  : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205  CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
206  Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
207 
208 void UnwrappedLineParser::reset() {
209  PPBranchLevel = -1;
210  Line.reset(new UnwrappedLine);
211  CommentsBeforeNextToken.clear();
212  FormatTok = nullptr;
213  MustBreakBeforeNextToken = false;
214  PreprocessorDirectives.clear();
215  CurrentLines = &Lines;
216  DeclarationScopeStack.clear();
217  PPStack.clear();
218 }
219 
221  IndexedTokenSource TokenSource(AllTokens);
222  do {
223  DEBUG(llvm::dbgs() << "----\n");
224  reset();
225  Tokens = &TokenSource;
226  TokenSource.reset();
227 
228  readToken();
229  parseFile();
230  // Create line with eof token.
231  pushToken(FormatTok);
232  addUnwrappedLine();
233 
234  for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
235  E = Lines.end();
236  I != E; ++I) {
237  Callback.consumeUnwrappedLine(*I);
238  }
239  Callback.finishRun();
240  Lines.clear();
241  while (!PPLevelBranchIndex.empty() &&
242  PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
243  PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
244  PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
245  }
246  if (!PPLevelBranchIndex.empty()) {
247  ++PPLevelBranchIndex.back();
248  assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
249  assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
250  }
251  } while (!PPLevelBranchIndex.empty());
252 }
253 
254 void UnwrappedLineParser::parseFile() {
255  // The top-level context in a file always has declarations, except for pre-
256  // processor directives and JavaScript files.
257  bool MustBeDeclaration =
258  !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
259  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
260  MustBeDeclaration);
261  parseLevel(/*HasOpeningBrace=*/false);
262  // Make sure to format the remaining tokens.
263  flushComments(true);
264  addUnwrappedLine();
265 }
266 
267 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
268  bool SwitchLabelEncountered = false;
269  do {
270  tok::TokenKind kind = FormatTok->Tok.getKind();
271  if (FormatTok->Type == TT_MacroBlockBegin) {
272  kind = tok::l_brace;
273  } else if (FormatTok->Type == TT_MacroBlockEnd) {
274  kind = tok::r_brace;
275  }
276 
277  switch (kind) {
278  case tok::comment:
279  nextToken();
280  addUnwrappedLine();
281  break;
282  case tok::l_brace:
283  // FIXME: Add parameter whether this can happen - if this happens, we must
284  // be in a non-declaration context.
285  if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
286  continue;
287  parseBlock(/*MustBeDeclaration=*/false);
288  addUnwrappedLine();
289  break;
290  case tok::r_brace:
291  if (HasOpeningBrace)
292  return;
293  nextToken();
294  addUnwrappedLine();
295  break;
296  case tok::kw_default:
297  case tok::kw_case:
298  if (!SwitchLabelEncountered &&
299  (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300  ++Line->Level;
301  SwitchLabelEncountered = true;
302  parseStructuralElement();
303  break;
304  default:
305  parseStructuralElement();
306  break;
307  }
308  } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312  // We'll parse forward through the tokens until we hit
313  // a closing brace or eof - note that getNextToken() will
314  // parse macros, so this will magically work inside macro
315  // definitions, too.
316  unsigned StoredPosition = Tokens->getPosition();
317  FormatToken *Tok = FormatTok;
318  const FormatToken *PrevTok = getPreviousToken();
319  // Keep a stack of positions of lbrace tokens. We will
320  // update information about whether an lbrace starts a
321  // braced init list or a different block during the loop.
322  SmallVector<FormatToken *, 8> LBraceStack;
323  assert(Tok->Tok.is(tok::l_brace));
324  do {
325  // Get next non-comment token.
326  FormatToken *NextTok;
327  unsigned ReadTokens = 0;
328  do {
329  NextTok = Tokens->getNextToken();
330  ++ReadTokens;
331  } while (NextTok->is(tok::comment));
332 
333  switch (Tok->Tok.getKind()) {
334  case tok::l_brace:
335  if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
336  PrevTok->is(tok::colon))
337  // In TypeScript's TypeMemberLists, there can be semicolons between the
338  // individual members.
339  Tok->BlockKind = BK_BracedInit;
340  else
341  Tok->BlockKind = BK_Unknown;
342  LBraceStack.push_back(Tok);
343  break;
344  case tok::r_brace:
345  if (LBraceStack.empty())
346  break;
347  if (LBraceStack.back()->BlockKind == BK_Unknown) {
348  bool ProbablyBracedList = false;
349  if (Style.Language == FormatStyle::LK_Proto) {
350  ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
351  } else {
352  // Using OriginalColumn to distinguish between ObjC methods and
353  // binary operators is a bit hacky.
354  bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
355  NextTok->OriginalColumn == 0;
356 
357  // If there is a comma, semicolon or right paren after the closing
358  // brace, we assume this is a braced initializer list. Note that
359  // regardless how we mark inner braces here, we will overwrite the
360  // BlockKind later if we parse a braced list (where all blocks
361  // inside are by default braced lists), or when we explicitly detect
362  // blocks (for example while parsing lambdas).
363  //
364  // We exclude + and - as they can be ObjC visibility modifiers.
365  ProbablyBracedList =
367  NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in)) ||
368  NextTok->isOneOf(tok::comma, tok::period, tok::colon,
369  tok::r_paren, tok::r_square, tok::l_brace,
370  tok::l_square, tok::l_paren, tok::ellipsis) ||
371  (NextTok->is(tok::semi) &&
372  (!ExpectClassBody || LBraceStack.size() != 1)) ||
373  (NextTok->isBinaryOperator() && !NextIsObjCMethod);
374  }
375  if (ProbablyBracedList) {
376  Tok->BlockKind = BK_BracedInit;
377  LBraceStack.back()->BlockKind = BK_BracedInit;
378  } else {
379  Tok->BlockKind = BK_Block;
380  LBraceStack.back()->BlockKind = BK_Block;
381  }
382  }
383  LBraceStack.pop_back();
384  break;
385  case tok::at:
386  case tok::semi:
387  case tok::kw_if:
388  case tok::kw_while:
389  case tok::kw_for:
390  case tok::kw_switch:
391  case tok::kw_try:
392  case tok::kw___try:
393  if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
394  LBraceStack.back()->BlockKind = BK_Block;
395  break;
396  default:
397  break;
398  }
399  PrevTok = Tok;
400  Tok = NextTok;
401  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
402 
403  // Assume other blocks for all unclosed opening braces.
404  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
405  if (LBraceStack[i]->BlockKind == BK_Unknown)
406  LBraceStack[i]->BlockKind = BK_Block;
407  }
408 
409  FormatTok = Tokens->setPosition(StoredPosition);
410 }
411 
412 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
413  bool MunchSemi) {
414  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
415  "'{' or macro block token expected");
416  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
417  FormatTok->BlockKind = BK_Block;
418 
419  unsigned InitialLevel = Line->Level;
420  nextToken();
421 
422  if (MacroBlock && FormatTok->is(tok::l_paren))
423  parseParens();
424 
425  addUnwrappedLine();
426 
427  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
428  MustBeDeclaration);
429  if (AddLevel)
430  ++Line->Level;
431  parseLevel(/*HasOpeningBrace=*/true);
432 
433  if (eof())
434  return;
435 
436  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
437  : !FormatTok->is(tok::r_brace)) {
438  Line->Level = InitialLevel;
439  FormatTok->BlockKind = BK_Block;
440  return;
441  }
442 
443  nextToken(); // Munch the closing brace.
444 
445  if (MacroBlock && FormatTok->is(tok::l_paren))
446  parseParens();
447 
448  if (MunchSemi && FormatTok->Tok.is(tok::semi))
449  nextToken();
450  Line->Level = InitialLevel;
451 }
452 
453 static bool isGoogScope(const UnwrappedLine &Line) {
454  // FIXME: Closure-library specific stuff should not be hard-coded but be
455  // configurable.
456  if (Line.Tokens.size() < 4)
457  return false;
458  auto I = Line.Tokens.begin();
459  if (I->Tok->TokenText != "goog")
460  return false;
461  ++I;
462  if (I->Tok->isNot(tok::period))
463  return false;
464  ++I;
465  if (I->Tok->TokenText != "scope")
466  return false;
467  ++I;
468  return I->Tok->is(tok::l_paren);
469 }
470 
472  const FormatToken &InitialToken) {
473  if (InitialToken.is(tok::kw_namespace))
474  return Style.BraceWrapping.AfterNamespace;
475  if (InitialToken.is(tok::kw_class))
476  return Style.BraceWrapping.AfterClass;
477  if (InitialToken.is(tok::kw_union))
478  return Style.BraceWrapping.AfterUnion;
479  if (InitialToken.is(tok::kw_struct))
480  return Style.BraceWrapping.AfterStruct;
481  return false;
482 }
483 
484 void UnwrappedLineParser::parseChildBlock() {
485  FormatTok->BlockKind = BK_Block;
486  nextToken();
487  {
488  bool GoogScope =
490  ScopedLineState LineState(*this);
491  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
492  /*MustBeDeclaration=*/false);
493  Line->Level += GoogScope ? 0 : 1;
494  parseLevel(/*HasOpeningBrace=*/true);
495  flushComments(isOnNewLine(*FormatTok));
496  Line->Level -= GoogScope ? 0 : 1;
497  }
498  nextToken();
499 }
500 
501 void UnwrappedLineParser::parsePPDirective() {
502  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
503  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
504  nextToken();
505 
506  if (!FormatTok->Tok.getIdentifierInfo()) {
507  parsePPUnknown();
508  return;
509  }
510 
511  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
512  case tok::pp_define:
513  parsePPDefine();
514  return;
515  case tok::pp_if:
516  parsePPIf(/*IfDef=*/false);
517  break;
518  case tok::pp_ifdef:
519  case tok::pp_ifndef:
520  parsePPIf(/*IfDef=*/true);
521  break;
522  case tok::pp_else:
523  parsePPElse();
524  break;
525  case tok::pp_elif:
526  parsePPElIf();
527  break;
528  case tok::pp_endif:
529  parsePPEndIf();
530  break;
531  default:
532  parsePPUnknown();
533  break;
534  }
535 }
536 
537 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
538  if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
539  PPStack.push_back(PP_Unreachable);
540  else
541  PPStack.push_back(PP_Conditional);
542 }
543 
544 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
545  ++PPBranchLevel;
546  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
547  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
548  PPLevelBranchIndex.push_back(0);
549  PPLevelBranchCount.push_back(0);
550  }
551  PPChainBranchIndex.push(0);
552  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
553  conditionalCompilationCondition(Unreachable || Skip);
554 }
555 
556 void UnwrappedLineParser::conditionalCompilationAlternative() {
557  if (!PPStack.empty())
558  PPStack.pop_back();
559  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
560  if (!PPChainBranchIndex.empty())
561  ++PPChainBranchIndex.top();
562  conditionalCompilationCondition(
563  PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
564  PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
565 }
566 
567 void UnwrappedLineParser::conditionalCompilationEnd() {
568  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
569  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
570  if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
571  PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
572  }
573  }
574  // Guard against #endif's without #if.
575  if (PPBranchLevel > 0)
576  --PPBranchLevel;
577  if (!PPChainBranchIndex.empty())
578  PPChainBranchIndex.pop();
579  if (!PPStack.empty())
580  PPStack.pop_back();
581 }
582 
583 void UnwrappedLineParser::parsePPIf(bool IfDef) {
584  nextToken();
585  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
586  FormatTok->Tok.getLiteralData() != nullptr &&
587  StringRef(FormatTok->Tok.getLiteralData(),
588  FormatTok->Tok.getLength()) == "0") ||
589  FormatTok->Tok.is(tok::kw_false);
590  conditionalCompilationStart(!IfDef && IsLiteralFalse);
591  parsePPUnknown();
592 }
593 
594 void UnwrappedLineParser::parsePPElse() {
595  conditionalCompilationAlternative();
596  parsePPUnknown();
597 }
598 
599 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
600 
601 void UnwrappedLineParser::parsePPEndIf() {
602  conditionalCompilationEnd();
603  parsePPUnknown();
604 }
605 
606 void UnwrappedLineParser::parsePPDefine() {
607  nextToken();
608 
609  if (FormatTok->Tok.getKind() != tok::identifier) {
610  parsePPUnknown();
611  return;
612  }
613  nextToken();
614  if (FormatTok->Tok.getKind() == tok::l_paren &&
615  FormatTok->WhitespaceRange.getBegin() ==
616  FormatTok->WhitespaceRange.getEnd()) {
617  parseParens();
618  }
619  addUnwrappedLine();
620  Line->Level = 1;
621 
622  // Errors during a preprocessor directive can only affect the layout of the
623  // preprocessor directive, and thus we ignore them. An alternative approach
624  // would be to use the same approach we use on the file level (no
625  // re-indentation if there was a structural error) within the macro
626  // definition.
627  parseFile();
628 }
629 
630 void UnwrappedLineParser::parsePPUnknown() {
631  do {
632  nextToken();
633  } while (!eof());
634  addUnwrappedLine();
635 }
636 
637 // Here we blacklist certain tokens that are not usually the first token in an
638 // unwrapped line. This is used in attempt to distinguish macro calls without
639 // trailing semicolons from other constructs split to several lines.
640 static bool tokenCanStartNewLine(const clang::Token &Tok) {
641  // Semicolon can be a null-statement, l_square can be a start of a macro or
642  // a C++11 attribute, but this doesn't seem to be common.
643  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
644  Tok.isNot(tok::l_square) &&
645  // Tokens that can only be used as binary operators and a part of
646  // overloaded operator names.
647  Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
648  Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
649  Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
650  Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
651  Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
652  Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
653  Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
654  Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
655  Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
656  Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
657  Tok.isNot(tok::lesslessequal) &&
658  // Colon is used in labels, base class lists, initializer lists,
659  // range-based for loops, ternary operator, but should never be the
660  // first token in an unwrapped line.
661  Tok.isNot(tok::colon) &&
662  // 'noexcept' is a trailing annotation.
663  Tok.isNot(tok::kw_noexcept);
664 }
665 
667  const FormatToken *FormatTok) {
668  // FIXME: This returns true for C/C++ keywords like 'struct'.
669  return FormatTok->is(tok::identifier) &&
670  (FormatTok->Tok.getIdentifierInfo() == nullptr ||
671  !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of, Keywords.kw_as,
672  Keywords.kw_async, Keywords.kw_await,
673  Keywords.kw_yield, Keywords.kw_finally,
674  Keywords.kw_function, Keywords.kw_import,
675  Keywords.kw_is, Keywords.kw_let, Keywords.kw_var,
676  Keywords.kw_abstract, Keywords.kw_extends,
677  Keywords.kw_implements, Keywords.kw_instanceof,
678  Keywords.kw_interface, Keywords.kw_throws));
679 }
680 
682  const FormatToken *FormatTok) {
683  return FormatTok->Tok.isLiteral() || mustBeJSIdent(Keywords, FormatTok);
684 }
685 
686 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
687 // when encountered after a value (see mustBeJSIdentOrValue).
689  const FormatToken *FormatTok) {
690  return FormatTok->isOneOf(
691  tok::kw_return, Keywords.kw_yield,
692  // conditionals
693  tok::kw_if, tok::kw_else,
694  // loops
695  tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
696  // switch/case
697  tok::kw_switch, tok::kw_case,
698  // exceptions
699  tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
700  // declaration
701  tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
702  Keywords.kw_async, Keywords.kw_function,
703  // import/export
704  Keywords.kw_import, tok::kw_export);
705 }
706 
707 // readTokenWithJavaScriptASI reads the next token and terminates the current
708 // line if JavaScript Automatic Semicolon Insertion must
709 // happen between the current token and the next token.
710 //
711 // This method is conservative - it cannot cover all edge cases of JavaScript,
712 // but only aims to correctly handle certain well known cases. It *must not*
713 // return true in speculative cases.
714 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
715  FormatToken *Previous = FormatTok;
716  readToken();
717  FormatToken *Next = FormatTok;
718 
719  bool IsOnSameLine =
720  CommentsBeforeNextToken.empty()
721  ? Next->NewlinesBefore == 0
722  : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
723  if (IsOnSameLine)
724  return;
725 
726  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
727  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
728  // If the token before the previous one is an '@', the previous token is an
729  // annotation and can precede another identifier/value.
730  const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
731  if (PrePrevious->is(tok::at))
732  return;
733  }
734  if (Next->is(tok::exclaim) && PreviousMustBeValue)
735  addUnwrappedLine();
736  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
737  if (NextMustBeValue && (PreviousMustBeValue ||
738  Previous->isOneOf(tok::r_square, tok::r_paren,
739  tok::plusplus, tok::minusminus)))
740  addUnwrappedLine();
741  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
742  addUnwrappedLine();
743 }
744 
745 void UnwrappedLineParser::parseStructuralElement() {
746  assert(!FormatTok->is(tok::l_brace));
747  if (Style.Language == FormatStyle::LK_TableGen &&
748  FormatTok->is(tok::pp_include)) {
749  nextToken();
750  if (FormatTok->is(tok::string_literal))
751  nextToken();
752  addUnwrappedLine();
753  return;
754  }
755  switch (FormatTok->Tok.getKind()) {
756  case tok::at:
757  nextToken();
758  if (FormatTok->Tok.is(tok::l_brace)) {
759  parseBracedList();
760  break;
761  }
762  switch (FormatTok->Tok.getObjCKeywordID()) {
763  case tok::objc_public:
764  case tok::objc_protected:
765  case tok::objc_package:
766  case tok::objc_private:
767  return parseAccessSpecifier();
768  case tok::objc_interface:
769  case tok::objc_implementation:
770  return parseObjCInterfaceOrImplementation();
771  case tok::objc_protocol:
772  return parseObjCProtocol();
773  case tok::objc_end:
774  return; // Handled by the caller.
775  case tok::objc_optional:
776  case tok::objc_required:
777  nextToken();
778  addUnwrappedLine();
779  return;
780  case tok::objc_autoreleasepool:
781  nextToken();
782  if (FormatTok->Tok.is(tok::l_brace)) {
784  addUnwrappedLine();
785  parseBlock(/*MustBeDeclaration=*/false);
786  }
787  addUnwrappedLine();
788  return;
789  case tok::objc_try:
790  // This branch isn't strictly necessary (the kw_try case below would
791  // do this too after the tok::at is parsed above). But be explicit.
792  parseTryCatch();
793  return;
794  default:
795  break;
796  }
797  break;
798  case tok::kw_asm:
799  nextToken();
800  if (FormatTok->is(tok::l_brace)) {
801  FormatTok->Type = TT_InlineASMBrace;
802  nextToken();
803  while (FormatTok && FormatTok->isNot(tok::eof)) {
804  if (FormatTok->is(tok::r_brace)) {
805  FormatTok->Type = TT_InlineASMBrace;
806  nextToken();
807  addUnwrappedLine();
808  break;
809  }
810  FormatTok->Finalized = true;
811  nextToken();
812  }
813  }
814  break;
815  case tok::kw_namespace:
816  parseNamespace();
817  return;
818  case tok::kw_inline:
819  nextToken();
820  if (FormatTok->Tok.is(tok::kw_namespace)) {
821  parseNamespace();
822  return;
823  }
824  break;
825  case tok::kw_public:
826  case tok::kw_protected:
827  case tok::kw_private:
828  if (Style.Language == FormatStyle::LK_Java ||
830  nextToken();
831  else
832  parseAccessSpecifier();
833  return;
834  case tok::kw_if:
835  parseIfThenElse();
836  return;
837  case tok::kw_for:
838  case tok::kw_while:
839  parseForOrWhileLoop();
840  return;
841  case tok::kw_do:
842  parseDoWhile();
843  return;
844  case tok::kw_switch:
845  parseSwitch();
846  return;
847  case tok::kw_default:
848  nextToken();
849  parseLabel();
850  return;
851  case tok::kw_case:
852  parseCaseLabel();
853  return;
854  case tok::kw_try:
855  case tok::kw___try:
856  parseTryCatch();
857  return;
858  case tok::kw_extern:
859  nextToken();
860  if (FormatTok->Tok.is(tok::string_literal)) {
861  nextToken();
862  if (FormatTok->Tok.is(tok::l_brace)) {
863  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
864  addUnwrappedLine();
865  return;
866  }
867  }
868  break;
869  case tok::kw_export:
870  if (Style.Language == FormatStyle::LK_JavaScript) {
871  parseJavaScriptEs6ImportExport();
872  return;
873  }
874  break;
875  case tok::identifier:
876  if (FormatTok->is(TT_ForEachMacro)) {
877  parseForOrWhileLoop();
878  return;
879  }
880  if (FormatTok->is(TT_MacroBlockBegin)) {
881  parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
882  /*MunchSemi=*/false);
883  return;
884  }
885  if (FormatTok->is(Keywords.kw_import)) {
886  if (Style.Language == FormatStyle::LK_JavaScript) {
887  parseJavaScriptEs6ImportExport();
888  return;
889  }
890  if (Style.Language == FormatStyle::LK_Proto) {
891  nextToken();
892  if (FormatTok->is(tok::kw_public))
893  nextToken();
894  if (!FormatTok->is(tok::string_literal))
895  return;
896  nextToken();
897  if (FormatTok->is(tok::semi))
898  nextToken();
899  addUnwrappedLine();
900  return;
901  }
902  }
903  if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
904  Keywords.kw_slots, Keywords.kw_qslots)) {
905  nextToken();
906  if (FormatTok->is(tok::colon)) {
907  nextToken();
908  addUnwrappedLine();
909  }
910  return;
911  }
912  // In all other cases, parse the declaration.
913  break;
914  default:
915  break;
916  }
917  do {
918  const FormatToken *Previous = getPreviousToken();
919  switch (FormatTok->Tok.getKind()) {
920  case tok::at:
921  nextToken();
922  if (FormatTok->Tok.is(tok::l_brace))
923  parseBracedList();
924  break;
925  case tok::kw_enum:
926  // Ignore if this is part of "template <enum ...".
927  if (Previous && Previous->is(tok::less)) {
928  nextToken();
929  break;
930  }
931 
932  // parseEnum falls through and does not yet add an unwrapped line as an
933  // enum definition can start a structural element.
934  if (!parseEnum())
935  break;
936  // This only applies for C++.
937  if (Style.Language != FormatStyle::LK_Cpp) {
938  addUnwrappedLine();
939  return;
940  }
941  break;
942  case tok::kw_typedef:
943  nextToken();
944  if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
945  Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
946  parseEnum();
947  break;
948  case tok::kw_struct:
949  case tok::kw_union:
950  case tok::kw_class:
951  // parseRecord falls through and does not yet add an unwrapped line as a
952  // record declaration or definition can start a structural element.
953  parseRecord();
954  // This does not apply for Java and JavaScript.
955  if (Style.Language == FormatStyle::LK_Java ||
957  if (FormatTok->is(tok::semi))
958  nextToken();
959  addUnwrappedLine();
960  return;
961  }
962  break;
963  case tok::period:
964  nextToken();
965  // In Java, classes have an implicit static member "class".
966  if (Style.Language == FormatStyle::LK_Java && FormatTok &&
967  FormatTok->is(tok::kw_class))
968  nextToken();
969  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
970  FormatTok->Tok.getIdentifierInfo())
971  // JavaScript only has pseudo keywords, all keywords are allowed to
972  // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
973  nextToken();
974  break;
975  case tok::semi:
976  nextToken();
977  addUnwrappedLine();
978  return;
979  case tok::r_brace:
980  addUnwrappedLine();
981  return;
982  case tok::l_paren:
983  parseParens();
984  break;
985  case tok::kw_operator:
986  nextToken();
987  if (FormatTok->isBinaryOperator())
988  nextToken();
989  break;
990  case tok::caret:
991  nextToken();
992  if (FormatTok->Tok.isAnyIdentifier() ||
993  FormatTok->isSimpleTypeSpecifier())
994  nextToken();
995  if (FormatTok->is(tok::l_paren))
996  parseParens();
997  if (FormatTok->is(tok::l_brace))
998  parseChildBlock();
999  break;
1000  case tok::l_brace:
1001  if (!tryToParseBracedList()) {
1002  // A block outside of parentheses must be the last part of a
1003  // structural element.
1004  // FIXME: Figure out cases where this is not true, and add projections
1005  // for them (the one we know is missing are lambdas).
1006  if (Style.BraceWrapping.AfterFunction)
1007  addUnwrappedLine();
1008  FormatTok->Type = TT_FunctionLBrace;
1009  parseBlock(/*MustBeDeclaration=*/false);
1010  addUnwrappedLine();
1011  return;
1012  }
1013  // Otherwise this was a braced init list, and the structural
1014  // element continues.
1015  break;
1016  case tok::kw_try:
1017  // We arrive here when parsing function-try blocks.
1018  parseTryCatch();
1019  return;
1020  case tok::identifier: {
1021  if (FormatTok->is(TT_MacroBlockEnd)) {
1022  addUnwrappedLine();
1023  return;
1024  }
1025 
1026  // Parse function literal unless 'function' is the first token in a line
1027  // in which case this should be treated as a free-standing function.
1028  if (Style.Language == FormatStyle::LK_JavaScript &&
1029  (FormatTok->is(Keywords.kw_function) ||
1030  FormatTok->startsSequence(Keywords.kw_async,
1031  Keywords.kw_function)) &&
1032  Line->Tokens.size() > 0) {
1033  tryToParseJSFunction();
1034  break;
1035  }
1036  if ((Style.Language == FormatStyle::LK_JavaScript ||
1037  Style.Language == FormatStyle::LK_Java) &&
1038  FormatTok->is(Keywords.kw_interface)) {
1039  if (Style.Language == FormatStyle::LK_JavaScript) {
1040  // In JavaScript/TypeScript, "interface" can be used as a standalone
1041  // identifier, e.g. in `var interface = 1;`. If "interface" is
1042  // followed by another identifier, it is very like to be an actual
1043  // interface declaration.
1044  unsigned StoredPosition = Tokens->getPosition();
1045  FormatToken *Next = Tokens->getNextToken();
1046  FormatTok = Tokens->setPosition(StoredPosition);
1047  if (Next && !mustBeJSIdent(Keywords, Next)) {
1048  nextToken();
1049  break;
1050  }
1051  }
1052  parseRecord();
1053  addUnwrappedLine();
1054  return;
1055  }
1056 
1057  // See if the following token should start a new unwrapped line.
1058  StringRef Text = FormatTok->TokenText;
1059  nextToken();
1060  if (Line->Tokens.size() == 1 &&
1061  // JS doesn't have macros, and within classes colons indicate fields,
1062  // not labels.
1064  if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1065  Line->Tokens.begin()->Tok->MustBreakBefore = true;
1066  parseLabel();
1067  return;
1068  }
1069  // Recognize function-like macro usages without trailing semicolon as
1070  // well as free-standing macros like Q_OBJECT.
1071  bool FunctionLike = FormatTok->is(tok::l_paren);
1072  if (FunctionLike)
1073  parseParens();
1074 
1075  bool FollowedByNewline =
1076  CommentsBeforeNextToken.empty()
1077  ? FormatTok->NewlinesBefore > 0
1078  : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1079 
1080  if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1081  tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1082  addUnwrappedLine();
1083  return;
1084  }
1085  }
1086  break;
1087  }
1088  case tok::equal:
1089  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1090  // TT_JsFatArrow. The always start an expression or a child block if
1091  // followed by a curly.
1092  if (FormatTok->is(TT_JsFatArrow)) {
1093  nextToken();
1094  if (FormatTok->is(tok::l_brace))
1095  parseChildBlock();
1096  break;
1097  }
1098 
1099  nextToken();
1100  if (FormatTok->Tok.is(tok::l_brace)) {
1101  parseBracedList();
1102  }
1103  break;
1104  case tok::l_square:
1105  parseSquare();
1106  break;
1107  case tok::kw_new:
1108  parseNew();
1109  break;
1110  default:
1111  nextToken();
1112  break;
1113  }
1114  } while (!eof());
1115 }
1116 
1117 bool UnwrappedLineParser::tryToParseLambda() {
1118  if (Style.Language != FormatStyle::LK_Cpp) {
1119  nextToken();
1120  return false;
1121  }
1122  const FormatToken* Previous = getPreviousToken();
1123  if (Previous &&
1124  (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1125  tok::kw_delete) ||
1126  Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1127  nextToken();
1128  return false;
1129  }
1130  assert(FormatTok->is(tok::l_square));
1131  FormatToken &LSquare = *FormatTok;
1132  if (!tryToParseLambdaIntroducer())
1133  return false;
1134 
1135  while (FormatTok->isNot(tok::l_brace)) {
1136  if (FormatTok->isSimpleTypeSpecifier()) {
1137  nextToken();
1138  continue;
1139  }
1140  switch (FormatTok->Tok.getKind()) {
1141  case tok::l_brace:
1142  break;
1143  case tok::l_paren:
1144  parseParens();
1145  break;
1146  case tok::amp:
1147  case tok::star:
1148  case tok::kw_const:
1149  case tok::comma:
1150  case tok::less:
1151  case tok::greater:
1152  case tok::identifier:
1153  case tok::numeric_constant:
1154  case tok::coloncolon:
1155  case tok::kw_mutable:
1156  nextToken();
1157  break;
1158  case tok::arrow:
1159  FormatTok->Type = TT_LambdaArrow;
1160  nextToken();
1161  break;
1162  default:
1163  return true;
1164  }
1165  }
1166  LSquare.Type = TT_LambdaLSquare;
1167  parseChildBlock();
1168  return true;
1169 }
1170 
1171 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1172  nextToken();
1173  if (FormatTok->is(tok::equal)) {
1174  nextToken();
1175  if (FormatTok->is(tok::r_square)) {
1176  nextToken();
1177  return true;
1178  }
1179  if (FormatTok->isNot(tok::comma))
1180  return false;
1181  nextToken();
1182  } else if (FormatTok->is(tok::amp)) {
1183  nextToken();
1184  if (FormatTok->is(tok::r_square)) {
1185  nextToken();
1186  return true;
1187  }
1188  if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1189  return false;
1190  }
1191  if (FormatTok->is(tok::comma))
1192  nextToken();
1193  } else if (FormatTok->is(tok::r_square)) {
1194  nextToken();
1195  return true;
1196  }
1197  do {
1198  if (FormatTok->is(tok::amp))
1199  nextToken();
1200  if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1201  return false;
1202  nextToken();
1203  if (FormatTok->is(tok::ellipsis))
1204  nextToken();
1205  if (FormatTok->is(tok::comma)) {
1206  nextToken();
1207  } else if (FormatTok->is(tok::r_square)) {
1208  nextToken();
1209  return true;
1210  } else {
1211  return false;
1212  }
1213  } while (!eof());
1214  return false;
1215 }
1216 
1217 void UnwrappedLineParser::tryToParseJSFunction() {
1218  assert(FormatTok->is(Keywords.kw_function) ||
1219  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1220  if (FormatTok->is(Keywords.kw_async))
1221  nextToken();
1222  // Consume "function".
1223  nextToken();
1224 
1225  // Consume * (generator function).
1226  if (FormatTok->is(tok::star))
1227  nextToken();
1228 
1229  // Consume function name.
1230  if (FormatTok->is(tok::identifier))
1231  nextToken();
1232 
1233  if (FormatTok->isNot(tok::l_paren))
1234  return;
1235 
1236  // Parse formal parameter list.
1237  parseParens();
1238 
1239  if (FormatTok->is(tok::colon)) {
1240  // Parse a type definition.
1241  nextToken();
1242 
1243  // Eat the type declaration. For braced inline object types, balance braces,
1244  // otherwise just parse until finding an l_brace for the function body.
1245  if (FormatTok->is(tok::l_brace))
1246  tryToParseBracedList();
1247  else
1248  while (FormatTok->isNot(tok::l_brace) && !eof())
1249  nextToken();
1250  }
1251 
1252  parseChildBlock();
1253 }
1254 
1255 bool UnwrappedLineParser::tryToParseBracedList() {
1256  if (FormatTok->BlockKind == BK_Unknown)
1257  calculateBraceTypes();
1258  assert(FormatTok->BlockKind != BK_Unknown);
1259  if (FormatTok->BlockKind == BK_Block)
1260  return false;
1261  parseBracedList();
1262  return true;
1263 }
1264 
1265 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1266  bool HasError = false;
1267  nextToken();
1268 
1269  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1270  // replace this by using parseAssigmentExpression() inside.
1271  do {
1272  if (Style.Language == FormatStyle::LK_JavaScript) {
1273  if (FormatTok->is(Keywords.kw_function) ||
1274  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1275  tryToParseJSFunction();
1276  continue;
1277  }
1278  if (FormatTok->is(TT_JsFatArrow)) {
1279  nextToken();
1280  // Fat arrows can be followed by simple expressions or by child blocks
1281  // in curly braces.
1282  if (FormatTok->is(tok::l_brace)) {
1283  parseChildBlock();
1284  continue;
1285  }
1286  }
1287  }
1288  switch (FormatTok->Tok.getKind()) {
1289  case tok::caret:
1290  nextToken();
1291  if (FormatTok->is(tok::l_brace)) {
1292  parseChildBlock();
1293  }
1294  break;
1295  case tok::l_square:
1296  tryToParseLambda();
1297  break;
1298  case tok::l_brace:
1299  // Assume there are no blocks inside a braced init list apart
1300  // from the ones we explicitly parse out (like lambdas).
1301  FormatTok->BlockKind = BK_BracedInit;
1302  parseBracedList();
1303  break;
1304  case tok::l_paren:
1305  parseParens();
1306  // JavaScript can just have free standing methods and getters/setters in
1307  // object literals. Detect them by a "{" following ")".
1308  if (Style.Language == FormatStyle::LK_JavaScript) {
1309  if (FormatTok->is(tok::l_brace))
1310  parseChildBlock();
1311  break;
1312  }
1313  break;
1314  case tok::r_brace:
1315  nextToken();
1316  return !HasError;
1317  case tok::semi:
1318  // JavaScript (or more precisely TypeScript) can have semicolons in braced
1319  // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1320  // used for error recovery if we have otherwise determined that this is
1321  // a braced list.
1322  if (Style.Language == FormatStyle::LK_JavaScript) {
1323  nextToken();
1324  break;
1325  }
1326  HasError = true;
1327  if (!ContinueOnSemicolons)
1328  return !HasError;
1329  nextToken();
1330  break;
1331  case tok::comma:
1332  nextToken();
1333  break;
1334  default:
1335  nextToken();
1336  break;
1337  }
1338  } while (!eof());
1339  return false;
1340 }
1341 
1342 void UnwrappedLineParser::parseParens() {
1343  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1344  nextToken();
1345  do {
1346  switch (FormatTok->Tok.getKind()) {
1347  case tok::l_paren:
1348  parseParens();
1349  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1350  parseChildBlock();
1351  break;
1352  case tok::r_paren:
1353  nextToken();
1354  return;
1355  case tok::r_brace:
1356  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1357  return;
1358  case tok::l_square:
1359  tryToParseLambda();
1360  break;
1361  case tok::l_brace:
1362  if (!tryToParseBracedList())
1363  parseChildBlock();
1364  break;
1365  case tok::at:
1366  nextToken();
1367  if (FormatTok->Tok.is(tok::l_brace))
1368  parseBracedList();
1369  break;
1370  case tok::identifier:
1371  if (Style.Language == FormatStyle::LK_JavaScript &&
1372  (FormatTok->is(Keywords.kw_function) ||
1373  FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1374  tryToParseJSFunction();
1375  else
1376  nextToken();
1377  break;
1378  default:
1379  nextToken();
1380  break;
1381  }
1382  } while (!eof());
1383 }
1384 
1385 void UnwrappedLineParser::parseSquare() {
1386  assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1387  if (tryToParseLambda())
1388  return;
1389  do {
1390  switch (FormatTok->Tok.getKind()) {
1391  case tok::l_paren:
1392  parseParens();
1393  break;
1394  case tok::r_square:
1395  nextToken();
1396  return;
1397  case tok::r_brace:
1398  // A "}" inside parenthesis is an error if there wasn't a matching "{".
1399  return;
1400  case tok::l_square:
1401  parseSquare();
1402  break;
1403  case tok::l_brace: {
1404  if (!tryToParseBracedList())
1405  parseChildBlock();
1406  break;
1407  }
1408  case tok::at:
1409  nextToken();
1410  if (FormatTok->Tok.is(tok::l_brace))
1411  parseBracedList();
1412  break;
1413  default:
1414  nextToken();
1415  break;
1416  }
1417  } while (!eof());
1418 }
1419 
1420 void UnwrappedLineParser::parseIfThenElse() {
1421  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1422  nextToken();
1423  if (FormatTok->Tok.is(tok::l_paren))
1424  parseParens();
1425  bool NeedsUnwrappedLine = false;
1426  if (FormatTok->Tok.is(tok::l_brace)) {
1427  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1428  parseBlock(/*MustBeDeclaration=*/false);
1429  if (Style.BraceWrapping.BeforeElse)
1430  addUnwrappedLine();
1431  else
1432  NeedsUnwrappedLine = true;
1433  } else {
1434  addUnwrappedLine();
1435  ++Line->Level;
1436  parseStructuralElement();
1437  --Line->Level;
1438  }
1439  if (FormatTok->Tok.is(tok::kw_else)) {
1440  nextToken();
1441  if (FormatTok->Tok.is(tok::l_brace)) {
1442  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1443  parseBlock(/*MustBeDeclaration=*/false);
1444  addUnwrappedLine();
1445  } else if (FormatTok->Tok.is(tok::kw_if)) {
1446  parseIfThenElse();
1447  } else {
1448  addUnwrappedLine();
1449  ++Line->Level;
1450  parseStructuralElement();
1451  if (FormatTok->is(tok::eof))
1452  addUnwrappedLine();
1453  --Line->Level;
1454  }
1455  } else if (NeedsUnwrappedLine) {
1456  addUnwrappedLine();
1457  }
1458 }
1459 
1460 void UnwrappedLineParser::parseTryCatch() {
1461  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1462  nextToken();
1463  bool NeedsUnwrappedLine = false;
1464  if (FormatTok->is(tok::colon)) {
1465  // We are in a function try block, what comes is an initializer list.
1466  nextToken();
1467  while (FormatTok->is(tok::identifier)) {
1468  nextToken();
1469  if (FormatTok->is(tok::l_paren))
1470  parseParens();
1471  if (FormatTok->is(tok::comma))
1472  nextToken();
1473  }
1474  }
1475  // Parse try with resource.
1476  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1477  parseParens();
1478  }
1479  if (FormatTok->is(tok::l_brace)) {
1480  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1481  parseBlock(/*MustBeDeclaration=*/false);
1482  if (Style.BraceWrapping.BeforeCatch) {
1483  addUnwrappedLine();
1484  } else {
1485  NeedsUnwrappedLine = true;
1486  }
1487  } else if (!FormatTok->is(tok::kw_catch)) {
1488  // The C++ standard requires a compound-statement after a try.
1489  // If there's none, we try to assume there's a structuralElement
1490  // and try to continue.
1491  addUnwrappedLine();
1492  ++Line->Level;
1493  parseStructuralElement();
1494  --Line->Level;
1495  }
1496  while (1) {
1497  if (FormatTok->is(tok::at))
1498  nextToken();
1499  if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1500  tok::kw___finally) ||
1501  ((Style.Language == FormatStyle::LK_Java ||
1503  FormatTok->is(Keywords.kw_finally)) ||
1504  (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1505  FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1506  break;
1507  nextToken();
1508  while (FormatTok->isNot(tok::l_brace)) {
1509  if (FormatTok->is(tok::l_paren)) {
1510  parseParens();
1511  continue;
1512  }
1513  if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1514  return;
1515  nextToken();
1516  }
1517  NeedsUnwrappedLine = false;
1518  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1519  parseBlock(/*MustBeDeclaration=*/false);
1520  if (Style.BraceWrapping.BeforeCatch)
1521  addUnwrappedLine();
1522  else
1523  NeedsUnwrappedLine = true;
1524  }
1525  if (NeedsUnwrappedLine)
1526  addUnwrappedLine();
1527 }
1528 
1529 void UnwrappedLineParser::parseNamespace() {
1530  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1531 
1532  const FormatToken &InitialToken = *FormatTok;
1533  nextToken();
1534  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1535  nextToken();
1536  if (FormatTok->Tok.is(tok::l_brace)) {
1537  if (ShouldBreakBeforeBrace(Style, InitialToken))
1538  addUnwrappedLine();
1539 
1540  bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1542  DeclarationScopeStack.size() > 1);
1543  parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1544  // Munch the semicolon after a namespace. This is more common than one would
1545  // think. Puttin the semicolon into its own line is very ugly.
1546  if (FormatTok->Tok.is(tok::semi))
1547  nextToken();
1548  addUnwrappedLine();
1549  }
1550  // FIXME: Add error handling.
1551 }
1552 
1553 void UnwrappedLineParser::parseNew() {
1554  assert(FormatTok->is(tok::kw_new) && "'new' expected");
1555  nextToken();
1556  if (Style.Language != FormatStyle::LK_Java)
1557  return;
1558 
1559  // In Java, we can parse everything up to the parens, which aren't optional.
1560  do {
1561  // There should not be a ;, { or } before the new's open paren.
1562  if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1563  return;
1564 
1565  // Consume the parens.
1566  if (FormatTok->is(tok::l_paren)) {
1567  parseParens();
1568 
1569  // If there is a class body of an anonymous class, consume that as child.
1570  if (FormatTok->is(tok::l_brace))
1571  parseChildBlock();
1572  return;
1573  }
1574  nextToken();
1575  } while (!eof());
1576 }
1577 
1578 void UnwrappedLineParser::parseForOrWhileLoop() {
1579  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1580  "'for', 'while' or foreach macro expected");
1581  nextToken();
1582  if (FormatTok->Tok.is(tok::l_paren))
1583  parseParens();
1584  if (FormatTok->Tok.is(tok::l_brace)) {
1585  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1586  parseBlock(/*MustBeDeclaration=*/false);
1587  addUnwrappedLine();
1588  } else {
1589  addUnwrappedLine();
1590  ++Line->Level;
1591  parseStructuralElement();
1592  --Line->Level;
1593  }
1594 }
1595 
1596 void UnwrappedLineParser::parseDoWhile() {
1597  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1598  nextToken();
1599  if (FormatTok->Tok.is(tok::l_brace)) {
1600  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1601  parseBlock(/*MustBeDeclaration=*/false);
1602  if (Style.BraceWrapping.IndentBraces)
1603  addUnwrappedLine();
1604  } else {
1605  addUnwrappedLine();
1606  ++Line->Level;
1607  parseStructuralElement();
1608  --Line->Level;
1609  }
1610 
1611  // FIXME: Add error handling.
1612  if (!FormatTok->Tok.is(tok::kw_while)) {
1613  addUnwrappedLine();
1614  return;
1615  }
1616 
1617  nextToken();
1618  parseStructuralElement();
1619 }
1620 
1621 void UnwrappedLineParser::parseLabel() {
1622  nextToken();
1623  unsigned OldLineLevel = Line->Level;
1624  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1625  --Line->Level;
1626  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1627  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1628  parseBlock(/*MustBeDeclaration=*/false);
1629  if (FormatTok->Tok.is(tok::kw_break)) {
1631  addUnwrappedLine();
1632  parseStructuralElement();
1633  }
1634  addUnwrappedLine();
1635  } else {
1636  if (FormatTok->is(tok::semi))
1637  nextToken();
1638  addUnwrappedLine();
1639  }
1640  Line->Level = OldLineLevel;
1641  if (FormatTok->isNot(tok::l_brace)) {
1642  parseStructuralElement();
1643  addUnwrappedLine();
1644  }
1645 }
1646 
1647 void UnwrappedLineParser::parseCaseLabel() {
1648  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1649  // FIXME: fix handling of complex expressions here.
1650  do {
1651  nextToken();
1652  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1653  parseLabel();
1654 }
1655 
1656 void UnwrappedLineParser::parseSwitch() {
1657  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1658  nextToken();
1659  if (FormatTok->Tok.is(tok::l_paren))
1660  parseParens();
1661  if (FormatTok->Tok.is(tok::l_brace)) {
1662  CompoundStatementIndenter Indenter(this, Style, Line->Level);
1663  parseBlock(/*MustBeDeclaration=*/false);
1664  addUnwrappedLine();
1665  } else {
1666  addUnwrappedLine();
1667  ++Line->Level;
1668  parseStructuralElement();
1669  --Line->Level;
1670  }
1671 }
1672 
1673 void UnwrappedLineParser::parseAccessSpecifier() {
1674  nextToken();
1675  // Understand Qt's slots.
1676  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1677  nextToken();
1678  // Otherwise, we don't know what it is, and we'd better keep the next token.
1679  if (FormatTok->Tok.is(tok::colon))
1680  nextToken();
1681  addUnwrappedLine();
1682 }
1683 
1684 bool UnwrappedLineParser::parseEnum() {
1685  // Won't be 'enum' for NS_ENUMs.
1686  if (FormatTok->Tok.is(tok::kw_enum))
1687  nextToken();
1688 
1689  // In TypeScript, "enum" can also be used as property name, e.g. in interface
1690  // declarations. An "enum" keyword followed by a colon would be a syntax
1691  // error and thus assume it is just an identifier.
1692  if (Style.Language == FormatStyle::LK_JavaScript &&
1693  FormatTok->isOneOf(tok::colon, tok::question))
1694  return false;
1695 
1696  // Eat up enum class ...
1697  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1698  nextToken();
1699 
1700  while (FormatTok->Tok.getIdentifierInfo() ||
1701  FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1702  tok::greater, tok::comma, tok::question)) {
1703  nextToken();
1704  // We can have macros or attributes in between 'enum' and the enum name.
1705  if (FormatTok->is(tok::l_paren))
1706  parseParens();
1707  if (FormatTok->is(tok::identifier)) {
1708  nextToken();
1709  // If there are two identifiers in a row, this is likely an elaborate
1710  // return type. In Java, this can be "implements", etc.
1711  if (Style.Language == FormatStyle::LK_Cpp &&
1712  FormatTok->is(tok::identifier))
1713  return false;
1714  }
1715  }
1716 
1717  // Just a declaration or something is wrong.
1718  if (FormatTok->isNot(tok::l_brace))
1719  return true;
1720  FormatTok->BlockKind = BK_Block;
1721 
1722  if (Style.Language == FormatStyle::LK_Java) {
1723  // Java enums are different.
1724  parseJavaEnumBody();
1725  return true;
1726  }
1727  if (Style.Language == FormatStyle::LK_Proto) {
1728  parseBlock(/*MustBeDeclaration=*/true);
1729  return true;
1730  }
1731 
1732  // Parse enum body.
1733  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1734  if (HasError) {
1735  if (FormatTok->is(tok::semi))
1736  nextToken();
1737  addUnwrappedLine();
1738  }
1739  return true;
1740 
1741  // There is no addUnwrappedLine() here so that we fall through to parsing a
1742  // structural element afterwards. Thus, in "enum A {} n, m;",
1743  // "} n, m;" will end up in one unwrapped line.
1744 }
1745 
1746 void UnwrappedLineParser::parseJavaEnumBody() {
1747  // Determine whether the enum is simple, i.e. does not have a semicolon or
1748  // constants with class bodies. Simple enums can be formatted like braced
1749  // lists, contracted to a single line, etc.
1750  unsigned StoredPosition = Tokens->getPosition();
1751  bool IsSimple = true;
1752  FormatToken *Tok = Tokens->getNextToken();
1753  while (Tok) {
1754  if (Tok->is(tok::r_brace))
1755  break;
1756  if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1757  IsSimple = false;
1758  break;
1759  }
1760  // FIXME: This will also mark enums with braces in the arguments to enum
1761  // constants as "not simple". This is probably fine in practice, though.
1762  Tok = Tokens->getNextToken();
1763  }
1764  FormatTok = Tokens->setPosition(StoredPosition);
1765 
1766  if (IsSimple) {
1767  parseBracedList();
1768  addUnwrappedLine();
1769  return;
1770  }
1771 
1772  // Parse the body of a more complex enum.
1773  // First add a line for everything up to the "{".
1774  nextToken();
1775  addUnwrappedLine();
1776  ++Line->Level;
1777 
1778  // Parse the enum constants.
1779  while (FormatTok) {
1780  if (FormatTok->is(tok::l_brace)) {
1781  // Parse the constant's class body.
1782  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1783  /*MunchSemi=*/false);
1784  } else if (FormatTok->is(tok::l_paren)) {
1785  parseParens();
1786  } else if (FormatTok->is(tok::comma)) {
1787  nextToken();
1788  addUnwrappedLine();
1789  } else if (FormatTok->is(tok::semi)) {
1790  nextToken();
1791  addUnwrappedLine();
1792  break;
1793  } else if (FormatTok->is(tok::r_brace)) {
1794  addUnwrappedLine();
1795  break;
1796  } else {
1797  nextToken();
1798  }
1799  }
1800 
1801  // Parse the class body after the enum's ";" if any.
1802  parseLevel(/*HasOpeningBrace=*/true);
1803  nextToken();
1804  --Line->Level;
1805  addUnwrappedLine();
1806 }
1807 
1808 void UnwrappedLineParser::parseRecord() {
1809  const FormatToken &InitialToken = *FormatTok;
1810  nextToken();
1811 
1812  // The actual identifier can be a nested name specifier, and in macros
1813  // it is often token-pasted.
1814  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1815  tok::kw___attribute, tok::kw___declspec,
1816  tok::kw_alignas) ||
1817  ((Style.Language == FormatStyle::LK_Java ||
1819  FormatTok->isOneOf(tok::period, tok::comma))) {
1820  bool IsNonMacroIdentifier =
1821  FormatTok->is(tok::identifier) &&
1822  FormatTok->TokenText != FormatTok->TokenText.upper();
1823  nextToken();
1824  // We can have macros or attributes in between 'class' and the class name.
1825  if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1826  parseParens();
1827  }
1828 
1829  // Note that parsing away template declarations here leads to incorrectly
1830  // accepting function declarations as record declarations.
1831  // In general, we cannot solve this problem. Consider:
1832  // class A<int> B() {}
1833  // which can be a function definition or a class definition when B() is a
1834  // macro. If we find enough real-world cases where this is a problem, we
1835  // can parse for the 'template' keyword in the beginning of the statement,
1836  // and thus rule out the record production in case there is no template
1837  // (this would still leave us with an ambiguity between template function
1838  // and class declarations).
1839  if (FormatTok->isOneOf(tok::colon, tok::less)) {
1840  while (!eof()) {
1841  if (FormatTok->is(tok::l_brace)) {
1842  calculateBraceTypes(/*ExpectClassBody=*/true);
1843  if (!tryToParseBracedList())
1844  break;
1845  }
1846  if (FormatTok->Tok.is(tok::semi))
1847  return;
1848  nextToken();
1849  }
1850  }
1851  if (FormatTok->Tok.is(tok::l_brace)) {
1852  if (ShouldBreakBeforeBrace(Style, InitialToken))
1853  addUnwrappedLine();
1854 
1855  parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1856  /*MunchSemi=*/false);
1857  }
1858  // There is no addUnwrappedLine() here so that we fall through to parsing a
1859  // structural element afterwards. Thus, in "class A {} n, m;",
1860  // "} n, m;" will end up in one unwrapped line.
1861 }
1862 
1863 void UnwrappedLineParser::parseObjCProtocolList() {
1864  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1865  do
1866  nextToken();
1867  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1868  nextToken(); // Skip '>'.
1869 }
1870 
1871 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1872  do {
1873  if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1874  nextToken();
1875  addUnwrappedLine();
1876  break;
1877  }
1878  if (FormatTok->is(tok::l_brace)) {
1879  parseBlock(/*MustBeDeclaration=*/false);
1880  // In ObjC interfaces, nothing should be following the "}".
1881  addUnwrappedLine();
1882  } else if (FormatTok->is(tok::r_brace)) {
1883  // Ignore stray "}". parseStructuralElement doesn't consume them.
1884  nextToken();
1885  addUnwrappedLine();
1886  } else {
1887  parseStructuralElement();
1888  }
1889  } while (!eof());
1890 }
1891 
1892 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1893  nextToken();
1894  nextToken(); // interface name
1895 
1896  // @interface can be followed by either a base class, or a category.
1897  if (FormatTok->Tok.is(tok::colon)) {
1898  nextToken();
1899  nextToken(); // base class name
1900  } else if (FormatTok->Tok.is(tok::l_paren))
1901  // Skip category, if present.
1902  parseParens();
1903 
1904  if (FormatTok->Tok.is(tok::less))
1905  parseObjCProtocolList();
1906 
1907  if (FormatTok->Tok.is(tok::l_brace)) {
1909  addUnwrappedLine();
1910  parseBlock(/*MustBeDeclaration=*/true);
1911  }
1912 
1913  // With instance variables, this puts '}' on its own line. Without instance
1914  // variables, this ends the @interface line.
1915  addUnwrappedLine();
1916 
1917  parseObjCUntilAtEnd();
1918 }
1919 
1920 void UnwrappedLineParser::parseObjCProtocol() {
1921  nextToken();
1922  nextToken(); // protocol name
1923 
1924  if (FormatTok->Tok.is(tok::less))
1925  parseObjCProtocolList();
1926 
1927  // Check for protocol declaration.
1928  if (FormatTok->Tok.is(tok::semi)) {
1929  nextToken();
1930  return addUnwrappedLine();
1931  }
1932 
1933  addUnwrappedLine();
1934  parseObjCUntilAtEnd();
1935 }
1936 
1937 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1938  bool IsImport = FormatTok->is(Keywords.kw_import);
1939  assert(IsImport || FormatTok->is(tok::kw_export));
1940  nextToken();
1941 
1942  // Consume the "default" in "export default class/function".
1943  if (FormatTok->is(tok::kw_default))
1944  nextToken();
1945 
1946  // Consume "async function", "function" and "default function", so that these
1947  // get parsed as free-standing JS functions, i.e. do not require a trailing
1948  // semicolon.
1949  if (FormatTok->is(Keywords.kw_async))
1950  nextToken();
1951  if (FormatTok->is(Keywords.kw_function)) {
1952  nextToken();
1953  return;
1954  }
1955 
1956  // For imports, `export *`, `export {...}`, consume the rest of the line up
1957  // to the terminating `;`. For everything else, just return and continue
1958  // parsing the structural element, i.e. the declaration or expression for
1959  // `export default`.
1960  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1961  !FormatTok->isStringLiteral())
1962  return;
1963 
1964  while (!eof() && FormatTok->isNot(tok::semi)) {
1965  if (FormatTok->is(tok::l_brace)) {
1966  FormatTok->BlockKind = BK_Block;
1967  parseBracedList();
1968  } else {
1969  nextToken();
1970  }
1971  }
1972 }
1973 
1974 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1975  StringRef Prefix = "") {
1976  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1977  << (Line.InPPDirective ? " MACRO" : "") << ": ";
1978  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1979  E = Line.Tokens.end();
1980  I != E; ++I) {
1981  llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1982  }
1983  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1984  E = Line.Tokens.end();
1985  I != E; ++I) {
1986  const UnwrappedLineNode &Node = *I;
1988  I = Node.Children.begin(),
1989  E = Node.Children.end();
1990  I != E; ++I) {
1991  printDebugInfo(*I, "\nChild: ");
1992  }
1993  }
1994  llvm::dbgs() << "\n";
1995 }
1996 
1997 void UnwrappedLineParser::addUnwrappedLine() {
1998  if (Line->Tokens.empty())
1999  return;
2000  DEBUG({
2001  if (CurrentLines == &Lines)
2002  printDebugInfo(*Line);
2003  });
2004  CurrentLines->push_back(std::move(*Line));
2005  Line->Tokens.clear();
2006  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2007  CurrentLines->append(
2008  std::make_move_iterator(PreprocessorDirectives.begin()),
2009  std::make_move_iterator(PreprocessorDirectives.end()));
2010  PreprocessorDirectives.clear();
2011  }
2012 }
2013 
2014 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2015 
2016 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2017  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2018  FormatTok.NewlinesBefore > 0;
2019 }
2020 
2021 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2022  bool JustComments = Line->Tokens.empty();
2023  for (SmallVectorImpl<FormatToken *>::const_iterator
2024  I = CommentsBeforeNextToken.begin(),
2025  E = CommentsBeforeNextToken.end();
2026  I != E; ++I) {
2027  if (isOnNewLine(**I) && JustComments)
2028  addUnwrappedLine();
2029  pushToken(*I);
2030  }
2031  if (NewlineBeforeNext && JustComments)
2032  addUnwrappedLine();
2033  CommentsBeforeNextToken.clear();
2034 }
2035 
2036 void UnwrappedLineParser::nextToken() {
2037  if (eof())
2038  return;
2039  flushComments(isOnNewLine(*FormatTok));
2040  pushToken(FormatTok);
2041  if (Style.Language != FormatStyle::LK_JavaScript)
2042  readToken();
2043  else
2044  readTokenWithJavaScriptASI();
2045 }
2046 
2047 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2048  // FIXME: This is a dirty way to access the previous token. Find a better
2049  // solution.
2050  if (!Line || Line->Tokens.empty())
2051  return nullptr;
2052  return Line->Tokens.back().Tok;
2053 }
2054 
2055 void UnwrappedLineParser::readToken() {
2056  bool CommentsInCurrentLine = true;
2057  do {
2058  FormatTok = Tokens->getNextToken();
2059  assert(FormatTok);
2060  while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2061  (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2062  // If there is an unfinished unwrapped line, we flush the preprocessor
2063  // directives only after that unwrapped line was finished later.
2064  bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2065  ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2066  // Comments stored before the preprocessor directive need to be output
2067  // before the preprocessor directive, at the same level as the
2068  // preprocessor directive, as we consider them to apply to the directive.
2069  flushComments(isOnNewLine(*FormatTok));
2070  parsePPDirective();
2071  }
2072  while (FormatTok->Type == TT_ConflictStart ||
2073  FormatTok->Type == TT_ConflictEnd ||
2074  FormatTok->Type == TT_ConflictAlternative) {
2075  if (FormatTok->Type == TT_ConflictStart) {
2076  conditionalCompilationStart(/*Unreachable=*/false);
2077  } else if (FormatTok->Type == TT_ConflictAlternative) {
2078  conditionalCompilationAlternative();
2079  } else if (FormatTok->Type == TT_ConflictEnd) {
2080  conditionalCompilationEnd();
2081  }
2082  FormatTok = Tokens->getNextToken();
2083  FormatTok->MustBreakBefore = true;
2084  }
2085 
2086  if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2087  !Line->InPPDirective) {
2088  continue;
2089  }
2090 
2091  if (!FormatTok->Tok.is(tok::comment))
2092  return;
2093  if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
2094  CommentsInCurrentLine = false;
2095  }
2096  if (CommentsInCurrentLine) {
2097  pushToken(FormatTok);
2098  } else {
2099  CommentsBeforeNextToken.push_back(FormatTok);
2100  }
2101  } while (!eof());
2102 }
2103 
2104 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2105  Line->Tokens.push_back(UnwrappedLineNode(Tok));
2106  if (MustBreakBeforeNextToken) {
2107  Line->Tokens.back().Tok->MustBreakBefore = true;
2108  MustBreakBeforeNextToken = false;
2109  }
2110 }
2111 
2112 } // end namespace format
2113 } // end namespace clang
int Position
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
SmallVector< UnwrappedLine, 0 > Children
bool AfterUnion
Wrap union definitions.
Definition: Format.h:267
Indent in all namespaces.
Definition: Format.h:494
Token Tok
The Token.
Definition: FormatToken.h:117
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:470
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:425
static LLVM_ATTRIBUTE_UNUSED void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix="")
StringRef P
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:291
Parser - This implements a parser for the C family of languages.
Definition: Parse/Parser.h:57
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
Should be used for C, C++, ObjectiveC, ObjectiveC++.
Definition: Format.h:463
UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback)
FormatToken *& ResetToken
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
unsigned Level
The indent level of the UnwrappedLine.
bool AfterObjCDeclaration
Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
Definition: Format.h:263
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:273
Should be used for Java.
Definition: Format.h:465
static bool tokenCanStartNewLine(const clang::Token &Tok)
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:498
static bool isGoogScope(const UnwrappedLine &Line)
virtual FormatToken * getNextToken()=0
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
detail::InMemoryDirectory::const_iterator I
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
const FormatStyle & Style
Definition: Format.cpp:1311
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
std::vector< bool > & Stack
FormatTokenSource *& TokenSource
std::list< UnwrappedLineNode > Tokens
The Tokens comprising this UnwrappedLine.
Should be used for JavaScript.
Definition: Format.h:467
ContinuationIndenter * Indenter
MatchFinder::MatchCallback * Callback
StateNode * Previous
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:280
bool AfterFunction
Wrap function definitions.
Definition: Format.h:259
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:113
FormatToken * Token
bool isNot(tok::TokenKind K) const
Definition: Token.h:95
#define false
Definition: stdbool.h:33
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:569
ArrayRef< FormatToken * > Tokens
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
virtual FormatToken * setPosition(unsigned Position)=0
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:492
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:476
ast_type_traits::DynTypedNode Node
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
UnwrappedLine & Line
const AdditionalKeywords & Keywords
Should be used for TableGen code.
Definition: Format.h:472
detail::InMemoryDirectory::const_iterator E
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:112
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:265
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:282
virtual unsigned getPosition()=0
bool AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:255
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:43
unsigned PreviousLineLevel
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:155
FormatTokenSource * PreviousTokenSource
bool AfterClass
Wrap class definitions.
Definition: Format.h:253
StringRef Text
Definition: Format.cpp:1195
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:261
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:176