clang  3.9.0
UnwrappedLineFormatter.cpp
Go to the documentation of this file.
1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "UnwrappedLineFormatter.h"
11 #include "WhitespaceManager.h"
12 #include "llvm/Support/Debug.h"
13 
14 #define DEBUG_TYPE "format-formatter"
15 
16 namespace clang {
17 namespace format {
18 
19 namespace {
20 
21 bool startsExternCBlock(const AnnotatedLine &Line) {
22  const FormatToken *Next = Line.First->getNextNonComment();
23  const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
24  return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
25  NextNext && NextNext->is(tok::l_brace);
26 }
27 
28 /// \brief Tracks the indent level of \c AnnotatedLines across levels.
29 ///
30 /// \c nextLine must be called for each \c AnnotatedLine, after which \c
31 /// getIndent() will return the indent for the last line \c nextLine was called
32 /// with.
33 /// If the line is not formatted (and thus the indent does not change), calling
34 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
35 /// subsequent lines on the same level to be indented at the same level as the
36 /// given line.
37 class LevelIndentTracker {
38 public:
39  LevelIndentTracker(const FormatStyle &Style,
40  const AdditionalKeywords &Keywords, unsigned StartLevel,
41  int AdditionalIndent)
42  : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
43  for (unsigned i = 0; i != StartLevel; ++i)
44  IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
45  }
46 
47  /// \brief Returns the indent for the current line.
48  unsigned getIndent() const { return Indent; }
49 
50  /// \brief Update the indent state given that \p Line is going to be formatted
51  /// next.
52  void nextLine(const AnnotatedLine &Line) {
53  Offset = getIndentOffset(*Line.First);
54  // Update the indent level cache size so that we can rely on it
55  // having the right size in adjustToUnmodifiedline.
56  while (IndentForLevel.size() <= Line.Level)
57  IndentForLevel.push_back(-1);
58  if (Line.InPPDirective) {
59  Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
60  } else {
61  IndentForLevel.resize(Line.Level + 1);
62  Indent = getIndent(IndentForLevel, Line.Level);
63  }
64  if (static_cast<int>(Indent) + Offset >= 0)
65  Indent += Offset;
66  }
67 
68  /// \brief Update the level indent to adapt to the given \p Line.
69  ///
70  /// When a line is not formatted, we move the subsequent lines on the same
71  /// level to the same indent.
72  /// Note that \c nextLine must have been called before this method.
73  void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
74  unsigned LevelIndent = Line.First->OriginalColumn;
75  if (static_cast<int>(LevelIndent) - Offset >= 0)
76  LevelIndent -= Offset;
77  if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
78  !Line.InPPDirective)
79  IndentForLevel[Line.Level] = LevelIndent;
80  }
81 
82 private:
83  /// \brief Get the offset of the line relatively to the level.
84  ///
85  /// For example, 'public:' labels in classes are offset by 1 or 2
86  /// characters to the left from their level.
87  int getIndentOffset(const FormatToken &RootToken) {
88  if (Style.Language == FormatStyle::LK_Java ||
89  Style.Language == FormatStyle::LK_JavaScript)
90  return 0;
91  if (RootToken.isAccessSpecifier(false) ||
92  RootToken.isObjCAccessSpecifier() ||
93  (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
94  RootToken.Next && RootToken.Next->is(tok::colon)))
95  return Style.AccessModifierOffset;
96  return 0;
97  }
98 
99  /// \brief Get the indent of \p Level from \p IndentForLevel.
100  ///
101  /// \p IndentForLevel must contain the indent for the level \c l
102  /// at \p IndentForLevel[l], or a value < 0 if the indent for
103  /// that level is unknown.
104  unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
105  if (IndentForLevel[Level] != -1)
106  return IndentForLevel[Level];
107  if (Level == 0)
108  return 0;
109  return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
110  }
111 
112  const FormatStyle &Style;
113  const AdditionalKeywords &Keywords;
114  const unsigned AdditionalIndent;
115 
116  /// \brief The indent in characters for each level.
117  std::vector<int> IndentForLevel;
118 
119  /// \brief Offset of the current line relative to the indent level.
120  ///
121  /// For example, the 'public' keywords is often indented with a negative
122  /// offset.
123  int Offset = 0;
124 
125  /// \brief The current line's indent.
126  unsigned Indent = 0;
127 };
128 
129 class LineJoiner {
130 public:
131  LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
132  const SmallVectorImpl<AnnotatedLine *> &Lines)
133  : Style(Style), Keywords(Keywords), End(Lines.end()),
134  Next(Lines.begin()) {}
135 
136  /// \brief Returns the next line, merging multiple lines into one if possible.
137  const AnnotatedLine *getNextMergedLine(bool DryRun,
138  LevelIndentTracker &IndentTracker) {
139  if (Next == End)
140  return nullptr;
141  const AnnotatedLine *Current = *Next;
142  IndentTracker.nextLine(*Current);
143  unsigned MergedLines =
144  tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
145  if (MergedLines > 0 && Style.ColumnLimit == 0)
146  // Disallow line merging if there is a break at the start of one of the
147  // input lines.
148  for (unsigned i = 0; i < MergedLines; ++i)
149  if (Next[i + 1]->First->NewlinesBefore > 0)
150  MergedLines = 0;
151  if (!DryRun)
152  for (unsigned i = 0; i < MergedLines; ++i)
153  join(*Next[i], *Next[i + 1]);
154  Next = Next + MergedLines + 1;
155  return Current;
156  }
157 
158 private:
159  /// \brief Calculates how many lines can be merged into 1 starting at \p I.
160  unsigned
161  tryFitMultipleLinesInOne(unsigned Indent,
162  SmallVectorImpl<AnnotatedLine *>::const_iterator I,
163  SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
164  // Can't join the last line with anything.
165  if (I + 1 == E)
166  return 0;
167  // We can never merge stuff if there are trailing line comments.
168  const AnnotatedLine *TheLine = *I;
169  if (TheLine->Last->is(TT_LineComment))
170  return 0;
171  if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
172  return 0;
173  if (TheLine->InPPDirective &&
174  (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
175  return 0;
176 
177  if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
178  return 0;
179 
180  unsigned Limit =
181  Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
182  // If we already exceed the column limit, we set 'Limit' to 0. The different
183  // tryMerge..() functions can then decide whether to still do merging.
184  Limit = TheLine->Last->TotalLength > Limit
185  ? 0
186  : Limit - TheLine->Last->TotalLength;
187 
188  // FIXME: TheLine->Level != 0 might or might not be the right check to do.
189  // If necessary, change to something smarter.
190  bool MergeShortFunctions =
191  Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
192  (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
193  I[1]->First->is(tok::r_brace)) ||
194  (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
195  TheLine->Level != 0);
196 
197  if (TheLine->Last->is(TT_FunctionLBrace) &&
198  TheLine->First != TheLine->Last) {
199  return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
200  }
201  if (TheLine->Last->is(tok::l_brace)) {
202  return !Style.BraceWrapping.AfterFunction
203  ? tryMergeSimpleBlock(I, E, Limit)
204  : 0;
205  }
206  if (I[1]->First->is(TT_FunctionLBrace) &&
207  Style.BraceWrapping.AfterFunction) {
208  if (I[1]->Last->is(TT_LineComment))
209  return 0;
210 
211  // Check for Limit <= 2 to account for the " {".
212  if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
213  return 0;
214  Limit -= 2;
215 
216  unsigned MergedLines = 0;
217  if (MergeShortFunctions) {
218  MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
219  // If we managed to merge the block, count the function header, which is
220  // on a separate line.
221  if (MergedLines > 0)
222  ++MergedLines;
223  }
224  return MergedLines;
225  }
226  if (TheLine->First->is(tok::kw_if)) {
227  return Style.AllowShortIfStatementsOnASingleLine
228  ? tryMergeSimpleControlStatement(I, E, Limit)
229  : 0;
230  }
231  if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
232  return Style.AllowShortLoopsOnASingleLine
233  ? tryMergeSimpleControlStatement(I, E, Limit)
234  : 0;
235  }
236  if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
237  return Style.AllowShortCaseLabelsOnASingleLine
238  ? tryMergeShortCaseLabels(I, E, Limit)
239  : 0;
240  }
241  if (TheLine->InPPDirective &&
242  (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
243  return tryMergeSimplePPDirective(I, E, Limit);
244  }
245  return 0;
246  }
247 
248  unsigned
249  tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
250  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
251  unsigned Limit) {
252  if (Limit == 0)
253  return 0;
254  if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
255  return 0;
256  if (1 + I[1]->Last->TotalLength > Limit)
257  return 0;
258  return 1;
259  }
260 
261  unsigned tryMergeSimpleControlStatement(
262  SmallVectorImpl<AnnotatedLine *>::const_iterator I,
263  SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
264  if (Limit == 0)
265  return 0;
266  if (Style.BraceWrapping.AfterControlStatement &&
267  (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
268  return 0;
269  if (I[1]->InPPDirective != (*I)->InPPDirective ||
270  (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
271  return 0;
272  Limit = limitConsideringMacros(I + 1, E, Limit);
273  AnnotatedLine &Line = **I;
274  if (Line.Last->isNot(tok::r_paren))
275  return 0;
276  if (1 + I[1]->Last->TotalLength > Limit)
277  return 0;
278  if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
279  TT_LineComment))
280  return 0;
281  // Only inline simple if's (no nested if or else).
282  if (I + 2 != E && Line.startsWith(tok::kw_if) &&
283  I[2]->First->is(tok::kw_else))
284  return 0;
285  return 1;
286  }
287 
288  unsigned
289  tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
290  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
291  unsigned Limit) {
292  if (Limit == 0 || I + 1 == E ||
293  I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
294  return 0;
295  unsigned NumStmts = 0;
296  unsigned Length = 0;
297  bool InPPDirective = I[0]->InPPDirective;
298  for (; NumStmts < 3; ++NumStmts) {
299  if (I + 1 + NumStmts == E)
300  break;
301  const AnnotatedLine *Line = I[1 + NumStmts];
302  if (Line->InPPDirective != InPPDirective)
303  break;
304  if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
305  break;
306  if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
307  tok::kw_while, tok::comment) ||
308  Line->Last->is(tok::comment))
309  return 0;
310  Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
311  }
312  if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
313  return 0;
314  return NumStmts;
315  }
316 
317  unsigned
318  tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
319  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
320  unsigned Limit) {
321  AnnotatedLine &Line = **I;
322 
323  // Don't merge ObjC @ keywords and methods.
324  // FIXME: If an option to allow short exception handling clauses on a single
325  // line is added, change this to not return for @try and friends.
326  if (Style.Language != FormatStyle::LK_Java &&
327  Line.First->isOneOf(tok::at, tok::minus, tok::plus))
328  return 0;
329 
330  // Check that the current line allows merging. This depends on whether we
331  // are in a control flow statements as well as several style flags.
332  if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
333  (Line.First->Next && Line.First->Next->is(tok::kw_else)))
334  return 0;
335  if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
336  tok::kw___try, tok::kw_catch, tok::kw___finally,
337  tok::kw_for, tok::r_brace, Keywords.kw___except)) {
338  if (!Style.AllowShortBlocksOnASingleLine)
339  return 0;
340  if (!Style.AllowShortIfStatementsOnASingleLine &&
341  Line.startsWith(tok::kw_if))
342  return 0;
343  if (!Style.AllowShortLoopsOnASingleLine &&
344  Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
345  return 0;
346  // FIXME: Consider an option to allow short exception handling clauses on
347  // a single line.
348  // FIXME: This isn't covered by tests.
349  // FIXME: For catch, __except, __finally the first token on the line
350  // is '}', so this isn't correct here.
351  if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
352  Keywords.kw___except, tok::kw___finally))
353  return 0;
354  }
355 
356  FormatToken *Tok = I[1]->First;
357  if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
358  (Tok->getNextNonComment() == nullptr ||
359  Tok->getNextNonComment()->is(tok::semi))) {
360  // We merge empty blocks even if the line exceeds the column limit.
361  Tok->SpacesRequiredBefore = 0;
362  Tok->CanBreakBefore = true;
363  return 1;
364  } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
365  !startsExternCBlock(Line)) {
366  // We don't merge short records.
367  if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
368  Keywords.kw_interface))
369  return 0;
370 
371  // Check that we still have three lines and they fit into the limit.
372  if (I + 2 == E || I[2]->Type == LT_Invalid)
373  return 0;
374  Limit = limitConsideringMacros(I + 2, E, Limit);
375 
376  if (!nextTwoLinesFitInto(I, Limit))
377  return 0;
378 
379  // Second, check that the next line does not contain any braces - if it
380  // does, readability declines when putting it into a single line.
381  if (I[1]->Last->is(TT_LineComment))
382  return 0;
383  do {
384  if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
385  return 0;
386  Tok = Tok->Next;
387  } while (Tok);
388 
389  // Last, check that the third line starts with a closing brace.
390  Tok = I[2]->First;
391  if (Tok->isNot(tok::r_brace))
392  return 0;
393 
394  // Don't merge "if (a) { .. } else {".
395  if (Tok->Next && Tok->Next->is(tok::kw_else))
396  return 0;
397 
398  return 2;
399  }
400  return 0;
401  }
402 
403  /// Returns the modified column limit for \p I if it is inside a macro and
404  /// needs a trailing '\'.
405  unsigned
406  limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
407  SmallVectorImpl<AnnotatedLine *>::const_iterator E,
408  unsigned Limit) {
409  if (I[0]->InPPDirective && I + 1 != E &&
410  !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
411  return Limit < 2 ? 0 : Limit - 2;
412  }
413  return Limit;
414  }
415 
416  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
417  unsigned Limit) {
418  if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
419  return false;
420  return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
421  }
422 
423  bool containsMustBreak(const AnnotatedLine *Line) {
424  for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
425  if (Tok->MustBreakBefore)
426  return true;
427  }
428  return false;
429  }
430 
431  void join(AnnotatedLine &A, const AnnotatedLine &B) {
432  assert(!A.Last->Next);
433  assert(!B.First->Previous);
434  if (B.Affected)
435  A.Affected = true;
436  A.Last->Next = B.First;
437  B.First->Previous = A.Last;
438  B.First->CanBreakBefore = true;
439  unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
440  for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
441  Tok->TotalLength += LengthA;
442  A.Last = Tok;
443  }
444  }
445 
446  const FormatStyle &Style;
447  const AdditionalKeywords &Keywords;
448  const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
449 
450  SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
451 };
452 
453 static void markFinalized(FormatToken *Tok) {
454  for (; Tok; Tok = Tok->Next) {
455  Tok->Finalized = true;
456  for (AnnotatedLine *Child : Tok->Children)
457  markFinalized(Child->First);
458  }
459 }
460 
461 #ifndef NDEBUG
462 static void printLineState(const LineState &State) {
463  llvm::dbgs() << "State: ";
464  for (const ParenState &P : State.Stack) {
465  llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
466  << " ";
467  }
468  llvm::dbgs() << State.NextToken->TokenText << "\n";
469 }
470 #endif
471 
472 /// \brief Base class for classes that format one \c AnnotatedLine.
473 class LineFormatter {
474 public:
475  LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
476  const FormatStyle &Style,
477  UnwrappedLineFormatter *BlockFormatter)
478  : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
479  BlockFormatter(BlockFormatter) {}
480  virtual ~LineFormatter() {}
481 
482  /// \brief Formats an \c AnnotatedLine and returns the penalty.
483  ///
484  /// If \p DryRun is \c false, directly applies the changes.
485  virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
486  bool DryRun) = 0;
487 
488 protected:
489  /// \brief If the \p State's next token is an r_brace closing a nested block,
490  /// format the nested block before it.
491  ///
492  /// Returns \c true if all children could be placed successfully and adapts
493  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
494  /// creates changes using \c Whitespaces.
495  ///
496  /// The crucial idea here is that children always get formatted upon
497  /// encountering the closing brace right after the nested block. Now, if we
498  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
499  /// \c false), the entire block has to be kept on the same line (which is only
500  /// possible if it fits on the line, only contains a single statement, etc.
501  ///
502  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
503  /// break after the "{", format all lines with correct indentation and the put
504  /// the closing "}" on yet another new line.
505  ///
506  /// This enables us to keep the simple structure of the
507  /// \c UnwrappedLineFormatter, where we only have two options for each token:
508  /// break or don't break.
509  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
510  unsigned &Penalty) {
511  const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
512  FormatToken &Previous = *State.NextToken->Previous;
513  if (!LBrace || LBrace->isNot(tok::l_brace) ||
514  LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
515  // The previous token does not open a block. Nothing to do. We don't
516  // assert so that we can simply call this function for all tokens.
517  return true;
518 
519  if (NewLine) {
520  int AdditionalIndent = State.Stack.back().Indent -
521  Previous.Children[0]->Level * Style.IndentWidth;
522 
523  Penalty +=
524  BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
525  /*FixBadIndentation=*/true);
526  return true;
527  }
528 
529  if (Previous.Children[0]->First->MustBreakBefore)
530  return false;
531 
532  // Cannot merge multiple statements into a single line.
533  if (Previous.Children.size() > 1)
534  return false;
535 
536  // Cannot merge into one line if this line ends on a comment.
537  if (Previous.is(tok::comment))
538  return false;
539 
540  // We can't put the closing "}" on a line with a trailing comment.
541  if (Previous.Children[0]->Last->isTrailingComment())
542  return false;
543 
544  // If the child line exceeds the column limit, we wouldn't want to merge it.
545  // We add +2 for the trailing " }".
546  if (Style.ColumnLimit > 0 &&
547  Previous.Children[0]->Last->TotalLength + State.Column + 2 >
548  Style.ColumnLimit)
549  return false;
550 
551  if (!DryRun) {
552  Whitespaces->replaceWhitespace(
553  *Previous.Children[0]->First,
554  /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
555  /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
556  }
557  Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
558 
559  State.Column += 1 + Previous.Children[0]->Last->TotalLength;
560  return true;
561  }
562 
563  ContinuationIndenter *Indenter;
564 
565 private:
566  WhitespaceManager *Whitespaces;
567  const FormatStyle &Style;
568  UnwrappedLineFormatter *BlockFormatter;
569 };
570 
571 /// \brief Formatter that keeps the existing line breaks.
572 class NoColumnLimitLineFormatter : public LineFormatter {
573 public:
574  NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
575  WhitespaceManager *Whitespaces,
576  const FormatStyle &Style,
577  UnwrappedLineFormatter *BlockFormatter)
578  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
579 
580  /// \brief Formats the line, simply keeping all of the input's line breaking
581  /// decisions.
582  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
583  bool DryRun) override {
584  assert(!DryRun);
585  LineState State =
586  Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
587  while (State.NextToken) {
588  bool Newline =
589  Indenter->mustBreak(State) ||
590  (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
591  unsigned Penalty = 0;
592  formatChildren(State, Newline, /*DryRun=*/false, Penalty);
593  Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
594  }
595  return 0;
596  }
597 };
598 
599 /// \brief Formatter that puts all tokens into a single line without breaks.
600 class NoLineBreakFormatter : public LineFormatter {
601 public:
602  NoLineBreakFormatter(ContinuationIndenter *Indenter,
603  WhitespaceManager *Whitespaces, const FormatStyle &Style,
604  UnwrappedLineFormatter *BlockFormatter)
605  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
606 
607  /// \brief Puts all tokens into a single line.
608  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
609  bool DryRun) override {
610  unsigned Penalty = 0;
611  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
612  while (State.NextToken) {
613  formatChildren(State, /*Newline=*/false, DryRun, Penalty);
614  Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
615  }
616  return Penalty;
617  }
618 };
619 
620 /// \brief Finds the best way to break lines.
621 class OptimizingLineFormatter : public LineFormatter {
622 public:
623  OptimizingLineFormatter(ContinuationIndenter *Indenter,
624  WhitespaceManager *Whitespaces,
625  const FormatStyle &Style,
626  UnwrappedLineFormatter *BlockFormatter)
627  : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
628 
629  /// \brief Formats the line by finding the best line breaks with line lengths
630  /// below the column limit.
631  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
632  bool DryRun) override {
633  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
634 
635  // If the ObjC method declaration does not fit on a line, we should format
636  // it with one arg per line.
637  if (State.Line->Type == LT_ObjCMethodDecl)
638  State.Stack.back().BreakBeforeParameter = true;
639 
640  // Find best solution in solution space.
641  return analyzeSolutionSpace(State, DryRun);
642  }
643 
644 private:
645  struct CompareLineStatePointers {
646  bool operator()(LineState *obj1, LineState *obj2) const {
647  return *obj1 < *obj2;
648  }
649  };
650 
651  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
652  ///
653  /// In case of equal penalties, we want to prefer states that were inserted
654  /// first. During state generation we make sure that we insert states first
655  /// that break the line as late as possible.
656  typedef std::pair<unsigned, unsigned> OrderedPenalty;
657 
658  /// \brief An edge in the solution space from \c Previous->State to \c State,
659  /// inserting a newline dependent on the \c NewLine.
660  struct StateNode {
661  StateNode(const LineState &State, bool NewLine, StateNode *Previous)
662  : State(State), NewLine(NewLine), Previous(Previous) {}
663  LineState State;
664  bool NewLine;
665  StateNode *Previous;
666  };
667 
668  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
669  /// \c State has the given \c OrderedPenalty.
670  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
671 
672  /// \brief The BFS queue type.
673  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
674  std::greater<QueueItem>> QueueType;
675 
676  /// \brief Analyze the entire solution space starting from \p InitialState.
677  ///
678  /// This implements a variant of Dijkstra's algorithm on the graph that spans
679  /// the solution space (\c LineStates are the nodes). The algorithm tries to
680  /// find the shortest path (the one with lowest penalty) from \p InitialState
681  /// to a state where all tokens are placed. Returns the penalty.
682  ///
683  /// If \p DryRun is \c false, directly applies the changes.
684  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
685  std::set<LineState *, CompareLineStatePointers> Seen;
686 
687  // Increasing count of \c StateNode items we have created. This is used to
688  // create a deterministic order independent of the container.
689  unsigned Count = 0;
690  QueueType Queue;
691 
692  // Insert start element into queue.
693  StateNode *Node =
694  new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
695  Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
696  ++Count;
697 
698  unsigned Penalty = 0;
699 
700  // While not empty, take first element and follow edges.
701  while (!Queue.empty()) {
702  Penalty = Queue.top().first.first;
703  StateNode *Node = Queue.top().second;
704  if (!Node->State.NextToken) {
705  DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
706  break;
707  }
708  Queue.pop();
709 
710  // Cut off the analysis of certain solutions if the analysis gets too
711  // complex. See description of IgnoreStackForComparison.
712  if (Count > 50000)
713  Node->State.IgnoreStackForComparison = true;
714 
715  if (!Seen.insert(&Node->State).second)
716  // State already examined with lower penalty.
717  continue;
718 
719  FormatDecision LastFormat = Node->State.NextToken->Decision;
720  if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
721  addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
722  if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
723  addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
724  }
725 
726  if (Queue.empty()) {
727  // We were unable to find a solution, do nothing.
728  // FIXME: Add diagnostic?
729  DEBUG(llvm::dbgs() << "Could not find a solution.\n");
730  return 0;
731  }
732 
733  // Reconstruct the solution.
734  if (!DryRun)
735  reconstructPath(InitialState, Queue.top().second);
736 
737  DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
738  DEBUG(llvm::dbgs() << "---\n");
739 
740  return Penalty;
741  }
742 
743  /// \brief Add the following state to the analysis queue \c Queue.
744  ///
745  /// Assume the current state is \p PreviousNode and has been reached with a
746  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
747  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
748  bool NewLine, unsigned *Count, QueueType *Queue) {
749  if (NewLine && !Indenter->canBreak(PreviousNode->State))
750  return;
751  if (!NewLine && Indenter->mustBreak(PreviousNode->State))
752  return;
753 
754  StateNode *Node = new (Allocator.Allocate())
755  StateNode(PreviousNode->State, NewLine, PreviousNode);
756  if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
757  return;
758 
759  Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
760 
761  Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
762  ++(*Count);
763  }
764 
765  /// \brief Applies the best formatting by reconstructing the path in the
766  /// solution space that leads to \c Best.
767  void reconstructPath(LineState &State, StateNode *Best) {
768  std::deque<StateNode *> Path;
769  // We do not need a break before the initial token.
770  while (Best->Previous) {
771  Path.push_front(Best);
772  Best = Best->Previous;
773  }
774  for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
775  I != E; ++I) {
776  unsigned Penalty = 0;
777  formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
778  Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
779 
780  DEBUG({
781  printLineState((*I)->Previous->State);
782  if ((*I)->NewLine) {
783  llvm::dbgs() << "Penalty for placing "
784  << (*I)->Previous->State.NextToken->Tok.getName() << ": "
785  << Penalty << "\n";
786  }
787  });
788  }
789  }
790 
791  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
792 };
793 
794 } // anonymous namespace
795 
796 unsigned
798  bool DryRun, int AdditionalIndent,
799  bool FixBadIndentation) {
800  LineJoiner Joiner(Style, Keywords, Lines);
801 
802  // Try to look up already computed penalty in DryRun-mode.
803  std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
804  &Lines, AdditionalIndent);
805  auto CacheIt = PenaltyCache.find(CacheKey);
806  if (DryRun && CacheIt != PenaltyCache.end())
807  return CacheIt->second;
808 
809  assert(!Lines.empty());
810  unsigned Penalty = 0;
811  LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
812  AdditionalIndent);
813  const AnnotatedLine *PreviousLine = nullptr;
814  const AnnotatedLine *NextLine = nullptr;
815 
816  // The minimum level of consecutive lines that have been formatted.
817  unsigned RangeMinLevel = UINT_MAX;
818 
819  for (const AnnotatedLine *Line =
820  Joiner.getNextMergedLine(DryRun, IndentTracker);
821  Line; Line = NextLine) {
822  const AnnotatedLine &TheLine = *Line;
823  unsigned Indent = IndentTracker.getIndent();
824 
825  // We continue formatting unchanged lines to adjust their indent, e.g. if a
826  // scope was added. However, we need to carefully stop doing this when we
827  // exit the scope of affected lines to prevent indenting a the entire
828  // remaining file if it currently missing a closing brace.
829  bool ContinueFormatting =
830  TheLine.Level > RangeMinLevel ||
831  (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace));
832 
833  bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
834  Indent != TheLine.First->OriginalColumn;
835  bool ShouldFormat = TheLine.Affected || FixIndentation;
836  // We cannot format this line; if the reason is that the line had a
837  // parsing error, remember that.
838  if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
839  *IncompleteFormat = true;
840 
841  if (ShouldFormat && TheLine.Type != LT_Invalid) {
842  if (!DryRun)
843  formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
844  TheLine.InPPDirective);
845 
846  NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
847  unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
848  bool FitsIntoOneLine =
849  TheLine.Last->TotalLength + Indent <= ColumnLimit ||
850  (TheLine.Type == LT_ImportStatement &&
851  (Style.Language != FormatStyle::LK_JavaScript ||
852  !Style.JavaScriptWrapImports));
853 
854  if (Style.ColumnLimit == 0)
855  NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
856  .formatLine(TheLine, Indent, DryRun);
857  else if (FitsIntoOneLine)
858  Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
859  .formatLine(TheLine, Indent, DryRun);
860  else
861  Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
862  .formatLine(TheLine, Indent, DryRun);
863  RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
864  } else {
865  // If no token in the current line is affected, we still need to format
866  // affected children.
867  if (TheLine.ChildrenAffected)
868  for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
869  if (!Tok->Children.empty())
870  format(Tok->Children, DryRun);
871 
872  // Adapt following lines on the current indent level to the same level
873  // unless the current \c AnnotatedLine is not at the beginning of a line.
874  bool StartsNewLine =
875  TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
876  if (StartsNewLine)
877  IndentTracker.adjustToUnmodifiedLine(TheLine);
878  if (!DryRun) {
879  bool ReformatLeadingWhitespace =
880  StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
881  TheLine.LeadingEmptyLinesAffected);
882  // Format the first token.
883  if (ReformatLeadingWhitespace)
884  formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
885  TheLine.First->OriginalColumn,
886  TheLine.InPPDirective);
887  else
888  Whitespaces->addUntouchableToken(*TheLine.First,
889  TheLine.InPPDirective);
890 
891  // Notify the WhitespaceManager about the unchanged whitespace.
892  for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
893  Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
894  }
895  NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
896  RangeMinLevel = UINT_MAX;
897  }
898  if (!DryRun)
899  markFinalized(TheLine.First);
900  PreviousLine = &TheLine;
901  }
902  PenaltyCache[CacheKey] = Penalty;
903  return Penalty;
904 }
905 
906 void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
907  const AnnotatedLine *PreviousLine,
908  unsigned IndentLevel,
909  unsigned Indent,
910  bool InPPDirective) {
911  if (RootToken.is(tok::eof)) {
912  unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
913  Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
914  /*Spaces=*/0, /*TargetColumn=*/0);
915  return;
916  }
917  unsigned Newlines =
918  std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
919  // Remove empty lines before "}" where applicable.
920  if (RootToken.is(tok::r_brace) &&
921  (!RootToken.Next ||
922  (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
923  Newlines = std::min(Newlines, 1u);
924  if (Newlines == 0 && !RootToken.IsFirst)
925  Newlines = 1;
926  if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
927  Newlines = 0;
928 
929  // Remove empty lines after "{".
930  if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
931  PreviousLine->Last->is(tok::l_brace) &&
932  PreviousLine->First->isNot(tok::kw_namespace) &&
933  !startsExternCBlock(*PreviousLine))
934  Newlines = 1;
935 
936  // Insert extra new line before access specifiers.
937  if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
938  RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
939  ++Newlines;
940 
941  // Remove empty lines after access specifiers.
942  if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
943  (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
944  Newlines = std::min(1u, Newlines);
945 
946  Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
947  Indent, InPPDirective &&
948  !RootToken.HasUnescapedNewline);
949 }
950 
951 unsigned
952 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
953  const AnnotatedLine *NextLine) const {
954  // In preprocessor directives reserve two chars for trailing " \" if the
955  // next line continues the preprocessor directive.
956  bool ContinuesPPDirective =
957  InPPDirective &&
958  // If there is no next line, this is likely a child line and the parent
959  // continues the preprocessor directive.
960  (!NextLine ||
961  (NextLine->InPPDirective &&
962  // If there is an unescaped newline between this line and the next, the
963  // next line starts a new preprocessor directive.
964  !NextLine->First->HasUnescapedNewline));
965  return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
966 }
967 
968 } // namespace format
969 } // namespace clang
unsigned Length
bool isAccessSpecifier(bool ColonRequired=true) const
Definition: FormatToken.h:320
std::vector< int > IndentForLevel
The indent in characters for each level.
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:208
StringRef P
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:291
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:149
bool isNot(T Kind) const
Definition: FormatToken.h:298
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:204
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:123
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:268
LineState State
Should be used for Java.
Definition: Format.h:465
const FormatStyle & Style
uint32_t Offset
Definition: CacheTokens.cpp:44
bool ChildrenAffected
True if a one of this line's children intersects with an input range.
int Offset
Offset of the current line relative to the indent level.
WhitespaceManager * Whitespaces
UnwrappedLineFormatter * BlockFormatter
#define UINT_MAX
Definition: limits.h:72
detail::InMemoryDirectory::const_iterator I
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
WhitespaceManager class manages whitespace around tokens and their replacements.
llvm::SpecificBumpPtrAllocator< StateNode > Allocator
Only merge empty functions.
Definition: Format.h:137
Should be used for JavaScript.
Definition: Format.h:467
ContinuationIndenter * Indenter
StateNode * Previous
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:113
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
Implements a combinartorial exploration of all the different linebreaks unwrapped lines can be format...
bool startsWith(Ts...Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
bool Affected
True if this line should be formatted, i.e.
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
const TemplateArgument * iterator
Definition: Type.h:4233
AnnotatedLine & Line
unsigned format(const SmallVectorImpl< AnnotatedLine * > &Lines, bool DryRun=false, int AdditionalIndent=0, bool FixBadIndentation=false)
Format the current block and return the penalty.
ast_type_traits::DynTypedNode Node
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
detail::InMemoryDirectory::const_iterator E
Merge all functions fitting on a single line.
Definition: Format.h:141
const AdditionalKeywords & Keywords
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:282
Only merge functions defined inside a class. Implies "empty".
Definition: Format.h:139
FormatToken * Current
const unsigned AdditionalIndent
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:127
unsigned Indent
The current line's indent.