clang  3.9.0
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 
24 namespace clang {
25 class SourceManager;
26 
27 namespace format {
28 
29 class AnnotatedLine;
30 struct FormatToken;
31 struct LineState;
32 struct ParenState;
33 class WhitespaceManager;
34 
36 public:
37  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38  /// column \p FirstIndent.
39  ContinuationIndenter(const FormatStyle &Style,
40  const AdditionalKeywords &Keywords,
41  const SourceManager &SourceMgr,
42  WhitespaceManager &Whitespaces,
43  encoding::Encoding Encoding,
44  bool BinPackInconclusiveFunctions);
45 
46  /// \brief Get the initial state, i.e. the state after placing \p Line's
47  /// first token at \p FirstIndent.
48  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
49  bool DryRun);
50 
51  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
52  // better home.
53  /// \brief Returns \c true, if a line break after \p State is allowed.
54  bool canBreak(const LineState &State);
55 
56  /// \brief Returns \c true, if a line break after \p State is mandatory.
57  bool mustBreak(const LineState &State);
58 
59  /// \brief Appends the next token to \p State and updates information
60  /// necessary for indentation.
61  ///
62  /// Puts the token on the current line if \p Newline is \c false and adds a
63  /// line break and necessary indentation otherwise.
64  ///
65  /// If \p DryRun is \c false, also creates and stores the required
66  /// \c Replacement.
67  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
68  unsigned ExtraSpaces = 0);
69 
70  /// \brief Get the column limit for this line. This is the style's column
71  /// limit, potentially reduced for preprocessor definitions.
72  unsigned getColumnLimit(const LineState &State) const;
73 
74 private:
75  /// \brief Mark the next token as consumed in \p State and modify its stacks
76  /// accordingly.
77  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
78 
79  /// \brief Update 'State' according to the next token's fake left parentheses.
80  void moveStatePastFakeLParens(LineState &State, bool Newline);
81  /// \brief Update 'State' according to the next token's fake r_parens.
82  void moveStatePastFakeRParens(LineState &State);
83 
84  /// \brief Update 'State' according to the next token being one of "(<{[".
85  void moveStatePastScopeOpener(LineState &State, bool Newline);
86  /// \brief Update 'State' according to the next token being one of ")>}]".
87  void moveStatePastScopeCloser(LineState &State);
88  /// \brief Update 'State' with the next token opening a nested block.
89  void moveStateToNewBlock(LineState &State);
90 
91  /// \brief If the current token sticks out over the end of the line, break
92  /// it if possible.
93  ///
94  /// \returns An extra penalty if a token was broken, otherwise 0.
95  ///
96  /// The returned penalty will cover the cost of the additional line breaks and
97  /// column limit violation in all lines except for the last one. The penalty
98  /// for the column limit violation in the last line (and in single line
99  /// tokens) is handled in \c addNextStateToQueue.
100  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
101  bool DryRun);
102 
103  /// \brief Appends the next token to \p State and updates information
104  /// necessary for indentation.
105  ///
106  /// Puts the token on the current line.
107  ///
108  /// If \p DryRun is \c false, also creates and stores the required
109  /// \c Replacement.
110  void addTokenOnCurrentLine(LineState &State, bool DryRun,
111  unsigned ExtraSpaces);
112 
113  /// \brief Appends the next token to \p State and updates information
114  /// necessary for indentation.
115  ///
116  /// Adds a line break and necessary indentation.
117  ///
118  /// If \p DryRun is \c false, also creates and stores the required
119  /// \c Replacement.
120  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
121 
122  /// \brief Calculate the new column for a line wrap before the next token.
123  unsigned getNewLineColumn(const LineState &State);
124 
125  /// \brief Adds a multiline token to the \p State.
126  ///
127  /// \returns Extra penalty for the first line of the literal: last line is
128  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
129  /// matter, as we don't change them.
130  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
131 
132  /// \brief Returns \c true if the next token starts a multiline string
133  /// literal.
134  ///
135  /// This includes implicitly concatenated strings, strings that will be broken
136  /// by clang-format and string literals with escaped newlines.
137  bool nextIsMultilineString(const LineState &State);
138 
139  FormatStyle Style;
140  const AdditionalKeywords &Keywords;
141  const SourceManager &SourceMgr;
142  WhitespaceManager &Whitespaces;
143  encoding::Encoding Encoding;
144  bool BinPackInconclusiveFunctions;
145  llvm::Regex CommentPragmasRegex;
146 };
147 
148 struct ParenState {
149  ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
150  bool AvoidBinPacking, bool NoLineBreak)
151  : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
153  AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
154  NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
158 
159  /// \brief The position to which a specific parenthesis level needs to be
160  /// indented.
161  unsigned Indent;
162 
163  /// \brief The number of indentation levels of the block.
164  unsigned IndentLevel;
165 
166  /// \brief The position of the last space on each level.
167  ///
168  /// Used e.g. to break like:
169  /// functionCall(Parameter, otherCall(
170  /// OtherParameter));
171  unsigned LastSpace;
172 
173  /// \brief If a block relative to this parenthesis level gets wrapped, indent
174  /// it this much.
176 
177  /// \brief The position the first "<<" operator encountered on each level.
178  ///
179  /// Used to align "<<" operators. 0 if no such operator has been encountered
180  /// on a level.
181  unsigned FirstLessLess = 0;
182 
183  /// \brief The column of a \c ? in a conditional expression;
184  unsigned QuestionColumn = 0;
185 
186  /// \brief The position of the colon in an ObjC method declaration/call.
187  unsigned ColonPos = 0;
188 
189  /// \brief The start of the most recent function in a builder-type call.
190  unsigned StartOfFunctionCall = 0;
191 
192  /// \brief Contains the start of array subscript expressions, so that they
193  /// can be aligned.
195 
196  /// \brief If a nested name specifier was broken over multiple lines, this
197  /// contains the start column of the second line. Otherwise 0.
199 
200  /// \brief If a call expression was broken over multiple lines, this
201  /// contains the start column of the second line. Otherwise 0.
202  unsigned CallContinuation = 0;
203 
204  /// \brief The column of the first variable name in a variable declaration.
205  ///
206  /// Used to align further variables if necessary.
207  unsigned VariablePos = 0;
208 
209  /// \brief Whether a newline needs to be inserted before the block's closing
210  /// brace.
211  ///
212  /// We only want to insert a newline before the closing brace if there also
213  /// was a newline after the beginning left brace.
215 
216  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
217  /// lines, in this context.
218  bool AvoidBinPacking : 1;
219 
220  /// \brief Break after the next comma (or all the commas in this context if
221  /// \c AvoidBinPacking is \c true).
223 
224  /// \brief Line breaking in this context would break a formatting rule.
225  bool NoLineBreak : 1;
226 
227  /// \brief True if the last binary operator on this level was wrapped to the
228  /// next line.
230 
231  /// \brief \c true if this \c ParenState already contains a line-break.
232  ///
233  /// The first line break in a certain \c ParenState causes extra penalty so
234  /// that clang-format prefers similar breaks, i.e. breaks in the same
235  /// parenthesis.
237 
238  /// \brief \c true if this \c ParenState contains multiple segments of a
239  /// builder-type call on one line.
241 
242  /// \brief \c true if the colons of the curren ObjC method expression should
243  /// be aligned.
244  ///
245  /// Not considered for memoization as it will always have the same value at
246  /// the same token.
247  bool AlignColons : 1;
248 
249  /// \brief \c true if at least one selector name was found in the current
250  /// ObjC method expression.
251  ///
252  /// Not considered for memoization as it will always have the same value at
253  /// the same token.
255 
256  /// \brief \c true if there are multiple nested blocks inside these parens.
257  ///
258  /// Not considered for memoization as it will always have the same value at
259  /// the same token.
261 
262  // \brief The start of a nested block (e.g. lambda introducer in C++ or
263  // "function" in JavaScript) is not wrapped to a new line.
265 
266  bool operator<(const ParenState &Other) const {
267  if (Indent != Other.Indent)
268  return Indent < Other.Indent;
269  if (LastSpace != Other.LastSpace)
270  return LastSpace < Other.LastSpace;
272  return NestedBlockIndent < Other.NestedBlockIndent;
273  if (FirstLessLess != Other.FirstLessLess)
274  return FirstLessLess < Other.FirstLessLess;
277  if (QuestionColumn != Other.QuestionColumn)
278  return QuestionColumn < Other.QuestionColumn;
279  if (AvoidBinPacking != Other.AvoidBinPacking)
280  return AvoidBinPacking;
282  return BreakBeforeParameter;
283  if (NoLineBreak != Other.NoLineBreak)
284  return NoLineBreak;
286  return LastOperatorWrapped;
287  if (ColonPos != Other.ColonPos)
288  return ColonPos < Other.ColonPos;
293  if (CallContinuation != Other.CallContinuation)
294  return CallContinuation < Other.CallContinuation;
295  if (VariablePos != Other.VariablePos)
296  return VariablePos < Other.VariablePos;
298  return ContainsLineBreak;
302  return NestedBlockInlined;
303  return false;
304  }
305 };
306 
307 /// \brief The current state when indenting a unwrapped line.
308 ///
309 /// As the indenting tries different combinations this is copied by value.
310 struct LineState {
311  /// \brief The number of used columns in the current line.
312  unsigned Column;
313 
314  /// \brief The token that needs to be next formatted.
316 
317  /// \brief \c true if this line contains a continued for-loop section.
319 
320  /// \brief The \c NestingLevel at the start of this line.
322 
323  /// \brief The lowest \c NestingLevel on the current line.
325 
326  /// \brief The start column of the string literal, if we're in a string
327  /// literal sequence, 0 otherwise.
329 
330  /// \brief A stack keeping track of properties applying to parenthesis
331  /// levels.
332  std::vector<ParenState> Stack;
333 
334  /// \brief Ignore the stack of \c ParenStates for state comparison.
335  ///
336  /// In long and deeply nested unwrapped lines, the current algorithm can
337  /// be insufficient for finding the best formatting with a reasonable amount
338  /// of time and memory. Setting this flag will effectively lead to the
339  /// algorithm not analyzing some combinations. However, these combinations
340  /// rarely contain the optimal solution: In short, accepting a higher
341  /// penalty early would need to lead to different values in the \c
342  /// ParenState stack (in an otherwise identical state) and these different
343  /// values would need to lead to a significant amount of avoided penalty
344  /// later.
345  ///
346  /// FIXME: Come up with a better algorithm instead.
348 
349  /// \brief The indent of the first token.
350  unsigned FirstIndent;
351 
352  /// \brief The line that is being formatted.
353  ///
354  /// Does not need to be considered for memoization because it doesn't change.
356 
357  /// \brief Comparison operator to be able to used \c LineState in \c map.
358  bool operator<(const LineState &Other) const {
359  if (NextToken != Other.NextToken)
360  return NextToken < Other.NextToken;
361  if (Column != Other.Column)
362  return Column < Other.Column;
366  if (StartOfLineLevel != Other.StartOfLineLevel)
367  return StartOfLineLevel < Other.StartOfLineLevel;
369  return LowestLevelOnLine < Other.LowestLevelOnLine;
373  return false;
374  return Stack < Other.Stack;
375  }
376 };
377 
378 } // end namespace format
379 } // end namespace clang
380 
381 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
unsigned IndentLevel
The number of indentation levels of the block.
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
Manages the whitespaces around tokens and their replacements.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
const AnnotatedLine * Line
The line that is being formatted.
bool operator<(const ParenState &Other) const
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:113
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
AnnotatedLine & Line
Various functions to configurably format source code.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:569
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
FormatToken * Current
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
FormatToken * NextToken
The token that needs to be next formatted.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
This class handles loading and caching of source files into memory.