clang  3.9.0
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements specific functions of \c FormatTokens and their
12 /// roles.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "ContinuationIndenter.h"
17 #include "FormatToken.h"
18 #include "clang/Format/Format.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/Support/Debug.h"
21 #include <climits>
22 
23 namespace clang {
24 namespace format {
25 
27  static const char *const TokNames[] = {
28 #define TYPE(X) #X,
30 #undef TYPE
31  nullptr
32  };
33 
34  if (Type < NUM_TOKEN_TYPES)
35  return TokNames[Type];
36  llvm_unreachable("unknown TokenType");
37  return nullptr;
38 }
39 
40 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
41 // duplication.
43  switch (Tok.getKind()) {
44  case tok::kw_short:
45  case tok::kw_long:
46  case tok::kw___int64:
47  case tok::kw___int128:
48  case tok::kw_signed:
49  case tok::kw_unsigned:
50  case tok::kw_void:
51  case tok::kw_char:
52  case tok::kw_int:
53  case tok::kw_half:
54  case tok::kw_float:
55  case tok::kw_double:
56  case tok::kw___float128:
57  case tok::kw_wchar_t:
58  case tok::kw_bool:
59  case tok::kw___underlying_type:
60  case tok::annot_typename:
61  case tok::kw_char16_t:
62  case tok::kw_char32_t:
63  case tok::kw_typeof:
64  case tok::kw_decltype:
65  return true;
66  default:
67  return false;
68  }
69 }
70 
72 
74 
77  bool DryRun) {
78  if (State.NextToken == nullptr || !State.NextToken->Previous)
79  return 0;
80 
81  // Ensure that we start on the opening brace.
82  const FormatToken *LBrace =
84  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
85  LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
86  LBrace->Next->Type == TT_DesignatedInitializerPeriod)
87  return 0;
88 
89  // Calculate the number of code points we have to format this list. As the
90  // first token is already placed, we have to subtract it.
91  unsigned RemainingCodePoints =
93 
94  // Find the best ColumnFormat, i.e. the best number of columns to use.
95  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
96  // If no ColumnFormat can be used, the braced list would generally be
97  // bin-packed. Add a severe penalty to this so that column layouts are
98  // preferred if possible.
99  if (!Format)
100  return 10000;
101 
102  // Format the entire list.
103  unsigned Penalty = 0;
104  unsigned Column = 0;
105  unsigned Item = 0;
106  while (State.NextToken != LBrace->MatchingParen) {
107  bool NewLine = false;
108  unsigned ExtraSpaces = 0;
109 
110  // If the previous token was one of our commas, we are now on the next item.
111  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
112  if (!State.NextToken->isTrailingComment()) {
113  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
114  ++Column;
115  }
116  ++Item;
117  }
118 
119  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
120  Column = 0;
121  NewLine = true;
122  }
123 
124  // Place token using the continuation indenter and store the penalty.
125  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
126  }
127  return Penalty;
128 }
129 
132  bool DryRun) {
133  if (HasNestedBracedList)
134  State.Stack.back().AvoidBinPacking = true;
135  return 0;
136 }
137 
138 // Returns the lengths in code points between Begin and End (both included),
139 // assuming that the entire sequence is put on a single line.
140 static unsigned CodePointsBetween(const FormatToken *Begin,
141  const FormatToken *End) {
142  assert(End->TotalLength >= Begin->TotalLength);
143  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
144 }
145 
147  // FIXME: At some point we might want to do this for other lists, too.
148  if (!Token->MatchingParen ||
149  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
150  return;
151 
152  // In C++11 braced list style, we should not format in columns unless they
153  // have many items (20 or more) or we allow bin-packing of function call
154  // arguments.
156  Commas.size() < 19)
157  return;
158 
159  // Limit column layout for JavaScript array initializers to 20 or more items
160  // for now to introduce it carefully. We can become more aggressive if this
161  // necessary.
162  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
163  return;
164 
165  // Column format doesn't really make sense if we don't align after brackets.
167  return;
168 
169  FormatToken *ItemBegin = Token->Next;
170  while (ItemBegin->isTrailingComment())
171  ItemBegin = ItemBegin->Next;
172  SmallVector<bool, 8> MustBreakBeforeItem;
173 
174  // The lengths of an item if it is put at the end of the line. This includes
175  // trailing comments which are otherwise ignored for column alignment.
176  SmallVector<unsigned, 8> EndOfLineItemLength;
177 
178  bool HasSeparatingComment = false;
179  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
180  // Skip comments on their own line.
181  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
182  ItemBegin = ItemBegin->Next;
183  HasSeparatingComment = i > 0;
184  }
185 
186  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
187  if (ItemBegin->is(tok::l_brace))
188  HasNestedBracedList = true;
189  const FormatToken *ItemEnd = nullptr;
190  if (i == Commas.size()) {
191  ItemEnd = Token->MatchingParen;
192  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
193  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
195  !ItemEnd->Previous->isTrailingComment()) {
196  // In Cpp11 braced list style, the } and possibly other subsequent
197  // tokens will need to stay on a line with the last element.
198  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
199  ItemEnd = ItemEnd->Next;
200  } else {
201  // In other braced lists styles, the "}" can be wrapped to the new line.
202  ItemEnd = Token->MatchingParen->Previous;
203  }
204  } else {
205  ItemEnd = Commas[i];
206  // The comma is counted as part of the item when calculating the length.
207  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
208 
209  // Consume trailing comments so the are included in EndOfLineItemLength.
210  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
211  ItemEnd->Next->isTrailingComment())
212  ItemEnd = ItemEnd->Next;
213  }
214  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
215  // If there is a trailing comma in the list, the next item will start at the
216  // closing brace. Don't create an extra item for this.
217  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
218  break;
219  ItemBegin = ItemEnd->Next;
220  }
221 
222  // Don't use column layout for lists with few elements and in presence of
223  // separating comments.
224  if (Commas.size() < 5 || HasSeparatingComment)
225  return;
226 
227  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
228  return;
229 
230  // We can never place more than ColumnLimit / 3 items in a row (because of the
231  // spaces and the comma).
232  unsigned MaxItems = Style.ColumnLimit / 3;
233  std::vector<unsigned> MinSizeInColumn;
234  MinSizeInColumn.reserve(MaxItems);
235  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
236  ColumnFormat Format;
237  Format.Columns = Columns;
238  Format.ColumnSizes.resize(Columns);
239  MinSizeInColumn.assign(Columns, UINT_MAX);
240  Format.LineCount = 1;
241  bool HasRowWithSufficientColumns = false;
242  unsigned Column = 0;
243  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
244  assert(i < MustBreakBeforeItem.size());
245  if (MustBreakBeforeItem[i] || Column == Columns) {
246  ++Format.LineCount;
247  Column = 0;
248  }
249  if (Column == Columns - 1)
250  HasRowWithSufficientColumns = true;
251  unsigned Length =
252  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
253  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
254  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
255  ++Column;
256  }
257  // If all rows are terminated early (e.g. by trailing comments), we don't
258  // need to look further.
259  if (!HasRowWithSufficientColumns)
260  break;
261  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
262 
263  for (unsigned i = 0; i < Columns; ++i)
264  Format.TotalWidth += Format.ColumnSizes[i];
265 
266  // Don't use this Format, if the difference between the longest and shortest
267  // element in a column exceeds a threshold to avoid excessive spaces.
268  if ([&] {
269  for (unsigned i = 0; i < Columns - 1; ++i)
270  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
271  return true;
272  return false;
273  }())
274  continue;
275 
276  // Ignore layouts that are bound to violate the column limit.
277  if (Format.TotalWidth > Style.ColumnLimit)
278  continue;
279 
280  Formats.push_back(Format);
281  }
282 }
283 
284 const CommaSeparatedList::ColumnFormat *
285 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
286  const ColumnFormat *BestFormat = nullptr;
288  I = Formats.rbegin(),
289  E = Formats.rend();
290  I != E; ++I) {
291  if (I->TotalWidth <= RemainingCharacters) {
292  if (BestFormat && I->LineCount > BestFormat->LineCount)
293  break;
294  BestFormat = &*I;
295  }
296  }
297  return BestFormat;
298 }
299 
300 } // namespace format
301 } // namespace clang
unsigned Length
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:221
Token Tok
The Token.
Definition: FormatToken.h:117
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:291
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:413
The base class of the type hierarchy.
Definition: Type.h:1281
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:204
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:268
static const char *const TokNames[]
Definition: TokenKinds.cpp:18
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:175
unsigned Column
The number of used columns in the current line.
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:265
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:421
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:75
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each...
Definition: Format.h:203
tok::TokenKind getKind() const
Definition: Token.h:89
#define UINT_MAX
Definition: limits.h:72
detail::InMemoryDirectory::const_iterator I
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:73
unsigned ColumnLimit
The column limit.
Definition: Format.h:300
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:77
The current state when indenting a unwrapped line.
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
ContinuationIndenter * Indenter
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:113
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:63
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:26
bool isTrailingComment() const
Definition: FormatToken.h:374
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
Various functions to configurably format source code.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T-> getSizeExpr()))
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:139
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:330
detail::InMemoryDirectory::const_iterator E
for(auto typeArg:T->getTypeArgsAsWritten())
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:282
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:262
const FormatStyle & Style
Definition: FormatToken.h:512
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:155
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
FormatToken * NextToken
The token that needs to be next formatted.
This file implements an indenter that manages the indentation of continuations.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:127
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:167
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:42
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:28