clang  6.0.0svn
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements specific functions of \c FormatTokens and their
12 /// roles.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "FormatToken.h"
17 #include "ContinuationIndenter.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/Support/Debug.h"
20 #include <climits>
21 
22 namespace clang {
23 namespace format {
24 
26  static const char *const TokNames[] = {
27 #define TYPE(X) #X,
29 #undef TYPE
30  nullptr};
31 
32  if (Type < NUM_TOKEN_TYPES)
33  return TokNames[Type];
34  llvm_unreachable("unknown TokenType");
35  return nullptr;
36 }
37 
38 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
39 // duplication.
41  switch (Tok.getKind()) {
42  case tok::kw_short:
43  case tok::kw_long:
44  case tok::kw___int64:
45  case tok::kw___int128:
46  case tok::kw_signed:
47  case tok::kw_unsigned:
48  case tok::kw_void:
49  case tok::kw_char:
50  case tok::kw_int:
51  case tok::kw_half:
52  case tok::kw_float:
53  case tok::kw_double:
54  case tok::kw__Float16:
55  case tok::kw___float128:
56  case tok::kw_wchar_t:
57  case tok::kw_bool:
58  case tok::kw___underlying_type:
59  case tok::annot_typename:
60  case tok::kw_char16_t:
61  case tok::kw_char32_t:
62  case tok::kw_typeof:
63  case tok::kw_decltype:
64  return true;
65  default:
66  return false;
67  }
68 }
69 
71 
73 
76  bool DryRun) {
77  if (State.NextToken == nullptr || !State.NextToken->Previous)
78  return 0;
79 
80  if (Formats.size() == 1)
81  return 0; // Handled by formatFromToken
82 
83  // Ensure that we start on the opening brace.
84  const FormatToken *LBrace =
86  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
87  LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
88  LBrace->Next->Type == TT_DesignatedInitializerPeriod)
89  return 0;
90 
91  // Calculate the number of code points we have to format this list. As the
92  // first token is already placed, we have to subtract it.
93  unsigned RemainingCodePoints =
95 
96  // Find the best ColumnFormat, i.e. the best number of columns to use.
97  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
98 
99  // If no ColumnFormat can be used, the braced list would generally be
100  // bin-packed. Add a severe penalty to this so that column layouts are
101  // preferred if possible.
102  if (!Format)
103  return 10000;
104 
105  // Format the entire list.
106  unsigned Penalty = 0;
107  unsigned Column = 0;
108  unsigned Item = 0;
109  while (State.NextToken != LBrace->MatchingParen) {
110  bool NewLine = false;
111  unsigned ExtraSpaces = 0;
112 
113  // If the previous token was one of our commas, we are now on the next item.
114  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
115  if (!State.NextToken->isTrailingComment()) {
116  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
117  ++Column;
118  }
119  ++Item;
120  }
121 
122  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
123  Column = 0;
124  NewLine = true;
125  }
126 
127  // Place token using the continuation indenter and store the penalty.
128  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
129  }
130  return Penalty;
131 }
132 
135  bool DryRun) {
136  // Formatting with 1 Column isn't really a column layout, so we don't need the
137  // special logic here. We can just avoid bin packing any of the parameters.
138  if (Formats.size() == 1 || HasNestedBracedList)
139  State.Stack.back().AvoidBinPacking = true;
140  return 0;
141 }
142 
143 // Returns the lengths in code points between Begin and End (both included),
144 // assuming that the entire sequence is put on a single line.
145 static unsigned CodePointsBetween(const FormatToken *Begin,
146  const FormatToken *End) {
147  assert(End->TotalLength >= Begin->TotalLength);
148  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
149 }
150 
152  // FIXME: At some point we might want to do this for other lists, too.
153  if (!Token->MatchingParen ||
154  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
155  return;
156 
157  // In C++11 braced list style, we should not format in columns unless they
158  // have many items (20 or more) or we allow bin-packing of function call
159  // arguments.
161  Commas.size() < 19)
162  return;
163 
164  // Limit column layout for JavaScript array initializers to 20 or more items
165  // for now to introduce it carefully. We can become more aggressive if this
166  // necessary.
167  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
168  return;
169 
170  // Column format doesn't really make sense if we don't align after brackets.
172  return;
173 
174  FormatToken *ItemBegin = Token->Next;
175  while (ItemBegin->isTrailingComment())
176  ItemBegin = ItemBegin->Next;
177  SmallVector<bool, 8> MustBreakBeforeItem;
178 
179  // The lengths of an item if it is put at the end of the line. This includes
180  // trailing comments which are otherwise ignored for column alignment.
181  SmallVector<unsigned, 8> EndOfLineItemLength;
182 
183  bool HasSeparatingComment = false;
184  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
185  // Skip comments on their own line.
186  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
187  ItemBegin = ItemBegin->Next;
188  HasSeparatingComment = i > 0;
189  }
190 
191  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
192  if (ItemBegin->is(tok::l_brace))
193  HasNestedBracedList = true;
194  const FormatToken *ItemEnd = nullptr;
195  if (i == Commas.size()) {
196  ItemEnd = Token->MatchingParen;
197  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
198  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
200  !ItemEnd->Previous->isTrailingComment()) {
201  // In Cpp11 braced list style, the } and possibly other subsequent
202  // tokens will need to stay on a line with the last element.
203  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
204  ItemEnd = ItemEnd->Next;
205  } else {
206  // In other braced lists styles, the "}" can be wrapped to the new line.
207  ItemEnd = Token->MatchingParen->Previous;
208  }
209  } else {
210  ItemEnd = Commas[i];
211  // The comma is counted as part of the item when calculating the length.
212  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
213 
214  // Consume trailing comments so the are included in EndOfLineItemLength.
215  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
216  ItemEnd->Next->isTrailingComment())
217  ItemEnd = ItemEnd->Next;
218  }
219  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
220  // If there is a trailing comma in the list, the next item will start at the
221  // closing brace. Don't create an extra item for this.
222  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
223  break;
224  ItemBegin = ItemEnd->Next;
225  }
226 
227  // Don't use column layout for lists with few elements and in presence of
228  // separating comments.
229  if (Commas.size() < 5 || HasSeparatingComment)
230  return;
231 
232  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
233  return;
234 
235  // We can never place more than ColumnLimit / 3 items in a row (because of the
236  // spaces and the comma).
237  unsigned MaxItems = Style.ColumnLimit / 3;
238  std::vector<unsigned> MinSizeInColumn;
239  MinSizeInColumn.reserve(MaxItems);
240  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
241  ColumnFormat Format;
242  Format.Columns = Columns;
243  Format.ColumnSizes.resize(Columns);
244  MinSizeInColumn.assign(Columns, UINT_MAX);
245  Format.LineCount = 1;
246  bool HasRowWithSufficientColumns = false;
247  unsigned Column = 0;
248  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
249  assert(i < MustBreakBeforeItem.size());
250  if (MustBreakBeforeItem[i] || Column == Columns) {
251  ++Format.LineCount;
252  Column = 0;
253  }
254  if (Column == Columns - 1)
255  HasRowWithSufficientColumns = true;
256  unsigned Length =
257  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
258  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
259  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
260  ++Column;
261  }
262  // If all rows are terminated early (e.g. by trailing comments), we don't
263  // need to look further.
264  if (!HasRowWithSufficientColumns)
265  break;
266  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
267 
268  for (unsigned i = 0; i < Columns; ++i)
269  Format.TotalWidth += Format.ColumnSizes[i];
270 
271  // Don't use this Format, if the difference between the longest and shortest
272  // element in a column exceeds a threshold to avoid excessive spaces.
273  if ([&] {
274  for (unsigned i = 0; i < Columns - 1; ++i)
275  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
276  return true;
277  return false;
278  }())
279  continue;
280 
281  // Ignore layouts that are bound to violate the column limit.
282  if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
283  continue;
284 
285  Formats.push_back(Format);
286  }
287 }
288 
289 const CommaSeparatedList::ColumnFormat *
290 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
291  const ColumnFormat *BestFormat = nullptr;
293  I = Formats.rbegin(),
294  E = Formats.rend();
295  I != E; ++I) {
296  if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
297  if (BestFormat && I->LineCount > BestFormat->LineCount)
298  break;
299  BestFormat = &*I;
300  }
301  }
302  return BestFormat;
303 }
304 
305 } // namespace format
306 } // namespace clang
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:228
Token Tok
The Token.
Definition: FormatToken.h:124
The base class of the type hierarchy.
Definition: Type.h:1353
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:456
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:211
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:283
tok::TokenKind getKind() const
Definition: Token.h:90
static const char *const TokNames[]
Definition: TokenKinds.cpp:18
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:182
unsigned Column
The number of used columns in the current line.
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:280
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
for(const auto &A :T->param_types())
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:74
bool BinPackArguments
If false, a function call&#39;s arguments will either be all on the same line or will have one line each...
Definition: Format.h:377
#define UINT_MAX
Definition: limits.h:72
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:72
unsigned ColumnLimit
The column limit.
Definition: Format.h:840
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:448
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:77
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:306
The current state when indenting a unwrapped line.
SourceLocation End
ContinuationIndenter * Indenter
SourceLocation Begin
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:120
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
bool isTrailingComment() const
Definition: FormatToken.h:394
Don&#39;t align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:63
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:297
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
Dataflow Directional Tag Classes.
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:146
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:937
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:277
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:162
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
FormatToken * NextToken
The token that needs to be next formatted.
This file implements an indenter that manages the indentation of continuations.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:134
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:174
const FormatStyle & Style
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:29