clang  7.0.0svn
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements specific functions of \c FormatTokens and their
12 /// roles.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "FormatToken.h"
17 #include "ContinuationIndenter.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/Support/Debug.h"
20 #include <climits>
21 
22 namespace clang {
23 namespace format {
24 
26  static const char *const TokNames[] = {
27 #define TYPE(X) #X,
29 #undef TYPE
30  nullptr};
31 
32  if (Type < NUM_TOKEN_TYPES)
33  return TokNames[Type];
34  llvm_unreachable("unknown TokenType");
35  return nullptr;
36 }
37 
38 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
39 // duplication.
41  switch (Tok.getKind()) {
42  case tok::kw_short:
43  case tok::kw_long:
44  case tok::kw___int64:
45  case tok::kw___int128:
46  case tok::kw_signed:
47  case tok::kw_unsigned:
48  case tok::kw_void:
49  case tok::kw_char:
50  case tok::kw_int:
51  case tok::kw_half:
52  case tok::kw_float:
53  case tok::kw_double:
54  case tok::kw__Float16:
55  case tok::kw___float128:
56  case tok::kw_wchar_t:
57  case tok::kw_bool:
58  case tok::kw___underlying_type:
59  case tok::annot_typename:
60  case tok::kw_char8_t:
61  case tok::kw_char16_t:
62  case tok::kw_char32_t:
63  case tok::kw_typeof:
64  case tok::kw_decltype:
65  return true;
66  default:
67  return false;
68  }
69 }
70 
72 
74 
77  bool DryRun) {
78  if (State.NextToken == nullptr || !State.NextToken->Previous)
79  return 0;
80 
81  if (Formats.size() == 1)
82  return 0; // Handled by formatFromToken
83 
84  // Ensure that we start on the opening brace.
85  const FormatToken *LBrace =
87  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
88  LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
89  LBrace->Next->Type == TT_DesignatedInitializerPeriod)
90  return 0;
91 
92  // Calculate the number of code points we have to format this list. As the
93  // first token is already placed, we have to subtract it.
94  unsigned RemainingCodePoints =
96 
97  // Find the best ColumnFormat, i.e. the best number of columns to use.
98  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
99 
100  // If no ColumnFormat can be used, the braced list would generally be
101  // bin-packed. Add a severe penalty to this so that column layouts are
102  // preferred if possible.
103  if (!Format)
104  return 10000;
105 
106  // Format the entire list.
107  unsigned Penalty = 0;
108  unsigned Column = 0;
109  unsigned Item = 0;
110  while (State.NextToken != LBrace->MatchingParen) {
111  bool NewLine = false;
112  unsigned ExtraSpaces = 0;
113 
114  // If the previous token was one of our commas, we are now on the next item.
115  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
116  if (!State.NextToken->isTrailingComment()) {
117  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
118  ++Column;
119  }
120  ++Item;
121  }
122 
123  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
124  Column = 0;
125  NewLine = true;
126  }
127 
128  // Place token using the continuation indenter and store the penalty.
129  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
130  }
131  return Penalty;
132 }
133 
136  bool DryRun) {
137  // Formatting with 1 Column isn't really a column layout, so we don't need the
138  // special logic here. We can just avoid bin packing any of the parameters.
139  if (Formats.size() == 1 || HasNestedBracedList)
140  State.Stack.back().AvoidBinPacking = true;
141  return 0;
142 }
143 
144 // Returns the lengths in code points between Begin and End (both included),
145 // assuming that the entire sequence is put on a single line.
146 static unsigned CodePointsBetween(const FormatToken *Begin,
147  const FormatToken *End) {
148  assert(End->TotalLength >= Begin->TotalLength);
149  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
150 }
151 
153  // FIXME: At some point we might want to do this for other lists, too.
154  if (!Token->MatchingParen ||
155  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
156  return;
157 
158  // In C++11 braced list style, we should not format in columns unless they
159  // have many items (20 or more) or we allow bin-packing of function call
160  // arguments.
162  Commas.size() < 19)
163  return;
164 
165  // Limit column layout for JavaScript array initializers to 20 or more items
166  // for now to introduce it carefully. We can become more aggressive if this
167  // necessary.
168  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
169  return;
170 
171  // Column format doesn't really make sense if we don't align after brackets.
173  return;
174 
175  FormatToken *ItemBegin = Token->Next;
176  while (ItemBegin->isTrailingComment())
177  ItemBegin = ItemBegin->Next;
178  SmallVector<bool, 8> MustBreakBeforeItem;
179 
180  // The lengths of an item if it is put at the end of the line. This includes
181  // trailing comments which are otherwise ignored for column alignment.
182  SmallVector<unsigned, 8> EndOfLineItemLength;
183 
184  bool HasSeparatingComment = false;
185  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
186  // Skip comments on their own line.
187  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
188  ItemBegin = ItemBegin->Next;
189  HasSeparatingComment = i > 0;
190  }
191 
192  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
193  if (ItemBegin->is(tok::l_brace))
194  HasNestedBracedList = true;
195  const FormatToken *ItemEnd = nullptr;
196  if (i == Commas.size()) {
197  ItemEnd = Token->MatchingParen;
198  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
199  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
201  !ItemEnd->Previous->isTrailingComment()) {
202  // In Cpp11 braced list style, the } and possibly other subsequent
203  // tokens will need to stay on a line with the last element.
204  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
205  ItemEnd = ItemEnd->Next;
206  } else {
207  // In other braced lists styles, the "}" can be wrapped to the new line.
208  ItemEnd = Token->MatchingParen->Previous;
209  }
210  } else {
211  ItemEnd = Commas[i];
212  // The comma is counted as part of the item when calculating the length.
213  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
214 
215  // Consume trailing comments so the are included in EndOfLineItemLength.
216  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
217  ItemEnd->Next->isTrailingComment())
218  ItemEnd = ItemEnd->Next;
219  }
220  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
221  // If there is a trailing comma in the list, the next item will start at the
222  // closing brace. Don't create an extra item for this.
223  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
224  break;
225  ItemBegin = ItemEnd->Next;
226  }
227 
228  // Don't use column layout for lists with few elements and in presence of
229  // separating comments.
230  if (Commas.size() < 5 || HasSeparatingComment)
231  return;
232 
233  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
234  return;
235 
236  // We can never place more than ColumnLimit / 3 items in a row (because of the
237  // spaces and the comma).
238  unsigned MaxItems = Style.ColumnLimit / 3;
239  std::vector<unsigned> MinSizeInColumn;
240  MinSizeInColumn.reserve(MaxItems);
241  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
242  ColumnFormat Format;
243  Format.Columns = Columns;
244  Format.ColumnSizes.resize(Columns);
245  MinSizeInColumn.assign(Columns, UINT_MAX);
246  Format.LineCount = 1;
247  bool HasRowWithSufficientColumns = false;
248  unsigned Column = 0;
249  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
250  assert(i < MustBreakBeforeItem.size());
251  if (MustBreakBeforeItem[i] || Column == Columns) {
252  ++Format.LineCount;
253  Column = 0;
254  }
255  if (Column == Columns - 1)
256  HasRowWithSufficientColumns = true;
257  unsigned Length =
258  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
259  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
260  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
261  ++Column;
262  }
263  // If all rows are terminated early (e.g. by trailing comments), we don't
264  // need to look further.
265  if (!HasRowWithSufficientColumns)
266  break;
267  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
268 
269  for (unsigned i = 0; i < Columns; ++i)
270  Format.TotalWidth += Format.ColumnSizes[i];
271 
272  // Don't use this Format, if the difference between the longest and shortest
273  // element in a column exceeds a threshold to avoid excessive spaces.
274  if ([&] {
275  for (unsigned i = 0; i < Columns - 1; ++i)
276  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
277  return true;
278  return false;
279  }())
280  continue;
281 
282  // Ignore layouts that are bound to violate the column limit.
283  if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
284  continue;
285 
286  Formats.push_back(Format);
287  }
288 }
289 
290 const CommaSeparatedList::ColumnFormat *
291 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
292  const ColumnFormat *BestFormat = nullptr;
294  I = Formats.rbegin(),
295  E = Formats.rend();
296  I != E; ++I) {
297  if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
298  if (BestFormat && I->LineCount > BestFormat->LineCount)
299  break;
300  BestFormat = &*I;
301  }
302  }
303  return BestFormat;
304 }
305 
306 } // namespace format
307 } // namespace clang
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:231
Token Tok
The Token.
Definition: FormatToken.h:127
The base class of the type hierarchy.
Definition: Type.h:1428
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:476
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:214
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:296
tok::TokenKind getKind() const
Definition: Token.h:90
static const char *const TokNames[]
Definition: TokenKinds.cpp:18
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:185
unsigned Column
The number of used columns in the current line.
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:293
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:75
bool BinPackArguments
If false, a function call&#39;s arguments will either be all on the same line or will have one line each...
Definition: Format.h:409
#define UINT_MAX
Definition: limits.h:72
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:73
unsigned ColumnLimit
The column limit.
Definition: Format.h:885
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:468
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:79
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:319
The current state when indenting a unwrapped line.
for(unsigned I=0, E=TL.getNumArgs();I !=E;++I)
SourceLocation End
ContinuationIndenter * Indenter
SourceLocation Begin
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
bool isTrailingComment() const
Definition: FormatToken.h:413
Don&#39;t align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:65
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:25
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:310
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:40
Dataflow Directional Tag Classes.
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:149
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:1003
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:290
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:165
__DEVICE__ int max(int __a, int __b)
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
__DEVICE__ int min(int __a, int __b)
FormatToken * NextToken
The token that needs to be next formatted.
This file implements an indenter that manages the indentation of continuations.
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:137
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:177
const FormatStyle & Style
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:29