clang  16.0.0git
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements specific functions of \c FormatTokens and their
11 /// roles.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <climits>
20 
21 namespace clang {
22 namespace format {
23 
25  static const char *const TokNames[] = {
26 #define TYPE(X) #X,
28 #undef TYPE
29  nullptr};
30 
31  if (Type < NUM_TOKEN_TYPES)
32  return TokNames[Type];
33  llvm_unreachable("unknown TokenType");
34  return nullptr;
35 }
36 
37 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38 // duplication.
40  switch (Tok.getKind()) {
41  case tok::kw_short:
42  case tok::kw_long:
43  case tok::kw___int64:
44  case tok::kw___int128:
45  case tok::kw_signed:
46  case tok::kw_unsigned:
47  case tok::kw_void:
48  case tok::kw_char:
49  case tok::kw_int:
50  case tok::kw_half:
51  case tok::kw_float:
52  case tok::kw_double:
53  case tok::kw___bf16:
54  case tok::kw__Float16:
55  case tok::kw___float128:
56  case tok::kw___ibm128:
57  case tok::kw_wchar_t:
58  case tok::kw_bool:
59 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
60 #include "clang/Basic/TransformTypeTraits.def"
61  case tok::annot_typename:
62  case tok::kw_char8_t:
63  case tok::kw_char16_t:
64  case tok::kw_char32_t:
65  case tok::kw_typeof:
66  case tok::kw_decltype:
67  case tok::kw__Atomic:
68  return true;
69  default:
70  return false;
71  }
72 }
73 
75  return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier);
76 }
77 
79  // C# Does not indent object initialisers as continuations.
80  if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
81  return true;
82  if (is(TT_TemplateString) && opensScope())
83  return true;
84  return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
85  (is(tok::l_brace) &&
86  (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
87  (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
88  (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
89  Style.Language == FormatStyle::LK_TextProto));
90 }
91 
93 
95 
98  bool DryRun) {
99  if (State.NextToken == nullptr || !State.NextToken->Previous)
100  return 0;
101 
102  if (Formats.size() == 1)
103  return 0; // Handled by formatFromToken
104 
105  // Ensure that we start on the opening brace.
106  const FormatToken *LBrace =
107  State.NextToken->Previous->getPreviousNonComment();
108  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
109  LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
110  LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
111  return 0;
112  }
113 
114  // Calculate the number of code points we have to format this list. As the
115  // first token is already placed, we have to subtract it.
116  unsigned RemainingCodePoints =
117  Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
118 
119  // Find the best ColumnFormat, i.e. the best number of columns to use.
120  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
121 
122  // If no ColumnFormat can be used, the braced list would generally be
123  // bin-packed. Add a severe penalty to this so that column layouts are
124  // preferred if possible.
125  if (!Format)
126  return 10000;
127 
128  // Format the entire list.
129  unsigned Penalty = 0;
130  unsigned Column = 0;
131  unsigned Item = 0;
132  while (State.NextToken != LBrace->MatchingParen) {
133  bool NewLine = false;
134  unsigned ExtraSpaces = 0;
135 
136  // If the previous token was one of our commas, we are now on the next item.
137  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
138  if (!State.NextToken->isTrailingComment()) {
139  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
140  ++Column;
141  }
142  ++Item;
143  }
144 
145  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
146  Column = 0;
147  NewLine = true;
148  }
149 
150  // Place token using the continuation indenter and store the penalty.
151  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
152  }
153  return Penalty;
154 }
155 
158  bool DryRun) {
159  // Formatting with 1 Column isn't really a column layout, so we don't need the
160  // special logic here. We can just avoid bin packing any of the parameters.
161  if (Formats.size() == 1 || HasNestedBracedList)
162  State.Stack.back().AvoidBinPacking = true;
163  return 0;
164 }
165 
166 // Returns the lengths in code points between Begin and End (both included),
167 // assuming that the entire sequence is put on a single line.
168 static unsigned CodePointsBetween(const FormatToken *Begin,
169  const FormatToken *End) {
170  assert(End->TotalLength >= Begin->TotalLength);
171  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
172 }
173 
175  // FIXME: At some point we might want to do this for other lists, too.
176  if (!Token->MatchingParen ||
177  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
178  return;
179  }
180 
181  // In C++11 braced list style, we should not format in columns unless they
182  // have many items (20 or more) or we allow bin-packing of function call
183  // arguments.
185  Commas.size() < 19) {
186  return;
187  }
188 
189  // Limit column layout for JavaScript array initializers to 20 or more items
190  // for now to introduce it carefully. We can become more aggressive if this
191  // necessary.
192  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
193  return;
194 
195  // Column format doesn't really make sense if we don't align after brackets.
197  return;
198 
199  FormatToken *ItemBegin = Token->Next;
200  while (ItemBegin->isTrailingComment())
201  ItemBegin = ItemBegin->Next;
202  SmallVector<bool, 8> MustBreakBeforeItem;
203 
204  // The lengths of an item if it is put at the end of the line. This includes
205  // trailing comments which are otherwise ignored for column alignment.
206  SmallVector<unsigned, 8> EndOfLineItemLength;
207  MustBreakBeforeItem.reserve(Commas.size() + 1);
208  EndOfLineItemLength.reserve(Commas.size() + 1);
209  ItemLengths.reserve(Commas.size() + 1);
210 
211  bool HasSeparatingComment = false;
212  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
213  assert(ItemBegin);
214  // Skip comments on their own line.
215  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
216  ItemBegin = ItemBegin->Next;
217  HasSeparatingComment = i > 0;
218  }
219 
220  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
221  if (ItemBegin->is(tok::l_brace))
222  HasNestedBracedList = true;
223  const FormatToken *ItemEnd = nullptr;
224  if (i == Commas.size()) {
225  ItemEnd = Token->MatchingParen;
226  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
227  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
229  !ItemEnd->Previous->isTrailingComment()) {
230  // In Cpp11 braced list style, the } and possibly other subsequent
231  // tokens will need to stay on a line with the last element.
232  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
233  ItemEnd = ItemEnd->Next;
234  } else {
235  // In other braced lists styles, the "}" can be wrapped to the new line.
236  ItemEnd = Token->MatchingParen->Previous;
237  }
238  } else {
239  ItemEnd = Commas[i];
240  // The comma is counted as part of the item when calculating the length.
241  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
242 
243  // Consume trailing comments so the are included in EndOfLineItemLength.
244  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
245  ItemEnd->Next->isTrailingComment()) {
246  ItemEnd = ItemEnd->Next;
247  }
248  }
249  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
250  // If there is a trailing comma in the list, the next item will start at the
251  // closing brace. Don't create an extra item for this.
252  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
253  break;
254  ItemBegin = ItemEnd->Next;
255  }
256 
257  // Don't use column layout for lists with few elements and in presence of
258  // separating comments.
259  if (Commas.size() < 5 || HasSeparatingComment)
260  return;
261 
262  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
263  return;
264 
265  // We can never place more than ColumnLimit / 3 items in a row (because of the
266  // spaces and the comma).
267  unsigned MaxItems = Style.ColumnLimit / 3;
268  SmallVector<unsigned> MinSizeInColumn;
269  MinSizeInColumn.reserve(MaxItems);
270  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
271  ColumnFormat Format;
272  Format.Columns = Columns;
273  Format.ColumnSizes.resize(Columns);
274  MinSizeInColumn.assign(Columns, UINT_MAX);
275  Format.LineCount = 1;
276  bool HasRowWithSufficientColumns = false;
277  unsigned Column = 0;
278  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
279  assert(i < MustBreakBeforeItem.size());
280  if (MustBreakBeforeItem[i] || Column == Columns) {
281  ++Format.LineCount;
282  Column = 0;
283  }
284  if (Column == Columns - 1)
285  HasRowWithSufficientColumns = true;
286  unsigned Length =
287  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
288  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
289  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
290  ++Column;
291  }
292  // If all rows are terminated early (e.g. by trailing comments), we don't
293  // need to look further.
294  if (!HasRowWithSufficientColumns)
295  break;
296  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
297 
298  for (unsigned i = 0; i < Columns; ++i)
299  Format.TotalWidth += Format.ColumnSizes[i];
300 
301  // Don't use this Format, if the difference between the longest and shortest
302  // element in a column exceeds a threshold to avoid excessive spaces.
303  if ([&] {
304  for (unsigned i = 0; i < Columns - 1; ++i)
305  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
306  return true;
307  return false;
308  }()) {
309  continue;
310  }
311 
312  // Ignore layouts that are bound to violate the column limit.
313  if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
314  continue;
315 
316  Formats.push_back(Format);
317  }
318 }
319 
320 const CommaSeparatedList::ColumnFormat *
321 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
322  const ColumnFormat *BestFormat = nullptr;
323  for (const ColumnFormat &Format : llvm::reverse(Formats)) {
324  if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {
325  if (BestFormat && Format.LineCount > BestFormat->LineCount)
326  break;
327  BestFormat = &Format;
328  }
329  }
330  return BestFormat;
331 }
332 
333 } // namespace format
334 } // namespace clang
clang::format::FormatToken::getNextNonComment
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:736
clang::format::FormatToken::MustBreakBefore
unsigned MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:283
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
NewLine
bool NewLine
Definition: UnwrappedLineFormatter.cpp:1148
llvm::SmallVector< bool, 8 >
clang::if
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
Definition: RecursiveASTVisitor.h:1081
clang::format::TokenRole::~TokenRole
virtual ~TokenRole()
Definition: FormatToken.cpp:92
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:240
clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
clang::format::BK_BracedInit
@ BK_BracedInit
Definition: FormatToken.h:166
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::Type
The base class of the type hierarchy.
Definition: Type.h:1565
clang::format::CommaSeparatedList::formatFromToken
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
Definition: FormatToken.cpp:156
clang::format::FormatToken::CanBreakBefore
unsigned CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:289
clang::format::FormatStyle::Cpp11BracedListStyle
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:1977
clang::format::FormatToken::NestingLevel
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:439
LIST_TOKEN_TYPES
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:28
clang::format::FormatToken::Previous
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:485
clang::format::LineState
The current state when indenting a unwrapped line.
Definition: ContinuationIndenter.h:415
min
__DEVICE__ int min(int __a, int __b)
Definition: __clang_cuda_math.h:197
clang::format::FormatToken::Tok
Token Tok
The Token.
Definition: FormatToken.h:252
clang::format::FormatStyle::LK_TextProto
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2602
clang::Token::getKind
tok::TokenKind getKind() const
Definition: Token.h:93
clang::format::FormatStyle::LK_Proto
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:2597
clang::format::FormatStyle::BinPackArguments
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each.
Definition: Format.h:900
clang::Token::is
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:98
clang::format::FormatToken::isTrailingComment
bool isTrailingComment() const
Definition: FormatToken.h:661
clang::format::TokenRole::Style
const FormatStyle & Style
Definition: FormatToken.h:854
clang::format::FormatStyle::ColumnLimit
unsigned ColumnLimit
The column limit.
Definition: Format.h:1859
clang::format::BK_Block
@ BK_Block
Definition: FormatToken.h:166
clang::format::CommaSeparatedList::precomputeFormattingInfos
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:174
clang::format::FormatToken::getPreviousNonComment
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:728
clang::format::getTokenTypeName
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:24
TokNames
static const char *const TokNames[]
Definition: TokenKinds.cpp:17
clang::format::NUM_TOKEN_TYPES
@ NUM_TOKEN_TYPES
Definition: FormatToken.h:159
UINT_MAX
#define UINT_MAX
Definition: limits.h:56
clang::format::CommaSeparatedList::formatAfterToken
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:96
clang::format::CodePointsBetween
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
Definition: FormatToken.cpp:168
Begin
SourceLocation Begin
Definition: USRLocFinder.cpp:165
clang::format::FormatToken::getBlockKind
BraceBlockKind getBlockKind() const
Definition: FormatToken.h:323
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:525
clang::format::FormatStyle::AlignAfterOpenBracket
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:105
State
LineState State
Definition: UnwrappedLineFormatter.cpp:1147
clang::format::FormatToken::isOneOf
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:537
clang::format::ContinuationIndenter
Definition: ContinuationIndenter.h:50
clang
Definition: CalledOnceCheck.h:17
clang::format::FormatToken::isTypeOrIdentifier
bool isTypeOrIdentifier() const
Definition: FormatToken.cpp:74
clang::format::FormatToken::isSimpleTypeSpecifier
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:39
clang::format::FormatToken::opensBlockOrBlockTypeList
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.cpp:78
clang::format::TokenRole::precomputeFormattingInfos
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:94
clang::Token::isOneOf
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:100
FormatToken.h
clang::format::FormatStyle::BAS_DontAlign
@ BAS_DontAlign
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:77
ContinuationIndenter.h
Indenter
ContinuationIndenter * Indenter
Definition: UnwrappedLineFormatter.cpp:1044
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:488
clang::format::FormatToken::HasUnescapedNewline
unsigned HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:271
clang::format::TokenType
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:155
clang::format::FormatToken::MatchingParen
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:482
clang::format::FormatToken::opensScope
bool opensScope() const
Returns whether Tok is ([{ or an opening < of a template or in protos.
Definition: FormatToken.h:614