clang 17.0.0git
UnwrappedLineFormatter.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "FormatToken.h"
12#include "WhitespaceManager.h"
13#include "llvm/Support/Debug.h"
14#include <queue>
15
16#define DEBUG_TYPE "format-formatter"
17
18namespace clang {
19namespace format {
20
21namespace {
22
23bool startsExternCBlock(const AnnotatedLine &Line) {
24 const FormatToken *Next = Line.First->getNextNonComment();
25 const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
26 return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
27 NextNext && NextNext->is(tok::l_brace);
28}
29
30bool isRecordLBrace(const FormatToken &Tok) {
31 return Tok.isOneOf(TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace,
32 TT_StructLBrace, TT_UnionLBrace);
33}
34
35/// Tracks the indent level of \c AnnotatedLines across levels.
36///
37/// \c nextLine must be called for each \c AnnotatedLine, after which \c
38/// getIndent() will return the indent for the last line \c nextLine was called
39/// with.
40/// If the line is not formatted (and thus the indent does not change), calling
41/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
42/// subsequent lines on the same level to be indented at the same level as the
43/// given line.
44class LevelIndentTracker {
45public:
46 LevelIndentTracker(const FormatStyle &Style,
47 const AdditionalKeywords &Keywords, unsigned StartLevel,
48 int AdditionalIndent)
49 : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
50 for (unsigned i = 0; i != StartLevel; ++i)
51 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
52 }
53
54 /// Returns the indent for the current line.
55 unsigned getIndent() const { return Indent; }
56
57 /// Update the indent state given that \p Line is going to be formatted
58 /// next.
59 void nextLine(const AnnotatedLine &Line) {
60 Offset = getIndentOffset(*Line.First);
61 // Update the indent level cache size so that we can rely on it
62 // having the right size in adjustToUnmodifiedline.
63 if (Line.Level >= IndentForLevel.size())
64 IndentForLevel.resize(Line.Level + 1, -1);
65 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None &&
66 (Line.InPPDirective ||
67 (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
68 Line.Type == LT_CommentAbovePPDirective))) {
69 unsigned PPIndentWidth =
70 (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth;
71 Indent = Line.InMacroBody
72 ? Line.PPLevel * PPIndentWidth +
73 (Line.Level - Line.PPLevel) * Style.IndentWidth
74 : Line.Level * PPIndentWidth;
75 Indent += AdditionalIndent;
76 } else {
77 Indent = getIndent(Line.Level);
78 }
79 if (static_cast<int>(Indent) + Offset >= 0)
80 Indent += Offset;
81 if (Line.IsContinuation)
82 Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth;
83 }
84
85 /// Update the level indent to adapt to the given \p Line.
86 ///
87 /// When a line is not formatted, we move the subsequent lines on the same
88 /// level to the same indent.
89 /// Note that \c nextLine must have been called before this method.
90 void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
91 unsigned LevelIndent = Line.First->OriginalColumn;
92 if (static_cast<int>(LevelIndent) - Offset >= 0)
93 LevelIndent -= Offset;
94 assert(Line.Level < IndentForLevel.size());
95 if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
96 !Line.InPPDirective) {
97 IndentForLevel[Line.Level] = LevelIndent;
98 }
99 }
100
101private:
102 /// Get the offset of the line relatively to the level.
103 ///
104 /// For example, 'public:' labels in classes are offset by 1 or 2
105 /// characters to the left from their level.
106 int getIndentOffset(const FormatToken &RootToken) {
107 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
108 Style.isCSharp()) {
109 return 0;
110 }
111
112 auto IsAccessModifier = [this, &RootToken]() {
113 if (RootToken.isAccessSpecifier(Style.isCpp())) {
114 return true;
115 } else if (RootToken.isObjCAccessSpecifier()) {
116 return true;
117 }
118 // Handle Qt signals.
119 else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
120 RootToken.Next && RootToken.Next->is(tok::colon))) {
121 return true;
122 } else if (RootToken.Next &&
123 RootToken.Next->isOneOf(Keywords.kw_slots,
124 Keywords.kw_qslots) &&
125 RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) {
126 return true;
127 }
128 // Handle malformed access specifier e.g. 'private' without trailing ':'.
129 else if (!RootToken.Next && RootToken.isAccessSpecifier(false)) {
130 return true;
131 }
132 return false;
133 };
134
135 if (IsAccessModifier()) {
136 // The AccessModifierOffset may be overridden by IndentAccessModifiers,
137 // in which case we take a negative value of the IndentWidth to simulate
138 // the upper indent level.
139 return Style.IndentAccessModifiers ? -Style.IndentWidth
140 : Style.AccessModifierOffset;
141 }
142 return 0;
143 }
144
145 /// Get the indent of \p Level from \p IndentForLevel.
146 ///
147 /// \p IndentForLevel must contain the indent for the level \c l
148 /// at \p IndentForLevel[l], or a value < 0 if the indent for
149 /// that level is unknown.
150 unsigned getIndent(unsigned Level) const {
151 if (IndentForLevel[Level] != -1)
152 return IndentForLevel[Level];
153 if (Level == 0)
154 return 0;
155 return getIndent(Level - 1) + Style.IndentWidth;
156 }
157
158 const FormatStyle &Style;
159 const AdditionalKeywords &Keywords;
160 const unsigned AdditionalIndent;
161
162 /// The indent in characters for each level.
163 SmallVector<int> IndentForLevel;
164
165 /// Offset of the current line relative to the indent level.
166 ///
167 /// For example, the 'public' keywords is often indented with a negative
168 /// offset.
169 int Offset = 0;
170
171 /// The current line's indent.
172 unsigned Indent = 0;
173};
174
175const FormatToken *getMatchingNamespaceToken(
176 const AnnotatedLine *Line,
177 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
178 if (!Line->startsWith(tok::r_brace))
179 return nullptr;
180 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
181 if (StartLineIndex == UnwrappedLine::kInvalidIndex)
182 return nullptr;
183 assert(StartLineIndex < AnnotatedLines.size());
184 return AnnotatedLines[StartLineIndex]->First->getNamespaceToken();
185}
186
187StringRef getNamespaceTokenText(const AnnotatedLine *Line) {
188 const FormatToken *NamespaceToken = Line->First->getNamespaceToken();
189 return NamespaceToken ? NamespaceToken->TokenText : StringRef();
190}
191
192StringRef getMatchingNamespaceTokenText(
193 const AnnotatedLine *Line,
194 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
195 const FormatToken *NamespaceToken =
196 getMatchingNamespaceToken(Line, AnnotatedLines);
197 return NamespaceToken ? NamespaceToken->TokenText : StringRef();
198}
199
200class LineJoiner {
201public:
202 LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
203 const SmallVectorImpl<AnnotatedLine *> &Lines)
204 : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()),
205 AnnotatedLines(Lines) {}
206
207 /// Returns the next line, merging multiple lines into one if possible.
208 const AnnotatedLine *getNextMergedLine(bool DryRun,
209 LevelIndentTracker &IndentTracker) {
210 if (Next == End)
211 return nullptr;
212 const AnnotatedLine *Current = *Next;
213 IndentTracker.nextLine(*Current);
214 unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End);
215 if (MergedLines > 0 && Style.ColumnLimit == 0) {
216 // Disallow line merging if there is a break at the start of one of the
217 // input lines.
218 for (unsigned i = 0; i < MergedLines; ++i)
219 if (Next[i + 1]->First->NewlinesBefore > 0)
220 MergedLines = 0;
221 }
222 if (!DryRun)
223 for (unsigned i = 0; i < MergedLines; ++i)
224 join(*Next[0], *Next[i + 1]);
225 Next = Next + MergedLines + 1;
226 return Current;
227 }
228
229private:
230 /// Calculates how many lines can be merged into 1 starting at \p I.
231 unsigned
232 tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker,
233 SmallVectorImpl<AnnotatedLine *>::const_iterator I,
234 SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
235 const unsigned Indent = IndentTracker.getIndent();
236
237 // Can't join the last line with anything.
238 if (I + 1 == E)
239 return 0;
240 // We can never merge stuff if there are trailing line comments.
241 const AnnotatedLine *TheLine = *I;
242 if (TheLine->Last->is(TT_LineComment))
243 return 0;
244 const auto &NextLine = *I[1];
245 if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore)
246 return 0;
247 if (TheLine->InPPDirective &&
248 (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) {
249 return 0;
250 }
251
252 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
253 return 0;
254
255 unsigned Limit =
256 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
257 // If we already exceed the column limit, we set 'Limit' to 0. The different
258 // tryMerge..() functions can then decide whether to still do merging.
259 Limit = TheLine->Last->TotalLength > Limit
260 ? 0
261 : Limit - TheLine->Last->TotalLength;
262
263 if (TheLine->Last->is(TT_FunctionLBrace) &&
264 TheLine->First == TheLine->Last &&
265 !Style.BraceWrapping.SplitEmptyFunction &&
266 NextLine.First->is(tok::r_brace)) {
267 return tryMergeSimpleBlock(I, E, Limit);
268 }
269
270 const auto *PreviousLine = I != AnnotatedLines.begin() ? I[-1] : nullptr;
271 // Handle empty record blocks where the brace has already been wrapped.
272 if (PreviousLine && TheLine->Last->is(tok::l_brace) &&
273 TheLine->First == TheLine->Last) {
274 bool EmptyBlock = NextLine.First->is(tok::r_brace);
275
276 const FormatToken *Tok = PreviousLine->First;
277 if (Tok && Tok->is(tok::comment))
278 Tok = Tok->getNextNonComment();
279
280 if (Tok && Tok->getNamespaceToken()) {
281 return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock
282 ? tryMergeSimpleBlock(I, E, Limit)
283 : 0;
284 }
285
286 if (Tok && Tok->is(tok::kw_typedef))
287 Tok = Tok->getNextNonComment();
288 if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,
289 tok::kw_extern, Keywords.kw_interface)) {
290 return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock
291 ? tryMergeSimpleBlock(I, E, Limit)
292 : 0;
293 }
294
295 if (Tok && Tok->is(tok::kw_template) &&
296 Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) {
297 return 0;
298 }
299 }
300
301 auto ShouldMergeShortFunctions = [this, &I, &NextLine, PreviousLine,
302 TheLine]() {
303 if (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All)
304 return true;
305 if (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
306 NextLine.First->is(tok::r_brace)) {
307 return true;
308 }
309
310 if (Style.AllowShortFunctionsOnASingleLine &
312 // Just checking TheLine->Level != 0 is not enough, because it
313 // provokes treating functions inside indented namespaces as short.
314 if (Style.isJavaScript() && TheLine->Last->is(TT_FunctionLBrace))
315 return true;
316
317 if (TheLine->Level != 0) {
318 if (!PreviousLine)
319 return false;
320
321 // TODO: Use IndentTracker to avoid loop?
322 // Find the last line with lower level.
323 const AnnotatedLine *Line = nullptr;
324 for (auto J = I - 1; J >= AnnotatedLines.begin(); --J) {
325 assert(*J);
326 if (!(*J)->InPPDirective && !(*J)->isComment() &&
327 (*J)->Level < TheLine->Level) {
328 Line = *J;
329 break;
330 }
331 }
332
333 if (!Line)
334 return false;
335
336 // Check if the found line starts a record.
337 const FormatToken *LastNonComment = Line->Last;
338 assert(LastNonComment);
339 if (LastNonComment->is(tok::comment)) {
340 LastNonComment = LastNonComment->getPreviousNonComment();
341 // There must be another token (usually `{`), because we chose a
342 // non-PPDirective and non-comment line that has a smaller level.
343 assert(LastNonComment);
344 }
345 return isRecordLBrace(*LastNonComment);
346 }
347 }
348
349 return false;
350 };
351
352 bool MergeShortFunctions = ShouldMergeShortFunctions();
353
354 const FormatToken *FirstNonComment = TheLine->First;
355 if (FirstNonComment->is(tok::comment)) {
356 FirstNonComment = FirstNonComment->getNextNonComment();
357 if (!FirstNonComment)
358 return 0;
359 }
360 // FIXME: There are probably cases where we should use FirstNonComment
361 // instead of TheLine->First.
362
363 if (Style.CompactNamespaces) {
364 if (const auto *NSToken = TheLine->First->getNamespaceToken()) {
365 int J = 1;
366 assert(TheLine->MatchingClosingBlockLineIndex > 0);
367 for (auto ClosingLineIndex = TheLine->MatchingClosingBlockLineIndex - 1;
368 I + J != E && NSToken->TokenText == getNamespaceTokenText(I[J]) &&
369 ClosingLineIndex == I[J]->MatchingClosingBlockLineIndex &&
370 I[J]->Last->TotalLength < Limit;
371 ++J, --ClosingLineIndex) {
372 Limit -= I[J]->Last->TotalLength;
373
374 // Reduce indent level for bodies of namespaces which were compacted,
375 // but only if their content was indented in the first place.
376 auto *ClosingLine = AnnotatedLines.begin() + ClosingLineIndex + 1;
377 auto OutdentBy = I[J]->Level - TheLine->Level;
378 for (auto *CompactedLine = I + J; CompactedLine <= ClosingLine;
379 ++CompactedLine) {
380 if (!(*CompactedLine)->InPPDirective)
381 (*CompactedLine)->Level -= OutdentBy;
382 }
383 }
384 return J - 1;
385 }
386
387 if (auto nsToken = getMatchingNamespaceToken(TheLine, AnnotatedLines)) {
388 int i = 0;
389 unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
390 for (; I + 1 + i != E &&
391 nsToken->TokenText ==
392 getMatchingNamespaceTokenText(I[i + 1], AnnotatedLines) &&
393 openingLine == I[i + 1]->MatchingOpeningBlockLineIndex;
394 i++, --openingLine) {
395 // No space between consecutive braces.
396 I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace);
397
398 // Indent like the outer-most namespace.
399 IndentTracker.nextLine(*I[i + 1]);
400 }
401 return i;
402 }
403 }
404
405 // Try to merge a function block with left brace unwrapped.
406 if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First != TheLine->Last)
407 return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
408 // Try to merge a control statement block with left brace unwrapped.
409 if (TheLine->Last->is(tok::l_brace) && FirstNonComment != TheLine->Last &&
410 FirstNonComment->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for,
411 TT_ForEachMacro)) {
412 return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never
413 ? tryMergeSimpleBlock(I, E, Limit)
414 : 0;
415 }
416 // Try to merge a control statement block with left brace wrapped.
417 if (NextLine.First->is(tok::l_brace)) {
418 if ((TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
419 tok::kw_for, tok::kw_switch, tok::kw_try,
420 tok::kw_do, TT_ForEachMacro) ||
421 (TheLine->First->is(tok::r_brace) && TheLine->First->Next &&
422 TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) &&
423 Style.BraceWrapping.AfterControlStatement ==
425 // If possible, merge the next line's wrapped left brace with the
426 // current line. Otherwise, leave it on the next line, as this is a
427 // multi-line control statement.
428 return (Style.ColumnLimit == 0 || TheLine->Level * Style.IndentWidth +
429 TheLine->Last->TotalLength <=
430 Style.ColumnLimit)
431 ? 1
432 : 0;
433 }
434 if (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
435 tok::kw_for, TT_ForEachMacro)) {
436 return (Style.BraceWrapping.AfterControlStatement ==
438 ? tryMergeSimpleBlock(I, E, Limit)
439 : 0;
440 }
441 if (TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) &&
442 Style.BraceWrapping.AfterControlStatement ==
444 // This case if different from the upper BWACS_MultiLine processing
445 // in that a preceding r_brace is not on the same line as else/catch
446 // most likely because of BeforeElse/BeforeCatch set to true.
447 // If the line length doesn't fit ColumnLimit, leave l_brace on the
448 // next line to respect the BWACS_MultiLine.
449 return (Style.ColumnLimit == 0 ||
450 TheLine->Last->TotalLength <= Style.ColumnLimit)
451 ? 1
452 : 0;
453 }
454 }
455 if (PreviousLine && TheLine->First->is(tok::l_brace)) {
456 switch (PreviousLine->First->Tok.getKind()) {
457 case tok::at:
458 // Don't merge block with left brace wrapped after ObjC special blocks.
459 if (PreviousLine->First->Next) {
461 PreviousLine->First->Next->Tok.getObjCKeywordID();
462 if (kwId == tok::objc_autoreleasepool ||
463 kwId == tok::objc_synchronized) {
464 return 0;
465 }
466 }
467 break;
468
469 case tok::kw_case:
470 case tok::kw_default:
471 // Don't merge block with left brace wrapped after case labels.
472 return 0;
473
474 default:
475 break;
476 }
477 }
478
479 // Don't merge an empty template class or struct if SplitEmptyRecords
480 // is defined.
481 if (PreviousLine && Style.BraceWrapping.SplitEmptyRecord &&
482 TheLine->Last->is(tok::l_brace) && PreviousLine->Last) {
483 const FormatToken *Previous = PreviousLine->Last;
484 if (Previous) {
485 if (Previous->is(tok::comment))
486 Previous = Previous->getPreviousNonComment();
487 if (Previous) {
488 if (Previous->is(tok::greater) && !PreviousLine->InPPDirective)
489 return 0;
490 if (Previous->is(tok::identifier)) {
491 const FormatToken *PreviousPrevious =
492 Previous->getPreviousNonComment();
493 if (PreviousPrevious &&
494 PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct)) {
495 return 0;
496 }
497 }
498 }
499 }
500 }
501
502 if (TheLine->Last->is(tok::l_brace)) {
503 bool ShouldMerge = false;
504 // Try to merge records.
505 if (TheLine->Last->is(TT_EnumLBrace)) {
506 ShouldMerge = Style.AllowShortEnumsOnASingleLine;
507 } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace)) {
508 // NOTE: We use AfterClass (whereas AfterStruct exists) for both classes
509 // and structs, but it seems that wrapping is still handled correctly
510 // elsewhere.
511 ShouldMerge = !Style.BraceWrapping.AfterClass ||
512 (NextLine.First->is(tok::r_brace) &&
513 !Style.BraceWrapping.SplitEmptyRecord);
514 } else if (TheLine->InPPDirective ||
515 !TheLine->First->isOneOf(tok::kw_class, tok::kw_enum,
516 tok::kw_struct)) {
517 // Try to merge a block with left brace unwrapped that wasn't yet
518 // covered.
519 ShouldMerge = !Style.BraceWrapping.AfterFunction ||
520 (NextLine.First->is(tok::r_brace) &&
521 !Style.BraceWrapping.SplitEmptyFunction);
522 }
523 return ShouldMerge ? tryMergeSimpleBlock(I, E, Limit) : 0;
524 }
525
526 // Try to merge a function block with left brace wrapped.
527 if (NextLine.First->is(TT_FunctionLBrace) &&
528 Style.BraceWrapping.AfterFunction) {
529 if (NextLine.Last->is(TT_LineComment))
530 return 0;
531
532 // Check for Limit <= 2 to account for the " {".
533 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
534 return 0;
535 Limit -= 2;
536
537 unsigned MergedLines = 0;
538 if (MergeShortFunctions ||
539 (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
540 NextLine.First == NextLine.Last && I + 2 != E &&
541 I[2]->First->is(tok::r_brace))) {
542 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
543 // If we managed to merge the block, count the function header, which is
544 // on a separate line.
545 if (MergedLines > 0)
546 ++MergedLines;
547 }
548 return MergedLines;
549 }
550 auto IsElseLine = [&TheLine]() -> bool {
551 const FormatToken *First = TheLine->First;
552 if (First->is(tok::kw_else))
553 return true;
554
555 return First->is(tok::r_brace) && First->Next &&
556 First->Next->is(tok::kw_else);
557 };
558 if (TheLine->First->is(tok::kw_if) ||
559 (IsElseLine() && (Style.AllowShortIfStatementsOnASingleLine ==
561 return Style.AllowShortIfStatementsOnASingleLine
562 ? tryMergeSimpleControlStatement(I, E, Limit)
563 : 0;
564 }
565 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while, tok::kw_do,
566 TT_ForEachMacro)) {
567 return Style.AllowShortLoopsOnASingleLine
568 ? tryMergeSimpleControlStatement(I, E, Limit)
569 : 0;
570 }
571 if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
572 return Style.AllowShortCaseLabelsOnASingleLine
573 ? tryMergeShortCaseLabels(I, E, Limit)
574 : 0;
575 }
576 if (TheLine->InPPDirective &&
577 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
578 return tryMergeSimplePPDirective(I, E, Limit);
579 }
580 return 0;
581 }
582
583 unsigned
584 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
585 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
586 unsigned Limit) {
587 if (Limit == 0)
588 return 0;
589 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
590 return 0;
591 if (1 + I[1]->Last->TotalLength > Limit)
592 return 0;
593 return 1;
594 }
595
596 unsigned tryMergeSimpleControlStatement(
597 SmallVectorImpl<AnnotatedLine *>::const_iterator I,
598 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
599 if (Limit == 0)
600 return 0;
601 if (Style.BraceWrapping.AfterControlStatement ==
603 I[1]->First->is(tok::l_brace) &&
604 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
605 return 0;
606 }
607 if (I[1]->InPPDirective != (*I)->InPPDirective ||
608 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) {
609 return 0;
610 }
611 Limit = limitConsideringMacros(I + 1, E, Limit);
612 AnnotatedLine &Line = **I;
613 if (!Line.First->is(tok::kw_do) && !Line.First->is(tok::kw_else) &&
614 !Line.Last->is(tok::kw_else) && Line.Last->isNot(tok::r_paren)) {
615 return 0;
616 }
617 // Only merge `do while` if `do` is the only statement on the line.
618 if (Line.First->is(tok::kw_do) && !Line.Last->is(tok::kw_do))
619 return 0;
620 if (1 + I[1]->Last->TotalLength > Limit)
621 return 0;
622 // Don't merge with loops, ifs, a single semicolon or a line comment.
623 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
624 TT_ForEachMacro, TT_LineComment)) {
625 return 0;
626 }
627 // Only inline simple if's (no nested if or else), unless specified
628 if (Style.AllowShortIfStatementsOnASingleLine ==
630 if (I + 2 != E && Line.startsWith(tok::kw_if) &&
631 I[2]->First->is(tok::kw_else)) {
632 return 0;
633 }
634 }
635 return 1;
636 }
637
638 unsigned
639 tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
640 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
641 unsigned Limit) {
642 if (Limit == 0 || I + 1 == E ||
643 I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) {
644 return 0;
645 }
646 if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace))
647 return 0;
648 unsigned NumStmts = 0;
649 unsigned Length = 0;
650 bool EndsWithComment = false;
651 bool InPPDirective = I[0]->InPPDirective;
652 bool InMacroBody = I[0]->InMacroBody;
653 const unsigned Level = I[0]->Level;
654 for (; NumStmts < 3; ++NumStmts) {
655 if (I + 1 + NumStmts == E)
656 break;
657 const AnnotatedLine *Line = I[1 + NumStmts];
658 if (Line->InPPDirective != InPPDirective)
659 break;
660 if (Line->InMacroBody != InMacroBody)
661 break;
662 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
663 break;
664 if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
665 tok::kw_while) ||
666 EndsWithComment) {
667 return 0;
668 }
669 if (Line->First->is(tok::comment)) {
670 if (Level != Line->Level)
671 return 0;
672 SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts;
673 for (; J != E; ++J) {
674 Line = *J;
675 if (Line->InPPDirective != InPPDirective)
676 break;
677 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
678 break;
679 if (Line->First->isNot(tok::comment) || Level != Line->Level)
680 return 0;
681 }
682 break;
683 }
684 if (Line->Last->is(tok::comment))
685 EndsWithComment = true;
686 Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
687 }
688 if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
689 return 0;
690 return NumStmts;
691 }
692
693 unsigned
694 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
695 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
696 unsigned Limit) {
697 // Don't merge with a preprocessor directive.
698 if (I[1]->Type == LT_PreprocessorDirective)
699 return 0;
700
701 AnnotatedLine &Line = **I;
702
703 // Don't merge ObjC @ keywords and methods.
704 // FIXME: If an option to allow short exception handling clauses on a single
705 // line is added, change this to not return for @try and friends.
706 if (Style.Language != FormatStyle::LK_Java &&
707 Line.First->isOneOf(tok::at, tok::minus, tok::plus)) {
708 return 0;
709 }
710
711 // Check that the current line allows merging. This depends on whether we
712 // are in a control flow statements as well as several style flags.
713 if (Line.First->is(tok::kw_case) ||
714 (Line.First->Next && Line.First->Next->is(tok::kw_else))) {
715 return 0;
716 }
717 // default: in switch statement
718 if (Line.First->is(tok::kw_default)) {
719 const FormatToken *Tok = Line.First->getNextNonComment();
720 if (Tok && Tok->is(tok::colon))
721 return 0;
722 }
723
724 auto IsCtrlStmt = [](const auto &Line) {
725 return Line.First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
726 tok::kw_do, tok::kw_for, TT_ForEachMacro);
727 };
728
729 const bool IsSplitBlock =
730 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never ||
731 (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Empty &&
732 I[1]->First->isNot(tok::r_brace));
733
734 if (IsCtrlStmt(Line) ||
735 Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
736 tok::kw___finally, tok::r_brace,
737 Keywords.kw___except)) {
738 if (IsSplitBlock)
739 return 0;
740 // Don't merge when we can't except the case when
741 // the control statement block is empty
742 if (!Style.AllowShortIfStatementsOnASingleLine &&
743 Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
744 !Style.BraceWrapping.AfterControlStatement &&
745 !I[1]->First->is(tok::r_brace)) {
746 return 0;
747 }
748 if (!Style.AllowShortIfStatementsOnASingleLine &&
749 Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
750 Style.BraceWrapping.AfterControlStatement ==
752 I + 2 != E && !I[2]->First->is(tok::r_brace)) {
753 return 0;
754 }
755 if (!Style.AllowShortLoopsOnASingleLine &&
756 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for,
757 TT_ForEachMacro) &&
758 !Style.BraceWrapping.AfterControlStatement &&
759 !I[1]->First->is(tok::r_brace)) {
760 return 0;
761 }
762 if (!Style.AllowShortLoopsOnASingleLine &&
763 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for,
764 TT_ForEachMacro) &&
765 Style.BraceWrapping.AfterControlStatement ==
767 I + 2 != E && !I[2]->First->is(tok::r_brace)) {
768 return 0;
769 }
770 // FIXME: Consider an option to allow short exception handling clauses on
771 // a single line.
772 // FIXME: This isn't covered by tests.
773 // FIXME: For catch, __except, __finally the first token on the line
774 // is '}', so this isn't correct here.
775 if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
776 Keywords.kw___except, tok::kw___finally)) {
777 return 0;
778 }
779 }
780
781 if (Line.Last->is(tok::l_brace)) {
782 if (IsSplitBlock && Line.First == Line.Last &&
783 I > AnnotatedLines.begin() &&
784 (I[-1]->endsWith(tok::kw_else) || IsCtrlStmt(*I[-1]))) {
785 return 0;
786 }
787 FormatToken *Tok = I[1]->First;
788 auto ShouldMerge = [Tok]() {
789 if (Tok->isNot(tok::r_brace) || Tok->MustBreakBefore)
790 return false;
791 const FormatToken *Next = Tok->getNextNonComment();
792 return !Next || Next->is(tok::semi);
793 };
794
795 if (ShouldMerge()) {
796 // We merge empty blocks even if the line exceeds the column limit.
797 Tok->SpacesRequiredBefore = Style.SpaceInEmptyBlock ? 1 : 0;
798 Tok->CanBreakBefore = true;
799 return 1;
800 } else if (Limit != 0 && !Line.startsWithNamespace() &&
801 !startsExternCBlock(Line)) {
802 // We don't merge short records.
803 if (isRecordLBrace(*Line.Last))
804 return 0;
805
806 // Check that we still have three lines and they fit into the limit.
807 if (I + 2 == E || I[2]->Type == LT_Invalid)
808 return 0;
809 Limit = limitConsideringMacros(I + 2, E, Limit);
810
811 if (!nextTwoLinesFitInto(I, Limit))
812 return 0;
813
814 // Second, check that the next line does not contain any braces - if it
815 // does, readability declines when putting it into a single line.
816 if (I[1]->Last->is(TT_LineComment))
817 return 0;
818 do {
819 if (Tok->is(tok::l_brace) && Tok->isNot(BK_BracedInit))
820 return 0;
821 Tok = Tok->Next;
822 } while (Tok);
823
824 // Last, check that the third line starts with a closing brace.
825 Tok = I[2]->First;
826 if (Tok->isNot(tok::r_brace))
827 return 0;
828
829 // Don't merge "if (a) { .. } else {".
830 if (Tok->Next && Tok->Next->is(tok::kw_else))
831 return 0;
832
833 // Don't merge a trailing multi-line control statement block like:
834 // } else if (foo &&
835 // bar)
836 // { <-- current Line
837 // baz();
838 // }
839 if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) &&
840 Style.BraceWrapping.AfterControlStatement ==
842 return 0;
843 }
844
845 return 2;
846 }
847 } else if (I[1]->First->is(tok::l_brace)) {
848 if (I[1]->Last->is(TT_LineComment))
849 return 0;
850
851 // Check for Limit <= 2 to account for the " {".
852 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I)))
853 return 0;
854 Limit -= 2;
855 unsigned MergedLines = 0;
856 if (Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never ||
857 (I[1]->First == I[1]->Last && I + 2 != E &&
858 I[2]->First->is(tok::r_brace))) {
859 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
860 // If we managed to merge the block, count the statement header, which
861 // is on a separate line.
862 if (MergedLines > 0)
863 ++MergedLines;
864 }
865 return MergedLines;
866 }
867 return 0;
868 }
869
870 /// Returns the modified column limit for \p I if it is inside a macro and
871 /// needs a trailing '\'.
872 unsigned
873 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
874 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
875 unsigned Limit) {
876 if (I[0]->InPPDirective && I + 1 != E &&
877 !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
878 return Limit < 2 ? 0 : Limit - 2;
879 }
880 return Limit;
881 }
882
883 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
884 unsigned Limit) {
885 if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
886 return false;
887 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
888 }
889
890 bool containsMustBreak(const AnnotatedLine *Line) {
891 assert(Line->First);
892 // Ignore the first token, because in this situation, it applies more to the
893 // last token of the previous line.
894 for (const FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next)
895 if (Tok->MustBreakBefore)
896 return true;
897 return false;
898 }
899
900 void join(AnnotatedLine &A, const AnnotatedLine &B) {
901 assert(!A.Last->Next);
902 assert(!B.First->Previous);
903 if (B.Affected)
904 A.Affected = true;
905 A.Last->Next = B.First;
906 B.First->Previous = A.Last;
907 B.First->CanBreakBefore = true;
908 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
909 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
910 Tok->TotalLength += LengthA;
911 A.Last = Tok;
912 }
913 }
914
915 const FormatStyle &Style;
916 const AdditionalKeywords &Keywords;
917 const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
918
919 SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
920 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines;
921};
922
923static void markFinalized(FormatToken *Tok) {
924 for (; Tok; Tok = Tok->Next) {
925 if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
926 // In the first pass we format all macro arguments in the expanded token
927 // stream. Instead of finalizing the macro arguments, we mark that they
928 // will be modified as unexpanded arguments (as part of the macro call
929 // formatting) in the next pass.
930 Tok->MacroCtx->Role = MR_UnexpandedArg;
931 // Reset whether spaces are required before this token, as that is context
932 // dependent, and that context may change when formatting the macro call.
933 // For example, given M(x) -> 2 * x, and the macro call M(var),
934 // the token 'var' will have SpacesRequiredBefore = 1 after being
935 // formatted as part of the expanded macro, but SpacesRequiredBefore = 0
936 // for its position within the macro call.
937 Tok->SpacesRequiredBefore = 0;
938 } else {
939 Tok->Finalized = true;
940 }
941 }
942}
943
944#ifndef NDEBUG
945static void printLineState(const LineState &State) {
946 llvm::dbgs() << "State: ";
947 for (const ParenState &P : State.Stack) {
948 llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|"
949 << P.LastSpace << "|" << P.NestedBlockIndent << " ";
950 }
951 llvm::dbgs() << State.NextToken->TokenText << "\n";
952}
953#endif
954
955/// Base class for classes that format one \c AnnotatedLine.
956class LineFormatter {
957public:
958 LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
959 const FormatStyle &Style,
960 UnwrappedLineFormatter *BlockFormatter)
961 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
962 BlockFormatter(BlockFormatter) {}
963 virtual ~LineFormatter() {}
964
965 /// Formats an \c AnnotatedLine and returns the penalty.
966 ///
967 /// If \p DryRun is \c false, directly applies the changes.
968 virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
969 unsigned FirstStartColumn, bool DryRun) = 0;
970
971protected:
972 /// If the \p State's next token is an r_brace closing a nested block,
973 /// format the nested block before it.
974 ///
975 /// Returns \c true if all children could be placed successfully and adapts
976 /// \p Penalty as well as \p State. If \p DryRun is false, also directly
977 /// creates changes using \c Whitespaces.
978 ///
979 /// The crucial idea here is that children always get formatted upon
980 /// encountering the closing brace right after the nested block. Now, if we
981 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
982 /// \c false), the entire block has to be kept on the same line (which is only
983 /// possible if it fits on the line, only contains a single statement, etc.
984 ///
985 /// If \p NewLine is true, we format the nested block on separate lines, i.e.
986 /// break after the "{", format all lines with correct indentation and the put
987 /// the closing "}" on yet another new line.
988 ///
989 /// This enables us to keep the simple structure of the
990 /// \c UnwrappedLineFormatter, where we only have two options for each token:
991 /// break or don't break.
992 bool formatChildren(LineState &State, bool NewLine, bool DryRun,
993 unsigned &Penalty) {
994 const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
995 bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
996 FormatToken &Previous = *State.NextToken->Previous;
997 if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
998 // The previous token does not open a block. Nothing to do. We don't
999 // assert so that we can simply call this function for all tokens.
1000 return true;
1001 }
1002
1003 if (NewLine || Previous.MacroParent) {
1004 const ParenState &P = State.Stack.back();
1005
1006 int AdditionalIndent =
1007 P.Indent - Previous.Children[0]->Level * Style.IndentWidth;
1008 Penalty +=
1009 BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
1010 /*FixBadIndentation=*/true);
1011 return true;
1012 }
1013
1014 if (Previous.Children[0]->First->MustBreakBefore)
1015 return false;
1016
1017 // Cannot merge into one line if this line ends on a comment.
1018 if (Previous.is(tok::comment))
1019 return false;
1020
1021 // Cannot merge multiple statements into a single line.
1022 if (Previous.Children.size() > 1)
1023 return false;
1024
1025 const AnnotatedLine *Child = Previous.Children[0];
1026 // We can't put the closing "}" on a line with a trailing comment.
1027 if (Child->Last->isTrailingComment())
1028 return false;
1029
1030 // If the child line exceeds the column limit, we wouldn't want to merge it.
1031 // We add +2 for the trailing " }".
1032 if (Style.ColumnLimit > 0 &&
1033 Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) {
1034 return false;
1035 }
1036
1037 if (!DryRun) {
1038 Whitespaces->replaceWhitespace(
1039 *Child->First, /*Newlines=*/0, /*Spaces=*/1,
1040 /*StartOfTokenColumn=*/State.Column, /*IsAligned=*/false,
1041 State.Line->InPPDirective);
1042 }
1043 Penalty +=
1044 formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);
1045
1046 State.Column += 1 + Child->Last->TotalLength;
1047 return true;
1048 }
1049
1050 ContinuationIndenter *Indenter;
1051
1052private:
1053 WhitespaceManager *Whitespaces;
1054 const FormatStyle &Style;
1055 UnwrappedLineFormatter *BlockFormatter;
1056};
1057
1058/// Formatter that keeps the existing line breaks.
1059class NoColumnLimitLineFormatter : public LineFormatter {
1060public:
1061 NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
1062 WhitespaceManager *Whitespaces,
1063 const FormatStyle &Style,
1064 UnwrappedLineFormatter *BlockFormatter)
1065 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1066
1067 /// Formats the line, simply keeping all of the input's line breaking
1068 /// decisions.
1069 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1070 unsigned FirstStartColumn, bool DryRun) override {
1071 assert(!DryRun);
1072 LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,
1073 &Line, /*DryRun=*/false);
1074 while (State.NextToken) {
1075 bool Newline =
1076 Indenter->mustBreak(State) ||
1077 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
1078 unsigned Penalty = 0;
1079 formatChildren(State, Newline, /*DryRun=*/false, Penalty);
1080 Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
1081 }
1082 return 0;
1083 }
1084};
1085
1086/// Formatter that puts all tokens into a single line without breaks.
1087class NoLineBreakFormatter : public LineFormatter {
1088public:
1089 NoLineBreakFormatter(ContinuationIndenter *Indenter,
1090 WhitespaceManager *Whitespaces, const FormatStyle &Style,
1091 UnwrappedLineFormatter *BlockFormatter)
1092 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1093
1094 /// Puts all tokens into a single line.
1095 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1096 unsigned FirstStartColumn, bool DryRun) override {
1097 unsigned Penalty = 0;
1098 LineState State =
1099 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
1100 while (State.NextToken) {
1101 formatChildren(State, /*NewLine=*/false, DryRun, Penalty);
1102 Indenter->addTokenToState(
1103 State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
1104 }
1105 return Penalty;
1106 }
1107};
1108
1109/// Finds the best way to break lines.
1110class OptimizingLineFormatter : public LineFormatter {
1111public:
1112 OptimizingLineFormatter(ContinuationIndenter *Indenter,
1113 WhitespaceManager *Whitespaces,
1114 const FormatStyle &Style,
1115 UnwrappedLineFormatter *BlockFormatter)
1116 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1117
1118 /// Formats the line by finding the best line breaks with line lengths
1119 /// below the column limit.
1120 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1121 unsigned FirstStartColumn, bool DryRun) override {
1122 LineState State =
1123 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
1124
1125 // If the ObjC method declaration does not fit on a line, we should format
1126 // it with one arg per line.
1127 if (State.Line->Type == LT_ObjCMethodDecl)
1128 State.Stack.back().BreakBeforeParameter = true;
1129
1130 // Find best solution in solution space.
1131 return analyzeSolutionSpace(State, DryRun);
1132 }
1133
1134private:
1135 struct CompareLineStatePointers {
1136 bool operator()(LineState *obj1, LineState *obj2) const {
1137 return *obj1 < *obj2;
1138 }
1139 };
1140
1141 /// A pair of <penalty, count> that is used to prioritize the BFS on.
1142 ///
1143 /// In case of equal penalties, we want to prefer states that were inserted
1144 /// first. During state generation we make sure that we insert states first
1145 /// that break the line as late as possible.
1146 typedef std::pair<unsigned, unsigned> OrderedPenalty;
1147
1148 /// An edge in the solution space from \c Previous->State to \c State,
1149 /// inserting a newline dependent on the \c NewLine.
1150 struct StateNode {
1151 StateNode(const LineState &State, bool NewLine, StateNode *Previous)
1152 : State(State), NewLine(NewLine), Previous(Previous) {}
1153 LineState State;
1155 StateNode *Previous;
1156 };
1157
1158 /// An item in the prioritized BFS search queue. The \c StateNode's
1159 /// \c State has the given \c OrderedPenalty.
1160 typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
1161
1162 /// The BFS queue type.
1163 typedef std::priority_queue<QueueItem, SmallVector<QueueItem>,
1164 std::greater<QueueItem>>
1165 QueueType;
1166
1167 /// Analyze the entire solution space starting from \p InitialState.
1168 ///
1169 /// This implements a variant of Dijkstra's algorithm on the graph that spans
1170 /// the solution space (\c LineStates are the nodes). The algorithm tries to
1171 /// find the shortest path (the one with lowest penalty) from \p InitialState
1172 /// to a state where all tokens are placed. Returns the penalty.
1173 ///
1174 /// If \p DryRun is \c false, directly applies the changes.
1175 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
1176 std::set<LineState *, CompareLineStatePointers> Seen;
1177
1178 // Increasing count of \c StateNode items we have created. This is used to
1179 // create a deterministic order independent of the container.
1180 unsigned Count = 0;
1181 QueueType Queue;
1182
1183 // Insert start element into queue.
1184 StateNode *RootNode =
1185 new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
1186 Queue.push(QueueItem(OrderedPenalty(0, Count), RootNode));
1187 ++Count;
1188
1189 unsigned Penalty = 0;
1190
1191 // While not empty, take first element and follow edges.
1192 while (!Queue.empty()) {
1193 // Quit if we still haven't found a solution by now.
1194 if (Count > 25000000)
1195 return 0;
1196
1197 Penalty = Queue.top().first.first;
1198 StateNode *Node = Queue.top().second;
1199 if (!Node->State.NextToken) {
1200 LLVM_DEBUG(llvm::dbgs()
1201 << "\n---\nPenalty for line: " << Penalty << "\n");
1202 break;
1203 }
1204 Queue.pop();
1205
1206 // Cut off the analysis of certain solutions if the analysis gets too
1207 // complex. See description of IgnoreStackForComparison.
1208 if (Count > 50000)
1209 Node->State.IgnoreStackForComparison = true;
1210
1211 if (!Seen.insert(&Node->State).second) {
1212 // State already examined with lower penalty.
1213 continue;
1214 }
1215
1216 FormatDecision LastFormat = Node->State.NextToken->getDecision();
1217 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
1218 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
1219 if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
1220 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
1221 }
1222
1223 if (Queue.empty()) {
1224 // We were unable to find a solution, do nothing.
1225 // FIXME: Add diagnostic?
1226 LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n");
1227 return 0;
1228 }
1229
1230 // Reconstruct the solution.
1231 if (!DryRun)
1232 reconstructPath(InitialState, Queue.top().second);
1233
1234 LLVM_DEBUG(llvm::dbgs()
1235 << "Total number of analyzed states: " << Count << "\n");
1236 LLVM_DEBUG(llvm::dbgs() << "---\n");
1237
1238 return Penalty;
1239 }
1240
1241 /// Add the following state to the analysis queue \c Queue.
1242 ///
1243 /// Assume the current state is \p PreviousNode and has been reached with a
1244 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1245 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1246 bool NewLine, unsigned *Count, QueueType *Queue) {
1247 if (NewLine && !Indenter->canBreak(PreviousNode->State))
1248 return;
1249 if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1250 return;
1251
1252 StateNode *Node = new (Allocator.Allocate())
1253 StateNode(PreviousNode->State, NewLine, PreviousNode);
1254 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1255 return;
1256
1257 Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1258
1259 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1260 ++(*Count);
1261 }
1262
1263 /// Applies the best formatting by reconstructing the path in the
1264 /// solution space that leads to \c Best.
1265 void reconstructPath(LineState &State, StateNode *Best) {
1267 // We do not need a break before the initial token.
1268 while (Best->Previous) {
1269 Path.push_back(Best);
1270 Best = Best->Previous;
1271 }
1272 for (const auto &Node : llvm::reverse(Path)) {
1273 unsigned Penalty = 0;
1274 formatChildren(State, Node->NewLine, /*DryRun=*/false, Penalty);
1275 Penalty += Indenter->addTokenToState(State, Node->NewLine, false);
1276
1277 LLVM_DEBUG({
1278 printLineState(Node->Previous->State);
1279 if (Node->NewLine) {
1280 llvm::dbgs() << "Penalty for placing "
1281 << Node->Previous->State.NextToken->Tok.getName()
1282 << " on a new line: " << Penalty << "\n";
1283 }
1284 });
1285 }
1286 }
1287
1288 llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1289};
1290
1291} // anonymous namespace
1292
1294 const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
1295 int AdditionalIndent, bool FixBadIndentation, unsigned FirstStartColumn,
1296 unsigned NextStartColumn, unsigned LastStartColumn) {
1297 LineJoiner Joiner(Style, Keywords, Lines);
1298
1299 // Try to look up already computed penalty in DryRun-mode.
1300 std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
1301 &Lines, AdditionalIndent);
1302 auto CacheIt = PenaltyCache.find(CacheKey);
1303 if (DryRun && CacheIt != PenaltyCache.end())
1304 return CacheIt->second;
1305
1306 assert(!Lines.empty());
1307 unsigned Penalty = 0;
1308 LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
1309 AdditionalIndent);
1310 const AnnotatedLine *PrevPrevLine = nullptr;
1311 const AnnotatedLine *PreviousLine = nullptr;
1312 const AnnotatedLine *NextLine = nullptr;
1313
1314 // The minimum level of consecutive lines that have been formatted.
1315 unsigned RangeMinLevel = UINT_MAX;
1316
1317 bool FirstLine = true;
1318 for (const AnnotatedLine *Line =
1319 Joiner.getNextMergedLine(DryRun, IndentTracker);
1320 Line; PrevPrevLine = PreviousLine, PreviousLine = Line, Line = NextLine,
1321 FirstLine = false) {
1322 assert(Line->First);
1323 const AnnotatedLine &TheLine = *Line;
1324 unsigned Indent = IndentTracker.getIndent();
1325
1326 // We continue formatting unchanged lines to adjust their indent, e.g. if a
1327 // scope was added. However, we need to carefully stop doing this when we
1328 // exit the scope of affected lines to prevent indenting the entire
1329 // remaining file if it currently missing a closing brace.
1330 bool PreviousRBrace =
1331 PreviousLine && PreviousLine->startsWith(tok::r_brace);
1332 bool ContinueFormatting =
1333 TheLine.Level > RangeMinLevel ||
1334 (TheLine.Level == RangeMinLevel && !PreviousRBrace &&
1335 !TheLine.startsWith(tok::r_brace));
1336
1337 bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
1338 Indent != TheLine.First->OriginalColumn;
1339 bool ShouldFormat = TheLine.Affected || FixIndentation;
1340 // We cannot format this line; if the reason is that the line had a
1341 // parsing error, remember that.
1342 if (ShouldFormat && TheLine.Type == LT_Invalid && Status) {
1343 Status->FormatComplete = false;
1344 Status->Line =
1345 SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation());
1346 }
1347
1348 if (ShouldFormat && TheLine.Type != LT_Invalid) {
1349 if (!DryRun) {
1350 bool LastLine = TheLine.First->is(tok::eof);
1351 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent,
1352 LastLine ? LastStartColumn : NextStartColumn + Indent);
1353 }
1354
1355 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1356 unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
1357 bool FitsIntoOneLine =
1358 !TheLine.ContainsMacroCall &&
1359 (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
1360 (TheLine.Type == LT_ImportStatement &&
1361 (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
1362 (Style.isCSharp() &&
1363 TheLine.InPPDirective)); // don't split #regions in C#
1364 if (Style.ColumnLimit == 0) {
1365 NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
1366 .formatLine(TheLine, NextStartColumn + Indent,
1367 FirstLine ? FirstStartColumn : 0, DryRun);
1368 } else if (FitsIntoOneLine) {
1369 Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
1370 .formatLine(TheLine, NextStartColumn + Indent,
1371 FirstLine ? FirstStartColumn : 0, DryRun);
1372 } else {
1373 Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
1374 .formatLine(TheLine, NextStartColumn + Indent,
1375 FirstLine ? FirstStartColumn : 0, DryRun);
1376 }
1377 RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
1378 } else {
1379 // If no token in the current line is affected, we still need to format
1380 // affected children.
1381 if (TheLine.ChildrenAffected) {
1382 for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
1383 if (!Tok->Children.empty())
1384 format(Tok->Children, DryRun);
1385 }
1386
1387 // Adapt following lines on the current indent level to the same level
1388 // unless the current \c AnnotatedLine is not at the beginning of a line.
1389 bool StartsNewLine =
1390 TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
1391 if (StartsNewLine)
1392 IndentTracker.adjustToUnmodifiedLine(TheLine);
1393 if (!DryRun) {
1394 bool ReformatLeadingWhitespace =
1395 StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
1397 // Format the first token.
1398 if (ReformatLeadingWhitespace) {
1399 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines,
1400 TheLine.First->OriginalColumn,
1401 TheLine.First->OriginalColumn);
1402 } else {
1403 Whitespaces->addUntouchableToken(*TheLine.First,
1404 TheLine.InPPDirective);
1405 }
1406
1407 // Notify the WhitespaceManager about the unchanged whitespace.
1408 for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
1409 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
1410 }
1411 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1412 RangeMinLevel = UINT_MAX;
1413 }
1414 if (!DryRun)
1415 markFinalized(TheLine.First);
1416 }
1417 PenaltyCache[CacheKey] = Penalty;
1418 return Penalty;
1419}
1420
1421static auto newlinesBeforeLine(const AnnotatedLine &Line,
1422 const AnnotatedLine *PreviousLine,
1423 const AnnotatedLine *PrevPrevLine,
1425 const FormatStyle &Style) {
1426 const auto &RootToken = *Line.First;
1427 unsigned Newlines =
1428 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1429 // Remove empty lines before "}" where applicable.
1430 if (RootToken.is(tok::r_brace) &&
1431 (!RootToken.Next ||
1432 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) &&
1433 // Do not remove empty lines before namespace closing "}".
1434 !getNamespaceToken(&Line, Lines)) {
1435 Newlines = std::min(Newlines, 1u);
1436 }
1437 // Remove empty lines at the start of nested blocks (lambdas/arrow functions)
1438 if (!PreviousLine && Line.Level > 0)
1439 Newlines = std::min(Newlines, 1u);
1440 if (Newlines == 0 && !RootToken.IsFirst)
1441 Newlines = 1;
1442 if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
1443 Newlines = 0;
1444
1445 // Remove empty lines after "{".
1446 if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
1447 PreviousLine->Last->is(tok::l_brace) &&
1448 !PreviousLine->startsWithNamespace() &&
1449 !(PrevPrevLine && PrevPrevLine->startsWithNamespace() &&
1450 PreviousLine->startsWith(tok::l_brace)) &&
1451 !startsExternCBlock(*PreviousLine)) {
1452 Newlines = 1;
1453 }
1454
1455 // Insert or remove empty line before access specifiers.
1456 if (PreviousLine && RootToken.isAccessSpecifier()) {
1457 switch (Style.EmptyLineBeforeAccessModifier) {
1459 if (Newlines > 1)
1460 Newlines = 1;
1461 break;
1463 Newlines = std::max(RootToken.NewlinesBefore, 1u);
1464 break;
1466 if (PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && Newlines <= 1)
1467 Newlines = 2;
1468 if (PreviousLine->First->isAccessSpecifier())
1469 Newlines = 1; // Previous is an access modifier remove all new lines.
1470 break;
1472 const FormatToken *previousToken;
1473 if (PreviousLine->Last->is(tok::comment))
1474 previousToken = PreviousLine->Last->getPreviousNonComment();
1475 else
1476 previousToken = PreviousLine->Last;
1477 if ((!previousToken || !previousToken->is(tok::l_brace)) && Newlines <= 1)
1478 Newlines = 2;
1479 } break;
1480 }
1481 }
1482
1483 // Insert or remove empty line after access specifiers.
1484 if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
1485 (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) {
1486 // EmptyLineBeforeAccessModifier is handling the case when two access
1487 // modifiers follow each other.
1488 if (!RootToken.isAccessSpecifier()) {
1489 switch (Style.EmptyLineAfterAccessModifier) {
1491 Newlines = 1;
1492 break;
1494 Newlines = std::max(Newlines, 1u);
1495 break;
1497 if (RootToken.is(tok::r_brace)) // Do not add at end of class.
1498 Newlines = 1u;
1499 else
1500 Newlines = std::max(Newlines, 2u);
1501 break;
1502 }
1503 }
1504 }
1505
1506 return Newlines;
1507}
1508
1509void UnwrappedLineFormatter::formatFirstToken(
1510 const AnnotatedLine &Line, const AnnotatedLine *PreviousLine,
1511 const AnnotatedLine *PrevPrevLine,
1512 const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent,
1513 unsigned NewlineIndent) {
1514 FormatToken &RootToken = *Line.First;
1515 if (RootToken.is(tok::eof)) {
1516 unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
1517 unsigned TokenIndent = Newlines ? NewlineIndent : 0;
1518 Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,
1519 TokenIndent);
1520 return;
1521 }
1522
1523 const auto Newlines =
1524 RootToken.Finalized
1525 ? RootToken.NewlinesBefore
1526 : newlinesBeforeLine(Line, PreviousLine, PrevPrevLine, Lines, Style);
1527 if (Newlines)
1528 Indent = NewlineIndent;
1529
1530 // Preprocessor directives get indented before the hash only if specified. In
1531 // Javascript import statements are indented like normal statements.
1532 if (!Style.isJavaScript() &&
1533 Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
1534 (Line.Type == LT_PreprocessorDirective ||
1535 Line.Type == LT_ImportStatement)) {
1536 Indent = 0;
1537 }
1538
1539 Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
1540 /*IsAligned=*/false,
1541 Line.InPPDirective &&
1542 !RootToken.HasUnescapedNewline);
1543}
1544
1545unsigned
1546UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
1547 const AnnotatedLine *NextLine) const {
1548 // In preprocessor directives reserve two chars for trailing " \" if the
1549 // next line continues the preprocessor directive.
1550 bool ContinuesPPDirective =
1551 InPPDirective &&
1552 // If there is no next line, this is likely a child line and the parent
1553 // continues the preprocessor directive.
1554 (!NextLine ||
1555 (NextLine->InPPDirective &&
1556 // If there is an unescaped newline between this line and the next, the
1557 // next line starts a new preprocessor directive.
1558 !NextLine->First->HasUnescapedNewline));
1559 return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
1560}
1561
1562} // namespace format
1563} // namespace clang
MatchType Type
DynTypedNode Node
StringRef P
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned Offset
Definition: Format.cpp:2798
This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that fixes namespace end comments.
StateNode * Previous
ContinuationIndenter * Indenter
Implements a combinatorial exploration of all the different linebreaks unwrapped lines can be formatt...
WhitespaceManager class manages whitespace around tokens and their replacements.
unsigned getSpellingLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:131
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
unsigned format(const SmallVectorImpl< AnnotatedLine * > &Lines, bool DryRun=false, int AdditionalIndent=0, bool FixBadIndentation=false, unsigned FirstStartColumn=0, unsigned NextStartColumn=0, unsigned LastStartColumn=0)
Format the current block and return the penalty.
#define UINT_MAX
Definition: limits.h:60
@ MR_UnexpandedArg
The token is part of a macro argument that was previously formatted as expansion when formatting the ...
Definition: FormatToken.h:196
@ MR_ExpandedArg
The token was expanded from a macro argument when formatting the expanded token sequence.
Definition: FormatToken.h:193
const FormatToken * getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
static auto newlinesBeforeLine(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, const AnnotatedLine *PrevPrevLine, const SmallVectorImpl< AnnotatedLine * > &Lines, const FormatStyle &Style)
StringRef getNamespaceTokenText(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
@ LT_CommentAbovePPDirective
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ LK_Java
Should be used for Java.
Definition: Format.h:2736
@ ELBAMS_LogicalBlock
Add empty line only when access modifier starts a new logical block.
Definition: Format.h:2147
@ ELBAMS_Never
Remove all empty lines before access modifiers.
Definition: Format.h:2127
@ ELBAMS_Always
Always add empty line before access modifiers unless access modifier is at the start of struct or cla...
Definition: Format.h:2167
@ ELBAMS_Leave
Keep existing empty lines before access modifiers.
Definition: Format.h:2129
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2398
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2380
@ SBS_Empty
Only merge empty blocks.
Definition: Format.h:513
@ SBS_Never
Never merge blocks into a single line.
Definition: Format.h:505
@ SIS_WithoutElse
Put short ifs on the same line only if there is no else statement.
Definition: Format.h:642
@ SIS_AllIfsAndElse
Always put short ifs, else ifs and else statements on the same line.
Definition: Format.h:672
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1011
@ BWACS_MultiLine
Only wrap braces after a multi-line control statement.
Definition: Format.h:1001
@ SFS_All
Merge all functions fitting on a single line.
Definition: Format.h:600
@ SFS_Empty
Only merge empty functions.
Definition: Format.h:581
@ SFS_InlineOnly
Only merge functions defined inside a class.
Definition: Format.h:573
unsigned MaxEmptyLinesToKeep
The maximum number of consecutive empty lines to keep.
Definition: Format.h:2868
@ ELAAMS_Always
Always add empty line after access modifiers if there are none.
Definition: Format.h:2102
@ ELAAMS_Never
Remove all empty lines after access modifiers.
Definition: Format.h:2078
@ ELAAMS_Leave
Keep existing empty lines after access modifiers.
Definition: Format.h:2081
EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier
Defines in which cases to put empty line before access modifiers.
Definition: Format.h:2172
bool KeepEmptyLinesAtTheStartOfBlocks
If true, the empty line at the start of blocks is kept.
Definition: Format.h:2687
EmptyLineAfterAccessModifierStyle EmptyLineAfterAccessModifier
Defines when to put an empty line after access modifiers.
Definition: Format.h:2109
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:256
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:794
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:451
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:275
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:761
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:753
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:513
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:416
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:550
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:447
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:562
unsigned IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:294
bool isAccessSpecifier(bool ColonRequired=true) const
Definition: FormatToken.h:608
bool FormatComplete
A value of false means that any of the affected ranges were not formatted due to a non-recoverable sy...
Definition: Format.h:4595
unsigned Line
If FormatComplete is false, Line records a one-based original line number at which a syntax error mig...
Definition: Format.h:4600
static const size_t kInvalidIndex