clang 20.0.0git
ContinuationIndenter.cpp
Go to the documentation of this file.
1//===--- ContinuationIndenter.cpp - Format C++ code -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the continuation indenter.
11///
12//===----------------------------------------------------------------------===//
13
15#include "BreakableToken.h"
16#include "FormatInternal.h"
17#include "FormatToken.h"
18#include "WhitespaceManager.h"
22#include "clang/Format/Format.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Support/Debug.h"
25#include <optional>
26
27#define DEBUG_TYPE "format-indenter"
28
29namespace clang {
30namespace format {
31
32// Returns true if a TT_SelectorName should be indented when wrapped,
33// false otherwise.
37}
38
39// Returns true if a binary operator following \p Tok should be unindented when
40// the style permits it.
41static bool shouldUnindentNextOperator(const FormatToken &Tok) {
43 return Previous && (Previous->getPrecedence() == prec::Assignment ||
44 Previous->isOneOf(tok::kw_return, TT_RequiresClause));
45}
46
47// Returns the length of everything up to the first possible line break after
48// the ), ], } or > matching \c Tok.
49static unsigned getLengthToMatchingParen(const FormatToken &Tok,
51 // Normally whether or not a break before T is possible is calculated and
52 // stored in T.CanBreakBefore. Braces, array initializers and text proto
53 // messages like `key: < ... >` are an exception: a break is possible
54 // before a closing brace R if a break was inserted after the corresponding
55 // opening brace. The information about whether or not a break is needed
56 // before a closing brace R is stored in the ParenState field
57 // S.BreakBeforeClosingBrace where S is the state that R closes.
58 //
59 // In order to decide whether there can be a break before encountered right
60 // braces, this implementation iterates over the sequence of tokens and over
61 // the paren stack in lockstep, keeping track of the stack level which visited
62 // right braces correspond to in MatchingStackIndex.
63 //
64 // For example, consider:
65 // L. <- line number
66 // 1. {
67 // 2. {1},
68 // 3. {2},
69 // 4. {{3}}}
70 // ^ where we call this method with this token.
71 // The paren stack at this point contains 3 brace levels:
72 // 0. { at line 1, BreakBeforeClosingBrace: true
73 // 1. first { at line 4, BreakBeforeClosingBrace: false
74 // 2. second { at line 4, BreakBeforeClosingBrace: false,
75 // where there might be fake parens levels in-between these levels.
76 // The algorithm will start at the first } on line 4, which is the matching
77 // brace of the initial left brace and at level 2 of the stack. Then,
78 // examining BreakBeforeClosingBrace: false at level 2, it will continue to
79 // the second } on line 4, and will traverse the stack downwards until it
80 // finds the matching { on level 1. Then, examining BreakBeforeClosingBrace:
81 // false at level 1, it will continue to the third } on line 4 and will
82 // traverse the stack downwards until it finds the matching { on level 0.
83 // Then, examining BreakBeforeClosingBrace: true at level 0, the algorithm
84 // will stop and will use the second } on line 4 to determine the length to
85 // return, as in this example the range will include the tokens: {3}}
86 //
87 // The algorithm will only traverse the stack if it encounters braces, array
88 // initializer squares or text proto angle brackets.
89 if (!Tok.MatchingParen)
90 return 0;
91 FormatToken *End = Tok.MatchingParen;
92 // Maintains a stack level corresponding to the current End token.
93 int MatchingStackIndex = Stack.size() - 1;
94 // Traverses the stack downwards, looking for the level to which LBrace
95 // corresponds. Returns either a pointer to the matching level or nullptr if
96 // LParen is not found in the initial portion of the stack up to
97 // MatchingStackIndex.
98 auto FindParenState = [&](const FormatToken *LBrace) -> const ParenState * {
99 while (MatchingStackIndex >= 0 && Stack[MatchingStackIndex].Tok != LBrace)
100 --MatchingStackIndex;
101 return MatchingStackIndex >= 0 ? &Stack[MatchingStackIndex] : nullptr;
102 };
103 for (; End->Next; End = End->Next) {
104 if (End->Next->CanBreakBefore)
105 break;
106 if (!End->Next->closesScope())
107 continue;
108 if (End->Next->MatchingParen &&
109 End->Next->MatchingParen->isOneOf(
110 tok::l_brace, TT_ArrayInitializerLSquare, tok::less)) {
111 const ParenState *State = FindParenState(End->Next->MatchingParen);
112 if (State && State->BreakBeforeClosingBrace)
113 break;
114 }
115 }
116 return End->TotalLength - Tok.TotalLength + 1;
117}
118
119static unsigned getLengthToNextOperator(const FormatToken &Tok) {
120 if (!Tok.NextOperator)
121 return 0;
122 return Tok.NextOperator->TotalLength - Tok.TotalLength;
123}
124
125// Returns \c true if \c Tok is the "." or "->" of a call and starts the next
126// segment of a builder type call.
128 return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope();
129}
130
131// Returns \c true if \c Current starts a new parameter.
132static bool startsNextParameter(const FormatToken &Current,
133 const FormatStyle &Style) {
134 const FormatToken &Previous = *Current.Previous;
135 if (Current.is(TT_CtorInitializerComma) &&
137 return true;
138 }
139 if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName))
140 return true;
141 return Previous.is(tok::comma) && !Current.isTrailingComment() &&
142 ((Previous.isNot(TT_CtorInitializerComma) ||
145 (Previous.isNot(TT_InheritanceComma) ||
147}
148
149static bool opensProtoMessageField(const FormatToken &LessTok,
150 const FormatStyle &Style) {
151 if (LessTok.isNot(tok::less))
152 return false;
153 return Style.Language == FormatStyle::LK_TextProto ||
155 (LessTok.NestingLevel > 0 ||
156 (LessTok.Previous && LessTok.Previous->is(tok::equal))));
157}
158
159// Returns the delimiter of a raw string literal, or std::nullopt if TokenText
160// is not the text of a raw string literal. The delimiter could be the empty
161// string. For example, the delimiter of R"deli(cont)deli" is deli.
162static std::optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
163 if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
164 || !TokenText.starts_with("R\"") || !TokenText.ends_with("\"")) {
165 return std::nullopt;
166 }
167
168 // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
169 // size at most 16 by the standard, so the first '(' must be among the first
170 // 19 bytes.
171 size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
172 if (LParenPos == StringRef::npos)
173 return std::nullopt;
174 StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
175
176 // Check that the string ends in ')Delimiter"'.
177 size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
178 if (TokenText[RParenPos] != ')')
179 return std::nullopt;
180 if (!TokenText.substr(RParenPos + 1).starts_with(Delimiter))
181 return std::nullopt;
182 return Delimiter;
183}
184
185// Returns the canonical delimiter for \p Language, or the empty string if no
186// canonical delimiter is specified.
187static StringRef
190 for (const auto &Format : Style.RawStringFormats)
191 if (Format.Language == Language)
192 return StringRef(Format.CanonicalDelimiter);
193 return "";
194}
195
197 const FormatStyle &CodeStyle) {
198 for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
199 std::optional<FormatStyle> LanguageStyle =
200 CodeStyle.GetLanguageStyle(RawStringFormat.Language);
201 if (!LanguageStyle) {
202 FormatStyle PredefinedStyle;
203 if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
204 RawStringFormat.Language, &PredefinedStyle)) {
205 PredefinedStyle = getLLVMStyle();
206 PredefinedStyle.Language = RawStringFormat.Language;
207 }
208 LanguageStyle = PredefinedStyle;
209 }
210 LanguageStyle->ColumnLimit = CodeStyle.ColumnLimit;
211 for (StringRef Delimiter : RawStringFormat.Delimiters)
212 DelimiterStyle.insert({Delimiter, *LanguageStyle});
213 for (StringRef EnclosingFunction : RawStringFormat.EnclosingFunctions)
214 EnclosingFunctionStyle.insert({EnclosingFunction, *LanguageStyle});
215 }
216}
217
218std::optional<FormatStyle>
220 auto It = DelimiterStyle.find(Delimiter);
221 if (It == DelimiterStyle.end())
222 return std::nullopt;
223 return It->second;
224}
225
226std::optional<FormatStyle>
228 StringRef EnclosingFunction) const {
229 auto It = EnclosingFunctionStyle.find(EnclosingFunction);
230 if (It == EnclosingFunctionStyle.end())
231 return std::nullopt;
232 return It->second;
233}
234
236 const AdditionalKeywords &Keywords,
237 const SourceManager &SourceMgr,
238 WhitespaceManager &Whitespaces,
239 encoding::Encoding Encoding,
240 bool BinPackInconclusiveFunctions)
241 : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
242 Whitespaces(Whitespaces), Encoding(Encoding),
243 BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
244 CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
245
247 unsigned FirstStartColumn,
248 const AnnotatedLine *Line,
249 bool DryRun) {
250 LineState State;
251 State.FirstIndent = FirstIndent;
252 if (FirstStartColumn && Line->First->NewlinesBefore == 0)
253 State.Column = FirstStartColumn;
254 else
255 State.Column = FirstIndent;
256 // With preprocessor directive indentation, the line starts on column 0
257 // since it's indented after the hash, but FirstIndent is set to the
258 // preprocessor indent.
260 (Line->Type == LT_PreprocessorDirective ||
261 Line->Type == LT_ImportStatement)) {
262 State.Column = 0;
263 }
264 State.Line = Line;
265 State.NextToken = Line->First;
266 State.Stack.push_back(ParenState(/*Tok=*/nullptr, FirstIndent, FirstIndent,
267 /*AvoidBinPacking=*/false,
268 /*NoLineBreak=*/false));
269 State.NoContinuation = false;
270 State.StartOfStringLiteral = 0;
271 State.NoLineBreak = false;
272 State.StartOfLineLevel = 0;
273 State.LowestLevelOnLine = 0;
274 State.IgnoreStackForComparison = false;
275
276 if (Style.Language == FormatStyle::LK_TextProto) {
277 // We need this in order to deal with the bin packing of text fields at
278 // global scope.
279 auto &CurrentState = State.Stack.back();
280 CurrentState.AvoidBinPacking = true;
281 CurrentState.BreakBeforeParameter = true;
282 CurrentState.AlignColons = false;
283 }
284
285 // The first token has already been indented and thus consumed.
286 moveStateToNextToken(State, DryRun, /*Newline=*/false);
287 return State;
288}
289
291 const FormatToken &Current = *State.NextToken;
292 const FormatToken &Previous = *Current.Previous;
293 const auto &CurrentState = State.Stack.back();
294 assert(&Previous == Current.Previous);
295 if (!Current.CanBreakBefore && !(CurrentState.BreakBeforeClosingBrace &&
296 Current.closesBlockOrBlockTypeList(Style))) {
297 return false;
298 }
299 // The opening "{" of a braced list has to be on the same line as the first
300 // element if it is nested in another braced init list or function call.
301 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
302 Previous.isNot(TT_DictLiteral) && Previous.is(BK_BracedInit) &&
303 Previous.Previous &&
304 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) {
305 return false;
306 }
307 // This prevents breaks like:
308 // ...
309 // SomeParameter, OtherParameter).DoSomething(
310 // ...
311 // As they hide "DoSomething" and are generally bad for readability.
312 if (Previous.opensScope() && Previous.isNot(tok::l_brace) &&
313 State.LowestLevelOnLine < State.StartOfLineLevel &&
314 State.LowestLevelOnLine < Current.NestingLevel) {
315 return false;
316 }
317 if (Current.isMemberAccess() && CurrentState.ContainsUnwrappedBuilder)
318 return false;
319
320 // Don't create a 'hanging' indent if there are multiple blocks in a single
321 // statement and we are aligning lambda blocks to their signatures.
322 if (Previous.is(tok::l_brace) && State.Stack.size() > 1 &&
323 State.Stack[State.Stack.size() - 2].NestedBlockInlined &&
324 State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks &&
326 return false;
327 }
328
329 // Don't break after very short return types (e.g. "void") as that is often
330 // unexpected.
331 if (Current.is(TT_FunctionDeclarationName)) {
333 State.Column < 6) {
334 return false;
335 }
336
338 assert(State.Column >= State.FirstIndent);
339 if (State.Column - State.FirstIndent < 6)
340 return false;
341 }
342 }
343
344 // If binary operators are moved to the next line (including commas for some
345 // styles of constructor initializers), that's always ok.
346 if (!Current.isOneOf(TT_BinaryOperator, tok::comma) &&
347 // Allow breaking opening brace of lambdas (when passed as function
348 // arguments) to a new line when BeforeLambdaBody brace wrapping is
349 // enabled.
351 Current.isNot(TT_LambdaLBrace)) &&
352 CurrentState.NoLineBreakInOperand) {
353 return false;
354 }
355
356 if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr))
357 return false;
358
359 if (Current.is(TT_ConditionalExpr) && Previous.is(tok::r_paren) &&
360 Previous.MatchingParen && Previous.MatchingParen->Previous &&
361 Previous.MatchingParen->Previous->MatchingParen &&
362 Previous.MatchingParen->Previous->MatchingParen->is(TT_LambdaLBrace)) {
363 // We have a lambda within a conditional expression, allow breaking here.
364 assert(Previous.MatchingParen->Previous->is(tok::r_brace));
365 return true;
366 }
367
368 return !State.NoLineBreak && !CurrentState.NoLineBreak;
369}
370
372 const FormatToken &Current = *State.NextToken;
373 const FormatToken &Previous = *Current.Previous;
374 const auto &CurrentState = State.Stack.back();
375 if (Style.BraceWrapping.BeforeLambdaBody && Current.CanBreakBefore &&
376 Current.is(TT_LambdaLBrace) && Previous.isNot(TT_LineComment)) {
377 auto LambdaBodyLength = getLengthToMatchingParen(Current, State.Stack);
378 return LambdaBodyLength > getColumnLimit(State);
379 }
380 if (Current.MustBreakBefore ||
381 (Current.is(TT_InlineASMColon) &&
384 Style.ColumnLimit > 0)))) {
385 return true;
386 }
387 if (CurrentState.BreakBeforeClosingBrace &&
388 (Current.closesBlockOrBlockTypeList(Style) ||
389 (Current.is(tok::r_brace) &&
390 Current.isBlockIndentedInitRBrace(Style)))) {
391 return true;
392 }
393 if (CurrentState.BreakBeforeClosingParen && Current.is(tok::r_paren))
394 return true;
395 if (Style.Language == FormatStyle::LK_ObjC &&
397 Current.ObjCSelectorNameParts > 1 &&
398 Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) {
399 return true;
400 }
401 // Avoid producing inconsistent states by requiring breaks where they are not
402 // permitted for C# generic type constraints.
403 if (CurrentState.IsCSharpGenericTypeConstraint &&
404 Previous.isNot(TT_CSharpGenericTypeConstraintComma)) {
405 return false;
406 }
407 if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
408 (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
409 State.Line->First->isNot(TT_AttributeSquare) && Style.isCpp() &&
410 // FIXME: This is a temporary workaround for the case where clang-format
411 // sets BreakBeforeParameter to avoid bin packing and this creates a
412 // completely unnecessary line break after a template type that isn't
413 // line-wrapped.
414 (Previous.NestingLevel == 1 || Style.BinPackParameters)) ||
415 (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
416 Previous.isNot(tok::question)) ||
418 Previous.is(TT_ConditionalExpr))) &&
419 CurrentState.BreakBeforeParameter && !Current.isTrailingComment() &&
420 !Current.isOneOf(tok::r_paren, tok::r_brace)) {
421 return true;
422 }
423 if (CurrentState.IsChainedConditional &&
424 ((Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
425 Current.is(tok::colon)) ||
426 (!Style.BreakBeforeTernaryOperators && Previous.is(TT_ConditionalExpr) &&
427 Previous.is(tok::colon)))) {
428 return true;
429 }
430 if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
431 (Previous.is(TT_ArrayInitializerLSquare) &&
432 Previous.ParameterCount > 1) ||
434 Style.ColumnLimit > 0 &&
435 getLengthToMatchingParen(Previous, State.Stack) + State.Column - 1 >
436 getColumnLimit(State)) {
437 return true;
438 }
439
440 const FormatToken &BreakConstructorInitializersToken =
442 ? Previous
443 : Current;
444 if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) &&
445 (State.Column + State.Line->Last->TotalLength - Previous.TotalLength >
446 getColumnLimit(State) ||
447 CurrentState.BreakBeforeParameter) &&
448 (!Current.isTrailingComment() || Current.NewlinesBefore > 0) &&
451 Style.ColumnLimit != 0)) {
452 return true;
453 }
454
455 if (Current.is(TT_ObjCMethodExpr) && Previous.isNot(TT_SelectorName) &&
456 State.Line->startsWith(TT_ObjCMethodSpecifier)) {
457 return true;
458 }
459 if (Current.is(TT_SelectorName) && Previous.isNot(tok::at) &&
460 CurrentState.ObjCSelectorNameFound && CurrentState.BreakBeforeParameter &&
462 !Current.startsSequence(TT_SelectorName, tok::colon, tok::caret))) {
463 return true;
464 }
465
466 unsigned NewLineColumn = getNewLineColumn(State);
467 if (Current.isMemberAccess() && Style.ColumnLimit != 0 &&
468 State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit &&
469 (State.Column > NewLineColumn ||
470 Current.NestingLevel < State.StartOfLineLevel)) {
471 return true;
472 }
473
474 if (startsSegmentOfBuilderTypeCall(Current) &&
475 (CurrentState.CallContinuation != 0 ||
476 CurrentState.BreakBeforeParameter) &&
477 // JavaScript is treated different here as there is a frequent pattern:
478 // SomeFunction(function() {
479 // ...
480 // }.bind(...));
481 // FIXME: We should find a more generic solution to this problem.
482 !(State.Column <= NewLineColumn && Style.isJavaScript()) &&
483 !(Previous.closesScopeAfterBlock() && State.Column <= NewLineColumn)) {
484 return true;
485 }
486
487 // If the template declaration spans multiple lines, force wrap before the
488 // function/class declaration.
489 if (Previous.ClosesTemplateDeclaration && CurrentState.BreakBeforeParameter &&
490 Current.CanBreakBefore) {
491 return true;
492 }
493
494 if (State.Line->First->isNot(tok::kw_enum) && State.Column <= NewLineColumn)
495 return false;
496
498 (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
499 Previous.is(tok::comma) || Current.NestingLevel < 2) &&
500 !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at,
501 Keywords.kw_dollar) &&
502 !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
503 nextIsMultilineString(State)) {
504 return true;
505 }
506
507 // Using CanBreakBefore here and below takes care of the decision whether the
508 // current style uses wrapping before or after operators for the given
509 // operator.
510 if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) {
511 const auto PreviousPrecedence = Previous.getPrecedence();
512 if (PreviousPrecedence != prec::Assignment &&
513 CurrentState.BreakBeforeParameter && !Current.isTrailingComment()) {
514 const bool LHSIsBinaryExpr =
515 Previous.Previous && Previous.Previous->EndsBinaryExpression;
516 if (LHSIsBinaryExpr)
517 return true;
518 // If we need to break somewhere inside the LHS of a binary expression, we
519 // should also break after the operator. Otherwise, the formatting would
520 // hide the operator precedence, e.g. in:
521 // if (aaaaaaaaaaaaaa ==
522 // bbbbbbbbbbbbbb && c) {..
523 // For comparisons, we only apply this rule, if the LHS is a binary
524 // expression itself as otherwise, the line breaks seem superfluous.
525 // We need special cases for ">>" which we have split into two ">" while
526 // lexing in order to make template parsing easier.
527 const bool IsComparison =
528 (PreviousPrecedence == prec::Relational ||
529 PreviousPrecedence == prec::Equality ||
530 PreviousPrecedence == prec::Spaceship) &&
531 Previous.Previous &&
532 Previous.Previous->isNot(TT_BinaryOperator); // For >>.
533 if (!IsComparison)
534 return true;
535 }
536 } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
537 CurrentState.BreakBeforeParameter) {
538 return true;
539 }
540
541 // Same as above, but for the first "<<" operator.
542 if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) &&
543 CurrentState.BreakBeforeParameter && CurrentState.FirstLessLess == 0) {
544 return true;
545 }
546
547 if (Current.NestingLevel == 0 && !Current.isTrailingComment()) {
548 // Always break after "template <...>"(*) and leading annotations. This is
549 // only for cases where the entire line does not fit on a single line as a
550 // different LineFormatter would be used otherwise.
551 // *: Except when another option interferes with that, like concepts.
552 if (Previous.ClosesTemplateDeclaration) {
553 if (Current.is(tok::kw_concept)) {
554 switch (Style.BreakBeforeConceptDeclarations) {
556 break;
558 return true;
560 return false;
561 }
562 }
563 if (Current.is(TT_RequiresClause)) {
564 switch (Style.RequiresClausePosition) {
567 return false;
568 default:
569 return true;
570 }
571 }
574 Current.NewlinesBefore > 0);
575 }
576 if (Previous.is(TT_FunctionAnnotationRParen) &&
577 State.Line->Type != LT_PreprocessorDirective) {
578 return true;
579 }
580 if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) &&
581 Current.isNot(TT_LeadingJavaAnnotation)) {
582 return true;
583 }
584 }
585
586 if (Style.isJavaScript() && Previous.is(tok::r_paren) &&
587 Previous.is(TT_JavaAnnotation)) {
588 // Break after the closing parenthesis of TypeScript decorators before
589 // functions, getters and setters.
590 static const llvm::StringSet<> BreakBeforeDecoratedTokens = {"get", "set",
591 "function"};
592 if (BreakBeforeDecoratedTokens.contains(Current.TokenText))
593 return true;
594 }
595
596 if (Current.is(TT_FunctionDeclarationName) &&
597 !State.Line->ReturnTypeWrapped &&
598 // Don't break before a C# function when no break after return type.
599 (!Style.isCSharp() ||
601 // Don't always break between a JavaScript `function` and the function
602 // name.
603 !Style.isJavaScript() && Previous.isNot(tok::kw_template) &&
604 CurrentState.BreakBeforeParameter) {
605 return true;
606 }
607
608 // The following could be precomputed as they do not depend on the state.
609 // However, as they should take effect only if the UnwrappedLine does not fit
610 // into the ColumnLimit, they are checked here in the ContinuationIndenter.
611 if (Style.ColumnLimit != 0 && Previous.is(BK_Block) &&
612 Previous.is(tok::l_brace) &&
613 !Current.isOneOf(tok::r_brace, tok::comment)) {
614 return true;
615 }
616
617 if (Current.is(tok::lessless) &&
618 ((Previous.is(tok::identifier) && Previous.TokenText == "endl") ||
619 (Previous.Tok.isLiteral() && (Previous.TokenText.ends_with("\\n\"") ||
620 Previous.TokenText == "\'\\n\'")))) {
621 return true;
622 }
623
624 if (Previous.is(TT_BlockComment) && Previous.IsMultiline)
625 return true;
626
627 if (State.NoContinuation)
628 return true;
629
630 return false;
631}
632
634 bool DryRun,
635 unsigned ExtraSpaces) {
636 const FormatToken &Current = *State.NextToken;
637 assert(State.NextToken->Previous);
638 const FormatToken &Previous = *State.NextToken->Previous;
639
640 assert(!State.Stack.empty());
641 State.NoContinuation = false;
642
643 if (Current.is(TT_ImplicitStringLiteral) &&
644 (!Previous.Tok.getIdentifierInfo() ||
645 Previous.Tok.getIdentifierInfo()->getPPKeywordID() ==
646 tok::pp_not_keyword)) {
647 unsigned EndColumn =
648 SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd());
649 if (Current.LastNewlineOffset != 0) {
650 // If there is a newline within this token, the final column will solely
651 // determined by the current end column.
652 State.Column = EndColumn;
653 } else {
654 unsigned StartColumn =
655 SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin());
656 assert(EndColumn >= StartColumn);
657 State.Column += EndColumn - StartColumn;
658 }
659 moveStateToNextToken(State, DryRun, /*Newline=*/false);
660 return 0;
661 }
662
663 unsigned Penalty = 0;
664 if (Newline)
665 Penalty = addTokenOnNewLine(State, DryRun);
666 else
667 addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
668
669 return moveStateToNextToken(State, DryRun, Newline) + Penalty;
670}
671
672void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
673 unsigned ExtraSpaces) {
674 FormatToken &Current = *State.NextToken;
675 assert(State.NextToken->Previous);
676 const FormatToken &Previous = *State.NextToken->Previous;
677 auto &CurrentState = State.Stack.back();
678
679 bool DisallowLineBreaksOnThisLine =
681 Style.isCpp() && [&Current] {
682 // Deal with lambda arguments in C++. The aim here is to ensure that we
683 // don't over-indent lambda function bodies when lambdas are passed as
684 // arguments to function calls. We do this by ensuring that either all
685 // arguments (including any lambdas) go on the same line as the function
686 // call, or we break before the first argument.
687 const auto *Prev = Current.Previous;
688 if (!Prev)
689 return false;
690 // For example, `/*Newline=*/false`.
691 if (Prev->is(TT_BlockComment) && Current.SpacesRequiredBefore == 0)
692 return false;
693 const auto *PrevNonComment = Current.getPreviousNonComment();
694 if (!PrevNonComment || PrevNonComment->isNot(tok::l_paren))
695 return false;
696 if (Current.isOneOf(tok::comment, tok::l_paren, TT_LambdaLSquare))
697 return false;
698 auto BlockParameterCount = PrevNonComment->BlockParameterCount;
699 if (BlockParameterCount == 0)
700 return false;
701
702 // Multiple lambdas in the same function call.
703 if (BlockParameterCount > 1)
704 return true;
705
706 // A lambda followed by another arg.
707 if (!PrevNonComment->Role)
708 return false;
709 auto Comma = PrevNonComment->Role->lastComma();
710 if (!Comma)
711 return false;
712 auto Next = Comma->getNextNonComment();
713 return Next &&
714 !Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret);
715 }();
716
717 if (DisallowLineBreaksOnThisLine)
718 State.NoLineBreak = true;
719
720 if (Current.is(tok::equal) &&
721 (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) &&
722 CurrentState.VariablePos == 0 &&
723 (!Previous.Previous ||
724 Previous.Previous->isNot(TT_DesignatedInitializerPeriod))) {
725 CurrentState.VariablePos = State.Column;
726 // Move over * and & if they are bound to the variable name.
727 const FormatToken *Tok = &Previous;
728 while (Tok && CurrentState.VariablePos >= Tok->ColumnWidth) {
729 CurrentState.VariablePos -= Tok->ColumnWidth;
730 if (Tok->SpacesRequiredBefore != 0)
731 break;
732 Tok = Tok->Previous;
733 }
734 if (Previous.PartOfMultiVariableDeclStmt)
735 CurrentState.LastSpace = CurrentState.VariablePos;
736 }
737
738 unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
739
740 // Indent preprocessor directives after the hash if required.
741 int PPColumnCorrection = 0;
743 Previous.is(tok::hash) && State.FirstIndent > 0 &&
744 &Previous == State.Line->First &&
745 (State.Line->Type == LT_PreprocessorDirective ||
746 State.Line->Type == LT_ImportStatement)) {
747 Spaces += State.FirstIndent;
748
749 // For preprocessor indent with tabs, State.Column will be 1 because of the
750 // hash. This causes second-level indents onward to have an extra space
751 // after the tabs. We avoid this misalignment by subtracting 1 from the
752 // column value passed to replaceWhitespace().
753 if (Style.UseTab != FormatStyle::UT_Never)
754 PPColumnCorrection = -1;
755 }
756
757 if (!DryRun) {
758 Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
759 State.Column + Spaces + PPColumnCorrection,
760 /*IsAligned=*/false, State.Line->InMacroBody);
761 }
762
763 // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
764 // declaration unless there is multiple inheritance.
766 Current.is(TT_InheritanceColon)) {
767 CurrentState.NoLineBreak = true;
768 }
770 Previous.is(TT_InheritanceColon)) {
771 CurrentState.NoLineBreak = true;
772 }
773
774 if (Current.is(TT_SelectorName) && !CurrentState.ObjCSelectorNameFound) {
775 unsigned MinIndent = std::max(
776 State.FirstIndent + Style.ContinuationIndentWidth, CurrentState.Indent);
777 unsigned FirstColonPos = State.Column + Spaces + Current.ColumnWidth;
778 if (Current.LongestObjCSelectorName == 0)
779 CurrentState.AlignColons = false;
780 else if (MinIndent + Current.LongestObjCSelectorName > FirstColonPos)
781 CurrentState.ColonPos = MinIndent + Current.LongestObjCSelectorName;
782 else
783 CurrentState.ColonPos = FirstColonPos;
784 }
785
786 // In "AlwaysBreak" or "BlockIndent" mode, enforce wrapping directly after the
787 // parenthesis by disallowing any further line breaks if there is no line
788 // break after the opening parenthesis. Don't break if it doesn't conserve
789 // columns.
790 auto IsOpeningBracket = [&](const FormatToken &Tok) {
791 auto IsStartOfBracedList = [&]() {
792 return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&
794 };
795 if (!Tok.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
796 !IsStartOfBracedList()) {
797 return false;
798 }
799 if (!Tok.Previous)
800 return true;
801 if (Tok.Previous->isIf())
803 return !Tok.Previous->isOneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
804 tok::kw_switch);
805 };
806 auto IsFunctionCallParen = [](const FormatToken &Tok) {
807 return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous &&
808 Tok.Previous->is(tok::identifier);
809 };
810 const auto IsInTemplateString = [this](const FormatToken &Tok) {
811 if (!Style.isJavaScript())
812 return false;
813 for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) {
814 if (Prev->is(TT_TemplateString) && Prev->opensScope())
815 return true;
816 if (Prev->is(TT_TemplateString) && Prev->closesScope())
817 break;
818 }
819 return false;
820 };
821 // Identifies simple (no expression) one-argument function calls.
822 const auto IsSimpleFunction = [&](const FormatToken &Tok) {
823 if (!Tok.FakeLParens.empty() && Tok.FakeLParens.back() > prec::Unknown)
824 return false;
825 const auto *Previous = Tok.Previous;
826 if (!Previous || (!Previous->isOneOf(TT_FunctionDeclarationLParen,
827 TT_LambdaDefinitionLParen) &&
828 !IsFunctionCallParen(*Previous))) {
829 return true;
830 }
831 if (IsOpeningBracket(Tok) || IsInTemplateString(Tok))
832 return true;
833 const auto *Next = Tok.Next;
834 return !Next || Next->isMemberAccess() ||
835 Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next);
836 };
839 IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
840 // Don't do this for simple (no expressions) one-argument function calls
841 // as that feels like needlessly wasting whitespace, e.g.:
842 //
843 // caaaaaaaaaaaall(
844 // caaaaaaaaaaaall(
845 // caaaaaaaaaaaall(
846 // caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa))));
847 !IsSimpleFunction(Current)) {
848 CurrentState.NoLineBreak = true;
849 }
850
851 if (Previous.is(TT_TemplateString) && Previous.opensScope())
852 CurrentState.NoLineBreak = true;
853
854 // Align following lines within parentheses / brackets if configured.
855 // Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
856 // with args as children of the '(' and ',' tokens. It does not make sense to
857 // align the commas with the opening paren.
859 !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
860 Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
861 Previous.isNot(TT_TableGenDAGArgOpener) &&
862 Previous.isNot(TT_TableGenDAGArgOpenerToBreak) &&
863 !(Current.MacroParent && Previous.MacroParent) &&
864 (Current.isNot(TT_LineComment) ||
865 Previous.isOneOf(BK_BracedInit, TT_VerilogMultiLineListLParen)) &&
866 !IsInTemplateString(Current)) {
867 CurrentState.Indent = State.Column + Spaces;
868 CurrentState.IsAligned = true;
869 }
870 if (CurrentState.AvoidBinPacking && startsNextParameter(Current, Style))
871 CurrentState.NoLineBreak = true;
872 if (startsSegmentOfBuilderTypeCall(Current) &&
873 State.Column > getNewLineColumn(State)) {
874 CurrentState.ContainsUnwrappedBuilder = true;
875 }
876
877 if (Current.is(TT_TrailingReturnArrow) &&
879 CurrentState.NoLineBreak = true;
880 }
881 if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
882 (Previous.MatchingParen &&
883 (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) {
884 // If there is a function call with long parameters, break before trailing
885 // calls. This prevents things like:
886 // EXPECT_CALL(SomeLongParameter).Times(
887 // 2);
888 // We don't want to do this for short parameters as they can just be
889 // indexes.
890 CurrentState.NoLineBreak = true;
891 }
892
893 // Don't allow the RHS of an operator to be split over multiple lines unless
894 // there is a line-break right after the operator.
895 // Exclude relational operators, as there, it is always more desirable to
896 // have the LHS 'left' of the RHS.
897 const FormatToken *P = Current.getPreviousNonComment();
898 if (Current.isNot(tok::comment) && P &&
899 (P->isOneOf(TT_BinaryOperator, tok::comma) ||
900 (P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
901 !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
902 P->getPrecedence() != prec::Assignment &&
903 P->getPrecedence() != prec::Relational &&
904 P->getPrecedence() != prec::Spaceship) {
905 bool BreakBeforeOperator =
906 P->MustBreakBefore || P->is(tok::lessless) ||
907 (P->is(TT_BinaryOperator) &&
909 (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators);
910 // Don't do this if there are only two operands. In these cases, there is
911 // always a nice vertical separation between them and the extra line break
912 // does not help.
913 bool HasTwoOperands = P->OperatorIndex == 0 && !P->NextOperator &&
914 P->isNot(TT_ConditionalExpr);
915 if ((!BreakBeforeOperator &&
916 !(HasTwoOperands &&
918 (!CurrentState.LastOperatorWrapped && BreakBeforeOperator)) {
919 CurrentState.NoLineBreakInOperand = true;
920 }
921 }
922
923 State.Column += Spaces;
924 if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) &&
925 Previous.Previous &&
926 (Previous.Previous->is(tok::kw_for) || Previous.Previous->isIf())) {
927 // Treat the condition inside an if as if it was a second function
928 // parameter, i.e. let nested calls have a continuation indent.
929 CurrentState.LastSpace = State.Column;
930 CurrentState.NestedBlockIndent = State.Column;
931 } else if (!Current.isOneOf(tok::comment, tok::caret) &&
932 ((Previous.is(tok::comma) &&
933 Previous.isNot(TT_OverloadedOperator)) ||
934 (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
935 CurrentState.LastSpace = State.Column;
936 } else if (Previous.is(TT_CtorInitializerColon) &&
937 (!Current.isTrailingComment() || Current.NewlinesBefore > 0) &&
940 CurrentState.Indent = State.Column;
941 CurrentState.LastSpace = State.Column;
942 } else if (Previous.isOneOf(TT_ConditionalExpr, TT_CtorInitializerColon)) {
943 CurrentState.LastSpace = State.Column;
944 } else if (Previous.is(TT_BinaryOperator) &&
945 ((Previous.getPrecedence() != prec::Assignment &&
946 (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 ||
947 Previous.NextOperator)) ||
948 Current.StartsBinaryExpression)) {
949 // Indent relative to the RHS of the expression unless this is a simple
950 // assignment without binary expression on the RHS.
952 CurrentState.LastSpace = State.Column;
953 } else if (Previous.is(TT_InheritanceColon)) {
954 CurrentState.Indent = State.Column;
955 CurrentState.LastSpace = State.Column;
956 } else if (Current.is(TT_CSharpGenericTypeConstraintColon)) {
957 CurrentState.ColonPos = State.Column;
958 } else if (Previous.opensScope()) {
959 // If a function has a trailing call, indent all parameters from the
960 // opening parenthesis. This avoids confusing indents like:
961 // OuterFunction(InnerFunctionCall( // break
962 // ParameterToInnerFunction)) // break
963 // .SecondInnerFunctionCall();
964 if (Previous.MatchingParen) {
965 const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
966 if (Next && Next->isMemberAccess() && State.Stack.size() > 1 &&
967 State.Stack[State.Stack.size() - 2].CallContinuation == 0) {
968 CurrentState.LastSpace = State.Column;
969 }
970 }
971 }
972}
973
974unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
975 bool DryRun) {
976 FormatToken &Current = *State.NextToken;
977 assert(State.NextToken->Previous);
978 const FormatToken &Previous = *State.NextToken->Previous;
979 auto &CurrentState = State.Stack.back();
980
981 // Extra penalty that needs to be added because of the way certain line
982 // breaks are chosen.
983 unsigned Penalty = 0;
984
985 const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
986 const FormatToken *NextNonComment = Previous.getNextNonComment();
987 if (!NextNonComment)
988 NextNonComment = &Current;
989 // The first line break on any NestingLevel causes an extra penalty in order
990 // prefer similar line breaks.
991 if (!CurrentState.ContainsLineBreak)
992 Penalty += 15;
993 CurrentState.ContainsLineBreak = true;
994
995 Penalty += State.NextToken->SplitPenalty;
996
997 // Breaking before the first "<<" is generally not desirable if the LHS is
998 // short. Also always add the penalty if the LHS is split over multiple lines
999 // to avoid unnecessary line breaks that just work around this penalty.
1000 if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess == 0 &&
1001 (State.Column <= Style.ColumnLimit / 3 ||
1002 CurrentState.BreakBeforeParameter)) {
1003 Penalty += Style.PenaltyBreakFirstLessLess;
1004 }
1005
1006 State.Column = getNewLineColumn(State);
1007
1008 // Add Penalty proportional to amount of whitespace away from FirstColumn
1009 // This tends to penalize several lines that are far-right indented,
1010 // and prefers a line-break prior to such a block, e.g:
1011 //
1012 // Constructor() :
1013 // member(value), looooooooooooooooong_member(
1014 // looooooooooong_call(param_1, param_2, param_3))
1015 // would then become
1016 // Constructor() :
1017 // member(value),
1018 // looooooooooooooooong_member(
1019 // looooooooooong_call(param_1, param_2, param_3))
1020 if (State.Column > State.FirstIndent) {
1021 Penalty +=
1022 Style.PenaltyIndentedWhitespace * (State.Column - State.FirstIndent);
1023 }
1024
1025 // Indent nested blocks relative to this column, unless in a very specific
1026 // JavaScript special case where:
1027 //
1028 // var loooooong_name =
1029 // function() {
1030 // // code
1031 // }
1032 //
1033 // is common and should be formatted like a free-standing function. The same
1034 // goes for wrapping before the lambda return type arrow.
1035 if (Current.isNot(TT_TrailingReturnArrow) &&
1036 (!Style.isJavaScript() || Current.NestingLevel != 0 ||
1037 !PreviousNonComment || PreviousNonComment->isNot(tok::equal) ||
1038 !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) {
1039 CurrentState.NestedBlockIndent = State.Column;
1040 }
1041
1042 if (NextNonComment->isMemberAccess()) {
1043 if (CurrentState.CallContinuation == 0)
1044 CurrentState.CallContinuation = State.Column;
1045 } else if (NextNonComment->is(TT_SelectorName)) {
1046 if (!CurrentState.ObjCSelectorNameFound) {
1047 if (NextNonComment->LongestObjCSelectorName == 0) {
1048 CurrentState.AlignColons = false;
1049 } else {
1050 CurrentState.ColonPos =
1051 (shouldIndentWrappedSelectorName(Style, State.Line->Type)
1052 ? std::max(CurrentState.Indent,
1053 State.FirstIndent + Style.ContinuationIndentWidth)
1054 : CurrentState.Indent) +
1055 std::max(NextNonComment->LongestObjCSelectorName,
1056 NextNonComment->ColumnWidth);
1057 }
1058 } else if (CurrentState.AlignColons &&
1059 CurrentState.ColonPos <= NextNonComment->ColumnWidth) {
1060 CurrentState.ColonPos = State.Column + NextNonComment->ColumnWidth;
1061 }
1062 } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
1063 PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) {
1064 // FIXME: This is hacky, find a better way. The problem is that in an ObjC
1065 // method expression, the block should be aligned to the line starting it,
1066 // e.g.:
1067 // [aaaaaaaaaaaaaaa aaaaaaaaa: \\ break for some reason
1068 // ^(int *i) {
1069 // // ...
1070 // }];
1071 // Thus, we set LastSpace of the next higher NestingLevel, to which we move
1072 // when we consume all of the "}"'s FakeRParens at the "{".
1073 if (State.Stack.size() > 1) {
1074 State.Stack[State.Stack.size() - 2].LastSpace =
1075 std::max(CurrentState.LastSpace, CurrentState.Indent) +
1077 }
1078 }
1079
1080 if ((PreviousNonComment &&
1081 PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
1082 !CurrentState.AvoidBinPacking) ||
1083 Previous.is(TT_BinaryOperator)) {
1084 CurrentState.BreakBeforeParameter = false;
1085 }
1086 if (PreviousNonComment &&
1087 (PreviousNonComment->isOneOf(TT_TemplateCloser, TT_JavaAnnotation) ||
1088 PreviousNonComment->ClosesRequiresClause) &&
1089 Current.NestingLevel == 0) {
1090 CurrentState.BreakBeforeParameter = false;
1091 }
1092 if (NextNonComment->is(tok::question) ||
1093 (PreviousNonComment && PreviousNonComment->is(tok::question))) {
1094 CurrentState.BreakBeforeParameter = true;
1095 }
1096 if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore)
1097 CurrentState.BreakBeforeParameter = false;
1098
1099 if (!DryRun) {
1100 unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;
1101 if (Current.is(tok::r_brace) && Current.MatchingParen &&
1102 // Only strip trailing empty lines for l_braces that have children, i.e.
1103 // for function expressions (lambdas, arrows, etc).
1104 !Current.MatchingParen->Children.empty()) {
1105 // lambdas and arrow functions are expressions, thus their r_brace is not
1106 // on its own line, and thus not covered by UnwrappedLineFormatter's logic
1107 // about removing empty lines on closing blocks. Special case them here.
1108 MaxEmptyLinesToKeep = 1;
1109 }
1110 unsigned Newlines =
1111 std::max(1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
1112 bool ContinuePPDirective =
1113 State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
1114 Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
1115 CurrentState.IsAligned, ContinuePPDirective);
1116 }
1117
1118 if (!Current.isTrailingComment())
1119 CurrentState.LastSpace = State.Column;
1120 if (Current.is(tok::lessless)) {
1121 // If we are breaking before a "<<", we always want to indent relative to
1122 // RHS. This is necessary only for "<<", as we special-case it and don't
1123 // always indent relative to the RHS.
1124 CurrentState.LastSpace += 3; // 3 -> width of "<< ".
1125 }
1126
1127 State.StartOfLineLevel = Current.NestingLevel;
1128 State.LowestLevelOnLine = Current.NestingLevel;
1129
1130 // Any break on this level means that the parent level has been broken
1131 // and we need to avoid bin packing there.
1132 bool NestedBlockSpecialCase =
1133 (!Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
1134 State.Stack[State.Stack.size() - 2].NestedBlockInlined) ||
1135 (Style.Language == FormatStyle::LK_ObjC && Current.is(tok::r_brace) &&
1136 State.Stack.size() > 1 && !Style.ObjCBreakBeforeNestedBlockParam);
1137 // Do not force parameter break for statements with requires expressions.
1138 NestedBlockSpecialCase =
1139 NestedBlockSpecialCase ||
1140 (Current.MatchingParen &&
1141 Current.MatchingParen->is(TT_RequiresExpressionLBrace));
1142 if (!NestedBlockSpecialCase) {
1143 auto ParentLevelIt = std::next(State.Stack.rbegin());
1145 Current.MatchingParen && Current.MatchingParen->is(TT_LambdaLBrace)) {
1146 // If the first character on the new line is a lambda's closing brace, the
1147 // stack still contains that lambda's parenthesis. As such, we need to
1148 // recurse further down the stack than usual to find the parenthesis level
1149 // containing the lambda, which is where we want to set
1150 // BreakBeforeParameter.
1151 //
1152 // We specifically special case "OuterScope"-formatted lambdas here
1153 // because, when using that setting, breaking before the parameter
1154 // directly following the lambda is particularly unsightly. However, when
1155 // "OuterScope" is not set, the logic to find the parent parenthesis level
1156 // still appears to be sometimes incorrect. It has not been fixed yet
1157 // because it would lead to significant changes in existing behaviour.
1158 //
1159 // TODO: fix the non-"OuterScope" case too.
1160 auto FindCurrentLevel = [&](const auto &It) {
1161 return std::find_if(It, State.Stack.rend(), [](const auto &PState) {
1162 return PState.Tok != nullptr; // Ignore fake parens.
1163 });
1164 };
1165 auto MaybeIncrement = [&](const auto &It) {
1166 return It != State.Stack.rend() ? std::next(It) : It;
1167 };
1168 auto LambdaLevelIt = FindCurrentLevel(State.Stack.rbegin());
1169 auto LevelContainingLambdaIt =
1170 FindCurrentLevel(MaybeIncrement(LambdaLevelIt));
1171 ParentLevelIt = MaybeIncrement(LevelContainingLambdaIt);
1172 }
1173 for (auto I = ParentLevelIt, E = State.Stack.rend(); I != E; ++I)
1174 I->BreakBeforeParameter = true;
1175 }
1176
1177 if (PreviousNonComment &&
1178 !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) &&
1179 ((PreviousNonComment->isNot(TT_TemplateCloser) &&
1180 !PreviousNonComment->ClosesRequiresClause) ||
1181 Current.NestingLevel != 0) &&
1182 !PreviousNonComment->isOneOf(
1183 TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
1184 TT_LeadingJavaAnnotation) &&
1185 Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope() &&
1186 // We don't want to enforce line breaks for subsequent arguments just
1187 // because we have been forced to break before a lambda body.
1189 Current.isNot(TT_LambdaLBrace))) {
1190 CurrentState.BreakBeforeParameter = true;
1191 }
1192
1193 // If we break after { or the [ of an array initializer, we should also break
1194 // before the corresponding } or ].
1195 if (PreviousNonComment &&
1196 (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
1197 opensProtoMessageField(*PreviousNonComment, Style))) {
1198 CurrentState.BreakBeforeClosingBrace = true;
1199 }
1200
1201 if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) {
1202 CurrentState.BreakBeforeClosingParen =
1204 }
1205
1206 if (CurrentState.AvoidBinPacking) {
1207 // If we are breaking after '(', '{', '<', or this is the break after a ':'
1208 // to start a member initializer list in a constructor, this should not
1209 // be considered bin packing unless the relevant AllowAll option is false or
1210 // this is a dict/object literal.
1211 bool PreviousIsBreakingCtorInitializerColon =
1212 PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
1214 bool AllowAllConstructorInitializersOnNextLine =
1217 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
1218 PreviousIsBreakingCtorInitializerColon) ||
1220 State.Line->MustBeDeclaration) ||
1222 !State.Line->MustBeDeclaration) ||
1223 (!AllowAllConstructorInitializersOnNextLine &&
1224 PreviousIsBreakingCtorInitializerColon) ||
1225 Previous.is(TT_DictLiteral)) {
1226 CurrentState.BreakBeforeParameter = true;
1227 }
1228
1229 // If we are breaking after a ':' to start a member initializer list,
1230 // and we allow all arguments on the next line, we should not break
1231 // before the next parameter.
1232 if (PreviousIsBreakingCtorInitializerColon &&
1233 AllowAllConstructorInitializersOnNextLine) {
1234 CurrentState.BreakBeforeParameter = false;
1235 }
1236 }
1237
1238 return Penalty;
1239}
1240
1241unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
1242 if (!State.NextToken || !State.NextToken->Previous)
1243 return 0;
1244
1245 FormatToken &Current = *State.NextToken;
1246 const auto &CurrentState = State.Stack.back();
1247
1248 if (CurrentState.IsCSharpGenericTypeConstraint &&
1249 Current.isNot(TT_CSharpGenericTypeConstraint)) {
1250 return CurrentState.ColonPos + 2;
1251 }
1252
1253 const FormatToken &Previous = *Current.Previous;
1254 // If we are continuing an expression, we want to use the continuation indent.
1255 unsigned ContinuationIndent =
1256 std::max(CurrentState.LastSpace, CurrentState.Indent) +
1258 const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
1259 const FormatToken *NextNonComment = Previous.getNextNonComment();
1260 if (!NextNonComment)
1261 NextNonComment = &Current;
1262
1263 // Java specific bits.
1264 if (Style.Language == FormatStyle::LK_Java &&
1265 Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) {
1266 return std::max(CurrentState.LastSpace,
1267 CurrentState.Indent + Style.ContinuationIndentWidth);
1268 }
1269
1270 // Indentation of the statement following a Verilog case label is taken care
1271 // of in moveStateToNextToken.
1272 if (Style.isVerilog() && PreviousNonComment &&
1273 Keywords.isVerilogEndOfLabel(*PreviousNonComment)) {
1274 return State.FirstIndent;
1275 }
1276
1278 State.Line->First->is(tok::kw_enum)) {
1279 return (Style.IndentWidth * State.Line->First->IndentLevel) +
1280 Style.IndentWidth;
1281 }
1282
1283 if ((NextNonComment->is(tok::l_brace) && NextNonComment->is(BK_Block)) ||
1284 (Style.isVerilog() && Keywords.isVerilogBegin(*NextNonComment))) {
1285 if (Current.NestingLevel == 0 ||
1287 State.NextToken->is(TT_LambdaLBrace))) {
1288 return State.FirstIndent;
1289 }
1290 return CurrentState.Indent;
1291 }
1292 if (Current.is(TT_TrailingReturnArrow) &&
1293 Previous.isOneOf(tok::kw_noexcept, tok::kw_mutable, tok::kw_constexpr,
1294 tok::kw_consteval, tok::kw_static, TT_AttributeSquare)) {
1295 return ContinuationIndent;
1296 }
1297 if ((Current.isOneOf(tok::r_brace, tok::r_square) ||
1298 (Current.is(tok::greater) && (Style.isProto() || Style.isTableGen()))) &&
1299 State.Stack.size() > 1) {
1300 if (Current.closesBlockOrBlockTypeList(Style))
1301 return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
1302 if (Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit))
1303 return State.Stack[State.Stack.size() - 2].LastSpace;
1304 return State.FirstIndent;
1305 }
1306 // Indent a closing parenthesis at the previous level if followed by a semi,
1307 // const, or opening brace. This allows indentations such as:
1308 // foo(
1309 // a,
1310 // );
1311 // int Foo::getter(
1312 // //
1313 // ) const {
1314 // return foo;
1315 // }
1316 // function foo(
1317 // a,
1318 // ) {
1319 // code(); //
1320 // }
1321 if (Current.is(tok::r_paren) && State.Stack.size() > 1 &&
1322 (!Current.Next ||
1323 Current.Next->isOneOf(tok::semi, tok::kw_const, tok::l_brace))) {
1324 return State.Stack[State.Stack.size() - 2].LastSpace;
1325 }
1326 // When DAGArg closer exists top of line, it should be aligned in the similar
1327 // way as function call above.
1328 if (Style.isTableGen() && Current.is(TT_TableGenDAGArgCloser) &&
1329 State.Stack.size() > 1) {
1330 return State.Stack[State.Stack.size() - 2].LastSpace;
1331 }
1333 (Current.is(tok::r_paren) ||
1334 (Current.is(tok::r_brace) && Current.MatchingParen &&
1335 Current.MatchingParen->is(BK_BracedInit))) &&
1336 State.Stack.size() > 1) {
1337 return State.Stack[State.Stack.size() - 2].LastSpace;
1338 }
1339 if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
1340 return State.Stack[State.Stack.size() - 2].LastSpace;
1341 // Field labels in a nested type should be aligned to the brace. For example
1342 // in ProtoBuf:
1343 // optional int32 b = 2 [(foo_options) = {aaaaaaaaaaaaaaaaaaa: 123,
1344 // bbbbbbbbbbbbbbbbbbbbbbbb:"baz"}];
1345 // For Verilog, a quote following a brace is treated as an identifier. And
1346 // Both braces and colons get annotated as TT_DictLiteral. So we have to
1347 // check.
1348 if (Current.is(tok::identifier) && Current.Next &&
1349 (!Style.isVerilog() || Current.Next->is(tok::colon)) &&
1350 (Current.Next->is(TT_DictLiteral) ||
1351 (Style.isProto() && Current.Next->isOneOf(tok::less, tok::l_brace)))) {
1352 return CurrentState.Indent;
1353 }
1354 if (NextNonComment->is(TT_ObjCStringLiteral) &&
1355 State.StartOfStringLiteral != 0) {
1356 return State.StartOfStringLiteral - 1;
1357 }
1358 if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
1359 return State.StartOfStringLiteral;
1360 if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess != 0)
1361 return CurrentState.FirstLessLess;
1362 if (NextNonComment->isMemberAccess()) {
1363 if (CurrentState.CallContinuation == 0)
1364 return ContinuationIndent;
1365 return CurrentState.CallContinuation;
1366 }
1367 if (CurrentState.QuestionColumn != 0 &&
1368 ((NextNonComment->is(tok::colon) &&
1369 NextNonComment->is(TT_ConditionalExpr)) ||
1370 Previous.is(TT_ConditionalExpr))) {
1371 if (((NextNonComment->is(tok::colon) && NextNonComment->Next &&
1372 !NextNonComment->Next->FakeLParens.empty() &&
1373 NextNonComment->Next->FakeLParens.back() == prec::Conditional) ||
1374 (Previous.is(tok::colon) && !Current.FakeLParens.empty() &&
1375 Current.FakeLParens.back() == prec::Conditional)) &&
1376 !CurrentState.IsWrappedConditional) {
1377 // NOTE: we may tweak this slightly:
1378 // * not remove the 'lead' ContinuationIndentWidth
1379 // * always un-indent by the operator when
1380 // BreakBeforeTernaryOperators=true
1381 unsigned Indent = CurrentState.Indent;
1383 Indent -= Style.ContinuationIndentWidth;
1384 if (Style.BreakBeforeTernaryOperators && CurrentState.UnindentOperator)
1385 Indent -= 2;
1386 return Indent;
1387 }
1388 return CurrentState.QuestionColumn;
1389 }
1390 if (Previous.is(tok::comma) && CurrentState.VariablePos != 0)
1391 return CurrentState.VariablePos;
1392 if (Current.is(TT_RequiresClause)) {
1393 if (Style.IndentRequiresClause)
1394 return CurrentState.Indent + Style.IndentWidth;
1395 switch (Style.RequiresClausePosition) {
1398 return CurrentState.Indent;
1399 default:
1400 break;
1401 }
1402 }
1403 if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon,
1404 TT_InheritanceComma)) {
1405 return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1406 }
1407 if ((PreviousNonComment &&
1408 (PreviousNonComment->ClosesTemplateDeclaration ||
1409 PreviousNonComment->ClosesRequiresClause ||
1410 (PreviousNonComment->is(TT_AttributeMacro) &&
1411 Current.isNot(tok::l_paren)) ||
1412 PreviousNonComment->isOneOf(
1413 TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen,
1414 TT_JavaAnnotation, TT_LeadingJavaAnnotation))) ||
1416 NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) {
1417 return std::max(CurrentState.LastSpace, CurrentState.Indent);
1418 }
1419 if (NextNonComment->is(TT_SelectorName)) {
1420 if (!CurrentState.ObjCSelectorNameFound) {
1421 unsigned MinIndent = CurrentState.Indent;
1422 if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) {
1423 MinIndent = std::max(MinIndent,
1424 State.FirstIndent + Style.ContinuationIndentWidth);
1425 }
1426 // If LongestObjCSelectorName is 0, we are indenting the first
1427 // part of an ObjC selector (or a selector component which is
1428 // not colon-aligned due to block formatting).
1429 //
1430 // Otherwise, we are indenting a subsequent part of an ObjC
1431 // selector which should be colon-aligned to the longest
1432 // component of the ObjC selector.
1433 //
1434 // In either case, we want to respect Style.IndentWrappedFunctionNames.
1435 return MinIndent +
1436 std::max(NextNonComment->LongestObjCSelectorName,
1437 NextNonComment->ColumnWidth) -
1438 NextNonComment->ColumnWidth;
1439 }
1440 if (!CurrentState.AlignColons)
1441 return CurrentState.Indent;
1442 if (CurrentState.ColonPos > NextNonComment->ColumnWidth)
1443 return CurrentState.ColonPos - NextNonComment->ColumnWidth;
1444 return CurrentState.Indent;
1445 }
1446 if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr))
1447 return CurrentState.ColonPos;
1448 if (NextNonComment->is(TT_ArraySubscriptLSquare)) {
1449 if (CurrentState.StartOfArraySubscripts != 0) {
1450 return CurrentState.StartOfArraySubscripts;
1451 } else if (Style.isCSharp()) { // C# allows `["key"] = value` inside object
1452 // initializers.
1453 return CurrentState.Indent;
1454 }
1455 return ContinuationIndent;
1456 }
1457
1458 // OpenMP clauses want to get additional indentation when they are pushed onto
1459 // the next line.
1460 if (State.Line->InPragmaDirective) {
1461 FormatToken *PragmaType = State.Line->First->Next->Next;
1462 if (PragmaType && PragmaType->TokenText == "omp")
1463 return CurrentState.Indent + Style.ContinuationIndentWidth;
1464 }
1465
1466 // This ensure that we correctly format ObjC methods calls without inputs,
1467 // i.e. where the last element isn't selector like: [callee method];
1468 if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 &&
1469 NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) {
1470 return CurrentState.Indent;
1471 }
1472
1473 if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) ||
1474 Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon)) {
1475 return ContinuationIndent;
1476 }
1477 if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
1478 PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) {
1479 return ContinuationIndent;
1480 }
1481 if (NextNonComment->is(TT_CtorInitializerComma))
1482 return CurrentState.Indent;
1483 if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
1485 return CurrentState.Indent;
1486 }
1487 if (PreviousNonComment && PreviousNonComment->is(TT_InheritanceColon) &&
1489 return CurrentState.Indent;
1490 }
1491 if (Previous.is(tok::r_paren) &&
1492 Previous.isNot(TT_TableGenDAGArgOperatorToBreak) &&
1493 !Current.isBinaryOperator() &&
1494 !Current.isOneOf(tok::colon, tok::comment)) {
1495 return ContinuationIndent;
1496 }
1497 if (Current.is(TT_ProtoExtensionLSquare))
1498 return CurrentState.Indent;
1499 if (Current.isBinaryOperator() && CurrentState.UnindentOperator) {
1500 return CurrentState.Indent - Current.Tok.getLength() -
1501 Current.SpacesRequiredBefore;
1502 }
1503 if (Current.is(tok::comment) && NextNonComment->isBinaryOperator() &&
1504 CurrentState.UnindentOperator) {
1505 return CurrentState.Indent - NextNonComment->Tok.getLength() -
1506 NextNonComment->SpacesRequiredBefore;
1507 }
1508 if (CurrentState.Indent == State.FirstIndent && PreviousNonComment &&
1509 !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) {
1510 // Ensure that we fall back to the continuation indent width instead of
1511 // just flushing continuations left.
1512 return CurrentState.Indent + Style.ContinuationIndentWidth;
1513 }
1514 return CurrentState.Indent;
1515}
1516
1518 const FormatToken &Current,
1519 const FormatStyle &Style) {
1520 if (Previous->isNot(tok::l_paren))
1521 return true;
1522 if (Previous->ParameterCount > 1)
1523 return true;
1524
1525 // Also a nested block if contains a lambda inside function with 1 parameter.
1526 return Style.BraceWrapping.BeforeLambdaBody && Current.is(TT_LambdaLSquare);
1527}
1528
1529unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
1530 bool DryRun, bool Newline) {
1531 assert(State.Stack.size());
1532 const FormatToken &Current = *State.NextToken;
1533 auto &CurrentState = State.Stack.back();
1534
1535 if (Current.is(TT_CSharpGenericTypeConstraint))
1536 CurrentState.IsCSharpGenericTypeConstraint = true;
1537 if (Current.isOneOf(tok::comma, TT_BinaryOperator))
1538 CurrentState.NoLineBreakInOperand = false;
1539 if (Current.isOneOf(TT_InheritanceColon, TT_CSharpGenericTypeConstraintColon))
1540 CurrentState.AvoidBinPacking = true;
1541 if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) {
1542 if (CurrentState.FirstLessLess == 0)
1543 CurrentState.FirstLessLess = State.Column;
1544 else
1545 CurrentState.LastOperatorWrapped = Newline;
1546 }
1547 if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless))
1548 CurrentState.LastOperatorWrapped = Newline;
1549 if (Current.is(TT_ConditionalExpr) && Current.Previous &&
1550 Current.Previous->isNot(TT_ConditionalExpr)) {
1551 CurrentState.LastOperatorWrapped = Newline;
1552 }
1553 if (Current.is(TT_ArraySubscriptLSquare) &&
1554 CurrentState.StartOfArraySubscripts == 0) {
1555 CurrentState.StartOfArraySubscripts = State.Column;
1556 }
1557
1558 auto IsWrappedConditional = [](const FormatToken &Tok) {
1559 if (!(Tok.is(TT_ConditionalExpr) && Tok.is(tok::question)))
1560 return false;
1561 if (Tok.MustBreakBefore)
1562 return true;
1563
1564 const FormatToken *Next = Tok.getNextNonComment();
1565 return Next && Next->MustBreakBefore;
1566 };
1567 if (IsWrappedConditional(Current))
1568 CurrentState.IsWrappedConditional = true;
1569 if (Style.BreakBeforeTernaryOperators && Current.is(tok::question))
1570 CurrentState.QuestionColumn = State.Column;
1571 if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) {
1572 const FormatToken *Previous = Current.Previous;
1573 while (Previous && Previous->isTrailingComment())
1574 Previous = Previous->Previous;
1575 if (Previous && Previous->is(tok::question))
1576 CurrentState.QuestionColumn = State.Column;
1577 }
1578 if (!Current.opensScope() && !Current.closesScope() &&
1579 Current.isNot(TT_PointerOrReference)) {
1580 State.LowestLevelOnLine =
1581 std::min(State.LowestLevelOnLine, Current.NestingLevel);
1582 }
1583 if (Current.isMemberAccess())
1584 CurrentState.StartOfFunctionCall = !Current.NextOperator ? 0 : State.Column;
1585 if (Current.is(TT_SelectorName))
1586 CurrentState.ObjCSelectorNameFound = true;
1587 if (Current.is(TT_CtorInitializerColon) &&
1589 // Indent 2 from the column, so:
1590 // SomeClass::SomeClass()
1591 // : First(...), ...
1592 // Next(...)
1593 // ^ line up here.
1594 CurrentState.Indent = State.Column + (Style.BreakConstructorInitializers ==
1596 ? 0
1597 : 2);
1598 CurrentState.NestedBlockIndent = CurrentState.Indent;
1600 CurrentState.AvoidBinPacking = true;
1601 CurrentState.BreakBeforeParameter =
1602 Style.ColumnLimit > 0 &&
1605 } else {
1606 CurrentState.BreakBeforeParameter = false;
1607 }
1608 }
1609 if (Current.is(TT_CtorInitializerColon) &&
1611 CurrentState.Indent =
1612 State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1613 CurrentState.NestedBlockIndent = CurrentState.Indent;
1615 CurrentState.AvoidBinPacking = true;
1616 else
1617 CurrentState.BreakBeforeParameter = false;
1618 }
1619 if (Current.is(TT_InheritanceColon)) {
1620 CurrentState.Indent =
1621 State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1622 }
1623 if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
1624 CurrentState.NestedBlockIndent = State.Column + Current.ColumnWidth + 1;
1625 if (Current.isOneOf(TT_LambdaLSquare, TT_TrailingReturnArrow))
1626 CurrentState.LastSpace = State.Column;
1627 if (Current.is(TT_RequiresExpression) &&
1629 CurrentState.NestedBlockIndent = State.Column;
1630 }
1631
1632 // Insert scopes created by fake parenthesis.
1633 const FormatToken *Previous = Current.getPreviousNonComment();
1634
1635 // Add special behavior to support a format commonly used for JavaScript
1636 // closures:
1637 // SomeFunction(function() {
1638 // foo();
1639 // bar();
1640 // }, a, b, c);
1641 if (Current.isNot(tok::comment) && !Current.ClosesRequiresClause &&
1642 Previous && Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
1643 Previous->isNot(TT_DictLiteral) && State.Stack.size() > 1 &&
1644 !CurrentState.HasMultipleNestedBlocks) {
1645 if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline)
1646 for (ParenState &PState : llvm::drop_end(State.Stack))
1647 PState.NoLineBreak = true;
1648 State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
1649 }
1650 if (Previous && (Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) ||
1651 (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) &&
1652 !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))) {
1653 CurrentState.NestedBlockInlined =
1654 !Newline && hasNestedBlockInlined(Previous, Current, Style);
1655 }
1656
1657 moveStatePastFakeLParens(State, Newline);
1658 moveStatePastScopeCloser(State);
1659 // Do not use CurrentState here, since the two functions before may change the
1660 // Stack.
1661 bool AllowBreak = !State.Stack.back().NoLineBreak &&
1662 !State.Stack.back().NoLineBreakInOperand;
1663 moveStatePastScopeOpener(State, Newline);
1664 moveStatePastFakeRParens(State);
1665
1666 if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
1667 State.StartOfStringLiteral = State.Column + 1;
1668 if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) {
1669 State.StartOfStringLiteral = State.Column + 1;
1670 } else if (Current.is(TT_TableGenMultiLineString) &&
1671 State.StartOfStringLiteral == 0) {
1672 State.StartOfStringLiteral = State.Column + 1;
1673 } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
1674 State.StartOfStringLiteral = State.Column;
1675 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
1676 !Current.isStringLiteral()) {
1677 State.StartOfStringLiteral = 0;
1678 }
1679
1680 State.Column += Current.ColumnWidth;
1681 State.NextToken = State.NextToken->Next;
1682 // Verilog case labels are on the same unwrapped lines as the statements that
1683 // follow. TokenAnnotator identifies them and sets MustBreakBefore.
1684 // Indentation is taken care of here. A case label can only have 1 statement
1685 // in Verilog, so we don't have to worry about lines that follow.
1686 if (Style.isVerilog() && State.NextToken &&
1687 State.NextToken->MustBreakBefore &&
1688 Keywords.isVerilogEndOfLabel(Current)) {
1689 State.FirstIndent += Style.IndentWidth;
1690 CurrentState.Indent = State.FirstIndent;
1691 }
1692
1693 unsigned Penalty =
1694 handleEndOfLine(Current, State, DryRun, AllowBreak, Newline);
1695
1696 if (Current.Role)
1697 Current.Role->formatFromToken(State, this, DryRun);
1698 // If the previous has a special role, let it consume tokens as appropriate.
1699 // It is necessary to start at the previous token for the only implemented
1700 // role (comma separated list). That way, the decision whether or not to break
1701 // after the "{" is already done and both options are tried and evaluated.
1702 // FIXME: This is ugly, find a better way.
1703 if (Previous && Previous->Role)
1704 Penalty += Previous->Role->formatAfterToken(State, this, DryRun);
1705
1706 return Penalty;
1707}
1708
1709void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
1710 bool Newline) {
1711 const FormatToken &Current = *State.NextToken;
1712 if (Current.FakeLParens.empty())
1713 return;
1714
1715 const FormatToken *Previous = Current.getPreviousNonComment();
1716
1717 // Don't add extra indentation for the first fake parenthesis after
1718 // 'return', assignments, opening <({[, or requires clauses. The indentation
1719 // for these cases is special cased.
1720 bool SkipFirstExtraIndent =
1721 Previous &&
1722 (Previous->opensScope() ||
1723 Previous->isOneOf(tok::semi, tok::kw_return, TT_RequiresClause) ||
1724 (Previous->getPrecedence() == prec::Assignment &&
1726 Previous->is(TT_ObjCMethodExpr));
1727 for (const auto &PrecedenceLevel : llvm::reverse(Current.FakeLParens)) {
1728 const auto &CurrentState = State.Stack.back();
1729 ParenState NewParenState = CurrentState;
1730 NewParenState.Tok = nullptr;
1731 NewParenState.ContainsLineBreak = false;
1732 NewParenState.LastOperatorWrapped = true;
1733 NewParenState.IsChainedConditional = false;
1734 NewParenState.IsWrappedConditional = false;
1735 NewParenState.UnindentOperator = false;
1736 NewParenState.NoLineBreak =
1737 NewParenState.NoLineBreak || CurrentState.NoLineBreakInOperand;
1738
1739 // Don't propagate AvoidBinPacking into subexpressions of arg/param lists.
1740 if (PrecedenceLevel > prec::Comma)
1741 NewParenState.AvoidBinPacking = false;
1742
1743 // Indent from 'LastSpace' unless these are fake parentheses encapsulating
1744 // a builder type call after 'return' or, if the alignment after opening
1745 // brackets is disabled.
1746 if (!Current.isTrailingComment() &&
1748 PrecedenceLevel < prec::Assignment) &&
1749 (!Previous || Previous->isNot(tok::kw_return) ||
1750 (Style.Language != FormatStyle::LK_Java && PrecedenceLevel > 0)) &&
1752 PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) &&
1753 (!Style.isTableGen() ||
1754 (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
1755 TT_TableGenDAGArgListCommaToBreak)))) {
1756 NewParenState.Indent = std::max(
1757 std::max(State.Column, NewParenState.Indent), CurrentState.LastSpace);
1758 }
1759
1760 // Special case for generic selection expressions, its comma-separated
1761 // expressions are not aligned to the opening paren like regular calls, but
1762 // rather continuation-indented relative to the _Generic keyword.
1763 if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic) &&
1764 State.Stack.size() > 1) {
1765 NewParenState.Indent = State.Stack[State.Stack.size() - 2].Indent +
1767 }
1768
1769 if ((shouldUnindentNextOperator(Current) ||
1770 (Previous &&
1771 (PrecedenceLevel == prec::Conditional &&
1772 Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) &&
1773 !Newline) {
1774 // If BreakBeforeBinaryOperators is set, un-indent a bit to account for
1775 // the operator and keep the operands aligned.
1777 NewParenState.UnindentOperator = true;
1778 // Mark indentation as alignment if the expression is aligned.
1780 NewParenState.IsAligned = true;
1781 }
1782
1783 // Do not indent relative to the fake parentheses inserted for "." or "->".
1784 // This is a special case to make the following to statements consistent:
1785 // OuterFunction(InnerFunctionCall( // break
1786 // ParameterToInnerFunction));
1787 // OuterFunction(SomeObject.InnerFunctionCall( // break
1788 // ParameterToInnerFunction));
1789 if (PrecedenceLevel > prec::Unknown)
1790 NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
1791 if (PrecedenceLevel != prec::Conditional &&
1792 Current.isNot(TT_UnaryOperator) &&
1794 NewParenState.StartOfFunctionCall = State.Column;
1795 }
1796
1797 // Indent conditional expressions, unless they are chained "else-if"
1798 // conditionals. Never indent expression where the 'operator' is ',', ';' or
1799 // an assignment (i.e. *I <= prec::Assignment) as those have different
1800 // indentation rules. Indent other expression, unless the indentation needs
1801 // to be skipped.
1802 if (PrecedenceLevel == prec::Conditional && Previous &&
1803 Previous->is(tok::colon) && Previous->is(TT_ConditionalExpr) &&
1804 &PrecedenceLevel == &Current.FakeLParens.back() &&
1805 !CurrentState.IsWrappedConditional) {
1806 NewParenState.IsChainedConditional = true;
1807 NewParenState.UnindentOperator = State.Stack.back().UnindentOperator;
1808 } else if (PrecedenceLevel == prec::Conditional ||
1809 (!SkipFirstExtraIndent && PrecedenceLevel > prec::Assignment &&
1810 !Current.isTrailingComment())) {
1811 NewParenState.Indent += Style.ContinuationIndentWidth;
1812 }
1813 if ((Previous && !Previous->opensScope()) || PrecedenceLevel != prec::Comma)
1814 NewParenState.BreakBeforeParameter = false;
1815 State.Stack.push_back(NewParenState);
1816 SkipFirstExtraIndent = false;
1817 }
1818}
1819
1820void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) {
1821 for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) {
1822 unsigned VariablePos = State.Stack.back().VariablePos;
1823 if (State.Stack.size() == 1) {
1824 // Do not pop the last element.
1825 break;
1826 }
1827 State.Stack.pop_back();
1828 State.Stack.back().VariablePos = VariablePos;
1829 }
1830
1831 if (State.NextToken->ClosesRequiresClause && Style.IndentRequiresClause) {
1832 // Remove the indentation of the requires clauses (which is not in Indent,
1833 // but in LastSpace).
1834 State.Stack.back().LastSpace -= Style.IndentWidth;
1835 }
1836}
1837
1838void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
1839 bool Newline) {
1840 const FormatToken &Current = *State.NextToken;
1841 if (!Current.opensScope())
1842 return;
1843
1844 const auto &CurrentState = State.Stack.back();
1845
1846 // Don't allow '<' or '(' in C# generic type constraints to start new scopes.
1847 if (Current.isOneOf(tok::less, tok::l_paren) &&
1848 CurrentState.IsCSharpGenericTypeConstraint) {
1849 return;
1850 }
1851
1852 if (Current.MatchingParen && Current.is(BK_Block)) {
1853 moveStateToNewBlock(State, Newline);
1854 return;
1855 }
1856
1857 unsigned NewIndent;
1858 unsigned LastSpace = CurrentState.LastSpace;
1859 bool AvoidBinPacking;
1860 bool BreakBeforeParameter = false;
1861 unsigned NestedBlockIndent = std::max(CurrentState.StartOfFunctionCall,
1862 CurrentState.NestedBlockIndent);
1863 if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
1864 opensProtoMessageField(Current, Style)) {
1865 if (Current.opensBlockOrBlockTypeList(Style)) {
1866 NewIndent = Style.IndentWidth +
1867 std::min(State.Column, CurrentState.NestedBlockIndent);
1868 } else if (Current.is(tok::l_brace)) {
1869 NewIndent =
1870 CurrentState.LastSpace + Style.BracedInitializerIndentWidth.value_or(
1872 } else {
1873 NewIndent = CurrentState.LastSpace + Style.ContinuationIndentWidth;
1874 }
1875 const FormatToken *NextNonComment = Current.getNextNonComment();
1876 bool EndsInComma = Current.MatchingParen &&
1877 Current.MatchingParen->Previous &&
1878 Current.MatchingParen->Previous->is(tok::comma);
1879 AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) ||
1880 Style.isProto() || !Style.BinPackArguments ||
1881 (NextNonComment && NextNonComment->isOneOf(
1882 TT_DesignatedInitializerPeriod,
1883 TT_DesignatedInitializerLSquare));
1884 BreakBeforeParameter = EndsInComma;
1885 if (Current.ParameterCount > 1)
1886 NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
1887 } else {
1888 NewIndent =
1890 std::max(CurrentState.LastSpace, CurrentState.StartOfFunctionCall);
1891
1892 if (Style.isTableGen() && Current.is(TT_TableGenDAGArgOpenerToBreak) &&
1894 // For the case the next token is a TableGen DAGArg operator identifier
1895 // that is not marked to have a line break after it.
1896 // In this case the option DAS_BreakElements requires to align the
1897 // DAGArg elements to the operator.
1898 const FormatToken *Next = Current.Next;
1899 if (Next && Next->is(TT_TableGenDAGArgOperatorID))
1900 NewIndent = State.Column + Next->TokenText.size() + 2;
1901 }
1902
1903 // Ensure that different different brackets force relative alignment, e.g.:
1904 // void SomeFunction(vector< // break
1905 // int> v);
1906 // FIXME: We likely want to do this for more combinations of brackets.
1907 if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) {
1908 NewIndent = std::max(NewIndent, CurrentState.Indent);
1909 LastSpace = std::max(LastSpace, CurrentState.Indent);
1910 }
1911
1912 bool EndsInComma =
1913 Current.MatchingParen &&
1914 Current.MatchingParen->getPreviousNonComment() &&
1915 Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
1916
1917 // If ObjCBinPackProtocolList is unspecified, fall back to BinPackParameters
1918 // for backwards compatibility.
1919 bool ObjCBinPackProtocolList =
1921 Style.BinPackParameters) ||
1923
1924 bool BinPackDeclaration =
1925 (State.Line->Type != LT_ObjCDecl && Style.BinPackParameters) ||
1926 (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList);
1927
1928 bool GenericSelection =
1929 Current.getPreviousNonComment() &&
1930 Current.getPreviousNonComment()->is(tok::kw__Generic);
1931
1932 AvoidBinPacking =
1933 (CurrentState.IsCSharpGenericTypeConstraint) || GenericSelection ||
1934 (Style.isJavaScript() && EndsInComma) ||
1935 (State.Line->MustBeDeclaration && !BinPackDeclaration) ||
1936 (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
1938 (Current.is(PPK_OnePerLine) ||
1939 (!BinPackInconclusiveFunctions && Current.is(PPK_Inconclusive))));
1940
1941 if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen &&
1943 if (Style.ColumnLimit) {
1944 // If this '[' opens an ObjC call, determine whether all parameters fit
1945 // into one line and put one per line if they don't.
1946 if (getLengthToMatchingParen(Current, State.Stack) + State.Column >
1947 getColumnLimit(State)) {
1948 BreakBeforeParameter = true;
1949 }
1950 } else {
1951 // For ColumnLimit = 0, we have to figure out whether there is or has to
1952 // be a line break within this call.
1953 for (const FormatToken *Tok = &Current;
1954 Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
1955 if (Tok->MustBreakBefore ||
1956 (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
1957 BreakBeforeParameter = true;
1958 break;
1959 }
1960 }
1961 }
1962 }
1963
1964 if (Style.isJavaScript() && EndsInComma)
1965 BreakBeforeParameter = true;
1966 }
1967 // Generally inherit NoLineBreak from the current scope to nested scope.
1968 // However, don't do this for non-empty nested blocks, dict literals and
1969 // array literals as these follow different indentation rules.
1970 bool NoLineBreak =
1971 Current.Children.empty() &&
1972 !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
1973 (CurrentState.NoLineBreak || CurrentState.NoLineBreakInOperand ||
1974 (Current.is(TT_TemplateOpener) &&
1975 CurrentState.ContainsUnwrappedBuilder));
1976 State.Stack.push_back(
1977 ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak));
1978 auto &NewState = State.Stack.back();
1979 NewState.NestedBlockIndent = NestedBlockIndent;
1980 NewState.BreakBeforeParameter = BreakBeforeParameter;
1981 NewState.HasMultipleNestedBlocks = (Current.BlockParameterCount > 1);
1982
1983 if (Style.BraceWrapping.BeforeLambdaBody && Current.Next &&
1984 Current.is(tok::l_paren)) {
1985 // Search for any parameter that is a lambda.
1986 FormatToken const *next = Current.Next;
1987 while (next) {
1988 if (next->is(TT_LambdaLSquare)) {
1989 NewState.HasMultipleNestedBlocks = true;
1990 break;
1991 }
1992 next = next->Next;
1993 }
1994 }
1995
1996 NewState.IsInsideObjCArrayLiteral = Current.is(TT_ArrayInitializerLSquare) &&
1997 Current.Previous &&
1998 Current.Previous->is(tok::at);
1999}
2000
2001void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
2002 const FormatToken &Current = *State.NextToken;
2003 if (!Current.closesScope())
2004 return;
2005
2006 // If we encounter a closing ), ], } or >, we can remove a level from our
2007 // stacks.
2008 if (State.Stack.size() > 1 &&
2009 (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) ||
2010 (Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
2011 State.NextToken->is(TT_TemplateCloser) ||
2012 State.NextToken->is(TT_TableGenListCloser) ||
2013 (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) {
2014 State.Stack.pop_back();
2015 }
2016
2017 auto &CurrentState = State.Stack.back();
2018
2019 // Reevaluate whether ObjC message arguments fit into one line.
2020 // If a receiver spans multiple lines, e.g.:
2021 // [[object block:^{
2022 // return 42;
2023 // }] a:42 b:42];
2024 // BreakBeforeParameter is calculated based on an incorrect assumption
2025 // (it is checked whether the whole expression fits into one line without
2026 // considering a line break inside a message receiver).
2027 // We check whether arguments fit after receiver scope closer (into the same
2028 // line).
2029 if (CurrentState.BreakBeforeParameter && Current.MatchingParen &&
2030 Current.MatchingParen->Previous) {
2031 const FormatToken &CurrentScopeOpener = *Current.MatchingParen->Previous;
2032 if (CurrentScopeOpener.is(TT_ObjCMethodExpr) &&
2033 CurrentScopeOpener.MatchingParen) {
2034 int NecessarySpaceInLine =
2035 getLengthToMatchingParen(CurrentScopeOpener, State.Stack) +
2036 CurrentScopeOpener.TotalLength - Current.TotalLength - 1;
2037 if (State.Column + Current.ColumnWidth + NecessarySpaceInLine <=
2038 Style.ColumnLimit) {
2039 CurrentState.BreakBeforeParameter = false;
2040 }
2041 }
2042 }
2043
2044 if (Current.is(tok::r_square)) {
2045 // If this ends the array subscript expr, reset the corresponding value.
2046 const FormatToken *NextNonComment = Current.getNextNonComment();
2047 if (NextNonComment && NextNonComment->isNot(tok::l_square))
2048 CurrentState.StartOfArraySubscripts = 0;
2049 }
2050}
2051
2052void ContinuationIndenter::moveStateToNewBlock(LineState &State, bool NewLine) {
2054 State.NextToken->is(TT_LambdaLBrace) &&
2055 !State.Line->MightBeFunctionDecl) {
2056 State.Stack.back().NestedBlockIndent = State.FirstIndent;
2057 }
2058 unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent;
2059 // ObjC block sometimes follow special indentation rules.
2060 unsigned NewIndent =
2061 NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace)
2062 ? Style.ObjCBlockIndentWidth
2063 : Style.IndentWidth);
2064
2065 // Even when wrapping before lambda body, the left brace can still be added to
2066 // the same line. This occurs when checking whether the whole lambda body can
2067 // go on a single line. In this case we have to make sure there are no line
2068 // breaks in the body, otherwise we could just end up with a regular lambda
2069 // body without the brace wrapped.
2070 bool NoLineBreak = Style.BraceWrapping.BeforeLambdaBody && !NewLine &&
2071 State.NextToken->is(TT_LambdaLBrace);
2072
2073 State.Stack.push_back(ParenState(State.NextToken, NewIndent,
2074 State.Stack.back().LastSpace,
2075 /*AvoidBinPacking=*/true, NoLineBreak));
2076 State.Stack.back().NestedBlockIndent = NestedBlockIndent;
2077 State.Stack.back().BreakBeforeParameter = true;
2078}
2079
2080static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
2081 unsigned TabWidth,
2082 encoding::Encoding Encoding) {
2083 size_t LastNewlinePos = Text.find_last_of("\n");
2084 if (LastNewlinePos == StringRef::npos) {
2085 return StartColumn +
2086 encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
2087 } else {
2088 return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
2089 /*StartColumn=*/0, TabWidth, Encoding);
2090 }
2091}
2092
2093unsigned ContinuationIndenter::reformatRawStringLiteral(
2094 const FormatToken &Current, LineState &State,
2095 const FormatStyle &RawStringStyle, bool DryRun, bool Newline) {
2096 unsigned StartColumn = State.Column - Current.ColumnWidth;
2097 StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
2098 StringRef NewDelimiter =
2099 getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
2100 if (NewDelimiter.empty())
2101 NewDelimiter = OldDelimiter;
2102 // The text of a raw string is between the leading 'R"delimiter(' and the
2103 // trailing 'delimiter)"'.
2104 unsigned OldPrefixSize = 3 + OldDelimiter.size();
2105 unsigned OldSuffixSize = 2 + OldDelimiter.size();
2106 // We create a virtual text environment which expects a null-terminated
2107 // string, so we cannot use StringRef.
2108 std::string RawText = std::string(
2109 Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize));
2110 if (NewDelimiter != OldDelimiter) {
2111 // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
2112 // raw string.
2113 std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
2114 if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
2115 NewDelimiter = OldDelimiter;
2116 }
2117
2118 unsigned NewPrefixSize = 3 + NewDelimiter.size();
2119 unsigned NewSuffixSize = 2 + NewDelimiter.size();
2120
2121 // The first start column is the column the raw text starts after formatting.
2122 unsigned FirstStartColumn = StartColumn + NewPrefixSize;
2123
2124 // The next start column is the intended indentation a line break inside
2125 // the raw string at level 0. It is determined by the following rules:
2126 // - if the content starts on newline, it is one level more than the current
2127 // indent, and
2128 // - if the content does not start on a newline, it is the first start
2129 // column.
2130 // These rules have the advantage that the formatted content both does not
2131 // violate the rectangle rule and visually flows within the surrounding
2132 // source.
2133 bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
2134 // If this token is the last parameter (checked by looking if it's followed by
2135 // `)` and is not on a newline, the base the indent off the line's nested
2136 // block indent. Otherwise, base the indent off the arguments indent, so we
2137 // can achieve:
2138 //
2139 // fffffffffff(1, 2, 3, R"pb(
2140 // key1: 1 #
2141 // key2: 2)pb");
2142 //
2143 // fffffffffff(1, 2, 3,
2144 // R"pb(
2145 // key1: 1 #
2146 // key2: 2
2147 // )pb");
2148 //
2149 // fffffffffff(1, 2, 3,
2150 // R"pb(
2151 // key1: 1 #
2152 // key2: 2
2153 // )pb",
2154 // 5);
2155 unsigned CurrentIndent =
2156 (!Newline && Current.Next && Current.Next->is(tok::r_paren))
2157 ? State.Stack.back().NestedBlockIndent
2158 : State.Stack.back().Indent;
2159 unsigned NextStartColumn = ContentStartsOnNewline
2160 ? CurrentIndent + Style.IndentWidth
2161 : FirstStartColumn;
2162
2163 // The last start column is the column the raw string suffix starts if it is
2164 // put on a newline.
2165 // The last start column is the intended indentation of the raw string postfix
2166 // if it is put on a newline. It is determined by the following rules:
2167 // - if the raw string prefix starts on a newline, it is the column where
2168 // that raw string prefix starts, and
2169 // - if the raw string prefix does not start on a newline, it is the current
2170 // indent.
2171 unsigned LastStartColumn =
2172 Current.NewlinesBefore ? FirstStartColumn - NewPrefixSize : CurrentIndent;
2173
2174 std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
2175 RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
2176 FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
2177 /*Status=*/nullptr);
2178
2179 auto NewCode = applyAllReplacements(RawText, Fixes.first);
2180 tooling::Replacements NoFixes;
2181 if (!NewCode)
2182 return addMultilineToken(Current, State);
2183 if (!DryRun) {
2184 if (NewDelimiter != OldDelimiter) {
2185 // In 'R"delimiter(...', the delimiter starts 2 characters after the start
2186 // of the token.
2187 SourceLocation PrefixDelimiterStart =
2188 Current.Tok.getLocation().getLocWithOffset(2);
2189 auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
2190 SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
2191 if (PrefixErr) {
2192 llvm::errs()
2193 << "Failed to update the prefix delimiter of a raw string: "
2194 << llvm::toString(std::move(PrefixErr)) << "\n";
2195 }
2196 // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
2197 // position length - 1 - |delimiter|.
2198 SourceLocation SuffixDelimiterStart =
2199 Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
2200 1 - OldDelimiter.size());
2201 auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
2202 SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
2203 if (SuffixErr) {
2204 llvm::errs()
2205 << "Failed to update the suffix delimiter of a raw string: "
2206 << llvm::toString(std::move(SuffixErr)) << "\n";
2207 }
2208 }
2209 SourceLocation OriginLoc =
2210 Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
2211 for (const tooling::Replacement &Fix : Fixes.first) {
2212 auto Err = Whitespaces.addReplacement(tooling::Replacement(
2213 SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
2214 Fix.getLength(), Fix.getReplacementText()));
2215 if (Err) {
2216 llvm::errs() << "Failed to reformat raw string: "
2217 << llvm::toString(std::move(Err)) << "\n";
2218 }
2219 }
2220 }
2221 unsigned RawLastLineEndColumn = getLastLineEndColumn(
2222 *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
2223 State.Column = RawLastLineEndColumn + NewSuffixSize;
2224 // Since we're updating the column to after the raw string literal here, we
2225 // have to manually add the penalty for the prefix R"delim( over the column
2226 // limit.
2227 unsigned PrefixExcessCharacters =
2228 StartColumn + NewPrefixSize > Style.ColumnLimit
2229 ? StartColumn + NewPrefixSize - Style.ColumnLimit
2230 : 0;
2231 bool IsMultiline =
2232 ContentStartsOnNewline || (NewCode->find('\n') != std::string::npos);
2233 if (IsMultiline) {
2234 // Break before further function parameters on all levels.
2235 for (ParenState &Paren : State.Stack)
2236 Paren.BreakBeforeParameter = true;
2237 }
2238 return Fixes.second + PrefixExcessCharacters * Style.PenaltyExcessCharacter;
2239}
2240
2241unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
2242 LineState &State) {
2243 // Break before further function parameters on all levels.
2244 for (ParenState &Paren : State.Stack)
2245 Paren.BreakBeforeParameter = true;
2246
2247 unsigned ColumnsUsed = State.Column;
2248 // We can only affect layout of the first and the last line, so the penalty
2249 // for all other lines is constant, and we ignore it.
2250 State.Column = Current.LastLineColumnWidth;
2251
2252 if (ColumnsUsed > getColumnLimit(State))
2253 return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
2254 return 0;
2255}
2256
2257unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
2258 LineState &State, bool DryRun,
2259 bool AllowBreak, bool Newline) {
2260 unsigned Penalty = 0;
2261 // Compute the raw string style to use in case this is a raw string literal
2262 // that can be reformatted.
2263 auto RawStringStyle = getRawStringStyle(Current, State);
2264 if (RawStringStyle && !Current.Finalized) {
2265 Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun,
2266 Newline);
2267 } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
2268 // Don't break multi-line tokens other than block comments and raw string
2269 // literals. Instead, just update the state.
2270 Penalty = addMultilineToken(Current, State);
2271 } else if (State.Line->Type != LT_ImportStatement) {
2272 // We generally don't break import statements.
2273 LineState OriginalState = State;
2274
2275 // Whether we force the reflowing algorithm to stay strictly within the
2276 // column limit.
2277 bool Strict = false;
2278 // Whether the first non-strict attempt at reflowing did intentionally
2279 // exceed the column limit.
2280 bool Exceeded = false;
2281 std::tie(Penalty, Exceeded) = breakProtrudingToken(
2282 Current, State, AllowBreak, /*DryRun=*/true, Strict);
2283 if (Exceeded) {
2284 // If non-strict reflowing exceeds the column limit, try whether strict
2285 // reflowing leads to an overall lower penalty.
2286 LineState StrictState = OriginalState;
2287 unsigned StrictPenalty =
2288 breakProtrudingToken(Current, StrictState, AllowBreak,
2289 /*DryRun=*/true, /*Strict=*/true)
2290 .first;
2291 Strict = StrictPenalty <= Penalty;
2292 if (Strict) {
2293 Penalty = StrictPenalty;
2294 State = StrictState;
2295 }
2296 }
2297 if (!DryRun) {
2298 // If we're not in dry-run mode, apply the changes with the decision on
2299 // strictness made above.
2300 breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
2301 Strict);
2302 }
2303 }
2304 if (State.Column > getColumnLimit(State)) {
2305 unsigned ExcessCharacters = State.Column - getColumnLimit(State);
2306 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
2307 }
2308 return Penalty;
2309}
2310
2311// Returns the enclosing function name of a token, or the empty string if not
2312// found.
2313static StringRef getEnclosingFunctionName(const FormatToken &Current) {
2314 // Look for: 'function(' or 'function<templates>(' before Current.
2315 auto Tok = Current.getPreviousNonComment();
2316 if (!Tok || Tok->isNot(tok::l_paren))
2317 return "";
2318 Tok = Tok->getPreviousNonComment();
2319 if (!Tok)
2320 return "";
2321 if (Tok->is(TT_TemplateCloser)) {
2322 Tok = Tok->MatchingParen;
2323 if (Tok)
2324 Tok = Tok->getPreviousNonComment();
2325 }
2326 if (!Tok || Tok->isNot(tok::identifier))
2327 return "";
2328 return Tok->TokenText;
2329}
2330
2331std::optional<FormatStyle>
2332ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
2333 const LineState &State) {
2334 if (!Current.isStringLiteral())
2335 return std::nullopt;
2336 auto Delimiter = getRawStringDelimiter(Current.TokenText);
2337 if (!Delimiter)
2338 return std::nullopt;
2339 auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter);
2340 if (!RawStringStyle && Delimiter->empty()) {
2341 RawStringStyle = RawStringFormats.getEnclosingFunctionStyle(
2342 getEnclosingFunctionName(Current));
2343 }
2344 if (!RawStringStyle)
2345 return std::nullopt;
2346 RawStringStyle->ColumnLimit = getColumnLimit(State);
2347 return RawStringStyle;
2348}
2349
2350std::unique_ptr<BreakableToken>
2351ContinuationIndenter::createBreakableToken(const FormatToken &Current,
2352 LineState &State, bool AllowBreak) {
2353 unsigned StartColumn = State.Column - Current.ColumnWidth;
2354 if (Current.isStringLiteral()) {
2355 // Strings in JSON cannot be broken. Breaking strings in JavaScript is
2356 // disabled for now.
2357 if (Style.isJson() || Style.isJavaScript() || !Style.BreakStringLiterals ||
2358 !AllowBreak) {
2359 return nullptr;
2360 }
2361
2362 // Don't break string literals inside preprocessor directives (except for
2363 // #define directives, as their contents are stored in separate lines and
2364 // are not affected by this check).
2365 // This way we avoid breaking code with line directives and unknown
2366 // preprocessor directives that contain long string literals.
2367 if (State.Line->Type == LT_PreprocessorDirective)
2368 return nullptr;
2369 // Exempts unterminated string literals from line breaking. The user will
2370 // likely want to terminate the string before any line breaking is done.
2371 if (Current.IsUnterminatedLiteral)
2372 return nullptr;
2373 // Don't break string literals inside Objective-C array literals (doing so
2374 // raises the warning -Wobjc-string-concatenation).
2375 if (State.Stack.back().IsInsideObjCArrayLiteral)
2376 return nullptr;
2377
2378 // The "DPI"/"DPI-C" in SystemVerilog direct programming interface
2379 // imports/exports cannot be split, e.g.
2380 // `import "DPI" function foo();`
2381 // FIXME: make this use same infra as C++ import checks
2382 if (Style.isVerilog() && Current.Previous &&
2383 Current.Previous->isOneOf(tok::kw_export, Keywords.kw_import)) {
2384 return nullptr;
2385 }
2386 StringRef Text = Current.TokenText;
2387
2388 // We need this to address the case where there is an unbreakable tail only
2389 // if certain other formatting decisions have been taken. The
2390 // UnbreakableTailLength of Current is an overapproximation in that case and
2391 // we need to be correct here.
2392 unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
2393 ? 0
2394 : Current.UnbreakableTailLength;
2395
2396 if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java ||
2397 Style.isJavaScript() || Style.isCSharp()) {
2399 if (Style.isJavaScript() && Text.starts_with("'") &&
2400 Text.ends_with("'")) {
2402 } else if (Style.isCSharp() && Text.starts_with("@\"") &&
2403 Text.ends_with("\"")) {
2405 } else if (Text.starts_with("\"") && Text.ends_with("\"")) {
2407 } else {
2408 return nullptr;
2409 }
2410 return std::make_unique<BreakableStringLiteralUsingOperators>(
2411 Current, QuoteStyle,
2412 /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn,
2413 UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style);
2414 }
2415
2416 StringRef Prefix;
2417 StringRef Postfix;
2418 // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
2419 // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
2420 // reduce the overhead) for each FormatToken, which is a string, so that we
2421 // don't run multiple checks here on the hot path.
2422 if ((Text.ends_with(Postfix = "\"") &&
2423 (Text.starts_with(Prefix = "@\"") || Text.starts_with(Prefix = "\"") ||
2424 Text.starts_with(Prefix = "u\"") ||
2425 Text.starts_with(Prefix = "U\"") ||
2426 Text.starts_with(Prefix = "u8\"") ||
2427 Text.starts_with(Prefix = "L\""))) ||
2428 (Text.starts_with(Prefix = "_T(\"") &&
2429 Text.ends_with(Postfix = "\")"))) {
2430 return std::make_unique<BreakableStringLiteral>(
2431 Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
2432 State.Line->InPPDirective, Encoding, Style);
2433 }
2434 } else if (Current.is(TT_BlockComment)) {
2435 if (!Style.ReflowComments ||
2436 // If a comment token switches formatting, like
2437 // /* clang-format on */, we don't want to break it further,
2438 // but we may still want to adjust its indentation.
2439 switchesFormatting(Current)) {
2440 return nullptr;
2441 }
2442 return std::make_unique<BreakableBlockComment>(
2443 Current, StartColumn, Current.OriginalColumn, !Current.Previous,
2444 State.Line->InPPDirective, Encoding, Style, Whitespaces.useCRLF());
2445 } else if (Current.is(TT_LineComment) &&
2446 (!Current.Previous ||
2447 Current.Previous->isNot(TT_ImplicitStringLiteral))) {
2448 bool RegularComments = [&]() {
2449 for (const FormatToken *T = &Current; T && T->is(TT_LineComment);
2450 T = T->Next) {
2451 if (!(T->TokenText.starts_with("//") || T->TokenText.starts_with("#")))
2452 return false;
2453 }
2454 return true;
2455 }();
2456 if (!Style.ReflowComments ||
2457 CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
2458 switchesFormatting(Current) || !RegularComments) {
2459 return nullptr;
2460 }
2461 return std::make_unique<BreakableLineCommentSection>(
2462 Current, StartColumn, /*InPPDirective=*/false, Encoding, Style);
2463 }
2464 return nullptr;
2465}
2466
2467std::pair<unsigned, bool>
2468ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
2469 LineState &State, bool AllowBreak,
2470 bool DryRun, bool Strict) {
2471 std::unique_ptr<const BreakableToken> Token =
2472 createBreakableToken(Current, State, AllowBreak);
2473 if (!Token)
2474 return {0, false};
2475 assert(Token->getLineCount() > 0);
2476 unsigned ColumnLimit = getColumnLimit(State);
2477 if (Current.is(TT_LineComment)) {
2478 // We don't insert backslashes when breaking line comments.
2479 ColumnLimit = Style.ColumnLimit;
2480 }
2481 if (ColumnLimit == 0) {
2482 // To make the rest of the function easier set the column limit to the
2483 // maximum, if there should be no limit.
2484 ColumnLimit = std::numeric_limits<decltype(ColumnLimit)>::max();
2485 }
2486 if (Current.UnbreakableTailLength >= ColumnLimit)
2487 return {0, false};
2488 // ColumnWidth was already accounted into State.Column before calling
2489 // breakProtrudingToken.
2490 unsigned StartColumn = State.Column - Current.ColumnWidth;
2491 unsigned NewBreakPenalty = Current.isStringLiteral()
2492 ? Style.PenaltyBreakString
2493 : Style.PenaltyBreakComment;
2494 // Stores whether we intentionally decide to let a line exceed the column
2495 // limit.
2496 bool Exceeded = false;
2497 // Stores whether we introduce a break anywhere in the token.
2498 bool BreakInserted = Token->introducesBreakBeforeToken();
2499 // Store whether we inserted a new line break at the end of the previous
2500 // logical line.
2501 bool NewBreakBefore = false;
2502 // We use a conservative reflowing strategy. Reflow starts after a line is
2503 // broken or the corresponding whitespace compressed. Reflow ends as soon as a
2504 // line that doesn't get reflown with the previous line is reached.
2505 bool Reflow = false;
2506 // Keep track of where we are in the token:
2507 // Where we are in the content of the current logical line.
2508 unsigned TailOffset = 0;
2509 // The column number we're currently at.
2510 unsigned ContentStartColumn =
2511 Token->getContentStartColumn(0, /*Break=*/false);
2512 // The number of columns left in the current logical line after TailOffset.
2513 unsigned RemainingTokenColumns =
2514 Token->getRemainingLength(0, TailOffset, ContentStartColumn);
2515 // Adapt the start of the token, for example indent.
2516 if (!DryRun)
2517 Token->adaptStartOfLine(0, Whitespaces);
2518
2519 unsigned ContentIndent = 0;
2520 unsigned Penalty = 0;
2521 LLVM_DEBUG(llvm::dbgs() << "Breaking protruding token at column "
2522 << StartColumn << ".\n");
2523 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
2524 LineIndex != EndIndex; ++LineIndex) {
2525 LLVM_DEBUG(llvm::dbgs()
2526 << " Line: " << LineIndex << " (Reflow: " << Reflow << ")\n");
2527 NewBreakBefore = false;
2528 // If we did reflow the previous line, we'll try reflowing again. Otherwise
2529 // we'll start reflowing if the current line is broken or whitespace is
2530 // compressed.
2531 bool TryReflow = Reflow;
2532 // Break the current token until we can fit the rest of the line.
2533 while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
2534 LLVM_DEBUG(llvm::dbgs() << " Over limit, need: "
2535 << (ContentStartColumn + RemainingTokenColumns)
2536 << ", space: " << ColumnLimit
2537 << ", reflown prefix: " << ContentStartColumn
2538 << ", offset in line: " << TailOffset << "\n");
2539 // If the current token doesn't fit, find the latest possible split in the
2540 // current line so that breaking at it will be under the column limit.
2541 // FIXME: Use the earliest possible split while reflowing to correctly
2542 // compress whitespace within a line.
2544 Token->getSplit(LineIndex, TailOffset, ColumnLimit,
2545 ContentStartColumn, CommentPragmasRegex);
2546 if (Split.first == StringRef::npos) {
2547 // No break opportunity - update the penalty and continue with the next
2548 // logical line.
2549 if (LineIndex < EndIndex - 1) {
2550 // The last line's penalty is handled in addNextStateToQueue() or when
2551 // calling replaceWhitespaceAfterLastLine below.
2552 Penalty += Style.PenaltyExcessCharacter *
2553 (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
2554 }
2555 LLVM_DEBUG(llvm::dbgs() << " No break opportunity.\n");
2556 break;
2557 }
2558 assert(Split.first != 0);
2559
2560 if (Token->supportsReflow()) {
2561 // Check whether the next natural split point after the current one can
2562 // still fit the line, either because we can compress away whitespace,
2563 // or because the penalty the excess characters introduce is lower than
2564 // the break penalty.
2565 // We only do this for tokens that support reflowing, and thus allow us
2566 // to change the whitespace arbitrarily (e.g. comments).
2567 // Other tokens, like string literals, can be broken on arbitrary
2568 // positions.
2569
2570 // First, compute the columns from TailOffset to the next possible split
2571 // position.
2572 // For example:
2573 // ColumnLimit: |
2574 // // Some text that breaks
2575 // ^ tail offset
2576 // ^-- split
2577 // ^-------- to split columns
2578 // ^--- next split
2579 // ^--------------- to next split columns
2580 unsigned ToSplitColumns = Token->getRangeLength(
2581 LineIndex, TailOffset, Split.first, ContentStartColumn);
2582 LLVM_DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n");
2583
2584 BreakableToken::Split NextSplit = Token->getSplit(
2585 LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
2586 ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
2587 // Compute the columns necessary to fit the next non-breakable sequence
2588 // into the current line.
2589 unsigned ToNextSplitColumns = 0;
2590 if (NextSplit.first == StringRef::npos) {
2591 ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
2592 ContentStartColumn);
2593 } else {
2594 ToNextSplitColumns = Token->getRangeLength(
2595 LineIndex, TailOffset,
2596 Split.first + Split.second + NextSplit.first, ContentStartColumn);
2597 }
2598 // Compress the whitespace between the break and the start of the next
2599 // unbreakable sequence.
2600 ToNextSplitColumns =
2601 Token->getLengthAfterCompression(ToNextSplitColumns, Split);
2602 LLVM_DEBUG(llvm::dbgs()
2603 << " ContentStartColumn: " << ContentStartColumn << "\n");
2604 LLVM_DEBUG(llvm::dbgs()
2605 << " ToNextSplit: " << ToNextSplitColumns << "\n");
2606 // If the whitespace compression makes us fit, continue on the current
2607 // line.
2608 bool ContinueOnLine =
2609 ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
2610 unsigned ExcessCharactersPenalty = 0;
2611 if (!ContinueOnLine && !Strict) {
2612 // Similarly, if the excess characters' penalty is lower than the
2613 // penalty of introducing a new break, continue on the current line.
2614 ExcessCharactersPenalty =
2615 (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
2617 LLVM_DEBUG(llvm::dbgs()
2618 << " Penalty excess: " << ExcessCharactersPenalty
2619 << "\n break : " << NewBreakPenalty << "\n");
2620 if (ExcessCharactersPenalty < NewBreakPenalty) {
2621 Exceeded = true;
2622 ContinueOnLine = true;
2623 }
2624 }
2625 if (ContinueOnLine) {
2626 LLVM_DEBUG(llvm::dbgs() << " Continuing on line...\n");
2627 // The current line fits after compressing the whitespace - reflow
2628 // the next line into it if possible.
2629 TryReflow = true;
2630 if (!DryRun) {
2631 Token->compressWhitespace(LineIndex, TailOffset, Split,
2632 Whitespaces);
2633 }
2634 // When we continue on the same line, leave one space between content.
2635 ContentStartColumn += ToSplitColumns + 1;
2636 Penalty += ExcessCharactersPenalty;
2637 TailOffset += Split.first + Split.second;
2638 RemainingTokenColumns = Token->getRemainingLength(
2639 LineIndex, TailOffset, ContentStartColumn);
2640 continue;
2641 }
2642 }
2643 LLVM_DEBUG(llvm::dbgs() << " Breaking...\n");
2644 // Update the ContentIndent only if the current line was not reflown with
2645 // the previous line, since in that case the previous line should still
2646 // determine the ContentIndent. Also never intent the last line.
2647 if (!Reflow)
2648 ContentIndent = Token->getContentIndent(LineIndex);
2649 LLVM_DEBUG(llvm::dbgs()
2650 << " ContentIndent: " << ContentIndent << "\n");
2651 ContentStartColumn = ContentIndent + Token->getContentStartColumn(
2652 LineIndex, /*Break=*/true);
2653
2654 unsigned NewRemainingTokenColumns = Token->getRemainingLength(
2655 LineIndex, TailOffset + Split.first + Split.second,
2656 ContentStartColumn);
2657 if (NewRemainingTokenColumns == 0) {
2658 // No content to indent.
2659 ContentIndent = 0;
2660 ContentStartColumn =
2661 Token->getContentStartColumn(LineIndex, /*Break=*/true);
2662 NewRemainingTokenColumns = Token->getRemainingLength(
2663 LineIndex, TailOffset + Split.first + Split.second,
2664 ContentStartColumn);
2665 }
2666
2667 // When breaking before a tab character, it may be moved by a few columns,
2668 // but will still be expanded to the next tab stop, so we don't save any
2669 // columns.
2670 if (NewRemainingTokenColumns >= RemainingTokenColumns) {
2671 // FIXME: Do we need to adjust the penalty?
2672 break;
2673 }
2674
2675 LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
2676 << ", " << Split.second << "\n");
2677 if (!DryRun) {
2678 Token->insertBreak(LineIndex, TailOffset, Split, ContentIndent,
2679 Whitespaces);
2680 }
2681
2682 Penalty += NewBreakPenalty;
2683 TailOffset += Split.first + Split.second;
2684 RemainingTokenColumns = NewRemainingTokenColumns;
2685 BreakInserted = true;
2686 NewBreakBefore = true;
2687 }
2688 // In case there's another line, prepare the state for the start of the next
2689 // line.
2690 if (LineIndex + 1 != EndIndex) {
2691 unsigned NextLineIndex = LineIndex + 1;
2692 if (NewBreakBefore) {
2693 // After breaking a line, try to reflow the next line into the current
2694 // one once RemainingTokenColumns fits.
2695 TryReflow = true;
2696 }
2697 if (TryReflow) {
2698 // We decided that we want to try reflowing the next line into the
2699 // current one.
2700 // We will now adjust the state as if the reflow is successful (in
2701 // preparation for the next line), and see whether that works. If we
2702 // decide that we cannot reflow, we will later reset the state to the
2703 // start of the next line.
2704 Reflow = false;
2705 // As we did not continue breaking the line, RemainingTokenColumns is
2706 // known to fit after ContentStartColumn. Adapt ContentStartColumn to
2707 // the position at which we want to format the next line if we do
2708 // actually reflow.
2709 // When we reflow, we need to add a space between the end of the current
2710 // line and the next line's start column.
2711 ContentStartColumn += RemainingTokenColumns + 1;
2712 // Get the split that we need to reflow next logical line into the end
2713 // of the current one; the split will include any leading whitespace of
2714 // the next logical line.
2715 BreakableToken::Split SplitBeforeNext =
2716 Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
2717 LLVM_DEBUG(llvm::dbgs()
2718 << " Size of reflown text: " << ContentStartColumn
2719 << "\n Potential reflow split: ");
2720 if (SplitBeforeNext.first != StringRef::npos) {
2721 LLVM_DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
2722 << SplitBeforeNext.second << "\n");
2723 TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
2724 // If the rest of the next line fits into the current line below the
2725 // column limit, we can safely reflow.
2726 RemainingTokenColumns = Token->getRemainingLength(
2727 NextLineIndex, TailOffset, ContentStartColumn);
2728 Reflow = true;
2729 if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
2730 LLVM_DEBUG(llvm::dbgs()
2731 << " Over limit after reflow, need: "
2732 << (ContentStartColumn + RemainingTokenColumns)
2733 << ", space: " << ColumnLimit
2734 << ", reflown prefix: " << ContentStartColumn
2735 << ", offset in line: " << TailOffset << "\n");
2736 // If the whole next line does not fit, try to find a point in
2737 // the next line at which we can break so that attaching the part
2738 // of the next line to that break point onto the current line is
2739 // below the column limit.
2741 Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
2742 ContentStartColumn, CommentPragmasRegex);
2743 if (Split.first == StringRef::npos) {
2744 LLVM_DEBUG(llvm::dbgs() << " Did not find later break\n");
2745 Reflow = false;
2746 } else {
2747 // Check whether the first split point gets us below the column
2748 // limit. Note that we will execute this split below as part of
2749 // the normal token breaking and reflow logic within the line.
2750 unsigned ToSplitColumns = Token->getRangeLength(
2751 NextLineIndex, TailOffset, Split.first, ContentStartColumn);
2752 if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
2753 LLVM_DEBUG(llvm::dbgs() << " Next split protrudes, need: "
2754 << (ContentStartColumn + ToSplitColumns)
2755 << ", space: " << ColumnLimit);
2756 unsigned ExcessCharactersPenalty =
2757 (ContentStartColumn + ToSplitColumns - ColumnLimit) *
2759 if (NewBreakPenalty < ExcessCharactersPenalty)
2760 Reflow = false;
2761 }
2762 }
2763 }
2764 } else {
2765 LLVM_DEBUG(llvm::dbgs() << "not found.\n");
2766 }
2767 }
2768 if (!Reflow) {
2769 // If we didn't reflow into the next line, the only space to consider is
2770 // the next logical line. Reset our state to match the start of the next
2771 // line.
2772 TailOffset = 0;
2773 ContentStartColumn =
2774 Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
2775 RemainingTokenColumns = Token->getRemainingLength(
2776 NextLineIndex, TailOffset, ContentStartColumn);
2777 // Adapt the start of the token, for example indent.
2778 if (!DryRun)
2779 Token->adaptStartOfLine(NextLineIndex, Whitespaces);
2780 } else {
2781 // If we found a reflow split and have added a new break before the next
2782 // line, we are going to remove the line break at the start of the next
2783 // logical line. For example, here we'll add a new line break after
2784 // 'text', and subsequently delete the line break between 'that' and
2785 // 'reflows'.
2786 // // some text that
2787 // // reflows
2788 // ->
2789 // // some text
2790 // // that reflows
2791 // When adding the line break, we also added the penalty for it, so we
2792 // need to subtract that penalty again when we remove the line break due
2793 // to reflowing.
2794 if (NewBreakBefore) {
2795 assert(Penalty >= NewBreakPenalty);
2796 Penalty -= NewBreakPenalty;
2797 }
2798 if (!DryRun)
2799 Token->reflow(NextLineIndex, Whitespaces);
2800 }
2801 }
2802 }
2803
2804 BreakableToken::Split SplitAfterLastLine =
2805 Token->getSplitAfterLastLine(TailOffset);
2806 if (SplitAfterLastLine.first != StringRef::npos) {
2807 LLVM_DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
2808
2809 // We add the last line's penalty here, since that line is going to be split
2810 // now.
2811 Penalty += Style.PenaltyExcessCharacter *
2812 (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
2813
2814 if (!DryRun) {
2815 Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
2816 Whitespaces);
2817 }
2818 ContentStartColumn =
2819 Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
2820 RemainingTokenColumns = Token->getRemainingLength(
2821 Token->getLineCount() - 1,
2822 TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
2823 ContentStartColumn);
2824 }
2825
2826 State.Column = ContentStartColumn + RemainingTokenColumns -
2827 Current.UnbreakableTailLength;
2828
2829 if (BreakInserted) {
2830 if (!DryRun)
2831 Token->updateAfterBroken(Whitespaces);
2832
2833 // If we break the token inside a parameter list, we need to break before
2834 // the next parameter on all levels, so that the next parameter is clearly
2835 // visible. Line comments already introduce a break.
2836 if (Current.isNot(TT_LineComment))
2837 for (ParenState &Paren : State.Stack)
2838 Paren.BreakBeforeParameter = true;
2839
2840 if (Current.is(TT_BlockComment))
2841 State.NoContinuation = true;
2842
2843 State.Stack.back().LastSpace = StartColumn;
2844 }
2845
2846 Token->updateNextToken(State);
2847
2848 return {Penalty, Exceeded};
2849}
2850
2852 // In preprocessor directives reserve two chars for trailing " \".
2853 return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0);
2854}
2855
2856bool ContinuationIndenter::nextIsMultilineString(const LineState &State) {
2857 const FormatToken &Current = *State.NextToken;
2858 if (!Current.isStringLiteral() || Current.is(TT_ImplicitStringLiteral))
2859 return false;
2860 // We never consider raw string literals "multiline" for the purpose of
2861 // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased
2862 // (see TokenAnnotator::mustBreakBefore().
2863 if (Current.TokenText.starts_with("R\""))
2864 return false;
2865 if (Current.IsMultiline)
2866 return true;
2867 if (Current.getNextNonComment() &&
2868 Current.getNextNonComment()->isStringLiteral()) {
2869 return true; // Implicit concatenation.
2870 }
2871 if (Style.ColumnLimit != 0 && Style.BreakStringLiterals &&
2872 State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
2873 Style.ColumnLimit) {
2874 return true; // String will be split.
2875 }
2876 return false;
2877}
2878
2879} // namespace format
2880} // namespace clang
StringRef P
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
Expr * E
This file implements an indenter that manages the indentation of continuations.
This file declares Format APIs to be used internally by the formatting library implementation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
StringRef Text
Definition: Format.cpp:2990
Various functions to configurably format source code.
Defines and computes precedence levels for binary/ternary operators.
Defines the SourceManager interface.
unsigned LongestObjCSelectorName
Defines the clang::TokenKind enum and support functions.
StateNode * Previous
WhitespaceManager class manages whitespace around tokens and their replacements.
__DEVICE__ int max(int __a, int __b)
This class handles loading and caching of source files into memory.
unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
std::pair< tooling::Replacements, unsigned > reformat(const FormatStyle &Style, StringRef Code, ArrayRef< tooling::Range > Ranges, unsigned FirstStartColumn, unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, FormattingAttemptStatus *Status)
Reformats the given Ranges in the code fragment Code.
Definition: Format.cpp:3622
static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, unsigned TabWidth, encoding::Encoding Encoding)
static bool shouldUnindentNextOperator(const FormatToken &Tok)
FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language=FormatStyle::LanguageKind::LK_Cpp)
Returns a format style complying with the LLVM coding standards: http://llvm.org/docs/CodingStandards...
Definition: Format.cpp:1412
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool hasNestedBlockInlined(const FormatToken *Previous, const FormatToken &Current, const FormatStyle &Style)
static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok)
static unsigned getLengthToNextOperator(const FormatToken &Tok)
static bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style)
static unsigned getLengthToMatchingParen(const FormatToken &Tok, ArrayRef< ParenState > Stack)
static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, LineType LineType)
bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, FormatStyle *Style)
Gets a predefined style for the specified language by name.
Definition: Format.cpp:1949
static std::optional< StringRef > getRawStringDelimiter(StringRef TokenText)
static StringRef getCanonicalRawStringDelimiter(const FormatStyle &Style, FormatStyle::LanguageKind Language)
static bool opensProtoMessageField(const FormatToken &LessTok, const FormatStyle &Style)
static StringRef getEnclosingFunctionName(const FormatToken &Current)
bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite)
Apply all replacements in Replaces to the Rewriter Rewrite.
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
Language
The language for the input, used to select and validate the language standard and possible actions.
Definition: LangStandard.h:23
const FunctionProtoType * T
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1024
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1850
bool isVerilogEndOfLabel(const FormatToken &Tok) const
Definition: FormatToken.h:1884
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ UT_Never
Never use tab.
Definition: Format.h:4936
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2436
bool AlwaysBreakBeforeMultilineStrings
This option is renamed to BreakAfterReturnType.
Definition: Format.h:1104
ReturnTypeBreakingStyle BreakAfterReturnType
The function declaration return type breaking style to use.
Definition: Format.h:1655
bool isTableGen() const
Definition: Format.h:3217
LanguageKind
Supported languages.
Definition: Format.h:3181
@ LK_Java
Should be used for Java.
Definition: Format.h:3189
@ LK_ObjC
Should be used for Objective-C, Objective-C++.
Definition: Format.h:3195
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:3198
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3203
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2459
BreakInheritanceListStyle BreakInheritanceList
The inheritance list style to use.
Definition: Format.h:2387
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:2859
bool IndentRequiresClause
Indent the requires clause in a template.
Definition: Format.h:2845
std::vector< RawStringFormat > RawStringFormats
Defines hints for detecting supported languages code blocks in raw strings.
Definition: Format.h:3753
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2823
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1729
@ BPS_Auto
Automatically determine parameter bin-packing behavior.
Definition: Format.h:1680
@ BPS_Always
Always bin-pack parameters.
Definition: Format.h:1682
@ BCIS_AfterColon
Break constructor initializers after the colon and commas.
Definition: Format.h:2257
@ BCIS_BeforeColon
Break constructor initializers before the colon and after the commas.
Definition: Format.h:2242
@ BCIS_BeforeComma
Break constructor initializers before the colon and commas, and align the commas with the colon.
Definition: Format.h:2250
BreakBeforeConceptDeclarationsStyle BreakBeforeConceptDeclarations
The concept declaration style to use.
Definition: Format.h:2188
BreakTemplateDeclarationsStyle BreakTemplateDeclarations
The template declaration breaking style to use.
Definition: Format.h:2391
@ BOS_None
Break after operators.
Definition: Format.h:1700
bool IndentWrappedFunctionNames
Indent if a function definition or declaration is wrapped after the type.
Definition: Format.h:2873
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3221
@ BAS_DontAlign
Don't align, instead use ContinuationIndentWidth, e.g.:
Definition: Format.h:78
@ BAS_AlwaysBreak
Always break after an open bracket, if the parameters don't fit on a single line, e....
Definition: Format.h:85
@ BAS_BlockIndent
Always break after an open bracket, if the parameters don't fit on a single line.
Definition: Format.h:99
@ BBIAS_Always
Always break before inline ASM colon.
Definition: Format.h:2212
@ BBIAS_OnlyMultiline
Break before inline ASM colon if the line length is longer than column limit.
Definition: Format.h:2205
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:4900
@ PPDIS_AfterHash
Indents directives after the hash.
Definition: Format.h:2809
@ LBI_OuterScope
For statements within block scope, align lambda body relative to the indentation level of the outer s...
Definition: Format.h:3165
@ LBI_Signature
Align lambda body relative to the lambda signature.
Definition: Format.h:3151
unsigned PenaltyBreakFirstLessLess
The penalty for breaking before the first <<.
Definition: Format.h:3554
unsigned ObjCBlockIndentWidth
The number of characters to use for indentation of ObjC blocks.
Definition: Format.h:3419
std::optional< FormatStyle > GetLanguageStyle(LanguageKind Language) const
Definition: Format.cpp:2117
std::optional< unsigned > BracedInitializerIndentWidth
The number of columns to use to indent the contents of braced init lists.
Definition: Format.h:1270
bool ExperimentalAutoDetectBinPacking
If true, clang-format detects whether function calls and definitions are formatted with one parameter...
Definition: Format.h:2605
bool ObjCBreakBeforeNestedBlockParam
Break parameters list into lines when there is nested block parameters in a function call.
Definition: Format.h:3443
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:549
@ BTDS_No
Do not force break before declaration.
Definition: Format.h:1127
@ BTDS_Leave
Do not change the line breaking before the declaration.
Definition: Format.h:1117
BreakConstructorInitializersStyle BreakConstructorInitializers
The break constructor initializers style to use.
Definition: Format.h:2262
bool BreakStringLiterals
Allow breaking string literals when formatting.
Definition: Format.h:2329
@ PCIS_NextLineOnly
Put all constructor initializers on the next line if they fit.
Definition: Format.h:3533
@ PCIS_BinPack
Bin-pack constructor initializers.
Definition: Format.h:3493
@ PCIS_NextLine
Same as PCIS_CurrentLine except that if all constructor initializers do not fit on the current line,...
Definition: Format.h:3518
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:2164
@ BILS_AfterColon
Break inheritance list after the colon and commas.
Definition: Format.h:2375
@ BILS_BeforeComma
Break inheritance list before the colon and commas, and align the commas with the colon.
Definition: Format.h:2367
bool isCSharp() const
Definition: Format.h:3210
unsigned PenaltyExcessCharacter
The penalty for each character outside of the column limit.
Definition: Format.h:3574
bool BinPackParameters
If false, a function declaration's or function definition's parameters will either all be on the same...
Definition: Format.h:1208
@ DAS_BreakElements
Break inside DAGArg after each list element but for the last.
Definition: Format.h:4883
unsigned ConstructorInitializerIndentWidth
This option is deprecated.
Definition: Format.h:2425
@ RCPS_OwnLine
Always put the requires clause on its own line.
Definition: Format.h:3927
@ RCPS_WithPreceding
Try to put the clause together with the preceding part of a declaration.
Definition: Format.h:3944
@ RCPS_SingleLine
Try to put everything in the same line if possible.
Definition: Format.h:3982
@ RCPS_WithFollowing
Try to put the requires clause together with the class or function declaration.
Definition: Format.h:3958
RequiresClausePositionStyle RequiresClausePosition
The position of the requires clause.
Definition: Format.h:3987
BreakBeforeInlineASMColonStyle BreakBeforeInlineASMColon
The inline ASM colon style to use.
Definition: Format.h:2217
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:2047
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each.
Definition: Format.h:1193
@ SFS_All
Merge all functions fitting on a single line.
Definition: Format.h:857
@ REI_Keyword
Align requires expression body relative to the requires keyword.
Definition: Format.h:4008
PackConstructorInitializersStyle PackConstructorInitializers
The pack constructor initializers style to use.
Definition: Format.h:3538
@ BBCDS_Allowed
Breaking between template declaration and concept is allowed.
Definition: Format.h:2176
@ BBCDS_Never
Keep the template declaration line together with concept.
Definition: Format.h:2172
@ BBCDS_Always
Always break before concept, putting it in the line after the template declaration.
Definition: Format.h:2183
bool AllowAllParametersOfDeclarationOnNextLine
This option is deprecated.
Definition: Format.h:676
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:107
bool isProto() const
Definition: Format.h:3214
bool ReflowComments
If true, clang-format will attempt to re-flow comments.
Definition: Format.h:3797
unsigned MaxEmptyLinesToKeep
The maximum number of consecutive empty lines to keep.
Definition: Format.h:3324
BinPackStyle ObjCBinPackProtocolList
Controls bin-packing Objective-C protocol conformance list items into as few lines as possible when t...
Definition: Format.h:3408
bool isVerilog() const
Definition: Format.h:3213
bool isJavaScript() const
Definition: Format.h:3212
DAGArgStyle TableGenBreakInsideDAGArg
The styles of the line break inside the DAGArg in TableGen.
Definition: Format.h:4896
LambdaBodyIndentationKind LambdaBodyIndentation
The indentation style of lambda bodies.
Definition: Format.h:3174
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1551
unsigned PenaltyBreakString
The penalty for each line break introduced inside a string literal.
Definition: Format.h:3566
RequiresExpressionIndentationKind RequiresExpressionIndentation
The indentation used for requires expression bodies.
Definition: Format.h:4013
unsigned PenaltyIndentedWhitespace
Penalty for each character of whitespace indentation (counted relative to leading non-whitespace colu...
Definition: Format.h:3579
ShortFunctionStyle AllowShortFunctionsOnASingleLine
Dependent on the value, int f() { return 0; } can be put on a single line.
Definition: Format.h:863
bool AllowAllArgumentsOnNextLine
If a function call or braced initializer list doesn't fit on a line, allow putting all arguments onto...
Definition: Format.h:653
unsigned PenaltyBreakComment
The penalty for each line break introduced inside a comment.
Definition: Format.h:3550
@ RTBS_ExceptShortType
Same as Automatic above, except that there is no break after short return types.
Definition: Format.h:1014
@ RTBS_None
This is deprecated. See Automatic below.
Definition: Format.h:991
UseTabStyle UseTab
The way to use tab characters in the resulting file.
Definition: Format.h:4952
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:543
@ OAS_DontAlign
Do not align operands of binary and ternary expressions.
Definition: Format.h:517
bool BreakBeforeTernaryOperators
If true, ternary operators will be placed after line breaks.
Definition: Format.h:2232
unsigned ColumnLimit
The column limit.
Definition: Format.h:2337
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:292
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:515
bool isMemberAccess() const
Returns true if this is a "." or "->" accessing a member.
Definition: FormatToken.h:720
bool isNot(T Kind) const
Definition: FormatToken.h:623
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:312
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:832
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:710
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:604
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:498
bool isTrailingComment() const
Definition: FormatToken.h:774
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence,...
Definition: FormatToken.h:555
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:558
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:561
The current state when indenting a unwrapped line.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
llvm::StringMap< FormatStyle > DelimiterStyle