clang 23.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
33
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46/// Represents the spaces at the start of a line, keeping track of what the
47/// spaces are for.
49 unsigned Total;
50
51 /// The column that the position of the start of the line is calculated
52 /// from. It can be more than Total.
53 unsigned IndentedFrom;
54
55 /// Add spaces for right-justifying the token. The IndentedFrom field does not
56 /// change.
57 ///
58 /// This example in Objective-C shows why the field should not change. The
59 /// token `xx` is right-justified with this method to align the `:`
60 /// symbols. The `:` symbols should remain aligned through the step that
61 /// aligns assignments. That step uses the IndentedFrom field to tell what
62 /// lines to move. Not changing the field in this method ensures that the 2
63 /// lines move together.
64 ///
65 /// [x //
66 /// xxxx:0
67 /// xx:0];
68 IndentationAndAlignment addPadding(unsigned Spaces) const;
69 /// Adding indentation is more common than padding. So the operator does that.
70 IndentationAndAlignment operator+(unsigned Spaces) const;
71 IndentationAndAlignment operator-(unsigned Spaces) const;
72 IndentationAndAlignment &operator+=(unsigned Spaces);
73
74 IndentationAndAlignment(unsigned Total, unsigned IndentedFrom);
75
76 IndentationAndAlignment(unsigned Spaces);
77
78 bool operator<(const IndentationAndAlignment &Other) const;
79};
80
82public:
83 /// Constructs a \c ContinuationIndenter to format \p Line starting in
84 /// column \p FirstIndent.
85 ContinuationIndenter(const FormatStyle &Style,
86 const AdditionalKeywords &Keywords,
87 const SourceManager &SourceMgr,
88 WhitespaceManager &Whitespaces,
89 encoding::Encoding Encoding,
90 bool BinPackInconclusiveFunctions);
91
92 /// Get the initial state, i.e. the state after placing \p Line's
93 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
94 /// the case of formatting inside raw string literals, \p FirstStartColumn is
95 /// the column at which the state of the parent formatter is.
96 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
97 const AnnotatedLine *Line, bool DryRun);
98
99 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
100 // better home.
101 /// Returns \c true, if a line break after \p State is allowed.
102 bool canBreak(const LineState &State);
103
104 /// Returns \c true, if a line break after \p State is mandatory.
105 bool mustBreak(const LineState &State);
106
107 /// Appends the next token to \p State and updates information
108 /// necessary for indentation.
109 ///
110 /// Puts the token on the current line if \p Newline is \c false and adds a
111 /// line break and necessary indentation otherwise.
112 ///
113 /// If \p DryRun is \c false, also creates and stores the required
114 /// \c Replacement.
115 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
116 unsigned ExtraSpaces = 0);
117
118 /// Get the column limit for this line. This is the style's column
119 /// limit, potentially reduced for preprocessor definitions.
120 unsigned getColumnLimit(const LineState &State) const;
121
122private:
123 /// Mark the next token as consumed in \p State and modify its stacks
124 /// accordingly.
125 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
126
127 /// Update 'State' according to the next token's fake left parentheses.
128 void moveStatePastFakeLParens(LineState &State, bool Newline);
129 /// Update 'State' according to the next token's fake r_parens.
130 void moveStatePastFakeRParens(LineState &State);
131
132 /// Update 'State' according to the next token being one of "(<{[".
133 void moveStatePastScopeOpener(LineState &State, bool Newline);
134 /// Update 'State' according to the next token being one of ")>}]".
135 void moveStatePastScopeCloser(LineState &State);
136 /// Update 'State' with the next token opening a nested block.
137 void moveStateToNewBlock(LineState &State, bool NewLine);
138
139 /// Reformats a raw string literal.
140 ///
141 /// \returns An extra penalty induced by reformatting the token.
142 unsigned reformatRawStringLiteral(const FormatToken &Current,
143 LineState &State,
144 const FormatStyle &RawStringStyle,
145 bool DryRun, bool Newline);
146
147 /// If the current token is at the end of the current line, handle
148 /// the transition to the next line.
149 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
150 bool DryRun, bool AllowBreak, bool Newline);
151
152 /// If \p Current is a raw string that is configured to be reformatted,
153 /// return the style to be used.
154 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
155 const LineState &State);
156
157 /// If the current token sticks out over the end of the line, break
158 /// it if possible.
159 ///
160 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
161 /// when tokens are broken or lines exceed the column limit, and exceeded
162 /// indicates whether the algorithm purposefully left lines exceeding the
163 /// column limit.
164 ///
165 /// The returned penalty will cover the cost of the additional line breaks
166 /// and column limit violation in all lines except for the last one. The
167 /// penalty for the column limit violation in the last line (and in single
168 /// line tokens) is handled in \c addNextStateToQueue.
169 ///
170 /// \p Strict indicates whether reflowing is allowed to leave characters
171 /// protruding the column limit; if true, lines will be split strictly within
172 /// the column limit where possible; if false, words are allowed to protrude
173 /// over the column limit as long as the penalty is less than the penalty
174 /// of a break.
175 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
176 LineState &State,
177 bool AllowBreak, bool DryRun,
178 bool Strict);
179
180 /// Returns the \c BreakableToken starting at \p Current, or nullptr
181 /// if the current token cannot be broken.
182 std::unique_ptr<BreakableToken>
183 createBreakableToken(const FormatToken &Current, LineState &State,
184 bool AllowBreak);
185
186 /// Appends the next token to \p State and updates information
187 /// necessary for indentation.
188 ///
189 /// Puts the token on the current line.
190 ///
191 /// If \p DryRun is \c false, also creates and stores the required
192 /// \c Replacement.
193 void addTokenOnCurrentLine(LineState &State, bool DryRun,
194 unsigned ExtraSpaces);
195
196 /// Appends the next token to \p State and updates information
197 /// necessary for indentation.
198 ///
199 /// Adds a line break and necessary indentation.
200 ///
201 /// If \p DryRun is \c false, also creates and stores the required
202 /// \c Replacement.
203 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
204
205 /// Calculate the new column for a line wrap before the next token.
206 IndentationAndAlignment getNewLineColumn(const LineState &State);
207
208 /// Adds a multiline token to the \p State.
209 ///
210 /// \returns Extra penalty for the first line of the literal: last line is
211 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
212 /// matter, as we don't change them.
213 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
214
215 /// Returns \c true if the next token starts a multiline string
216 /// literal.
217 ///
218 /// This includes implicitly concatenated strings, strings that will be broken
219 /// by clang-format and string literals with escaped newlines.
220 bool nextIsMultilineString(const LineState &State);
221
222 FormatStyle Style;
223 const AdditionalKeywords &Keywords;
224 const SourceManager &SourceMgr;
225 WhitespaceManager &Whitespaces;
226 encoding::Encoding Encoding;
227 bool BinPackInconclusiveFunctions;
228 llvm::Regex CommentPragmasRegex;
229 const RawStringFormatStyleManager RawStringFormats;
230};
231
246
247 /// The token opening this parenthesis level, or nullptr if this level is
248 /// opened by fake parenthesis.
249 ///
250 /// Not considered for memoization as it will always have the same value at
251 /// the same token.
253
254 /// The position to which a specific parenthesis level needs to be
255 /// indented.
257
258 /// The token in one of the previous lines this state wants to align to.
260
261 /// The position of the last space on each level.
262 ///
263 /// Used e.g. to break like:
264 /// functionCall(Parameter, otherCall(
265 /// OtherParameter));
266 unsigned LastSpace;
267
268 /// If a block relative to this parenthesis level gets wrapped, indent
269 /// it this much.
271
272 /// The position the first "<<" operator encountered on each level.
273 ///
274 /// Used to align "<<" operators. 0 if no such operator has been encountered
275 /// on a level.
276 unsigned FirstLessLess = 0;
277
278 /// The column of a \c ? in a conditional expression;
279 unsigned QuestionColumn = 0;
280
281 /// The position of the colon in an ObjC method declaration/call.
282 unsigned ColonPos = 0;
283
284 /// The start of the most recent function in a builder-type call.
286
287 /// Contains the start of array subscript expressions, so that they
288 /// can be aligned.
290
291 /// If a nested name specifier was broken over multiple lines, this
292 /// contains the start column of the second line. Otherwise 0.
294
295 /// If a call expression was broken over multiple lines, this
296 /// contains the start column of the second line. Otherwise 0.
297 unsigned CallContinuation = 0;
298
299 /// The column of the first variable name in a variable declaration.
300 ///
301 /// Used to align further variables if necessary.
302 unsigned VariablePos = 0;
303
304 /// The precedence. The outermost level and the levels corresponding to tokens
305 /// have prec::Unknown.
307
308 /// Whether a newline needs to be inserted before the block's closing
309 /// brace.
310 ///
311 /// We only want to insert a newline before the closing brace if there also
312 /// was a newline after the beginning left brace.
314
315 /// Whether a newline needs to be inserted before the block's closing
316 /// paren.
317 ///
318 /// We only want to insert a newline before the closing paren if there also
319 /// was a newline after the beginning left paren.
321
322 /// Whether a newline needs to be inserted before a closing angle `>`.
324
325 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
326 /// lines, in this context.
328
329 /// Break after the next comma (or all the commas in this context if
330 /// \c AvoidBinPacking is \c true).
332
333 /// Line breaking in this context would break a formatting rule.
334 bool NoLineBreak : 1;
335
336 /// Same as \c NoLineBreak, but is restricted until the end of the
337 /// operand (including the next ",").
339
340 /// True if the last binary operator on this level was wrapped to the
341 /// next line.
343
344 /// \c true if this \c ParenState already contains a line-break.
345 ///
346 /// The first line break in a certain \c ParenState causes extra penalty so
347 /// that clang-format prefers similar breaks, i.e. breaks in the same
348 /// parenthesis.
350
351 /// \c true if this \c ParenState contains multiple segments of a
352 /// builder-type call on one line.
354
355 /// \c true if the colons of the curren ObjC method expression should
356 /// be aligned.
357 ///
358 /// Not considered for memoization as it will always have the same value at
359 /// the same token.
360 bool AlignColons : 1;
361
362 /// \c true if at least one selector name was found in the current
363 /// ObjC method expression.
364 ///
365 /// Not considered for memoization as it will always have the same value at
366 /// the same token.
368
369 /// \c true if there are multiple nested blocks inside these parens.
370 ///
371 /// Not considered for memoization as it will always have the same value at
372 /// the same token.
374
375 /// The start of a nested block (e.g. lambda introducer in C++ or
376 /// "function" in JavaScript) is not wrapped to a new line.
378
379 /// \c true if the current \c ParenState represents an Objective-C
380 /// array literal.
382
384
385 /// true if the current \c ParenState represents the false branch of a chained
386 /// conditional expression (e.g. else-if)
388
389 /// true if there conditionnal was wrapped on the first operator (the question
390 /// mark)
392
393 /// Indicates the indent should be reduced by the length of the operator.
395
396 bool operator<(const ParenState &Other) const {
397 if (Indent.Total != Other.Indent.Total)
398 return Indent.Total < Other.Indent.Total;
399 if (LastSpace != Other.LastSpace)
400 return LastSpace < Other.LastSpace;
401 if (NestedBlockIndent != Other.NestedBlockIndent)
402 return NestedBlockIndent < Other.NestedBlockIndent;
403 if (FirstLessLess != Other.FirstLessLess)
404 return FirstLessLess < Other.FirstLessLess;
405 if (AlignedTo != Other.AlignedTo)
406 return AlignedTo < Other.AlignedTo;
407 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
409 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
411 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
413 if (QuestionColumn != Other.QuestionColumn)
414 return QuestionColumn < Other.QuestionColumn;
415 if (AvoidBinPacking != Other.AvoidBinPacking)
416 return AvoidBinPacking;
417 if (BreakBeforeParameter != Other.BreakBeforeParameter)
419 if (NoLineBreak != Other.NoLineBreak)
420 return NoLineBreak;
421 if (LastOperatorWrapped != Other.LastOperatorWrapped)
422 return LastOperatorWrapped;
423 if (ColonPos != Other.ColonPos)
424 return ColonPos < Other.ColonPos;
425 if (StartOfFunctionCall != Other.StartOfFunctionCall)
426 return StartOfFunctionCall < Other.StartOfFunctionCall;
427 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
428 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
429 if (CallContinuation != Other.CallContinuation)
430 return CallContinuation < Other.CallContinuation;
431 if (VariablePos != Other.VariablePos)
432 return VariablePos < Other.VariablePos;
433 if (ContainsLineBreak != Other.ContainsLineBreak)
434 return ContainsLineBreak;
435 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
437 if (NestedBlockInlined != Other.NestedBlockInlined)
438 return NestedBlockInlined;
439 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
441 if (IsChainedConditional != Other.IsChainedConditional)
443 if (IsWrappedConditional != Other.IsWrappedConditional)
445 if (UnindentOperator != Other.UnindentOperator)
446 return UnindentOperator;
447 return Indent < Other.Indent;
448 }
449};
450
451/// The current state when indenting a unwrapped line.
452///
453/// As the indenting tries different combinations this is copied by value.
454struct LineState {
455 /// The number of used columns in the current line.
456 unsigned Column;
457
458 /// The token that needs to be next formatted.
460
461 /// \c true if \p NextToken should not continue this line.
463
464 /// The \c NestingLevel at the start of this line.
466
467 /// The lowest \c NestingLevel on the current line.
469
470 /// The start column of the string literal, if we're in a string
471 /// literal sequence, 0 otherwise.
473
474 /// Disallow line breaks for this line.
476
477 /// A stack keeping track of properties applying to parenthesis
478 /// levels.
480
481 /// Ignore the stack of \c ParenStates for state comparison.
482 ///
483 /// In long and deeply nested unwrapped lines, the current algorithm can
484 /// be insufficient for finding the best formatting with a reasonable amount
485 /// of time and memory. Setting this flag will effectively lead to the
486 /// algorithm not analyzing some combinations. However, these combinations
487 /// rarely contain the optimal solution: In short, accepting a higher
488 /// penalty early would need to lead to different values in the \c
489 /// ParenState stack (in an otherwise identical state) and these different
490 /// values would need to lead to a significant amount of avoided penalty
491 /// later.
492 ///
493 /// FIXME: Come up with a better algorithm instead.
495
496 /// The indent of the first token.
497 unsigned FirstIndent;
498
499 /// The line that is being formatted.
500 ///
501 /// Does not need to be considered for memoization because it doesn't change.
503
504 /// Comparison operator to be able to used \c LineState in \c map.
505 bool operator<(const LineState &Other) const {
506 if (NextToken != Other.NextToken)
507 return NextToken < Other.NextToken;
508 if (Column != Other.Column)
509 return Column < Other.Column;
510 if (NoContinuation != Other.NoContinuation)
511 return NoContinuation;
512 if (StartOfLineLevel != Other.StartOfLineLevel)
513 return StartOfLineLevel < Other.StartOfLineLevel;
514 if (LowestLevelOnLine != Other.LowestLevelOnLine)
515 return LowestLevelOnLine < Other.LowestLevelOnLine;
516 if (StartOfStringLiteral != Other.StartOfStringLiteral)
517 return StartOfStringLiteral < Other.StartOfStringLiteral;
518 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
519 return false;
520 return Stack < Other.Stack;
521 }
522};
523
524} // end namespace format
525} // end namespace clang
526
527#endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
This class handles loading and caching of source files into memory.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
#define false
Definition stdbool.h:26
#define true
Definition stdbool.h:25
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents the spaces at the start of a line, keeping track of what the spaces are for.
IndentationAndAlignment operator+(unsigned Spaces) const
Adding indentation is more common than padding. So the operator does that.
IndentationAndAlignment(unsigned Total, unsigned IndentedFrom)
IndentationAndAlignment addPadding(unsigned Spaces) const
Add spaces for right-justifying the token.
IndentationAndAlignment operator-(unsigned Spaces) const
IndentationAndAlignment & operator+=(unsigned Spaces)
bool operator<(const IndentationAndAlignment &Other) const
unsigned IndentedFrom
The column that the position of the start of the line is calculated from.
The current state when indenting a unwrapped line.
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreak
Disallow line breaks for this line.
unsigned Column
The number of used columns in the current line.
SmallVector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoContinuation
true if NextToken should not continue this line.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
unsigned FirstIndent
The indent of the first token.
FormatToken * NextToken
The token that needs to be next formatted.
bool AvoidBinPacking
Avoid bin packing, i.e.
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
bool BreakBeforeClosingParen
Whether a newline needs to be inserted before the block's closing paren.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true).
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
unsigned VariablePos
The column of the first variable name in a variable declaration.
unsigned LastSpace
The position of the last space on each level.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next ",...
bool operator<(const ParenState &Other) const
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
bool NestedBlockInlined
The start of a nested block (e.g.
ParenState(const FormatToken *Tok, IndentationAndAlignment Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
prec::Level Precedence
The precedence.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e....
bool BreakBeforeClosingAngle
Whether a newline needs to be inserted before a closing angle >.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
IndentationAndAlignment Indent
The position to which a specific parenthesis level needs to be indented.
const FormatToken * AlignedTo
The token in one of the previous lines this state wants to align to.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
llvm::StringMap< FormatStyle > DelimiterStyle