clang 22.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
33
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46/// Represents the spaces at the start of a line, keeping track of what the
47/// spaces are for.
49 unsigned Total;
50
51 /// The column that the position of the start of the line is calculated
52 /// from. It can be more than Total.
53 unsigned IndentedFrom;
54
55 /// Add spaces for right-justifying the token. The IndentedFrom field does not
56 /// change.
57 ///
58 /// This example in Objective-C shows why the field should not change. The
59 /// token `xx` is right-justified with this method to align the `:`
60 /// symbols. The `:` symbols should remain aligned through the step that
61 /// aligns assignments. That step uses the IndentedFrom field to tell what
62 /// lines to move. Not changing the field in this method ensures that the 2
63 /// lines move together.
64 ///
65 /// [x //
66 /// xxxx:0
67 /// xx:0];
68 IndentationAndAlignment addPadding(unsigned Spaces) const;
69 /// Adding indentation is more common than padding. So the operator does that.
70 IndentationAndAlignment operator+(unsigned Spaces) const;
71 IndentationAndAlignment operator-(unsigned Spaces) const;
72 IndentationAndAlignment &operator+=(unsigned Spaces);
73
74 IndentationAndAlignment(unsigned Total, unsigned IndentedFrom);
75
76 IndentationAndAlignment(unsigned Spaces);
77
78 bool operator<(const IndentationAndAlignment &Other) const;
79};
80
82public:
83 /// Constructs a \c ContinuationIndenter to format \p Line starting in
84 /// column \p FirstIndent.
85 ContinuationIndenter(const FormatStyle &Style,
86 const AdditionalKeywords &Keywords,
87 const SourceManager &SourceMgr,
88 WhitespaceManager &Whitespaces,
89 encoding::Encoding Encoding,
90 bool BinPackInconclusiveFunctions);
91
92 /// Get the initial state, i.e. the state after placing \p Line's
93 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
94 /// the case of formatting inside raw string literals, \p FirstStartColumn is
95 /// the column at which the state of the parent formatter is.
96 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
97 const AnnotatedLine *Line, bool DryRun);
98
99 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
100 // better home.
101 /// Returns \c true, if a line break after \p State is allowed.
102 bool canBreak(const LineState &State);
103
104 /// Returns \c true, if a line break after \p State is mandatory.
105 bool mustBreak(const LineState &State);
106
107 /// Appends the next token to \p State and updates information
108 /// necessary for indentation.
109 ///
110 /// Puts the token on the current line if \p Newline is \c false and adds a
111 /// line break and necessary indentation otherwise.
112 ///
113 /// If \p DryRun is \c false, also creates and stores the required
114 /// \c Replacement.
115 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
116 unsigned ExtraSpaces = 0);
117
118 /// Get the column limit for this line. This is the style's column
119 /// limit, potentially reduced for preprocessor definitions.
120 unsigned getColumnLimit(const LineState &State) const;
121
122private:
123 /// Mark the next token as consumed in \p State and modify its stacks
124 /// accordingly.
125 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
126
127 /// Update 'State' according to the next token's fake left parentheses.
128 void moveStatePastFakeLParens(LineState &State, bool Newline);
129 /// Update 'State' according to the next token's fake r_parens.
130 void moveStatePastFakeRParens(LineState &State);
131
132 /// Update 'State' according to the next token being one of "(<{[".
133 void moveStatePastScopeOpener(LineState &State, bool Newline);
134 /// Update 'State' according to the next token being one of ")>}]".
135 void moveStatePastScopeCloser(LineState &State);
136 /// Update 'State' with the next token opening a nested block.
137 void moveStateToNewBlock(LineState &State, bool NewLine);
138
139 /// Reformats a raw string literal.
140 ///
141 /// \returns An extra penalty induced by reformatting the token.
142 unsigned reformatRawStringLiteral(const FormatToken &Current,
143 LineState &State,
144 const FormatStyle &RawStringStyle,
145 bool DryRun, bool Newline);
146
147 /// If the current token is at the end of the current line, handle
148 /// the transition to the next line.
149 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
150 bool DryRun, bool AllowBreak, bool Newline);
151
152 /// If \p Current is a raw string that is configured to be reformatted,
153 /// return the style to be used.
154 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
155 const LineState &State);
156
157 /// If the current token sticks out over the end of the line, break
158 /// it if possible.
159 ///
160 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
161 /// when tokens are broken or lines exceed the column limit, and exceeded
162 /// indicates whether the algorithm purposefully left lines exceeding the
163 /// column limit.
164 ///
165 /// The returned penalty will cover the cost of the additional line breaks
166 /// and column limit violation in all lines except for the last one. The
167 /// penalty for the column limit violation in the last line (and in single
168 /// line tokens) is handled in \c addNextStateToQueue.
169 ///
170 /// \p Strict indicates whether reflowing is allowed to leave characters
171 /// protruding the column limit; if true, lines will be split strictly within
172 /// the column limit where possible; if false, words are allowed to protrude
173 /// over the column limit as long as the penalty is less than the penalty
174 /// of a break.
175 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
176 LineState &State,
177 bool AllowBreak, bool DryRun,
178 bool Strict);
179
180 /// Returns the \c BreakableToken starting at \p Current, or nullptr
181 /// if the current token cannot be broken.
182 std::unique_ptr<BreakableToken>
183 createBreakableToken(const FormatToken &Current, LineState &State,
184 bool AllowBreak);
185
186 /// Appends the next token to \p State and updates information
187 /// necessary for indentation.
188 ///
189 /// Puts the token on the current line.
190 ///
191 /// If \p DryRun is \c false, also creates and stores the required
192 /// \c Replacement.
193 void addTokenOnCurrentLine(LineState &State, bool DryRun,
194 unsigned ExtraSpaces);
195
196 /// Appends the next token to \p State and updates information
197 /// necessary for indentation.
198 ///
199 /// Adds a line break and necessary indentation.
200 ///
201 /// If \p DryRun is \c false, also creates and stores the required
202 /// \c Replacement.
203 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
204
205 /// Calculate the new column for a line wrap before the next token.
206 IndentationAndAlignment getNewLineColumn(const LineState &State);
207
208 /// Adds a multiline token to the \p State.
209 ///
210 /// \returns Extra penalty for the first line of the literal: last line is
211 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
212 /// matter, as we don't change them.
213 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
214
215 /// Returns \c true if the next token starts a multiline string
216 /// literal.
217 ///
218 /// This includes implicitly concatenated strings, strings that will be broken
219 /// by clang-format and string literals with escaped newlines.
220 bool nextIsMultilineString(const LineState &State);
221
222 FormatStyle Style;
223 const AdditionalKeywords &Keywords;
224 const SourceManager &SourceMgr;
225 WhitespaceManager &Whitespaces;
226 encoding::Encoding Encoding;
227 bool BinPackInconclusiveFunctions;
228 llvm::Regex CommentPragmasRegex;
229 const RawStringFormatStyleManager RawStringFormats;
230};
231
247
248 /// The token opening this parenthesis level, or nullptr if this level is
249 /// opened by fake parenthesis.
250 ///
251 /// Not considered for memoization as it will always have the same value at
252 /// the same token.
254
255 /// The position to which a specific parenthesis level needs to be
256 /// indented.
258
259 /// The position of the last space on each level.
260 ///
261 /// Used e.g. to break like:
262 /// functionCall(Parameter, otherCall(
263 /// OtherParameter));
264 unsigned LastSpace;
265
266 /// If a block relative to this parenthesis level gets wrapped, indent
267 /// it this much.
269
270 /// The position the first "<<" operator encountered on each level.
271 ///
272 /// Used to align "<<" operators. 0 if no such operator has been encountered
273 /// on a level.
274 unsigned FirstLessLess = 0;
275
276 /// The column of a \c ? in a conditional expression;
277 unsigned QuestionColumn = 0;
278
279 /// The position of the colon in an ObjC method declaration/call.
280 unsigned ColonPos = 0;
281
282 /// The start of the most recent function in a builder-type call.
284
285 /// Contains the start of array subscript expressions, so that they
286 /// can be aligned.
288
289 /// If a nested name specifier was broken over multiple lines, this
290 /// contains the start column of the second line. Otherwise 0.
292
293 /// If a call expression was broken over multiple lines, this
294 /// contains the start column of the second line. Otherwise 0.
295 unsigned CallContinuation = 0;
296
297 /// The column of the first variable name in a variable declaration.
298 ///
299 /// Used to align further variables if necessary.
300 unsigned VariablePos = 0;
301
302 /// Whether this block's indentation is used for alignment.
303 bool IsAligned : 1;
304
305 /// Whether a newline needs to be inserted before the block's closing
306 /// brace.
307 ///
308 /// We only want to insert a newline before the closing brace if there also
309 /// was a newline after the beginning left brace.
311
312 /// Whether a newline needs to be inserted before the block's closing
313 /// paren.
314 ///
315 /// We only want to insert a newline before the closing paren if there also
316 /// was a newline after the beginning left paren.
318
319 /// Whether a newline needs to be inserted before a closing angle `>`.
321
322 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
323 /// lines, in this context.
325
326 /// Break after the next comma (or all the commas in this context if
327 /// \c AvoidBinPacking is \c true).
329
330 /// Line breaking in this context would break a formatting rule.
331 bool NoLineBreak : 1;
332
333 /// Same as \c NoLineBreak, but is restricted until the end of the
334 /// operand (including the next ",").
336
337 /// True if the last binary operator on this level was wrapped to the
338 /// next line.
340
341 /// \c true if this \c ParenState already contains a line-break.
342 ///
343 /// The first line break in a certain \c ParenState causes extra penalty so
344 /// that clang-format prefers similar breaks, i.e. breaks in the same
345 /// parenthesis.
347
348 /// \c true if this \c ParenState contains multiple segments of a
349 /// builder-type call on one line.
351
352 /// \c true if the colons of the curren ObjC method expression should
353 /// be aligned.
354 ///
355 /// Not considered for memoization as it will always have the same value at
356 /// the same token.
357 bool AlignColons : 1;
358
359 /// \c true if at least one selector name was found in the current
360 /// ObjC method expression.
361 ///
362 /// Not considered for memoization as it will always have the same value at
363 /// the same token.
365
366 /// \c true if there are multiple nested blocks inside these parens.
367 ///
368 /// Not considered for memoization as it will always have the same value at
369 /// the same token.
371
372 /// The start of a nested block (e.g. lambda introducer in C++ or
373 /// "function" in JavaScript) is not wrapped to a new line.
375
376 /// \c true if the current \c ParenState represents an Objective-C
377 /// array literal.
379
381
382 /// true if the current \c ParenState represents the false branch of a chained
383 /// conditional expression (e.g. else-if)
385
386 /// true if there conditionnal was wrapped on the first operator (the question
387 /// mark)
389
390 /// Indicates the indent should be reduced by the length of the operator.
392
393 bool operator<(const ParenState &Other) const {
394 if (Indent.Total != Other.Indent.Total)
395 return Indent.Total < Other.Indent.Total;
396 if (LastSpace != Other.LastSpace)
397 return LastSpace < Other.LastSpace;
398 if (NestedBlockIndent != Other.NestedBlockIndent)
399 return NestedBlockIndent < Other.NestedBlockIndent;
400 if (FirstLessLess != Other.FirstLessLess)
401 return FirstLessLess < Other.FirstLessLess;
402 if (IsAligned != Other.IsAligned)
403 return IsAligned;
404 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
406 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
408 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
410 if (QuestionColumn != Other.QuestionColumn)
411 return QuestionColumn < Other.QuestionColumn;
412 if (AvoidBinPacking != Other.AvoidBinPacking)
413 return AvoidBinPacking;
414 if (BreakBeforeParameter != Other.BreakBeforeParameter)
416 if (NoLineBreak != Other.NoLineBreak)
417 return NoLineBreak;
418 if (LastOperatorWrapped != Other.LastOperatorWrapped)
419 return LastOperatorWrapped;
420 if (ColonPos != Other.ColonPos)
421 return ColonPos < Other.ColonPos;
422 if (StartOfFunctionCall != Other.StartOfFunctionCall)
423 return StartOfFunctionCall < Other.StartOfFunctionCall;
424 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
425 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
426 if (CallContinuation != Other.CallContinuation)
427 return CallContinuation < Other.CallContinuation;
428 if (VariablePos != Other.VariablePos)
429 return VariablePos < Other.VariablePos;
430 if (ContainsLineBreak != Other.ContainsLineBreak)
431 return ContainsLineBreak;
432 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
434 if (NestedBlockInlined != Other.NestedBlockInlined)
435 return NestedBlockInlined;
436 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
438 if (IsChainedConditional != Other.IsChainedConditional)
440 if (IsWrappedConditional != Other.IsWrappedConditional)
442 if (UnindentOperator != Other.UnindentOperator)
443 return UnindentOperator;
444 return Indent < Other.Indent;
445 }
446};
447
448/// The current state when indenting a unwrapped line.
449///
450/// As the indenting tries different combinations this is copied by value.
451struct LineState {
452 /// The number of used columns in the current line.
453 unsigned Column;
454
455 /// The token that needs to be next formatted.
457
458 /// \c true if \p NextToken should not continue this line.
460
461 /// The \c NestingLevel at the start of this line.
463
464 /// The lowest \c NestingLevel on the current line.
466
467 /// The start column of the string literal, if we're in a string
468 /// literal sequence, 0 otherwise.
470
471 /// Disallow line breaks for this line.
473
474 /// A stack keeping track of properties applying to parenthesis
475 /// levels.
477
478 /// Ignore the stack of \c ParenStates for state comparison.
479 ///
480 /// In long and deeply nested unwrapped lines, the current algorithm can
481 /// be insufficient for finding the best formatting with a reasonable amount
482 /// of time and memory. Setting this flag will effectively lead to the
483 /// algorithm not analyzing some combinations. However, these combinations
484 /// rarely contain the optimal solution: In short, accepting a higher
485 /// penalty early would need to lead to different values in the \c
486 /// ParenState stack (in an otherwise identical state) and these different
487 /// values would need to lead to a significant amount of avoided penalty
488 /// later.
489 ///
490 /// FIXME: Come up with a better algorithm instead.
492
493 /// The indent of the first token.
494 unsigned FirstIndent;
495
496 /// The line that is being formatted.
497 ///
498 /// Does not need to be considered for memoization because it doesn't change.
500
501 /// Comparison operator to be able to used \c LineState in \c map.
502 bool operator<(const LineState &Other) const {
503 if (NextToken != Other.NextToken)
504 return NextToken < Other.NextToken;
505 if (Column != Other.Column)
506 return Column < Other.Column;
507 if (NoContinuation != Other.NoContinuation)
508 return NoContinuation;
509 if (StartOfLineLevel != Other.StartOfLineLevel)
510 return StartOfLineLevel < Other.StartOfLineLevel;
511 if (LowestLevelOnLine != Other.LowestLevelOnLine)
512 return LowestLevelOnLine < Other.LowestLevelOnLine;
513 if (StartOfStringLiteral != Other.StartOfStringLiteral)
514 return StartOfStringLiteral < Other.StartOfStringLiteral;
515 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
516 return false;
517 return Stack < Other.Stack;
518 }
519};
520
521} // end namespace format
522} // end namespace clang
523
524#endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
This class handles loading and caching of source files into memory.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition stdbool.h:26
#define true
Definition stdbool.h:25
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents the spaces at the start of a line, keeping track of what the spaces are for.
IndentationAndAlignment operator+(unsigned Spaces) const
Adding indentation is more common than padding. So the operator does that.
IndentationAndAlignment(unsigned Total, unsigned IndentedFrom)
IndentationAndAlignment addPadding(unsigned Spaces) const
Add spaces for right-justifying the token.
IndentationAndAlignment operator-(unsigned Spaces) const
IndentationAndAlignment & operator+=(unsigned Spaces)
bool operator<(const IndentationAndAlignment &Other) const
unsigned IndentedFrom
The column that the position of the start of the line is calculated from.
The current state when indenting a unwrapped line.
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreak
Disallow line breaks for this line.
unsigned Column
The number of used columns in the current line.
SmallVector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoContinuation
true if NextToken should not continue this line.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
unsigned FirstIndent
The indent of the first token.
FormatToken * NextToken
The token that needs to be next formatted.
bool AvoidBinPacking
Avoid bin packing, i.e.
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
bool BreakBeforeClosingParen
Whether a newline needs to be inserted before the block's closing paren.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true).
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
unsigned VariablePos
The column of the first variable name in a variable declaration.
unsigned LastSpace
The position of the last space on each level.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next ",...
bool operator<(const ParenState &Other) const
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
bool IsAligned
Whether this block's indentation is used for alignment.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
bool NestedBlockInlined
The start of a nested block (e.g.
ParenState(const FormatToken *Tok, IndentationAndAlignment Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e....
bool BreakBeforeClosingAngle
Whether a newline needs to be inserted before a closing angle >.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
IndentationAndAlignment Indent
The position to which a specific parenthesis level needs to be indented.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
llvm::StringMap< FormatStyle > DelimiterStyle