clang 23.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
33
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
46/// Represents the spaces at the start of a line, keeping track of what the
47/// spaces are for.
49 unsigned Total;
50
51 /// The column that the position of the start of the line is calculated
52 /// from. It can be more than Total.
53 unsigned IndentedFrom;
54
55 /// Add spaces for right-justifying the token. The IndentedFrom field does not
56 /// change.
57 ///
58 /// This example in Objective-C shows why the field should not change. The
59 /// token `xx` is right-justified with this method to align the `:`
60 /// symbols. The `:` symbols should remain aligned through the step that
61 /// aligns assignments. That step uses the IndentedFrom field to tell what
62 /// lines to move. Not changing the field in this method ensures that the 2
63 /// lines move together.
64 ///
65 /// [x //
66 /// xxxx:0
67 /// xx:0];
68 IndentationAndAlignment addPadding(unsigned Spaces) const;
69 /// Adding indentation is more common than padding. So the operator does that.
70 IndentationAndAlignment operator+(unsigned Spaces) const;
71 IndentationAndAlignment operator-(unsigned Spaces) const;
72 IndentationAndAlignment &operator+=(unsigned Spaces);
73
74 IndentationAndAlignment(unsigned Total, unsigned IndentedFrom);
75
76 IndentationAndAlignment(unsigned Spaces);
77
78 bool operator<(const IndentationAndAlignment &Other) const;
79};
80
82public:
83 /// Constructs a \c ContinuationIndenter to format \p Line starting in
84 /// column \p FirstIndent.
85 ContinuationIndenter(const FormatStyle &Style,
86 const AdditionalKeywords &Keywords,
87 const SourceManager &SourceMgr,
88 WhitespaceManager &Whitespaces,
89 encoding::Encoding Encoding,
90 bool BinPackInconclusiveFunctions);
91
92 /// Get the initial state, i.e. the state after placing \p Line's
93 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
94 /// the case of formatting inside raw string literals, \p FirstStartColumn is
95 /// the column at which the state of the parent formatter is.
96 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
97 const AnnotatedLine *Line, bool DryRun);
98
99 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
100 // better home.
101 /// Returns \c true, if a line break after \p State is allowed.
102 bool canBreak(const LineState &State);
103
104 /// Returns \c true, if a line break after \p State is mandatory.
105 bool mustBreak(const LineState &State);
106
107 /// Appends the next token to \p State and updates information
108 /// necessary for indentation.
109 ///
110 /// Puts the token on the current line if \p Newline is \c false and adds a
111 /// line break and necessary indentation otherwise.
112 ///
113 /// If \p DryRun is \c false, also creates and stores the required
114 /// \c Replacement.
115 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
116 unsigned ExtraSpaces = 0);
117
118 /// Get the column limit for this line. This is the style's column
119 /// limit, potentially reduced for preprocessor definitions.
120 unsigned getColumnLimit(const LineState &State) const;
121
122private:
123 /// Mark the next token as consumed in \p State and modify its stacks
124 /// accordingly.
125 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
126
127 /// Update 'State' according to the next token's fake left parentheses.
128 void moveStatePastFakeLParens(LineState &State, bool Newline);
129 /// Update 'State' according to the next token's fake r_parens.
130 void moveStatePastFakeRParens(LineState &State);
131
132 /// Update 'State' according to the next token being one of "(<{[".
133 void moveStatePastScopeOpener(LineState &State, bool Newline);
134 /// Update 'State' according to the next token being one of ")>}]".
135 void moveStatePastScopeCloser(LineState &State);
136 /// Update 'State' with the next token opening a nested block.
137 void moveStateToNewBlock(LineState &State, bool NewLine);
138
139 /// Reformats a raw string literal.
140 ///
141 /// \returns An extra penalty induced by reformatting the token.
142 unsigned reformatRawStringLiteral(const FormatToken &Current,
143 LineState &State,
144 const FormatStyle &RawStringStyle,
145 bool DryRun, bool Newline);
146
147 /// If the current token is at the end of the current line, handle
148 /// the transition to the next line.
149 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
150 bool DryRun, bool AllowBreak, bool Newline);
151
152 /// If \p Current is a raw string that is configured to be reformatted,
153 /// return the style to be used.
154 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
155 const LineState &State);
156
157 /// If the current token sticks out over the end of the line, break
158 /// it if possible.
159 ///
160 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
161 /// when tokens are broken or lines exceed the column limit, and exceeded
162 /// indicates whether the algorithm purposefully left lines exceeding the
163 /// column limit.
164 ///
165 /// The returned penalty will cover the cost of the additional line breaks
166 /// and column limit violation in all lines except for the last one. The
167 /// penalty for the column limit violation in the last line (and in single
168 /// line tokens) is handled in \c addNextStateToQueue.
169 ///
170 /// \p Strict indicates whether reflowing is allowed to leave characters
171 /// protruding the column limit; if true, lines will be split strictly within
172 /// the column limit where possible; if false, words are allowed to protrude
173 /// over the column limit as long as the penalty is less than the penalty
174 /// of a break.
175 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
176 LineState &State,
177 bool AllowBreak, bool DryRun,
178 bool Strict);
179
180 /// Returns the \c BreakableToken starting at \p Current, or nullptr
181 /// if the current token cannot be broken.
182 std::unique_ptr<BreakableToken>
183 createBreakableToken(const FormatToken &Current, LineState &State,
184 bool AllowBreak);
185
186 /// Appends the next token to \p State and updates information
187 /// necessary for indentation.
188 ///
189 /// Puts the token on the current line.
190 ///
191 /// If \p DryRun is \c false, also creates and stores the required
192 /// \c Replacement.
193 void addTokenOnCurrentLine(LineState &State, bool DryRun,
194 unsigned ExtraSpaces);
195
196 /// Appends the next token to \p State and updates information
197 /// necessary for indentation.
198 ///
199 /// Adds a line break and necessary indentation.
200 ///
201 /// If \p DryRun is \c false, also creates and stores the required
202 /// \c Replacement.
203 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
204
205 /// Calculate the new column for a line wrap before the next token.
206 IndentationAndAlignment getNewLineColumn(const LineState &State);
207
208 /// Adds a multiline token to the \p State.
209 ///
210 /// \returns Extra penalty for the first line of the literal: last line is
211 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
212 /// matter, as we don't change them.
213 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
214
215 /// Returns \c true if the next token starts a multiline string
216 /// literal.
217 ///
218 /// This includes implicitly concatenated strings, strings that will be broken
219 /// by clang-format and string literals with escaped newlines.
220 bool nextIsMultilineString(const LineState &State);
221
222 FormatStyle Style;
223 const AdditionalKeywords &Keywords;
224 const SourceManager &SourceMgr;
225 WhitespaceManager &Whitespaces;
226 encoding::Encoding Encoding;
227 bool BinPackInconclusiveFunctions;
228 llvm::Regex CommentPragmasRegex;
229 const RawStringFormatStyleManager RawStringFormats;
230};
231
246
247 /// The token opening this parenthesis level, or nullptr if this level is
248 /// opened by fake parenthesis.
249 ///
250 /// Not considered for memoization as it will always have the same value at
251 /// the same token.
253
254 /// The position to which a specific parenthesis level needs to be
255 /// indented.
257
258 /// The token in one of the previous lines this state wants to align to.
260
261 /// The position of the last space on each level.
262 ///
263 /// Used e.g. to break like:
264 /// functionCall(Parameter, otherCall(
265 /// OtherParameter));
266 unsigned LastSpace;
267
268 /// If a block relative to this parenthesis level gets wrapped, indent
269 /// it this much.
271
272 /// The position the first "<<" operator encountered on each level.
273 ///
274 /// Used to align "<<" operators. 0 if no such operator has been encountered
275 /// on a level.
276 unsigned FirstLessLess = 0;
277
278 /// The column of a \c ? in a conditional expression;
279 unsigned QuestionColumn = 0;
280
281 /// The position of the colon in an ObjC method declaration/call.
282 unsigned ColonPos = 0;
283
284 /// The start of the most recent function in a builder-type call.
286
287 /// Contains the start of array subscript expressions, so that they
288 /// can be aligned.
290
291 /// If a nested name specifier was broken over multiple lines, this
292 /// contains the start column of the second line. Otherwise 0.
294
295 /// If a call expression was broken over multiple lines, this
296 /// contains the start column of the second line. Otherwise 0.
297 unsigned CallContinuation = 0;
298
299 /// The column of the first variable name in a variable declaration.
300 ///
301 /// Used to align further variables if necessary.
302 unsigned VariablePos = 0;
303
304 /// Whether a newline needs to be inserted before the block's closing
305 /// brace.
306 ///
307 /// We only want to insert a newline before the closing brace if there also
308 /// was a newline after the beginning left brace.
310
311 /// Whether a newline needs to be inserted before the block's closing
312 /// paren.
313 ///
314 /// We only want to insert a newline before the closing paren if there also
315 /// was a newline after the beginning left paren.
317
318 /// Whether a newline needs to be inserted before a closing angle `>`.
320
321 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
322 /// lines, in this context.
324
325 /// Break after the next comma (or all the commas in this context if
326 /// \c AvoidBinPacking is \c true).
328
329 /// Line breaking in this context would break a formatting rule.
330 bool NoLineBreak : 1;
331
332 /// Same as \c NoLineBreak, but is restricted until the end of the
333 /// operand (including the next ",").
335
336 /// True if the last binary operator on this level was wrapped to the
337 /// next line.
339
340 /// \c true if this \c ParenState already contains a line-break.
341 ///
342 /// The first line break in a certain \c ParenState causes extra penalty so
343 /// that clang-format prefers similar breaks, i.e. breaks in the same
344 /// parenthesis.
346
347 /// \c true if this \c ParenState contains multiple segments of a
348 /// builder-type call on one line.
350
351 /// \c true if the colons of the curren ObjC method expression should
352 /// be aligned.
353 ///
354 /// Not considered for memoization as it will always have the same value at
355 /// the same token.
356 bool AlignColons : 1;
357
358 /// \c true if at least one selector name was found in the current
359 /// ObjC method expression.
360 ///
361 /// Not considered for memoization as it will always have the same value at
362 /// the same token.
364
365 /// \c true if there are multiple nested blocks inside these parens.
366 ///
367 /// Not considered for memoization as it will always have the same value at
368 /// the same token.
370
371 /// The start of a nested block (e.g. lambda introducer in C++ or
372 /// "function" in JavaScript) is not wrapped to a new line.
374
375 /// \c true if the current \c ParenState represents an Objective-C
376 /// array literal.
378
380
381 /// true if the current \c ParenState represents the false branch of a chained
382 /// conditional expression (e.g. else-if)
384
385 /// true if there conditionnal was wrapped on the first operator (the question
386 /// mark)
388
389 /// Indicates the indent should be reduced by the length of the operator.
391
392 bool operator<(const ParenState &Other) const {
393 if (Indent.Total != Other.Indent.Total)
394 return Indent.Total < Other.Indent.Total;
395 if (LastSpace != Other.LastSpace)
396 return LastSpace < Other.LastSpace;
397 if (NestedBlockIndent != Other.NestedBlockIndent)
398 return NestedBlockIndent < Other.NestedBlockIndent;
399 if (FirstLessLess != Other.FirstLessLess)
400 return FirstLessLess < Other.FirstLessLess;
401 if (AlignedTo != Other.AlignedTo)
402 return AlignedTo < Other.AlignedTo;
403 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
405 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
407 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
409 if (QuestionColumn != Other.QuestionColumn)
410 return QuestionColumn < Other.QuestionColumn;
411 if (AvoidBinPacking != Other.AvoidBinPacking)
412 return AvoidBinPacking;
413 if (BreakBeforeParameter != Other.BreakBeforeParameter)
415 if (NoLineBreak != Other.NoLineBreak)
416 return NoLineBreak;
417 if (LastOperatorWrapped != Other.LastOperatorWrapped)
418 return LastOperatorWrapped;
419 if (ColonPos != Other.ColonPos)
420 return ColonPos < Other.ColonPos;
421 if (StartOfFunctionCall != Other.StartOfFunctionCall)
422 return StartOfFunctionCall < Other.StartOfFunctionCall;
423 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
424 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
425 if (CallContinuation != Other.CallContinuation)
426 return CallContinuation < Other.CallContinuation;
427 if (VariablePos != Other.VariablePos)
428 return VariablePos < Other.VariablePos;
429 if (ContainsLineBreak != Other.ContainsLineBreak)
430 return ContainsLineBreak;
431 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
433 if (NestedBlockInlined != Other.NestedBlockInlined)
434 return NestedBlockInlined;
435 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
437 if (IsChainedConditional != Other.IsChainedConditional)
439 if (IsWrappedConditional != Other.IsWrappedConditional)
441 if (UnindentOperator != Other.UnindentOperator)
442 return UnindentOperator;
443 return Indent < Other.Indent;
444 }
445};
446
447/// The current state when indenting a unwrapped line.
448///
449/// As the indenting tries different combinations this is copied by value.
450struct LineState {
451 /// The number of used columns in the current line.
452 unsigned Column;
453
454 /// The token that needs to be next formatted.
456
457 /// \c true if \p NextToken should not continue this line.
459
460 /// The \c NestingLevel at the start of this line.
462
463 /// The lowest \c NestingLevel on the current line.
465
466 /// The start column of the string literal, if we're in a string
467 /// literal sequence, 0 otherwise.
469
470 /// Disallow line breaks for this line.
472
473 /// A stack keeping track of properties applying to parenthesis
474 /// levels.
476
477 /// Ignore the stack of \c ParenStates for state comparison.
478 ///
479 /// In long and deeply nested unwrapped lines, the current algorithm can
480 /// be insufficient for finding the best formatting with a reasonable amount
481 /// of time and memory. Setting this flag will effectively lead to the
482 /// algorithm not analyzing some combinations. However, these combinations
483 /// rarely contain the optimal solution: In short, accepting a higher
484 /// penalty early would need to lead to different values in the \c
485 /// ParenState stack (in an otherwise identical state) and these different
486 /// values would need to lead to a significant amount of avoided penalty
487 /// later.
488 ///
489 /// FIXME: Come up with a better algorithm instead.
491
492 /// The indent of the first token.
493 unsigned FirstIndent;
494
495 /// The line that is being formatted.
496 ///
497 /// Does not need to be considered for memoization because it doesn't change.
499
500 /// Comparison operator to be able to used \c LineState in \c map.
501 bool operator<(const LineState &Other) const {
502 if (NextToken != Other.NextToken)
503 return NextToken < Other.NextToken;
504 if (Column != Other.Column)
505 return Column < Other.Column;
506 if (NoContinuation != Other.NoContinuation)
507 return NoContinuation;
508 if (StartOfLineLevel != Other.StartOfLineLevel)
509 return StartOfLineLevel < Other.StartOfLineLevel;
510 if (LowestLevelOnLine != Other.LowestLevelOnLine)
511 return LowestLevelOnLine < Other.LowestLevelOnLine;
512 if (StartOfStringLiteral != Other.StartOfStringLiteral)
513 return StartOfStringLiteral < Other.StartOfStringLiteral;
514 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
515 return false;
516 return Stack < Other.Stack;
517 }
518};
519
520} // end namespace format
521} // end namespace clang
522
523#endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
This class handles loading and caching of source files into memory.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
#define false
Definition stdbool.h:26
#define true
Definition stdbool.h:25
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents the spaces at the start of a line, keeping track of what the spaces are for.
IndentationAndAlignment operator+(unsigned Spaces) const
Adding indentation is more common than padding. So the operator does that.
IndentationAndAlignment(unsigned Total, unsigned IndentedFrom)
IndentationAndAlignment addPadding(unsigned Spaces) const
Add spaces for right-justifying the token.
IndentationAndAlignment operator-(unsigned Spaces) const
IndentationAndAlignment & operator+=(unsigned Spaces)
bool operator<(const IndentationAndAlignment &Other) const
unsigned IndentedFrom
The column that the position of the start of the line is calculated from.
The current state when indenting a unwrapped line.
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreak
Disallow line breaks for this line.
unsigned Column
The number of used columns in the current line.
SmallVector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoContinuation
true if NextToken should not continue this line.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
unsigned FirstIndent
The indent of the first token.
FormatToken * NextToken
The token that needs to be next formatted.
bool AvoidBinPacking
Avoid bin packing, i.e.
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
bool BreakBeforeClosingParen
Whether a newline needs to be inserted before the block's closing paren.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true).
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
unsigned VariablePos
The column of the first variable name in a variable declaration.
unsigned LastSpace
The position of the last space on each level.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next ",...
bool operator<(const ParenState &Other) const
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
bool NestedBlockInlined
The start of a nested block (e.g.
ParenState(const FormatToken *Tok, IndentationAndAlignment Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e....
bool BreakBeforeClosingAngle
Whether a newline needs to be inserted before a closing angle >.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
IndentationAndAlignment Indent
The position to which a specific parenthesis level needs to be indented.
const FormatToken * AlignedTo
The token in one of the previous lines this state wants to align to.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
llvm::StringMap< FormatStyle > DelimiterStyle