clang 18.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20#include "clang/Format/Format.h"
21#include "llvm/Support/Regex.h"
22#include <map>
23#include <optional>
24#include <tuple>
25
26namespace clang {
27class SourceManager;
28
29namespace format {
30
31class AnnotatedLine;
32class BreakableToken;
33struct FormatToken;
34struct LineState;
35struct ParenState;
36struct RawStringFormatStyleManager;
37class WhitespaceManager;
38
40 llvm::StringMap<FormatStyle> DelimiterStyle;
41 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42
44
45 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46
47 std::optional<FormatStyle>
48 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
49};
50
52public:
53 /// Constructs a \c ContinuationIndenter to format \p Line starting in
54 /// column \p FirstIndent.
56 const AdditionalKeywords &Keywords,
57 const SourceManager &SourceMgr,
58 WhitespaceManager &Whitespaces,
59 encoding::Encoding Encoding,
60 bool BinPackInconclusiveFunctions);
61
62 /// Get the initial state, i.e. the state after placing \p Line's
63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64 /// the case of formatting inside raw string literals, \p FirstStartColumn is
65 /// the column at which the state of the parent formatter is.
66 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67 const AnnotatedLine *Line, bool DryRun);
68
69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70 // better home.
71 /// Returns \c true, if a line break after \p State is allowed.
72 bool canBreak(const LineState &State);
73
74 /// Returns \c true, if a line break after \p State is mandatory.
75 bool mustBreak(const LineState &State);
76
77 /// Appends the next token to \p State and updates information
78 /// necessary for indentation.
79 ///
80 /// Puts the token on the current line if \p Newline is \c false and adds a
81 /// line break and necessary indentation otherwise.
82 ///
83 /// If \p DryRun is \c false, also creates and stores the required
84 /// \c Replacement.
85 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86 unsigned ExtraSpaces = 0);
87
88 /// Get the column limit for this line. This is the style's column
89 /// limit, potentially reduced for preprocessor definitions.
90 unsigned getColumnLimit(const LineState &State) const;
91
92private:
93 /// Mark the next token as consumed in \p State and modify its stacks
94 /// accordingly.
95 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96
97 /// Update 'State' according to the next token's fake left parentheses.
98 void moveStatePastFakeLParens(LineState &State, bool Newline);
99 /// Update 'State' according to the next token's fake r_parens.
100 void moveStatePastFakeRParens(LineState &State);
101
102 /// Update 'State' according to the next token being one of "(<{[".
103 void moveStatePastScopeOpener(LineState &State, bool Newline);
104 /// Update 'State' according to the next token being one of ")>}]".
105 void moveStatePastScopeCloser(LineState &State);
106 /// Update 'State' with the next token opening a nested block.
107 void moveStateToNewBlock(LineState &State);
108
109 /// Reformats a raw string literal.
110 ///
111 /// \returns An extra penalty induced by reformatting the token.
112 unsigned reformatRawStringLiteral(const FormatToken &Current,
113 LineState &State,
114 const FormatStyle &RawStringStyle,
115 bool DryRun, bool Newline);
116
117 /// If the current token is at the end of the current line, handle
118 /// the transition to the next line.
119 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120 bool DryRun, bool AllowBreak, bool Newline);
121
122 /// If \p Current is a raw string that is configured to be reformatted,
123 /// return the style to be used.
124 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125 const LineState &State);
126
127 /// If the current token sticks out over the end of the line, break
128 /// it if possible.
129 ///
130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131 /// when tokens are broken or lines exceed the column limit, and exceeded
132 /// indicates whether the algorithm purposefully left lines exceeding the
133 /// column limit.
134 ///
135 /// The returned penalty will cover the cost of the additional line breaks
136 /// and column limit violation in all lines except for the last one. The
137 /// penalty for the column limit violation in the last line (and in single
138 /// line tokens) is handled in \c addNextStateToQueue.
139 ///
140 /// \p Strict indicates whether reflowing is allowed to leave characters
141 /// protruding the column limit; if true, lines will be split strictly within
142 /// the column limit where possible; if false, words are allowed to protrude
143 /// over the column limit as long as the penalty is less than the penalty
144 /// of a break.
145 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146 LineState &State,
147 bool AllowBreak, bool DryRun,
148 bool Strict);
149
150 /// Returns the \c BreakableToken starting at \p Current, or nullptr
151 /// if the current token cannot be broken.
152 std::unique_ptr<BreakableToken>
153 createBreakableToken(const FormatToken &Current, LineState &State,
154 bool AllowBreak);
155
156 /// Appends the next token to \p State and updates information
157 /// necessary for indentation.
158 ///
159 /// Puts the token on the current line.
160 ///
161 /// If \p DryRun is \c false, also creates and stores the required
162 /// \c Replacement.
163 void addTokenOnCurrentLine(LineState &State, bool DryRun,
164 unsigned ExtraSpaces);
165
166 /// Appends the next token to \p State and updates information
167 /// necessary for indentation.
168 ///
169 /// Adds a line break and necessary indentation.
170 ///
171 /// If \p DryRun is \c false, also creates and stores the required
172 /// \c Replacement.
173 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174
175 /// Calculate the new column for a line wrap before the next token.
176 unsigned getNewLineColumn(const LineState &State);
177
178 /// Adds a multiline token to the \p State.
179 ///
180 /// \returns Extra penalty for the first line of the literal: last line is
181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182 /// matter, as we don't change them.
183 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184
185 /// Returns \c true if the next token starts a multiline string
186 /// literal.
187 ///
188 /// This includes implicitly concatenated strings, strings that will be broken
189 /// by clang-format and string literals with escaped newlines.
190 bool nextIsMultilineString(const LineState &State);
191
192 FormatStyle Style;
193 const AdditionalKeywords &Keywords;
194 const SourceManager &SourceMgr;
195 WhitespaceManager &Whitespaces;
196 encoding::Encoding Encoding;
197 bool BinPackInconclusiveFunctions;
198 llvm::Regex CommentPragmasRegex;
199 const RawStringFormatStyleManager RawStringFormats;
200};
201
203 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204 bool AvoidBinPacking, bool NoLineBreak)
216
217 /// \brief The token opening this parenthesis level, or nullptr if this level
218 /// is opened by fake parenthesis.
219 ///
220 /// Not considered for memoization as it will always have the same value at
221 /// the same token.
223
224 /// The position to which a specific parenthesis level needs to be
225 /// indented.
226 unsigned Indent;
227
228 /// The position of the last space on each level.
229 ///
230 /// Used e.g. to break like:
231 /// functionCall(Parameter, otherCall(
232 /// OtherParameter));
233 unsigned LastSpace;
234
235 /// If a block relative to this parenthesis level gets wrapped, indent
236 /// it this much.
238
239 /// The position the first "<<" operator encountered on each level.
240 ///
241 /// Used to align "<<" operators. 0 if no such operator has been encountered
242 /// on a level.
243 unsigned FirstLessLess = 0;
244
245 /// The column of a \c ? in a conditional expression;
246 unsigned QuestionColumn = 0;
247
248 /// The position of the colon in an ObjC method declaration/call.
249 unsigned ColonPos = 0;
250
251 /// The start of the most recent function in a builder-type call.
253
254 /// Contains the start of array subscript expressions, so that they
255 /// can be aligned.
257
258 /// If a nested name specifier was broken over multiple lines, this
259 /// contains the start column of the second line. Otherwise 0.
261
262 /// If a call expression was broken over multiple lines, this
263 /// contains the start column of the second line. Otherwise 0.
264 unsigned CallContinuation = 0;
265
266 /// The column of the first variable name in a variable declaration.
267 ///
268 /// Used to align further variables if necessary.
269 unsigned VariablePos = 0;
270
271 /// Whether this block's indentation is used for alignment.
272 bool IsAligned : 1;
273
274 /// Whether a newline needs to be inserted before the block's closing
275 /// brace.
276 ///
277 /// We only want to insert a newline before the closing brace if there also
278 /// was a newline after the beginning left brace.
280
281 /// Whether a newline needs to be inserted before the block's closing
282 /// paren.
283 ///
284 /// We only want to insert a newline before the closing paren if there also
285 /// was a newline after the beginning left paren.
287
288 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289 /// lines, in this context.
291
292 /// Break after the next comma (or all the commas in this context if
293 /// \c AvoidBinPacking is \c true).
295
296 /// Line breaking in this context would break a formatting rule.
297 bool NoLineBreak : 1;
298
299 /// Same as \c NoLineBreak, but is restricted until the end of the
300 /// operand (including the next ",").
302
303 /// True if the last binary operator on this level was wrapped to the
304 /// next line.
306
307 /// \c true if this \c ParenState already contains a line-break.
308 ///
309 /// The first line break in a certain \c ParenState causes extra penalty so
310 /// that clang-format prefers similar breaks, i.e. breaks in the same
311 /// parenthesis.
313
314 /// \c true if this \c ParenState contains multiple segments of a
315 /// builder-type call on one line.
317
318 /// \c true if the colons of the curren ObjC method expression should
319 /// be aligned.
320 ///
321 /// Not considered for memoization as it will always have the same value at
322 /// the same token.
323 bool AlignColons : 1;
324
325 /// \c true if at least one selector name was found in the current
326 /// ObjC method expression.
327 ///
328 /// Not considered for memoization as it will always have the same value at
329 /// the same token.
331
332 /// \c true if there are multiple nested blocks inside these parens.
333 ///
334 /// Not considered for memoization as it will always have the same value at
335 /// the same token.
337
338 /// The start of a nested block (e.g. lambda introducer in C++ or
339 /// "function" in JavaScript) is not wrapped to a new line.
341
342 /// \c true if the current \c ParenState represents an Objective-C
343 /// array literal.
345
347
348 /// \brief true if the current \c ParenState represents the false branch of
349 /// a chained conditional expression (e.g. else-if)
351
352 /// \brief true if there conditionnal was wrapped on the first operator (the
353 /// question mark)
355
356 /// \brief Indicates the indent should be reduced by the length of the
357 /// operator.
359
360 bool operator<(const ParenState &Other) const {
361 if (Indent != Other.Indent)
362 return Indent < Other.Indent;
363 if (LastSpace != Other.LastSpace)
364 return LastSpace < Other.LastSpace;
365 if (NestedBlockIndent != Other.NestedBlockIndent)
366 return NestedBlockIndent < Other.NestedBlockIndent;
367 if (FirstLessLess != Other.FirstLessLess)
368 return FirstLessLess < Other.FirstLessLess;
369 if (IsAligned != Other.IsAligned)
370 return IsAligned;
371 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
373 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
375 if (QuestionColumn != Other.QuestionColumn)
376 return QuestionColumn < Other.QuestionColumn;
377 if (AvoidBinPacking != Other.AvoidBinPacking)
378 return AvoidBinPacking;
379 if (BreakBeforeParameter != Other.BreakBeforeParameter)
381 if (NoLineBreak != Other.NoLineBreak)
382 return NoLineBreak;
383 if (LastOperatorWrapped != Other.LastOperatorWrapped)
384 return LastOperatorWrapped;
385 if (ColonPos != Other.ColonPos)
386 return ColonPos < Other.ColonPos;
387 if (StartOfFunctionCall != Other.StartOfFunctionCall)
388 return StartOfFunctionCall < Other.StartOfFunctionCall;
389 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391 if (CallContinuation != Other.CallContinuation)
392 return CallContinuation < Other.CallContinuation;
393 if (VariablePos != Other.VariablePos)
394 return VariablePos < Other.VariablePos;
395 if (ContainsLineBreak != Other.ContainsLineBreak)
396 return ContainsLineBreak;
397 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
399 if (NestedBlockInlined != Other.NestedBlockInlined)
400 return NestedBlockInlined;
401 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
403 if (IsChainedConditional != Other.IsChainedConditional)
405 if (IsWrappedConditional != Other.IsWrappedConditional)
407 if (UnindentOperator != Other.UnindentOperator)
408 return UnindentOperator;
409 return false;
410 }
411};
412
413/// The current state when indenting a unwrapped line.
414///
415/// As the indenting tries different combinations this is copied by value.
416struct LineState {
417 /// The number of used columns in the current line.
418 unsigned Column;
419
420 /// The token that needs to be next formatted.
422
423 /// \c true if \p NextToken should not continue this line.
425
426 /// The \c NestingLevel at the start of this line.
428
429 /// The lowest \c NestingLevel on the current line.
431
432 /// The start column of the string literal, if we're in a string
433 /// literal sequence, 0 otherwise.
435
436 /// Disallow line breaks for this line.
438
439 /// A stack keeping track of properties applying to parenthesis
440 /// levels.
442
443 /// Ignore the stack of \c ParenStates for state comparison.
444 ///
445 /// In long and deeply nested unwrapped lines, the current algorithm can
446 /// be insufficient for finding the best formatting with a reasonable amount
447 /// of time and memory. Setting this flag will effectively lead to the
448 /// algorithm not analyzing some combinations. However, these combinations
449 /// rarely contain the optimal solution: In short, accepting a higher
450 /// penalty early would need to lead to different values in the \c
451 /// ParenState stack (in an otherwise identical state) and these different
452 /// values would need to lead to a significant amount of avoided penalty
453 /// later.
454 ///
455 /// FIXME: Come up with a better algorithm instead.
457
458 /// The indent of the first token.
459 unsigned FirstIndent;
460
461 /// The line that is being formatted.
462 ///
463 /// Does not need to be considered for memoization because it doesn't change.
465
466 /// Comparison operator to be able to used \c LineState in \c map.
467 bool operator<(const LineState &Other) const {
468 if (NextToken != Other.NextToken)
469 return NextToken < Other.NextToken;
470 if (Column != Other.Column)
471 return Column < Other.Column;
472 if (NoContinuation != Other.NoContinuation)
473 return NoContinuation;
474 if (StartOfLineLevel != Other.StartOfLineLevel)
475 return StartOfLineLevel < Other.StartOfLineLevel;
476 if (LowestLevelOnLine != Other.LowestLevelOnLine)
477 return LowestLevelOnLine < Other.LowestLevelOnLine;
478 if (StartOfStringLiteral != Other.StartOfStringLiteral)
479 return StartOfStringLiteral < Other.StartOfStringLiteral;
480 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
481 return false;
482 return Stack < Other.Stack;
483 }
484};
485
486} // end namespace format
487} // end namespace clang
488
489#endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
Various functions to configurably format source code.
This class handles loading and caching of source files into memory.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
#define true
Definition: stdbool.h:21
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:964
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:269
The current state when indenting a unwrapped line.
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreak
Disallow line breaks for this line.
unsigned Column
The number of used columns in the current line.
SmallVector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoContinuation
true if NextToken should not continue this line.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
unsigned FirstIndent
The indent of the first token.
FormatToken * NextToken
The token that needs to be next formatted.
bool AvoidBinPacking
Avoid bin packing, i.e.
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool BreakBeforeClosingParen
Whether a newline needs to be inserted before the block's closing paren.
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true).
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
unsigned VariablePos
The column of the first variable name in a variable declaration.
unsigned LastSpace
The position of the last space on each level.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next ",...
bool operator<(const ParenState &Other) const
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
bool IsAligned
Whether this block's indentation is used for alignment.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
bool NestedBlockInlined
The start of a nested block (e.g.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e....
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
llvm::StringMap< FormatStyle > DelimiterStyle