clang  11.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 #include <map>
23 #include <tuple>
24 
25 namespace clang {
26 class SourceManager;
27 
28 namespace format {
29 
30 class AnnotatedLine;
31 class BreakableToken;
32 struct FormatToken;
33 struct LineState;
34 struct ParenState;
35 struct RawStringFormatStyleManager;
36 class WhitespaceManager;
37 
39  llvm::StringMap<FormatStyle> DelimiterStyle;
40  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
41 
42  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
43 
44  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
45 
47  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
48 };
49 
51 public:
52  /// Constructs a \c ContinuationIndenter to format \p Line starting in
53  /// column \p FirstIndent.
55  const AdditionalKeywords &Keywords,
56  const SourceManager &SourceMgr,
57  WhitespaceManager &Whitespaces,
59  bool BinPackInconclusiveFunctions);
60 
61  /// Get the initial state, i.e. the state after placing \p Line's
62  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
63  /// the case of formatting inside raw string literals, \p FirstStartColumn is
64  /// the column at which the state of the parent formatter is.
65  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
66  const AnnotatedLine *Line, bool DryRun);
67 
68  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
69  // better home.
70  /// Returns \c true, if a line break after \p State is allowed.
71  bool canBreak(const LineState &State);
72 
73  /// Returns \c true, if a line break after \p State is mandatory.
74  bool mustBreak(const LineState &State);
75 
76  /// Appends the next token to \p State and updates information
77  /// necessary for indentation.
78  ///
79  /// Puts the token on the current line if \p Newline is \c false and adds a
80  /// line break and necessary indentation otherwise.
81  ///
82  /// If \p DryRun is \c false, also creates and stores the required
83  /// \c Replacement.
84  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
85  unsigned ExtraSpaces = 0);
86 
87  /// Get the column limit for this line. This is the style's column
88  /// limit, potentially reduced for preprocessor definitions.
89  unsigned getColumnLimit(const LineState &State) const;
90 
91 private:
92  /// Mark the next token as consumed in \p State and modify its stacks
93  /// accordingly.
94  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
95 
96  /// Update 'State' according to the next token's fake left parentheses.
97  void moveStatePastFakeLParens(LineState &State, bool Newline);
98  /// Update 'State' according to the next token's fake r_parens.
99  void moveStatePastFakeRParens(LineState &State);
100 
101  /// Update 'State' according to the next token being one of "(<{[".
102  void moveStatePastScopeOpener(LineState &State, bool Newline);
103  /// Update 'State' according to the next token being one of ")>}]".
104  void moveStatePastScopeCloser(LineState &State);
105  /// Update 'State' with the next token opening a nested block.
106  void moveStateToNewBlock(LineState &State);
107 
108  /// Reformats a raw string literal.
109  ///
110  /// \returns An extra penalty induced by reformatting the token.
111  unsigned reformatRawStringLiteral(const FormatToken &Current,
112  LineState &State,
113  const FormatStyle &RawStringStyle,
114  bool DryRun, bool Newline);
115 
116  /// If the current token is at the end of the current line, handle
117  /// the transition to the next line.
118  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
119  bool DryRun, bool AllowBreak, bool Newline);
120 
121  /// If \p Current is a raw string that is configured to be reformatted,
122  /// return the style to be used.
123  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
124  const LineState &State);
125 
126  /// If the current token sticks out over the end of the line, break
127  /// it if possible.
128  ///
129  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
130  /// when tokens are broken or lines exceed the column limit, and exceeded
131  /// indicates whether the algorithm purposefully left lines exceeding the
132  /// column limit.
133  ///
134  /// The returned penalty will cover the cost of the additional line breaks
135  /// and column limit violation in all lines except for the last one. The
136  /// penalty for the column limit violation in the last line (and in single
137  /// line tokens) is handled in \c addNextStateToQueue.
138  ///
139  /// \p Strict indicates whether reflowing is allowed to leave characters
140  /// protruding the column limit; if true, lines will be split strictly within
141  /// the column limit where possible; if false, words are allowed to protrude
142  /// over the column limit as long as the penalty is less than the penalty
143  /// of a break.
144  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
145  LineState &State,
146  bool AllowBreak, bool DryRun,
147  bool Strict);
148 
149  /// Returns the \c BreakableToken starting at \p Current, or nullptr
150  /// if the current token cannot be broken.
151  std::unique_ptr<BreakableToken>
152  createBreakableToken(const FormatToken &Current, LineState &State,
153  bool AllowBreak);
154 
155  /// Appends the next token to \p State and updates information
156  /// necessary for indentation.
157  ///
158  /// Puts the token on the current line.
159  ///
160  /// If \p DryRun is \c false, also creates and stores the required
161  /// \c Replacement.
162  void addTokenOnCurrentLine(LineState &State, bool DryRun,
163  unsigned ExtraSpaces);
164 
165  /// Appends the next token to \p State and updates information
166  /// necessary for indentation.
167  ///
168  /// Adds a line break and necessary indentation.
169  ///
170  /// If \p DryRun is \c false, also creates and stores the required
171  /// \c Replacement.
172  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
173 
174  /// Calculate the new column for a line wrap before the next token.
175  unsigned getNewLineColumn(const LineState &State);
176 
177  /// Adds a multiline token to the \p State.
178  ///
179  /// \returns Extra penalty for the first line of the literal: last line is
180  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
181  /// matter, as we don't change them.
182  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
183 
184  /// Returns \c true if the next token starts a multiline string
185  /// literal.
186  ///
187  /// This includes implicitly concatenated strings, strings that will be broken
188  /// by clang-format and string literals with escaped newlines.
189  bool nextIsMultilineString(const LineState &State);
190 
192  const AdditionalKeywords &Keywords;
193  const SourceManager &SourceMgr;
194  WhitespaceManager &Whitespaces;
196  bool BinPackInconclusiveFunctions;
197  llvm::Regex CommentPragmasRegex;
198  const RawStringFormatStyleManager RawStringFormats;
199 };
200 
201 struct ParenState {
202  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
203  bool AvoidBinPacking, bool NoLineBreak)
204  : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
205  NestedBlockIndent(Indent), IsAligned(false),
206  BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
207  BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
208  NoLineBreakInOperand(false), LastOperatorWrapped(true),
209  ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
210  AlignColons(true), ObjCSelectorNameFound(false),
211  HasMultipleNestedBlocks(false), NestedBlockInlined(false),
212  IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false),
213  IsChainedConditional(false), IsWrappedConditional(false),
214  UnindentOperator(false) {}
215 
216  /// \brief The token opening this parenthesis level, or nullptr if this level
217  /// is opened by fake parenthesis.
218  ///
219  /// Not considered for memoization as it will always have the same value at
220  /// the same token.
221  const FormatToken *Tok;
222 
223  /// The position to which a specific parenthesis level needs to be
224  /// indented.
225  unsigned Indent;
226 
227  /// The position of the last space on each level.
228  ///
229  /// Used e.g. to break like:
230  /// functionCall(Parameter, otherCall(
231  /// OtherParameter));
232  unsigned LastSpace;
233 
234  /// If a block relative to this parenthesis level gets wrapped, indent
235  /// it this much.
237 
238  /// The position the first "<<" operator encountered on each level.
239  ///
240  /// Used to align "<<" operators. 0 if no such operator has been encountered
241  /// on a level.
242  unsigned FirstLessLess = 0;
243 
244  /// The column of a \c ? in a conditional expression;
245  unsigned QuestionColumn = 0;
246 
247  /// The position of the colon in an ObjC method declaration/call.
248  unsigned ColonPos = 0;
249 
250  /// The start of the most recent function in a builder-type call.
251  unsigned StartOfFunctionCall = 0;
252 
253  /// Contains the start of array subscript expressions, so that they
254  /// can be aligned.
255  unsigned StartOfArraySubscripts = 0;
256 
257  /// If a nested name specifier was broken over multiple lines, this
258  /// contains the start column of the second line. Otherwise 0.
259  unsigned NestedNameSpecifierContinuation = 0;
260 
261  /// If a call expression was broken over multiple lines, this
262  /// contains the start column of the second line. Otherwise 0.
263  unsigned CallContinuation = 0;
264 
265  /// The column of the first variable name in a variable declaration.
266  ///
267  /// Used to align further variables if necessary.
268  unsigned VariablePos = 0;
269 
270  /// Whether this block's indentation is used for alignment.
271  bool IsAligned : 1;
272 
273  /// Whether a newline needs to be inserted before the block's closing
274  /// brace.
275  ///
276  /// We only want to insert a newline before the closing brace if there also
277  /// was a newline after the beginning left brace.
279 
280  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
281  /// lines, in this context.
282  bool AvoidBinPacking : 1;
283 
284  /// Break after the next comma (or all the commas in this context if
285  /// \c AvoidBinPacking is \c true).
287 
288  /// Line breaking in this context would break a formatting rule.
289  bool NoLineBreak : 1;
290 
291  /// Same as \c NoLineBreak, but is restricted until the end of the
292  /// operand (including the next ",").
294 
295  /// True if the last binary operator on this level was wrapped to the
296  /// next line.
298 
299  /// \c true if this \c ParenState already contains a line-break.
300  ///
301  /// The first line break in a certain \c ParenState causes extra penalty so
302  /// that clang-format prefers similar breaks, i.e. breaks in the same
303  /// parenthesis.
305 
306  /// \c true if this \c ParenState contains multiple segments of a
307  /// builder-type call on one line.
309 
310  /// \c true if the colons of the curren ObjC method expression should
311  /// be aligned.
312  ///
313  /// Not considered for memoization as it will always have the same value at
314  /// the same token.
315  bool AlignColons : 1;
316 
317  /// \c true if at least one selector name was found in the current
318  /// ObjC method expression.
319  ///
320  /// Not considered for memoization as it will always have the same value at
321  /// the same token.
323 
324  /// \c true if there are multiple nested blocks inside these parens.
325  ///
326  /// Not considered for memoization as it will always have the same value at
327  /// the same token.
329 
330  /// The start of a nested block (e.g. lambda introducer in C++ or
331  /// "function" in JavaScript) is not wrapped to a new line.
333 
334  /// \c true if the current \c ParenState represents an Objective-C
335  /// array literal.
337 
339 
340  /// \brief true if the current \c ParenState represents the false branch of
341  /// a chained conditional expression (e.g. else-if)
343 
344  /// \brief true if there conditionnal was wrapped on the first operator (the
345  /// question mark)
347 
348  /// \brief Indicates the indent should be reduced by the length of the
349  /// operator.
351 
352  bool operator<(const ParenState &Other) const {
353  if (Indent != Other.Indent)
354  return Indent < Other.Indent;
355  if (LastSpace != Other.LastSpace)
356  return LastSpace < Other.LastSpace;
357  if (NestedBlockIndent != Other.NestedBlockIndent)
358  return NestedBlockIndent < Other.NestedBlockIndent;
359  if (FirstLessLess != Other.FirstLessLess)
360  return FirstLessLess < Other.FirstLessLess;
361  if (IsAligned != Other.IsAligned)
362  return IsAligned;
363  if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
364  return BreakBeforeClosingBrace;
365  if (QuestionColumn != Other.QuestionColumn)
366  return QuestionColumn < Other.QuestionColumn;
367  if (AvoidBinPacking != Other.AvoidBinPacking)
368  return AvoidBinPacking;
369  if (BreakBeforeParameter != Other.BreakBeforeParameter)
370  return BreakBeforeParameter;
371  if (NoLineBreak != Other.NoLineBreak)
372  return NoLineBreak;
373  if (LastOperatorWrapped != Other.LastOperatorWrapped)
374  return LastOperatorWrapped;
375  if (ColonPos != Other.ColonPos)
376  return ColonPos < Other.ColonPos;
377  if (StartOfFunctionCall != Other.StartOfFunctionCall)
378  return StartOfFunctionCall < Other.StartOfFunctionCall;
379  if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
380  return StartOfArraySubscripts < Other.StartOfArraySubscripts;
381  if (CallContinuation != Other.CallContinuation)
382  return CallContinuation < Other.CallContinuation;
383  if (VariablePos != Other.VariablePos)
384  return VariablePos < Other.VariablePos;
385  if (ContainsLineBreak != Other.ContainsLineBreak)
386  return ContainsLineBreak;
387  if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
388  return ContainsUnwrappedBuilder;
389  if (NestedBlockInlined != Other.NestedBlockInlined)
390  return NestedBlockInlined;
391  if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
392  return IsCSharpGenericTypeConstraint;
393  if (IsChainedConditional != Other.IsChainedConditional)
394  return IsChainedConditional;
395  if (IsWrappedConditional != Other.IsWrappedConditional)
396  return IsWrappedConditional;
397  if (UnindentOperator != Other.UnindentOperator)
398  return UnindentOperator;
399  return false;
400  }
401 };
402 
403 /// The current state when indenting a unwrapped line.
404 ///
405 /// As the indenting tries different combinations this is copied by value.
406 struct LineState {
407  /// The number of used columns in the current line.
408  unsigned Column;
409 
410  /// The token that needs to be next formatted.
412 
413  /// \c true if this line contains a continued for-loop section.
415 
416  /// \c true if \p NextToken should not continue this line.
418 
419  /// The \c NestingLevel at the start of this line.
421 
422  /// The lowest \c NestingLevel on the current line.
424 
425  /// The start column of the string literal, if we're in a string
426  /// literal sequence, 0 otherwise.
428 
429  /// A stack keeping track of properties applying to parenthesis
430  /// levels.
431  std::vector<ParenState> Stack;
432 
433  /// Ignore the stack of \c ParenStates for state comparison.
434  ///
435  /// In long and deeply nested unwrapped lines, the current algorithm can
436  /// be insufficient for finding the best formatting with a reasonable amount
437  /// of time and memory. Setting this flag will effectively lead to the
438  /// algorithm not analyzing some combinations. However, these combinations
439  /// rarely contain the optimal solution: In short, accepting a higher
440  /// penalty early would need to lead to different values in the \c
441  /// ParenState stack (in an otherwise identical state) and these different
442  /// values would need to lead to a significant amount of avoided penalty
443  /// later.
444  ///
445  /// FIXME: Come up with a better algorithm instead.
447 
448  /// The indent of the first token.
449  unsigned FirstIndent;
450 
451  /// The line that is being formatted.
452  ///
453  /// Does not need to be considered for memoization because it doesn't change.
455 
456  /// Comparison operator to be able to used \c LineState in \c map.
457  bool operator<(const LineState &Other) const {
458  if (NextToken != Other.NextToken)
459  return NextToken < Other.NextToken;
460  if (Column != Other.Column)
461  return Column < Other.Column;
462  if (LineContainsContinuedForLoopSection !=
464  return LineContainsContinuedForLoopSection;
465  if (NoContinuation != Other.NoContinuation)
466  return NoContinuation;
467  if (StartOfLineLevel != Other.StartOfLineLevel)
468  return StartOfLineLevel < Other.StartOfLineLevel;
469  if (LowestLevelOnLine != Other.LowestLevelOnLine)
470  return LowestLevelOnLine < Other.LowestLevelOnLine;
471  if (StartOfStringLiteral != Other.StartOfStringLiteral)
472  return StartOfStringLiteral < Other.StartOfStringLiteral;
473  if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
474  return false;
475  return Stack < Other.Stack;
476  }
477 };
478 
479 } // end namespace format
480 } // end namespace clang
481 
482 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block&#39;s closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
bool NoContinuation
true if NextToken should not continue this line.
Manages the whitespaces around tokens and their replacements.
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis...
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
const FormatToken & Tok
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next "...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool NestedBlockInlined
The start of a nested block (e.g.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
llvm::Optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
const AnnotatedLine * Line
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:142
llvm::StringMap< FormatStyle > DelimiterStyle
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:17
Various functions to configurably format source code.
bool IsAligned
Whether this block&#39;s indentation is used for alignment.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:710
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
Dataflow Directional Tag Classes.
llvm::Optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e...
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
raw_ostream & Indent(raw_ostream &Out, const unsigned int Space, bool IsDot)
Definition: JsonSupport.h:20
unsigned StartOfStringLiteral
The start column of the string literal, if we&#39;re in a string literal sequence, 0 otherwise.
BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
FormatToken * NextToken
The token that needs to be next formatted.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
#define true
Definition: stdbool.h:16
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool operator<(const ParenState &Other) const
This class handles loading and caching of source files into memory.
const encoding::Encoding Encoding
const FormatStyle & Style