clang  9.0.0svn
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 #include <map>
23 #include <tuple>
24 
25 namespace clang {
26 class SourceManager;
27 
28 namespace format {
29 
30 class AnnotatedLine;
31 class BreakableToken;
32 struct FormatToken;
33 struct LineState;
34 struct ParenState;
35 struct RawStringFormatStyleManager;
36 class WhitespaceManager;
37 
39  llvm::StringMap<FormatStyle> DelimiterStyle;
40  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
41 
42  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
43 
44  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
45 
47  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
48 };
49 
51 public:
52  /// Constructs a \c ContinuationIndenter to format \p Line starting in
53  /// column \p FirstIndent.
55  const AdditionalKeywords &Keywords,
56  const SourceManager &SourceMgr,
57  WhitespaceManager &Whitespaces,
59  bool BinPackInconclusiveFunctions);
60 
61  /// Get the initial state, i.e. the state after placing \p Line's
62  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
63  /// the case of formatting inside raw string literals, \p FirstStartColumn is
64  /// the column at which the state of the parent formatter is.
65  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
66  const AnnotatedLine *Line, bool DryRun);
67 
68  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
69  // better home.
70  /// Returns \c true, if a line break after \p State is allowed.
71  bool canBreak(const LineState &State);
72 
73  /// Returns \c true, if a line break after \p State is mandatory.
74  bool mustBreak(const LineState &State);
75 
76  /// Appends the next token to \p State and updates information
77  /// necessary for indentation.
78  ///
79  /// Puts the token on the current line if \p Newline is \c false and adds a
80  /// line break and necessary indentation otherwise.
81  ///
82  /// If \p DryRun is \c false, also creates and stores the required
83  /// \c Replacement.
84  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
85  unsigned ExtraSpaces = 0);
86 
87  /// Get the column limit for this line. This is the style's column
88  /// limit, potentially reduced for preprocessor definitions.
89  unsigned getColumnLimit(const LineState &State) const;
90 
91 private:
92  /// Mark the next token as consumed in \p State and modify its stacks
93  /// accordingly.
94  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
95 
96  /// Update 'State' according to the next token's fake left parentheses.
97  void moveStatePastFakeLParens(LineState &State, bool Newline);
98  /// Update 'State' according to the next token's fake r_parens.
99  void moveStatePastFakeRParens(LineState &State);
100 
101  /// Update 'State' according to the next token being one of "(<{[".
102  void moveStatePastScopeOpener(LineState &State, bool Newline);
103  /// Update 'State' according to the next token being one of ")>}]".
104  void moveStatePastScopeCloser(LineState &State);
105  /// Update 'State' with the next token opening a nested block.
106  void moveStateToNewBlock(LineState &State);
107 
108  /// Reformats a raw string literal.
109  ///
110  /// \returns An extra penalty induced by reformatting the token.
111  unsigned reformatRawStringLiteral(const FormatToken &Current,
112  LineState &State,
113  const FormatStyle &RawStringStyle,
114  bool DryRun);
115 
116  /// If the current token is at the end of the current line, handle
117  /// the transition to the next line.
118  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
119  bool DryRun, bool AllowBreak);
120 
121  /// If \p Current is a raw string that is configured to be reformatted,
122  /// return the style to be used.
123  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
124  const LineState &State);
125 
126  /// If the current token sticks out over the end of the line, break
127  /// it if possible.
128  ///
129  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
130  /// when tokens are broken or lines exceed the column limit, and exceeded
131  /// indicates whether the algorithm purposefully left lines exceeding the
132  /// column limit.
133  ///
134  /// The returned penalty will cover the cost of the additional line breaks
135  /// and column limit violation in all lines except for the last one. The
136  /// penalty for the column limit violation in the last line (and in single
137  /// line tokens) is handled in \c addNextStateToQueue.
138  ///
139  /// \p Strict indicates whether reflowing is allowed to leave characters
140  /// protruding the column limit; if true, lines will be split strictly within
141  /// the column limit where possible; if false, words are allowed to protrude
142  /// over the column limit as long as the penalty is less than the penalty
143  /// of a break.
144  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
145  LineState &State,
146  bool AllowBreak, bool DryRun,
147  bool Strict);
148 
149  /// Returns the \c BreakableToken starting at \p Current, or nullptr
150  /// if the current token cannot be broken.
151  std::unique_ptr<BreakableToken>
152  createBreakableToken(const FormatToken &Current, LineState &State,
153  bool AllowBreak);
154 
155  /// Appends the next token to \p State and updates information
156  /// necessary for indentation.
157  ///
158  /// Puts the token on the current line.
159  ///
160  /// If \p DryRun is \c false, also creates and stores the required
161  /// \c Replacement.
162  void addTokenOnCurrentLine(LineState &State, bool DryRun,
163  unsigned ExtraSpaces);
164 
165  /// Appends the next token to \p State and updates information
166  /// necessary for indentation.
167  ///
168  /// Adds a line break and necessary indentation.
169  ///
170  /// If \p DryRun is \c false, also creates and stores the required
171  /// \c Replacement.
172  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
173 
174  /// Calculate the new column for a line wrap before the next token.
175  unsigned getNewLineColumn(const LineState &State);
176 
177  /// Adds a multiline token to the \p State.
178  ///
179  /// \returns Extra penalty for the first line of the literal: last line is
180  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
181  /// matter, as we don't change them.
182  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
183 
184  /// Returns \c true if the next token starts a multiline string
185  /// literal.
186  ///
187  /// This includes implicitly concatenated strings, strings that will be broken
188  /// by clang-format and string literals with escaped newlines.
189  bool nextIsMultilineString(const LineState &State);
190 
192  const AdditionalKeywords &Keywords;
193  const SourceManager &SourceMgr;
194  WhitespaceManager &Whitespaces;
196  bool BinPackInconclusiveFunctions;
197  llvm::Regex CommentPragmasRegex;
198  const RawStringFormatStyleManager RawStringFormats;
199 };
200 
201 struct ParenState {
202  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
203  bool AvoidBinPacking, bool NoLineBreak)
204  : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
205  NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
206  AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
207  NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
208  LastOperatorWrapped(true), ContainsLineBreak(false),
209  ContainsUnwrappedBuilder(false), AlignColons(true),
210  ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
211  NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
212 
213  /// \brief The token opening this parenthesis level, or nullptr if this level
214  /// is opened by fake parenthesis.
215  ///
216  /// Not considered for memoization as it will always have the same value at
217  /// the same token.
218  const FormatToken *Tok;
219 
220  /// The position to which a specific parenthesis level needs to be
221  /// indented.
222  unsigned Indent;
223 
224  /// The position of the last space on each level.
225  ///
226  /// Used e.g. to break like:
227  /// functionCall(Parameter, otherCall(
228  /// OtherParameter));
229  unsigned LastSpace;
230 
231  /// If a block relative to this parenthesis level gets wrapped, indent
232  /// it this much.
234 
235  /// The position the first "<<" operator encountered on each level.
236  ///
237  /// Used to align "<<" operators. 0 if no such operator has been encountered
238  /// on a level.
239  unsigned FirstLessLess = 0;
240 
241  /// The column of a \c ? in a conditional expression;
242  unsigned QuestionColumn = 0;
243 
244  /// The position of the colon in an ObjC method declaration/call.
245  unsigned ColonPos = 0;
246 
247  /// The start of the most recent function in a builder-type call.
248  unsigned StartOfFunctionCall = 0;
249 
250  /// Contains the start of array subscript expressions, so that they
251  /// can be aligned.
252  unsigned StartOfArraySubscripts = 0;
253 
254  /// If a nested name specifier was broken over multiple lines, this
255  /// contains the start column of the second line. Otherwise 0.
256  unsigned NestedNameSpecifierContinuation = 0;
257 
258  /// If a call expression was broken over multiple lines, this
259  /// contains the start column of the second line. Otherwise 0.
260  unsigned CallContinuation = 0;
261 
262  /// The column of the first variable name in a variable declaration.
263  ///
264  /// Used to align further variables if necessary.
265  unsigned VariablePos = 0;
266 
267  /// Whether a newline needs to be inserted before the block's closing
268  /// brace.
269  ///
270  /// We only want to insert a newline before the closing brace if there also
271  /// was a newline after the beginning left brace.
273 
274  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
275  /// lines, in this context.
276  bool AvoidBinPacking : 1;
277 
278  /// Break after the next comma (or all the commas in this context if
279  /// \c AvoidBinPacking is \c true).
281 
282  /// Line breaking in this context would break a formatting rule.
283  bool NoLineBreak : 1;
284 
285  /// Same as \c NoLineBreak, but is restricted until the end of the
286  /// operand (including the next ",").
288 
289  /// True if the last binary operator on this level was wrapped to the
290  /// next line.
292 
293  /// \c true if this \c ParenState already contains a line-break.
294  ///
295  /// The first line break in a certain \c ParenState causes extra penalty so
296  /// that clang-format prefers similar breaks, i.e. breaks in the same
297  /// parenthesis.
299 
300  /// \c true if this \c ParenState contains multiple segments of a
301  /// builder-type call on one line.
303 
304  /// \c true if the colons of the curren ObjC method expression should
305  /// be aligned.
306  ///
307  /// Not considered for memoization as it will always have the same value at
308  /// the same token.
309  bool AlignColons : 1;
310 
311  /// \c true if at least one selector name was found in the current
312  /// ObjC method expression.
313  ///
314  /// Not considered for memoization as it will always have the same value at
315  /// the same token.
317 
318  /// \c true if there are multiple nested blocks inside these parens.
319  ///
320  /// Not considered for memoization as it will always have the same value at
321  /// the same token.
323 
324  /// The start of a nested block (e.g. lambda introducer in C++ or
325  /// "function" in JavaScript) is not wrapped to a new line.
327 
328  /// \c true if the current \c ParenState represents an Objective-C
329  /// array literal.
331 
332  bool operator<(const ParenState &Other) const {
333  if (Indent != Other.Indent)
334  return Indent < Other.Indent;
335  if (LastSpace != Other.LastSpace)
336  return LastSpace < Other.LastSpace;
337  if (NestedBlockIndent != Other.NestedBlockIndent)
338  return NestedBlockIndent < Other.NestedBlockIndent;
339  if (FirstLessLess != Other.FirstLessLess)
340  return FirstLessLess < Other.FirstLessLess;
341  if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
342  return BreakBeforeClosingBrace;
343  if (QuestionColumn != Other.QuestionColumn)
344  return QuestionColumn < Other.QuestionColumn;
345  if (AvoidBinPacking != Other.AvoidBinPacking)
346  return AvoidBinPacking;
347  if (BreakBeforeParameter != Other.BreakBeforeParameter)
348  return BreakBeforeParameter;
349  if (NoLineBreak != Other.NoLineBreak)
350  return NoLineBreak;
351  if (LastOperatorWrapped != Other.LastOperatorWrapped)
352  return LastOperatorWrapped;
353  if (ColonPos != Other.ColonPos)
354  return ColonPos < Other.ColonPos;
355  if (StartOfFunctionCall != Other.StartOfFunctionCall)
356  return StartOfFunctionCall < Other.StartOfFunctionCall;
357  if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
358  return StartOfArraySubscripts < Other.StartOfArraySubscripts;
359  if (CallContinuation != Other.CallContinuation)
360  return CallContinuation < Other.CallContinuation;
361  if (VariablePos != Other.VariablePos)
362  return VariablePos < Other.VariablePos;
363  if (ContainsLineBreak != Other.ContainsLineBreak)
364  return ContainsLineBreak;
365  if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
366  return ContainsUnwrappedBuilder;
367  if (NestedBlockInlined != Other.NestedBlockInlined)
368  return NestedBlockInlined;
369  return false;
370  }
371 };
372 
373 /// The current state when indenting a unwrapped line.
374 ///
375 /// As the indenting tries different combinations this is copied by value.
376 struct LineState {
377  /// The number of used columns in the current line.
378  unsigned Column;
379 
380  /// The token that needs to be next formatted.
382 
383  /// \c true if this line contains a continued for-loop section.
385 
386  /// \c true if \p NextToken should not continue this line.
388 
389  /// The \c NestingLevel at the start of this line.
391 
392  /// The lowest \c NestingLevel on the current line.
394 
395  /// The start column of the string literal, if we're in a string
396  /// literal sequence, 0 otherwise.
398 
399  /// A stack keeping track of properties applying to parenthesis
400  /// levels.
401  std::vector<ParenState> Stack;
402 
403  /// Ignore the stack of \c ParenStates for state comparison.
404  ///
405  /// In long and deeply nested unwrapped lines, the current algorithm can
406  /// be insufficient for finding the best formatting with a reasonable amount
407  /// of time and memory. Setting this flag will effectively lead to the
408  /// algorithm not analyzing some combinations. However, these combinations
409  /// rarely contain the optimal solution: In short, accepting a higher
410  /// penalty early would need to lead to different values in the \c
411  /// ParenState stack (in an otherwise identical state) and these different
412  /// values would need to lead to a significant amount of avoided penalty
413  /// later.
414  ///
415  /// FIXME: Come up with a better algorithm instead.
417 
418  /// The indent of the first token.
419  unsigned FirstIndent;
420 
421  /// The line that is being formatted.
422  ///
423  /// Does not need to be considered for memoization because it doesn't change.
425 
426  /// Comparison operator to be able to used \c LineState in \c map.
427  bool operator<(const LineState &Other) const {
428  if (NextToken != Other.NextToken)
429  return NextToken < Other.NextToken;
430  if (Column != Other.Column)
431  return Column < Other.Column;
432  if (LineContainsContinuedForLoopSection !=
434  return LineContainsContinuedForLoopSection;
435  if (NoContinuation != Other.NoContinuation)
436  return NoContinuation;
437  if (StartOfLineLevel != Other.StartOfLineLevel)
438  return StartOfLineLevel < Other.StartOfLineLevel;
439  if (LowestLevelOnLine != Other.LowestLevelOnLine)
440  return LowestLevelOnLine < Other.LowestLevelOnLine;
441  if (StartOfStringLiteral != Other.StartOfStringLiteral)
442  return StartOfStringLiteral < Other.StartOfStringLiteral;
443  if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
444  return false;
445  return Stack < Other.Stack;
446  }
447 };
448 
449 } // end namespace format
450 } // end namespace clang
451 
452 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block&#39;s closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
bool NoContinuation
true if NextToken should not continue this line.
Manages the whitespaces around tokens and their replacements.
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis...
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
const FormatToken & Tok
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next "...
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool NestedBlockInlined
The start of a nested block (e.g.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
llvm::Optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
const AnnotatedLine * Line
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:126
llvm::StringMap< FormatStyle > DelimiterStyle
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
Various functions to configurably format source code.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:669
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:49
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
Dataflow Directional Tag Classes.
llvm::Optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned StartOfStringLiteral
The start column of the string literal, if we&#39;re in a string literal sequence, 0 otherwise.
BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
FormatToken * NextToken
The token that needs to be next formatted.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool operator<(const ParenState &Other) const
This class handles loading and caching of source files into memory.
const encoding::Encoding Encoding
const FormatStyle & Style