clang  8.0.0svn
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 #include <map>
24 #include <tuple>
25 
26 namespace clang {
27 class SourceManager;
28 
29 namespace format {
30 
31 class AnnotatedLine;
32 class BreakableToken;
33 struct FormatToken;
34 struct LineState;
35 struct ParenState;
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
38 
40  llvm::StringMap<FormatStyle> DelimiterStyle;
41  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42 
43  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44 
45  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46 
48  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
49 };
50 
52 public:
53  /// Constructs a \c ContinuationIndenter to format \p Line starting in
54  /// column \p FirstIndent.
56  const AdditionalKeywords &Keywords,
57  const SourceManager &SourceMgr,
58  WhitespaceManager &Whitespaces,
60  bool BinPackInconclusiveFunctions);
61 
62  /// Get the initial state, i.e. the state after placing \p Line's
63  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64  /// the case of formatting inside raw string literals, \p FirstStartColumn is
65  /// the column at which the state of the parent formatter is.
66  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67  const AnnotatedLine *Line, bool DryRun);
68 
69  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70  // better home.
71  /// Returns \c true, if a line break after \p State is allowed.
72  bool canBreak(const LineState &State);
73 
74  /// Returns \c true, if a line break after \p State is mandatory.
75  bool mustBreak(const LineState &State);
76 
77  /// Appends the next token to \p State and updates information
78  /// necessary for indentation.
79  ///
80  /// Puts the token on the current line if \p Newline is \c false and adds a
81  /// line break and necessary indentation otherwise.
82  ///
83  /// If \p DryRun is \c false, also creates and stores the required
84  /// \c Replacement.
85  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86  unsigned ExtraSpaces = 0);
87 
88  /// Get the column limit for this line. This is the style's column
89  /// limit, potentially reduced for preprocessor definitions.
90  unsigned getColumnLimit(const LineState &State) const;
91 
92 private:
93  /// Mark the next token as consumed in \p State and modify its stacks
94  /// accordingly.
95  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96 
97  /// Update 'State' according to the next token's fake left parentheses.
98  void moveStatePastFakeLParens(LineState &State, bool Newline);
99  /// Update 'State' according to the next token's fake r_parens.
100  void moveStatePastFakeRParens(LineState &State);
101 
102  /// Update 'State' according to the next token being one of "(<{[".
103  void moveStatePastScopeOpener(LineState &State, bool Newline);
104  /// Update 'State' according to the next token being one of ")>}]".
105  void moveStatePastScopeCloser(LineState &State);
106  /// Update 'State' with the next token opening a nested block.
107  void moveStateToNewBlock(LineState &State);
108 
109  /// Reformats a raw string literal.
110  ///
111  /// \returns An extra penalty induced by reformatting the token.
112  unsigned reformatRawStringLiteral(const FormatToken &Current,
113  LineState &State,
114  const FormatStyle &RawStringStyle,
115  bool DryRun);
116 
117  /// If the current token is at the end of the current line, handle
118  /// the transition to the next line.
119  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120  bool DryRun, bool AllowBreak);
121 
122  /// If \p Current is a raw string that is configured to be reformatted,
123  /// return the style to be used.
124  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125  const LineState &State);
126 
127  /// If the current token sticks out over the end of the line, break
128  /// it if possible.
129  ///
130  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131  /// when tokens are broken or lines exceed the column limit, and exceeded
132  /// indicates whether the algorithm purposefully left lines exceeding the
133  /// column limit.
134  ///
135  /// The returned penalty will cover the cost of the additional line breaks
136  /// and column limit violation in all lines except for the last one. The
137  /// penalty for the column limit violation in the last line (and in single
138  /// line tokens) is handled in \c addNextStateToQueue.
139  ///
140  /// \p Strict indicates whether reflowing is allowed to leave characters
141  /// protruding the column limit; if true, lines will be split strictly within
142  /// the column limit where possible; if false, words are allowed to protrude
143  /// over the column limit as long as the penalty is less than the penalty
144  /// of a break.
145  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146  LineState &State,
147  bool AllowBreak, bool DryRun,
148  bool Strict);
149 
150  /// Returns the \c BreakableToken starting at \p Current, or nullptr
151  /// if the current token cannot be broken.
152  std::unique_ptr<BreakableToken>
153  createBreakableToken(const FormatToken &Current, LineState &State,
154  bool AllowBreak);
155 
156  /// Appends the next token to \p State and updates information
157  /// necessary for indentation.
158  ///
159  /// Puts the token on the current line.
160  ///
161  /// If \p DryRun is \c false, also creates and stores the required
162  /// \c Replacement.
163  void addTokenOnCurrentLine(LineState &State, bool DryRun,
164  unsigned ExtraSpaces);
165 
166  /// Appends the next token to \p State and updates information
167  /// necessary for indentation.
168  ///
169  /// Adds a line break and necessary indentation.
170  ///
171  /// If \p DryRun is \c false, also creates and stores the required
172  /// \c Replacement.
173  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174 
175  /// Calculate the new column for a line wrap before the next token.
176  unsigned getNewLineColumn(const LineState &State);
177 
178  /// Adds a multiline token to the \p State.
179  ///
180  /// \returns Extra penalty for the first line of the literal: last line is
181  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182  /// matter, as we don't change them.
183  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184 
185  /// Returns \c true if the next token starts a multiline string
186  /// literal.
187  ///
188  /// This includes implicitly concatenated strings, strings that will be broken
189  /// by clang-format and string literals with escaped newlines.
190  bool nextIsMultilineString(const LineState &State);
191 
193  const AdditionalKeywords &Keywords;
194  const SourceManager &SourceMgr;
195  WhitespaceManager &Whitespaces;
197  bool BinPackInconclusiveFunctions;
198  llvm::Regex CommentPragmasRegex;
199  const RawStringFormatStyleManager RawStringFormats;
200 };
201 
202 struct ParenState {
203  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204  bool AvoidBinPacking, bool NoLineBreak)
205  : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206  NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
207  AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
208  NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
209  LastOperatorWrapped(true), ContainsLineBreak(false),
210  ContainsUnwrappedBuilder(false), AlignColons(true),
211  ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
212  NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
213 
214  /// \brief The token opening this parenthesis level, or nullptr if this level
215  /// is opened by fake parenthesis.
216  ///
217  /// Not considered for memoization as it will always have the same value at
218  /// the same token.
219  const FormatToken *Tok;
220 
221  /// The position to which a specific parenthesis level needs to be
222  /// indented.
223  unsigned Indent;
224 
225  /// The position of the last space on each level.
226  ///
227  /// Used e.g. to break like:
228  /// functionCall(Parameter, otherCall(
229  /// OtherParameter));
230  unsigned LastSpace;
231 
232  /// If a block relative to this parenthesis level gets wrapped, indent
233  /// it this much.
235 
236  /// The position the first "<<" operator encountered on each level.
237  ///
238  /// Used to align "<<" operators. 0 if no such operator has been encountered
239  /// on a level.
240  unsigned FirstLessLess = 0;
241 
242  /// The column of a \c ? in a conditional expression;
243  unsigned QuestionColumn = 0;
244 
245  /// The position of the colon in an ObjC method declaration/call.
246  unsigned ColonPos = 0;
247 
248  /// The start of the most recent function in a builder-type call.
249  unsigned StartOfFunctionCall = 0;
250 
251  /// Contains the start of array subscript expressions, so that they
252  /// can be aligned.
253  unsigned StartOfArraySubscripts = 0;
254 
255  /// If a nested name specifier was broken over multiple lines, this
256  /// contains the start column of the second line. Otherwise 0.
257  unsigned NestedNameSpecifierContinuation = 0;
258 
259  /// If a call expression was broken over multiple lines, this
260  /// contains the start column of the second line. Otherwise 0.
261  unsigned CallContinuation = 0;
262 
263  /// The column of the first variable name in a variable declaration.
264  ///
265  /// Used to align further variables if necessary.
266  unsigned VariablePos = 0;
267 
268  /// Whether a newline needs to be inserted before the block's closing
269  /// brace.
270  ///
271  /// We only want to insert a newline before the closing brace if there also
272  /// was a newline after the beginning left brace.
274 
275  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
276  /// lines, in this context.
277  bool AvoidBinPacking : 1;
278 
279  /// Break after the next comma (or all the commas in this context if
280  /// \c AvoidBinPacking is \c true).
282 
283  /// Line breaking in this context would break a formatting rule.
284  bool NoLineBreak : 1;
285 
286  /// Same as \c NoLineBreak, but is restricted until the end of the
287  /// operand (including the next ",").
289 
290  /// True if the last binary operator on this level was wrapped to the
291  /// next line.
293 
294  /// \c true if this \c ParenState already contains a line-break.
295  ///
296  /// The first line break in a certain \c ParenState causes extra penalty so
297  /// that clang-format prefers similar breaks, i.e. breaks in the same
298  /// parenthesis.
300 
301  /// \c true if this \c ParenState contains multiple segments of a
302  /// builder-type call on one line.
304 
305  /// \c true if the colons of the curren ObjC method expression should
306  /// be aligned.
307  ///
308  /// Not considered for memoization as it will always have the same value at
309  /// the same token.
310  bool AlignColons : 1;
311 
312  /// \c true if at least one selector name was found in the current
313  /// ObjC method expression.
314  ///
315  /// Not considered for memoization as it will always have the same value at
316  /// the same token.
318 
319  /// \c true if there are multiple nested blocks inside these parens.
320  ///
321  /// Not considered for memoization as it will always have the same value at
322  /// the same token.
324 
325  /// The start of a nested block (e.g. lambda introducer in C++ or
326  /// "function" in JavaScript) is not wrapped to a new line.
328 
329  /// \c true if the current \c ParenState represents an Objective-C
330  /// array literal.
332 
333  bool operator<(const ParenState &Other) const {
334  if (Indent != Other.Indent)
335  return Indent < Other.Indent;
336  if (LastSpace != Other.LastSpace)
337  return LastSpace < Other.LastSpace;
338  if (NestedBlockIndent != Other.NestedBlockIndent)
339  return NestedBlockIndent < Other.NestedBlockIndent;
340  if (FirstLessLess != Other.FirstLessLess)
341  return FirstLessLess < Other.FirstLessLess;
342  if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
343  return BreakBeforeClosingBrace;
344  if (QuestionColumn != Other.QuestionColumn)
345  return QuestionColumn < Other.QuestionColumn;
346  if (AvoidBinPacking != Other.AvoidBinPacking)
347  return AvoidBinPacking;
348  if (BreakBeforeParameter != Other.BreakBeforeParameter)
349  return BreakBeforeParameter;
350  if (NoLineBreak != Other.NoLineBreak)
351  return NoLineBreak;
352  if (LastOperatorWrapped != Other.LastOperatorWrapped)
353  return LastOperatorWrapped;
354  if (ColonPos != Other.ColonPos)
355  return ColonPos < Other.ColonPos;
356  if (StartOfFunctionCall != Other.StartOfFunctionCall)
357  return StartOfFunctionCall < Other.StartOfFunctionCall;
358  if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
359  return StartOfArraySubscripts < Other.StartOfArraySubscripts;
360  if (CallContinuation != Other.CallContinuation)
361  return CallContinuation < Other.CallContinuation;
362  if (VariablePos != Other.VariablePos)
363  return VariablePos < Other.VariablePos;
364  if (ContainsLineBreak != Other.ContainsLineBreak)
365  return ContainsLineBreak;
366  if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
367  return ContainsUnwrappedBuilder;
368  if (NestedBlockInlined != Other.NestedBlockInlined)
369  return NestedBlockInlined;
370  return false;
371  }
372 };
373 
374 /// The current state when indenting a unwrapped line.
375 ///
376 /// As the indenting tries different combinations this is copied by value.
377 struct LineState {
378  /// The number of used columns in the current line.
379  unsigned Column;
380 
381  /// The token that needs to be next formatted.
383 
384  /// \c true if this line contains a continued for-loop section.
386 
387  /// \c true if \p NextToken should not continue this line.
389 
390  /// The \c NestingLevel at the start of this line.
392 
393  /// The lowest \c NestingLevel on the current line.
395 
396  /// The start column of the string literal, if we're in a string
397  /// literal sequence, 0 otherwise.
399 
400  /// A stack keeping track of properties applying to parenthesis
401  /// levels.
402  std::vector<ParenState> Stack;
403 
404  /// Ignore the stack of \c ParenStates for state comparison.
405  ///
406  /// In long and deeply nested unwrapped lines, the current algorithm can
407  /// be insufficient for finding the best formatting with a reasonable amount
408  /// of time and memory. Setting this flag will effectively lead to the
409  /// algorithm not analyzing some combinations. However, these combinations
410  /// rarely contain the optimal solution: In short, accepting a higher
411  /// penalty early would need to lead to different values in the \c
412  /// ParenState stack (in an otherwise identical state) and these different
413  /// values would need to lead to a significant amount of avoided penalty
414  /// later.
415  ///
416  /// FIXME: Come up with a better algorithm instead.
418 
419  /// The indent of the first token.
420  unsigned FirstIndent;
421 
422  /// The line that is being formatted.
423  ///
424  /// Does not need to be considered for memoization because it doesn't change.
426 
427  /// Comparison operator to be able to used \c LineState in \c map.
428  bool operator<(const LineState &Other) const {
429  if (NextToken != Other.NextToken)
430  return NextToken < Other.NextToken;
431  if (Column != Other.Column)
432  return Column < Other.Column;
433  if (LineContainsContinuedForLoopSection !=
435  return LineContainsContinuedForLoopSection;
436  if (NoContinuation != Other.NoContinuation)
437  return NoContinuation;
438  if (StartOfLineLevel != Other.StartOfLineLevel)
439  return StartOfLineLevel < Other.StartOfLineLevel;
440  if (LowestLevelOnLine != Other.LowestLevelOnLine)
441  return LowestLevelOnLine < Other.LowestLevelOnLine;
442  if (StartOfStringLiteral != Other.StartOfStringLiteral)
443  return StartOfStringLiteral < Other.StartOfStringLiteral;
444  if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
445  return false;
446  return Stack < Other.Stack;
447  }
448 };
449 
450 } // end namespace format
451 } // end namespace clang
452 
453 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block&#39;s closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
bool NoContinuation
true if NextToken should not continue this line.
Manages the whitespaces around tokens and their replacements.
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis...
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
const FormatToken & Tok
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next "...
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool NestedBlockInlined
The start of a nested block (e.g.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
llvm::Optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
const AnnotatedLine * Line
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:123
llvm::StringMap< FormatStyle > DelimiterStyle
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
Various functions to configurably format source code.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:663
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:48
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
Dataflow Directional Tag Classes.
llvm::Optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
unsigned StartOfStringLiteral
The start column of the string literal, if we&#39;re in a string literal sequence, 0 otherwise.
BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
FormatToken * NextToken
The token that needs to be next formatted.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool operator<(const ParenState &Other) const
This class handles loading and caching of source files into memory.
const encoding::Encoding Encoding
const FormatStyle & Style