clang  6.0.0svn
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 #include <map>
24 #include <tuple>
25 
26 namespace clang {
27 class SourceManager;
28 
29 namespace format {
30 
31 class AnnotatedLine;
32 class BreakableToken;
33 struct FormatToken;
34 struct LineState;
35 struct ParenState;
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
38 
40  llvm::StringMap<FormatStyle> DelimiterStyle;
41 
42  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
43 
44  llvm::Optional<FormatStyle> get(StringRef Delimiter) const;
45 };
46 
48 public:
49  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
50  /// column \p FirstIndent.
52  const AdditionalKeywords &Keywords,
53  const SourceManager &SourceMgr,
54  WhitespaceManager &Whitespaces,
56  bool BinPackInconclusiveFunctions);
57 
58  /// \brief Get the initial state, i.e. the state after placing \p Line's
59  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
60  /// the case of formatting inside raw string literals, \p FirstStartColumn is
61  /// the column at which the state of the parent formatter is.
62  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
63  const AnnotatedLine *Line, bool DryRun);
64 
65  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
66  // better home.
67  /// \brief Returns \c true, if a line break after \p State is allowed.
68  bool canBreak(const LineState &State);
69 
70  /// \brief Returns \c true, if a line break after \p State is mandatory.
71  bool mustBreak(const LineState &State);
72 
73  /// \brief Appends the next token to \p State and updates information
74  /// necessary for indentation.
75  ///
76  /// Puts the token on the current line if \p Newline is \c false and adds a
77  /// line break and necessary indentation otherwise.
78  ///
79  /// If \p DryRun is \c false, also creates and stores the required
80  /// \c Replacement.
81  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
82  unsigned ExtraSpaces = 0);
83 
84  /// \brief Get the column limit for this line. This is the style's column
85  /// limit, potentially reduced for preprocessor definitions.
86  unsigned getColumnLimit(const LineState &State) const;
87 
88 private:
89  /// \brief Mark the next token as consumed in \p State and modify its stacks
90  /// accordingly.
91  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
92 
93  /// \brief Update 'State' according to the next token's fake left parentheses.
94  void moveStatePastFakeLParens(LineState &State, bool Newline);
95  /// \brief Update 'State' according to the next token's fake r_parens.
96  void moveStatePastFakeRParens(LineState &State);
97 
98  /// \brief Update 'State' according to the next token being one of "(<{[".
99  void moveStatePastScopeOpener(LineState &State, bool Newline);
100  /// \brief Update 'State' according to the next token being one of ")>}]".
101  void moveStatePastScopeCloser(LineState &State);
102  /// \brief Update 'State' with the next token opening a nested block.
103  void moveStateToNewBlock(LineState &State);
104 
105  /// \brief Reformats a raw string literal.
106  ///
107  /// \returns An extra penalty induced by reformatting the token.
108  unsigned reformatRawStringLiteral(const FormatToken &Current,
109  LineState &State,
110  const FormatStyle &RawStringStyle,
111  bool DryRun);
112 
113  /// \brief If the current token is at the end of the current line, handle
114  /// the transition to the next line.
115  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
116  bool DryRun, bool AllowBreak);
117 
118  /// \brief If \p Current is a raw string that is configured to be reformatted,
119  /// return the style to be used.
120  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
121  const LineState &State);
122 
123  /// \brief If the current token sticks out over the end of the line, break
124  /// it if possible.
125  ///
126  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
127  /// when tokens are broken or lines exceed the column limit, and exceeded
128  /// indicates whether the algorithm purposefully left lines exceeding the
129  /// column limit.
130  ///
131  /// The returned penalty will cover the cost of the additional line breaks
132  /// and column limit violation in all lines except for the last one. The
133  /// penalty for the column limit violation in the last line (and in single
134  /// line tokens) is handled in \c addNextStateToQueue.
135  ///
136  /// \p Strict indicates whether reflowing is allowed to leave characters
137  /// protruding the column limit; if true, lines will be split strictly within
138  /// the column limit where possible; if false, words are allowed to protrude
139  /// over the column limit as long as the penalty is less than the penalty
140  /// of a break.
141  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
142  LineState &State,
143  bool AllowBreak, bool DryRun,
144  bool Strict);
145 
146  /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
147  /// if the current token cannot be broken.
148  std::unique_ptr<BreakableToken>
149  createBreakableToken(const FormatToken &Current, LineState &State,
150  bool AllowBreak);
151 
152  /// \brief Appends the next token to \p State and updates information
153  /// necessary for indentation.
154  ///
155  /// Puts the token on the current line.
156  ///
157  /// If \p DryRun is \c false, also creates and stores the required
158  /// \c Replacement.
159  void addTokenOnCurrentLine(LineState &State, bool DryRun,
160  unsigned ExtraSpaces);
161 
162  /// \brief Appends the next token to \p State and updates information
163  /// necessary for indentation.
164  ///
165  /// Adds a line break and necessary indentation.
166  ///
167  /// If \p DryRun is \c false, also creates and stores the required
168  /// \c Replacement.
169  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
170 
171  /// \brief Calculate the new column for a line wrap before the next token.
172  unsigned getNewLineColumn(const LineState &State);
173 
174  /// \brief Adds a multiline token to the \p State.
175  ///
176  /// \returns Extra penalty for the first line of the literal: last line is
177  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
178  /// matter, as we don't change them.
179  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
180 
181  /// \brief Returns \c true if the next token starts a multiline string
182  /// literal.
183  ///
184  /// This includes implicitly concatenated strings, strings that will be broken
185  /// by clang-format and string literals with escaped newlines.
186  bool nextIsMultilineString(const LineState &State);
187 
189  const AdditionalKeywords &Keywords;
190  const SourceManager &SourceMgr;
191  WhitespaceManager &Whitespaces;
193  bool BinPackInconclusiveFunctions;
194  llvm::Regex CommentPragmasRegex;
195  const RawStringFormatStyleManager RawStringFormats;
196 };
197 
198 struct ParenState {
199  ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
200  bool NoLineBreak)
201  : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
202  BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
203  BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
204  NoLineBreakInOperand(false), LastOperatorWrapped(true),
205  ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
206  AlignColons(true), ObjCSelectorNameFound(false),
207  HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
208 
209  /// \brief The position to which a specific parenthesis level needs to be
210  /// indented.
211  unsigned Indent;
212 
213  /// \brief The position of the last space on each level.
214  ///
215  /// Used e.g. to break like:
216  /// functionCall(Parameter, otherCall(
217  /// OtherParameter));
218  unsigned LastSpace;
219 
220  /// \brief If a block relative to this parenthesis level gets wrapped, indent
221  /// it this much.
223 
224  /// \brief The position the first "<<" operator encountered on each level.
225  ///
226  /// Used to align "<<" operators. 0 if no such operator has been encountered
227  /// on a level.
228  unsigned FirstLessLess = 0;
229 
230  /// \brief The column of a \c ? in a conditional expression;
231  unsigned QuestionColumn = 0;
232 
233  /// \brief The position of the colon in an ObjC method declaration/call.
234  unsigned ColonPos = 0;
235 
236  /// \brief The start of the most recent function in a builder-type call.
237  unsigned StartOfFunctionCall = 0;
238 
239  /// \brief Contains the start of array subscript expressions, so that they
240  /// can be aligned.
241  unsigned StartOfArraySubscripts = 0;
242 
243  /// \brief If a nested name specifier was broken over multiple lines, this
244  /// contains the start column of the second line. Otherwise 0.
245  unsigned NestedNameSpecifierContinuation = 0;
246 
247  /// \brief If a call expression was broken over multiple lines, this
248  /// contains the start column of the second line. Otherwise 0.
249  unsigned CallContinuation = 0;
250 
251  /// \brief The column of the first variable name in a variable declaration.
252  ///
253  /// Used to align further variables if necessary.
254  unsigned VariablePos = 0;
255 
256  /// \brief Whether a newline needs to be inserted before the block's closing
257  /// brace.
258  ///
259  /// We only want to insert a newline before the closing brace if there also
260  /// was a newline after the beginning left brace.
262 
263  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
264  /// lines, in this context.
265  bool AvoidBinPacking : 1;
266 
267  /// \brief Break after the next comma (or all the commas in this context if
268  /// \c AvoidBinPacking is \c true).
270 
271  /// \brief Line breaking in this context would break a formatting rule.
272  bool NoLineBreak : 1;
273 
274  /// \brief Same as \c NoLineBreak, but is restricted until the end of the
275  /// operand (including the next ",").
277 
278  /// \brief True if the last binary operator on this level was wrapped to the
279  /// next line.
281 
282  /// \brief \c true if this \c ParenState already contains a line-break.
283  ///
284  /// The first line break in a certain \c ParenState causes extra penalty so
285  /// that clang-format prefers similar breaks, i.e. breaks in the same
286  /// parenthesis.
288 
289  /// \brief \c true if this \c ParenState contains multiple segments of a
290  /// builder-type call on one line.
292 
293  /// \brief \c true if the colons of the curren ObjC method expression should
294  /// be aligned.
295  ///
296  /// Not considered for memoization as it will always have the same value at
297  /// the same token.
298  bool AlignColons : 1;
299 
300  /// \brief \c true if at least one selector name was found in the current
301  /// ObjC method expression.
302  ///
303  /// Not considered for memoization as it will always have the same value at
304  /// the same token.
306 
307  /// \brief \c true if there are multiple nested blocks inside these parens.
308  ///
309  /// Not considered for memoization as it will always have the same value at
310  /// the same token.
312 
313  // \brief The start of a nested block (e.g. lambda introducer in C++ or
314  // "function" in JavaScript) is not wrapped to a new line.
316 
317  bool operator<(const ParenState &Other) const {
318  if (Indent != Other.Indent)
319  return Indent < Other.Indent;
320  if (LastSpace != Other.LastSpace)
321  return LastSpace < Other.LastSpace;
322  if (NestedBlockIndent != Other.NestedBlockIndent)
323  return NestedBlockIndent < Other.NestedBlockIndent;
324  if (FirstLessLess != Other.FirstLessLess)
325  return FirstLessLess < Other.FirstLessLess;
326  if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
327  return BreakBeforeClosingBrace;
328  if (QuestionColumn != Other.QuestionColumn)
329  return QuestionColumn < Other.QuestionColumn;
330  if (AvoidBinPacking != Other.AvoidBinPacking)
331  return AvoidBinPacking;
332  if (BreakBeforeParameter != Other.BreakBeforeParameter)
333  return BreakBeforeParameter;
334  if (NoLineBreak != Other.NoLineBreak)
335  return NoLineBreak;
336  if (LastOperatorWrapped != Other.LastOperatorWrapped)
337  return LastOperatorWrapped;
338  if (ColonPos != Other.ColonPos)
339  return ColonPos < Other.ColonPos;
340  if (StartOfFunctionCall != Other.StartOfFunctionCall)
341  return StartOfFunctionCall < Other.StartOfFunctionCall;
342  if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
343  return StartOfArraySubscripts < Other.StartOfArraySubscripts;
344  if (CallContinuation != Other.CallContinuation)
345  return CallContinuation < Other.CallContinuation;
346  if (VariablePos != Other.VariablePos)
347  return VariablePos < Other.VariablePos;
348  if (ContainsLineBreak != Other.ContainsLineBreak)
349  return ContainsLineBreak;
350  if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
351  return ContainsUnwrappedBuilder;
352  if (NestedBlockInlined != Other.NestedBlockInlined)
353  return NestedBlockInlined;
354  return false;
355  }
356 };
357 
358 /// \brief The current state when indenting a unwrapped line.
359 ///
360 /// As the indenting tries different combinations this is copied by value.
361 struct LineState {
362  /// \brief The number of used columns in the current line.
363  unsigned Column;
364 
365  /// \brief The token that needs to be next formatted.
367 
368  /// \brief \c true if this line contains a continued for-loop section.
370 
371  /// \brief \c true if \p NextToken should not continue this line.
373 
374  /// \brief The \c NestingLevel at the start of this line.
376 
377  /// \brief The lowest \c NestingLevel on the current line.
379 
380  /// \brief The start column of the string literal, if we're in a string
381  /// literal sequence, 0 otherwise.
383 
384  /// \brief A stack keeping track of properties applying to parenthesis
385  /// levels.
386  std::vector<ParenState> Stack;
387 
388  /// \brief Ignore the stack of \c ParenStates for state comparison.
389  ///
390  /// In long and deeply nested unwrapped lines, the current algorithm can
391  /// be insufficient for finding the best formatting with a reasonable amount
392  /// of time and memory. Setting this flag will effectively lead to the
393  /// algorithm not analyzing some combinations. However, these combinations
394  /// rarely contain the optimal solution: In short, accepting a higher
395  /// penalty early would need to lead to different values in the \c
396  /// ParenState stack (in an otherwise identical state) and these different
397  /// values would need to lead to a significant amount of avoided penalty
398  /// later.
399  ///
400  /// FIXME: Come up with a better algorithm instead.
402 
403  /// \brief The indent of the first token.
404  unsigned FirstIndent;
405 
406  /// \brief The line that is being formatted.
407  ///
408  /// Does not need to be considered for memoization because it doesn't change.
410 
411  /// \brief Comparison operator to be able to used \c LineState in \c map.
412  bool operator<(const LineState &Other) const {
413  if (NextToken != Other.NextToken)
414  return NextToken < Other.NextToken;
415  if (Column != Other.Column)
416  return Column < Other.Column;
417  if (LineContainsContinuedForLoopSection !=
419  return LineContainsContinuedForLoopSection;
420  if (NoContinuation != Other.NoContinuation)
421  return NoContinuation;
422  if (StartOfLineLevel != Other.StartOfLineLevel)
423  return StartOfLineLevel < Other.StartOfLineLevel;
424  if (LowestLevelOnLine != Other.LowestLevelOnLine)
425  return LowestLevelOnLine < Other.LowestLevelOnLine;
426  if (StartOfStringLiteral != Other.StartOfStringLiteral)
427  return StartOfStringLiteral < Other.StartOfStringLiteral;
428  if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
429  return false;
430  return Stack < Other.Stack;
431  }
432 };
433 
434 } // end namespace format
435 } // end namespace clang
436 
437 #endif
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
unsigned VariablePos
The column of the first variable name in a variable declaration.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block&#39;s closing brace.
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
LineState State
Contains functions for text encoding manipulation.
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Column
The number of used columns in the current line.
bool NoContinuation
true if NextToken should not continue this line.
Manages the whitespaces around tokens and their replacements.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next "...
bool LineContainsContinuedForLoopSection
true if this line contains a continued for-loop section.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true)...
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
The current state when indenting a unwrapped line.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
const AnnotatedLine * Line
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:120
llvm::StringMap< FormatStyle > DelimiterStyle
std::vector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
#define false
Definition: stdbool.h:33
Various functions to configurably format source code.
unsigned LastSpace
The position of the last space on each level.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:634
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:46
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
Dataflow Directional Tag Classes.
unsigned FirstIndent
The indent of the first token.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool AvoidBinPacking
Avoid bin packing, i.e.
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
unsigned StartOfStringLiteral
The start column of the string literal, if we&#39;re in a string literal sequence, 0 otherwise.
BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
FormatToken * NextToken
The token that needs to be next formatted.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
#define true
Definition: stdbool.h:32
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool operator<(const ParenState &Other) const
This class handles loading and caching of source files into memory.
const encoding::Encoding Encoding
const FormatStyle & Style