clang  14.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// WhitespaceManager class manages whitespace around tokens and their
11 /// replacements.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17 
18 #include "TokenAnnotator.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include <algorithm>
23 #include <string>
24 #include <tuple>
25 
26 namespace clang {
27 namespace format {
28 
29 /// Manages the whitespaces around tokens and their replacements.
30 ///
31 /// This includes special handling for certain constructs, e.g. the alignment of
32 /// trailing line comments.
33 ///
34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager
35 /// must be informed about every token in the source file; for each token, there
36 /// must be exactly one call to either \c replaceWhitespace or
37 /// \c addUntouchableToken.
38 ///
39 /// There may be multiple calls to \c breakToken for a given token.
41 public:
42  WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
43  bool UseCRLF)
44  : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
45 
46  bool useCRLF() const { return UseCRLF; }
47 
48  /// Replaces the whitespace in front of \p Tok. Only call once for
49  /// each \c AnnotatedToken.
50  ///
51  /// \p StartOfTokenColumn is the column at which the token will start after
52  /// this replacement. It is needed for determining how \p Spaces is turned
53  /// into tabs and spaces for some format styles.
54  void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
55  unsigned StartOfTokenColumn, bool isAligned = false,
56  bool InPPDirective = false);
57 
58  /// Adds information about an unchangeable token's whitespace.
59  ///
60  /// Needs to be called for every token for which \c replaceWhitespace
61  /// was not called.
62  void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
63 
64  llvm::Error addReplacement(const tooling::Replacement &Replacement);
65 
66  /// Inserts or replaces whitespace in the middle of a token.
67  ///
68  /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
69  /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
70  /// characters.
71  ///
72  /// Note: \p Spaces can be negative to retain information about initial
73  /// relative column offset between a line of a block comment and the start of
74  /// the comment. This negative offset may be compensated by trailing comment
75  /// alignment here. In all other cases negative \p Spaces will be truncated to
76  /// 0.
77  ///
78  /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
79  /// used to align backslashes correctly.
80  void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
81  unsigned ReplaceChars,
82  StringRef PreviousPostfix,
83  StringRef CurrentPrefix, bool InPPDirective,
84  unsigned Newlines, int Spaces);
85 
86  /// Returns all the \c Replacements created during formatting.
88 
89  /// Represents a change before a token, a break inside a token,
90  /// or the layout of an unchanged token (or whitespace within).
91  struct Change {
92  /// Functor to sort changes in original source order.
94  public:
95  IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
96  bool operator()(const Change &C1, const Change &C2) const;
97 
98  private:
99  const SourceManager &SourceMgr;
100  };
101 
102  /// Creates a \c Change.
103  ///
104  /// The generated \c Change will replace the characters at
105  /// \p OriginalWhitespaceRange with a concatenation of
106  /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
107  /// and \p CurrentLinePrefix.
108  ///
109  /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
110  /// trailing comments and escaped newlines.
113  unsigned StartOfTokenColumn, unsigned NewlinesBefore,
114  StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
115  bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);
116 
117  // The kind of the token whose whitespace this change replaces, or in which
118  // this change inserts whitespace.
119  // FIXME: Currently this is not set correctly for breaks inside comments, as
120  // the \c BreakableToken is still doing its own alignment.
121  const FormatToken *Tok;
122 
124  // Changes might be in the middle of a token, so we cannot just keep the
125  // FormatToken around to query its information.
128  unsigned NewlinesBefore;
131  bool IsAligned;
133 
134  // The number of spaces in front of the token or broken part of the token.
135  // This will be adapted when aligning tokens.
136  // Can be negative to retain information about the initial relative offset
137  // of the lines in a block comment. This is used when aligning trailing
138  // comments. Uncompensated negative offset is truncated to 0.
139  int Spaces;
140 
141  // If this change is inside of a token but not at the start of the token or
142  // directly after a newline.
144 
145  // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
146  // \c EscapedNewlineColumn will be calculated in
147  // \c calculateLineBreakInformation.
149  unsigned TokenLength;
152 
153  // These fields are used to retain correct relative line indentation in a
154  // block comment when aligning trailing comments.
155  //
156  // If this Change represents a continuation of a block comment,
157  // \c StartOfBlockComment is pointer to the first Change in the block
158  // comment. \c IndentationOffset is a relative column offset to this
159  // change, so that the correct column can be reconstructed at the end of
160  // the alignment process.
163 
164  // Depth of conditionals. Computed from tracking fake parenthesis, except
165  // it does not increase the indent for "chained" conditionals.
167 
168  // A combination of indent, nesting and conditionals levels, which are used
169  // in tandem to compute lexical scope, for the purposes of deciding
170  // when to stop consecutive alignment runs.
171  std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
172  return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
174  }
175  };
176 
177 private:
178  struct CellDescription {
179  unsigned Index = 0;
180  unsigned Cell = 0;
181  unsigned EndIndex = 0;
182  bool HasSplit = false;
183  CellDescription *NextColumnElement = nullptr;
184 
185  constexpr bool operator==(const CellDescription &Other) const {
186  return Index == Other.Index && Cell == Other.Cell &&
187  EndIndex == Other.EndIndex;
188  }
189  constexpr bool operator!=(const CellDescription &Other) const {
190  return !(*this == Other);
191  }
192  };
193 
194  struct CellDescriptions {
195  SmallVector<CellDescription> Cells;
196  unsigned CellCount = 0;
197  unsigned InitialSpaces = 0;
198  };
199 
200  /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
201  /// or token parts in a line and \c PreviousEndOfTokenColumn and
202  /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
203  void calculateLineBreakInformation();
204 
205  /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
206  void alignConsecutiveMacros();
207 
208  /// Align consecutive assignments over all \c Changes.
209  void alignConsecutiveAssignments();
210 
211  /// Align consecutive bitfields over all \c Changes.
212  void alignConsecutiveBitFields();
213 
214  /// Align consecutive declarations over all \c Changes.
215  void alignConsecutiveDeclarations();
216 
217  /// Align consecutive declarations over all \c Changes.
218  void alignChainedConditionals();
219 
220  /// Align trailing comments over all \c Changes.
221  void alignTrailingComments();
222 
223  /// Align trailing comments from change \p Start to change \p End at
224  /// the specified \p Column.
225  void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
226 
227  /// Align escaped newlines over all \c Changes.
228  void alignEscapedNewlines();
229 
230  /// Align escaped newlines from change \p Start to change \p End at
231  /// the specified \p Column.
232  void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
233 
234  /// Align Array Initializers over all \c Changes.
235  void alignArrayInitializers();
236 
237  /// Align Array Initializers from change \p Start to change \p End at
238  /// the specified \p Column.
239  void alignArrayInitializers(unsigned Start, unsigned End);
240 
241  /// Align Array Initializers being careful to right justify the columns
242  /// as described by \p CellDescs.
243  void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
244 
245  /// Align Array Initializers being careful to leftt justify the columns
246  /// as described by \p CellDescs.
247  void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
248 
249  /// Calculate the cell width between two indexes.
250  unsigned calculateCellWidth(unsigned Start, unsigned End,
251  bool WithSpaces = false) const;
252 
253  /// Get a set of fully specified CellDescriptions between \p Start and
254  /// \p End of the change list.
255  CellDescriptions getCells(unsigned Start, unsigned End);
256 
257  /// Does this \p Cell contain a split element?
258  static bool isSplitCell(const CellDescription &Cell);
259 
260  /// Get the width of the preceding cells from \p Start to \p End.
261  template <typename I>
262  auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
263  auto NetWidth = InitialSpaces;
264  for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
265  // If we broke the line the initial spaces are already
266  // accounted for.
267  if (Changes[PrevIter->Index].NewlinesBefore > 0)
268  NetWidth = 0;
269  NetWidth +=
270  calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
271  }
272  return NetWidth;
273  }
274 
275  /// Get the maximum width of a cell in a sequence of columns.
276  template <typename I>
277  unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
278  unsigned CellWidth =
279  calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
280  if (Changes[CellIter->Index].NewlinesBefore == 0)
281  CellWidth += NetWidth;
282  for (const auto *Next = CellIter->NextColumnElement; Next != nullptr;
283  Next = Next->NextColumnElement) {
284  auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
285  if (Changes[Next->Index].NewlinesBefore == 0)
286  ThisWidth += NetWidth;
287  CellWidth = std::max(CellWidth, ThisWidth);
288  }
289  return CellWidth;
290  }
291 
292  /// Get The maximum width of all columns to a given cell.
293  template <typename I>
294  unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
295  unsigned InitialSpaces,
296  unsigned CellCount) const {
297  auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
298  auto RowCount = 1U;
299  auto Offset = std::distance(CellStart, CellStop);
300  for (const auto *Next = CellStop->NextColumnElement; Next != nullptr;
301  Next = Next->NextColumnElement) {
302  auto Start = (CellStart + RowCount * CellCount);
303  auto End = Start + Offset;
304  MaxNetWidth =
305  std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
306  ++RowCount;
307  }
308  return MaxNetWidth;
309  }
310 
311  /// Align a split cell with a newline to the first element in the cell.
312  void alignToStartOfCell(unsigned Start, unsigned End);
313 
314  /// Link the Cell pointers in the list of Cells.
315  static CellDescriptions linkCells(CellDescriptions &&CellDesc);
316 
317  /// Fill \c Replaces with the replacements for all effective changes.
318  void generateChanges();
319 
320  /// Stores \p Text as the replacement for the whitespace in \p Range.
321  void storeReplacement(SourceRange Range, StringRef Text);
322  void appendNewlineText(std::string &Text, unsigned Newlines);
323  void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
324  unsigned PreviousEndOfTokenColumn,
325  unsigned EscapedNewlineColumn);
326  void appendIndentText(std::string &Text, unsigned IndentLevel,
327  unsigned Spaces, unsigned WhitespaceStartColumn,
328  bool IsAligned);
329  unsigned appendTabIndent(std::string &Text, unsigned Spaces,
330  unsigned Indentation);
331 
332  SmallVector<Change, 16> Changes;
333  const SourceManager &SourceMgr;
334  tooling::Replacements Replaces;
335  const FormatStyle &Style;
336  bool UseCRLF;
337 };
338 
339 } // namespace format
340 } // namespace clang
341 
342 #endif
clang::operator!=
bool operator!=(CanQual< T > x, CanQual< U > y)
Definition: CanonicalType.h:207
clang::format::WhitespaceManager::Change::Spaces
int Spaces
Definition: WhitespaceManager.h:139
clang::format::WhitespaceManager::Change::ConditionalsLevel
int ConditionalsLevel
Definition: WhitespaceManager.h:166
max
__DEVICE__ int max(int __a, int __b)
Definition: __clang_cuda_math.h:196
clang::SourceRange
A trivial tuple used to represent a source range.
Definition: SourceLocation.h:212
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::format::WhitespaceManager::Change::StartOfTokenColumn
unsigned StartOfTokenColumn
Definition: WhitespaceManager.h:127
clang::format::WhitespaceManager::generateReplacements
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
Definition: WhitespaceManager.cpp:92
clang::format::WhitespaceManager::Change::IsBeforeInFile::IsBeforeInFile
IsBeforeInFile(const SourceManager &SourceMgr)
Definition: WhitespaceManager.h:95
clang::tooling::Replacements
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:209
clang::format::WhitespaceManager::Change::PreviousLinePostfix
std::string PreviousLinePostfix
Definition: WhitespaceManager.h:129
clang::format::WhitespaceManager::addUntouchableToken
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
Definition: WhitespaceManager.cpp:61
clang::format::WhitespaceManager::Change::CreateReplacement
bool CreateReplacement
Definition: WhitespaceManager.h:123
clang::format::FormatToken::IndentLevel
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:387
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:59
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:210
distance
float __ovld __cnfn distance(float p0, float p1)
Returns the distance between p0 and p1.
SourceManager.h
clang::format::WhitespaceManager::Change::Tok
const FormatToken * Tok
Definition: WhitespaceManager.h:121
Format.h
End
SourceLocation End
Definition: USRLocFinder.cpp:167
clang::format::WhitespaceManager::addReplacement
llvm::Error addReplacement(const tooling::Replacement &Replacement)
Definition: WhitespaceManager.cpp:73
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::format::WhitespaceManager::Change::IndentationOffset
int IndentationOffset
Definition: WhitespaceManager.h:162
clang::format::WhitespaceManager::Change
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
Definition: WhitespaceManager.h:91
Offset
unsigned Offset
Definition: Format.cpp:2335
TokenAnnotator.h
U
clang::format::FormatToken::NestingLevel
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:384
clang::format::WhitespaceManager::Change::OriginalWhitespaceRange
SourceRange OriginalWhitespaceRange
Definition: WhitespaceManager.h:126
clang::format::WhitespaceManager::Change::IsTrailingComment
bool IsTrailingComment
Definition: WhitespaceManager.h:148
clang::format::WhitespaceManager::Change::indentAndNestingLevel
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const
Definition: WhitespaceManager.h:171
clang::format::WhitespaceManager::Change::IsInsideToken
bool IsInsideToken
Definition: WhitespaceManager.h:143
clang::format::WhitespaceManager
Manages the whitespaces around tokens and their replacements.
Definition: WhitespaceManager.h:40
clang::format::WhitespaceManager::Change::IsAligned
bool IsAligned
Definition: WhitespaceManager.h:131
clang::format::WhitespaceManager::Change::Change
Change(const FormatToken &Tok, bool CreateReplacement, SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken)
Creates a Change.
Definition: WhitespaceManager.cpp:29
clang::format::WhitespaceManager::Change::NewlinesBefore
unsigned NewlinesBefore
Definition: WhitespaceManager.h:128
clang::format::WhitespaceManager::Change::TokenLength
unsigned TokenLength
Definition: WhitespaceManager.h:149
clang::format::WhitespaceManager::Change::ContinuesPPDirective
bool ContinuesPPDirective
Definition: WhitespaceManager.h:132
clang::format::WhitespaceManager::Change::CurrentLinePrefix
std::string CurrentLinePrefix
Definition: WhitespaceManager.h:130
clang::format::WhitespaceManager::WhitespaceManager
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
Definition: WhitespaceManager.h:42
clang::tooling::Replacement
A text replacement.
Definition: Replacement.h:83
clang::format::WhitespaceManager::Change::PreviousEndOfTokenColumn
unsigned PreviousEndOfTokenColumn
Definition: WhitespaceManager.h:150
clang::format::WhitespaceManager::useCRLF
bool useCRLF() const
Definition: WhitespaceManager.h:46
clang::format::WhitespaceManager::Change::StartOfBlockComment
const Change * StartOfBlockComment
Definition: WhitespaceManager.h:161
clang::format::WhitespaceManager::Change::IsBeforeInFile::operator()
bool operator()(const Change &C1, const Change &C2) const
Definition: WhitespaceManager.cpp:22
clang
Definition: CalledOnceCheck.h:17
Text
StringRef Text
Definition: Format.cpp:2334
clang::format::WhitespaceManager::Change::EscapedNewlineColumn
unsigned EscapedNewlineColumn
Definition: WhitespaceManager.h:151
clang::format::WhitespaceManager::Change::IsBeforeInFile
Functor to sort changes in original source order.
Definition: WhitespaceManager.h:93
clang::format::WhitespaceManager::replaceWhitespaceInToken
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
Definition: WhitespaceManager.cpp:77
clang::operator==
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:207
clang::format::WhitespaceManager::replaceWhitespace
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool isAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
Definition: WhitespaceManager.cpp:48