clang 22.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
53 unsigned StartOfTokenColumn, bool IsAligned = false,
54 bool InPPDirective = false);
55
56 /// Adds information about an unchangeable token's whitespace.
57 ///
58 /// Needs to be called for every token for which \c replaceWhitespace
59 /// was not called.
60 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
61
62 llvm::Error addReplacement(const tooling::Replacement &Replacement);
63
64 /// Inserts or replaces whitespace in the middle of a token.
65 ///
66 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
67 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
68 /// characters.
69 ///
70 /// Note: \p Spaces can be negative to retain information about initial
71 /// relative column offset between a line of a block comment and the start of
72 /// the comment. This negative offset may be compensated by trailing comment
73 /// alignment here. In all other cases negative \p Spaces will be truncated to
74 /// 0.
75 ///
76 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
77 /// used to align backslashes correctly.
78 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
79 unsigned ReplaceChars,
80 StringRef PreviousPostfix,
81 StringRef CurrentPrefix, bool InPPDirective,
82 unsigned Newlines, int Spaces);
83
84 /// Returns all the \c Replacements created during formatting.
86
87 /// Represents a change before a token, a break inside a token,
88 /// or the layout of an unchanged token (or whitespace within).
89 struct Change {
90 /// Functor to sort changes in original source order.
92 public:
93 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
94 bool operator()(const Change &C1, const Change &C2) const;
95
96 private:
97 const SourceManager &SourceMgr;
98 };
99
100 /// Creates a \c Change.
101 ///
102 /// The generated \c Change will replace the characters at
103 /// \p OriginalWhitespaceRange with a concatenation of
104 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
105 /// and \p CurrentLinePrefix.
106 ///
107 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
108 /// trailing comments and escaped newlines.
111 unsigned StartOfTokenColumn, unsigned NewlinesBefore,
112 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
114
115 // The kind of the token whose whitespace this change replaces, or in which
116 // this change inserts whitespace.
117 // FIXME: Currently this is not set correctly for breaks inside comments, as
118 // the \c BreakableToken is still doing its own alignment.
120
122 // Changes might be in the middle of a token, so we cannot just keep the
123 // FormatToken around to query its information.
128 std::string CurrentLinePrefix;
131
132 // The number of spaces in front of the token or broken part of the token.
133 // This will be adapted when aligning tokens.
134 // Can be negative to retain information about the initial relative offset
135 // of the lines in a block comment. This is used when aligning trailing
136 // comments. Uncompensated negative offset is truncated to 0.
138
139 // If this change is inside of a token but not at the start of the token or
140 // directly after a newline.
142
143 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
144 // \c EscapedNewlineColumn will be calculated in
145 // \c calculateLineBreakInformation.
147 unsigned TokenLength;
150
151 // These fields are used to retain correct relative line indentation in a
152 // block comment when aligning trailing comments.
153 //
154 // If this Change represents a continuation of a block comment,
155 // \c StartOfBlockComment is pointer to the first Change in the block
156 // comment. \c IndentationOffset is a relative column offset to this
157 // change, so that the correct column can be reconstructed at the end of
158 // the alignment process.
161
162 // Depth of conditionals. Computed from tracking fake parenthesis, except
163 // it does not increase the indent for "chained" conditionals.
165
166 // A combination of indent, nesting and conditionals levels, which are used
167 // in tandem to compute lexical scope, for the purposes of deciding
168 // when to stop consecutive alignment runs.
169 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
170 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
172 }
173 };
174
175private:
176 struct CellDescription {
177 unsigned Index = 0;
178 unsigned Cell = 0;
179 unsigned EndIndex = 0;
180 bool HasSplit = false;
181 CellDescription *NextColumnElement = nullptr;
182
183 constexpr bool operator==(const CellDescription &Other) const {
184 return Index == Other.Index && Cell == Other.Cell &&
185 EndIndex == Other.EndIndex;
186 }
187 constexpr bool operator!=(const CellDescription &Other) const {
188 return !(*this == Other);
189 }
190 };
191
192 struct CellDescriptions {
193 SmallVector<CellDescription> Cells;
194 SmallVector<unsigned> CellCounts;
195 unsigned InitialSpaces = 0;
196
197 // Determine if every row in the array
198 // has the same number of columns.
199 bool isRectangular() const {
200 if (CellCounts.size() < 2)
201 return false;
202
203 for (auto NumberOfColumns : CellCounts)
204 if (NumberOfColumns != CellCounts[0])
205 return false;
206 return true;
207 }
208 };
209
210 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
211 /// or token parts in a line and \c PreviousEndOfTokenColumn and
212 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
213 void calculateLineBreakInformation();
214
215 /// Align consecutive C/C++ preprocessor macros over all \c Changes.
216 void alignConsecutiveMacros();
217
218 /// Align consecutive assignments over all \c Changes.
219 void alignConsecutiveAssignments();
220
221 /// Align consecutive bitfields over all \c Changes.
222 void alignConsecutiveBitFields();
223
224 /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.
225 void
226 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
228
229 /// Align consecutive declarations over all \c Changes.
230 void alignConsecutiveDeclarations();
231
232 /// Align consecutive declarations over all \c Changes.
233 void alignChainedConditionals();
234
235 /// Align consecutive short case statements over all \c Changes.
236 void alignConsecutiveShortCaseStatements(bool IsExpr);
237
238 /// Align consecutive TableGen DAGArg colon over all \c Changes.
239 void alignConsecutiveTableGenBreakingDAGArgColons();
240
241 /// Align consecutive TableGen cond operator colon over all \c Changes.
242 void alignConsecutiveTableGenCondOperatorColons();
243
244 /// Align consecutive TableGen definitions over all \c Changes.
245 void alignConsecutiveTableGenDefinitions();
246
247 /// Align trailing comments over all \c Changes.
248 void alignTrailingComments();
249
250 /// Align trailing comments from change \p Start to change \p End at
251 /// the specified \p Column.
252 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
253
254 /// Align escaped newlines over all \c Changes.
255 void alignEscapedNewlines();
256
257 /// Align escaped newlines from change \p Start to change \p End at
258 /// the specified \p Column.
259 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
260
261 /// Align Array Initializers over all \c Changes.
262 void alignArrayInitializers();
263
264 /// Align Array Initializers from change \p Start to change \p End at
265 /// the specified \p Column.
266 void alignArrayInitializers(unsigned Start, unsigned End);
267
268 /// Align Array Initializers being careful to right justify the columns
269 /// as described by \p CellDescs.
270 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
271
272 /// Align Array Initializers being careful to left justify the columns
273 /// as described by \p CellDescs.
274 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
275
276 /// Calculate the cell width between two indexes.
277 unsigned calculateCellWidth(unsigned Start, unsigned End,
278 bool WithSpaces = false) const;
279
280 /// Get a set of fully specified CellDescriptions between \p Start and
281 /// \p End of the change list.
282 CellDescriptions getCells(unsigned Start, unsigned End);
283
284 /// Does this \p Cell contain a split element?
285 static bool isSplitCell(const CellDescription &Cell);
286
287 /// Get the width of the preceding cells from \p Start to \p End.
288 template <typename I>
289 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
290 auto NetWidth = InitialSpaces;
291 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
292 // If we broke the line the initial spaces are already
293 // accounted for.
294 assert(PrevIter->Index < Changes.size());
295 if (Changes[PrevIter->Index].NewlinesBefore > 0)
296 NetWidth = 0;
297 NetWidth +=
298 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
299 }
300 return NetWidth;
301 }
302
303 /// Get the maximum width of a cell in a sequence of columns.
304 template <typename I>
305 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
306 unsigned CellWidth =
307 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
308 if (Changes[CellIter->Index].NewlinesBefore == 0)
309 CellWidth += NetWidth;
310 for (const auto *Next = CellIter->NextColumnElement; Next;
311 Next = Next->NextColumnElement) {
312 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
313 if (Changes[Next->Index].NewlinesBefore == 0)
314 ThisWidth += NetWidth;
315 CellWidth = std::max(CellWidth, ThisWidth);
316 }
317 return CellWidth;
318 }
319
320 /// Get The maximum width of all columns to a given cell.
321 template <typename I>
322 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
323 unsigned InitialSpaces, unsigned CellCount,
324 unsigned MaxRowCount) const {
325 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
326 auto RowCount = 1U;
327 auto Offset = std::distance(CellStart, CellStop);
328 for (const auto *Next = CellStop->NextColumnElement; Next;
329 Next = Next->NextColumnElement) {
330 if (RowCount >= MaxRowCount)
331 break;
332 auto Start = (CellStart + RowCount * CellCount);
333 auto End = Start + Offset;
334 MaxNetWidth =
335 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
336 ++RowCount;
337 }
338 return MaxNetWidth;
339 }
340
341 /// Align a split cell with a newline to the first element in the cell.
342 void alignToStartOfCell(unsigned Start, unsigned End);
343
344 /// Link the Cell pointers in the list of Cells.
345 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
346
347 /// Fill \c Replaces with the replacements for all effective changes.
348 void generateChanges();
349
350 /// Stores \p Text as the replacement for the whitespace in \p Range.
351 void storeReplacement(SourceRange Range, StringRef Text);
352 void appendNewlineText(std::string &Text, const Change &C);
353 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
354 unsigned PreviousEndOfTokenColumn,
355 unsigned EscapedNewlineColumn);
356 void appendIndentText(std::string &Text, unsigned IndentLevel,
357 unsigned Spaces, unsigned WhitespaceStartColumn,
358 bool IsAligned);
359 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
360 unsigned Indentation);
361
362 SmallVector<Change, 16> Changes;
363 const SourceManager &SourceMgr;
364 tooling::Replacements Replaces;
365 const FormatStyle &Style;
366 bool UseCRLF;
367};
368
369} // namespace format
370} // namespace clang
371
372#endif
int Newlines
The number of newlines immediately before the Token after formatting.
Token Tok
The Token.
unsigned NewlinesBefore
The number of newlines immediately before the Token.
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
FormatToken * Next
The next token in the unwrapped line.
Defines the SourceManager interface.
This file implements a token annotator, i.e.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
bool operator()(const Change &C1, const Change &C2) const
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
TokenType
Determines the semantic type of a syntactic token, e.g.
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition CallGraph.h:204
@ Type
The name was classified as a type.
Definition Sema.h:562
bool operator!=(CanQual< T > x, CanQual< U > y)
@ Other
Other implicit parameter.
Definition Decl.h:1745
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
Change(const FormatToken &Tok, bool CreateReplacement, SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken)
Creates a Change.
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const