clang 23.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 ///
53 /// \p IndentedFromColumn is only used when the replacement starts a new
54 /// line. It should be the column that the position of the line is derived
55 /// from. It is used for determining what lines the alignment process should
56 /// move.
57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
58 unsigned StartOfTokenColumn,
59 const FormatToken *AlignedTo = nullptr,
60 bool InPPDirective = false,
61 unsigned IndentedFromColumn = 0);
62
63 /// Adds information about an unchangeable token's whitespace.
64 ///
65 /// Needs to be called for every token for which \c replaceWhitespace
66 /// was not called.
67 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
68
69 llvm::Error addReplacement(const tooling::Replacement &Replacement);
70
71 /// Inserts or replaces whitespace in the middle of a token.
72 ///
73 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
74 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
75 /// characters.
76 ///
77 /// Note: \p Spaces can be negative to retain information about initial
78 /// relative column offset between a line of a block comment and the start of
79 /// the comment. This negative offset may be compensated by trailing comment
80 /// alignment here. In all other cases negative \p Spaces will be truncated to
81 /// 0.
82 ///
83 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
84 /// used to align backslashes correctly.
85 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
86 unsigned ReplaceChars,
87 StringRef PreviousPostfix,
88 StringRef CurrentPrefix, bool InPPDirective,
89 unsigned Newlines, int Spaces);
90
91 /// Returns all the \c Replacements created during formatting.
93
94 /// Represents a change before a token, a break inside a token,
95 /// or the layout of an unchanged token (or whitespace within).
96 struct Change {
97 /// Functor to sort changes in original source order.
99 public:
100 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
101 bool operator()(const Change &C1, const Change &C2) const;
102
103 private:
104 const SourceManager &SourceMgr;
105 };
106
107 /// Creates a \c Change.
108 ///
109 /// The generated \c Change will replace the characters at
110 /// \p OriginalWhitespaceRange with a concatenation of
111 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
112 /// and \p CurrentLinePrefix.
113 ///
114 /// \p StartOfTokenColumn and \p ContinuesPPDirective will be used to lay
115 /// out trailing comments and escaped newlines. \p IndentedFromColumn will
116 /// be used to continue aligned lines.
119 unsigned StartOfTokenColumn, unsigned IndentedFromColumn,
120 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
121 StringRef CurrentLinePrefix, const FormatToken *AlignedTo,
123
124 // The kind of the token whose whitespace this change replaces, or in which
125 // this change inserts whitespace.
126 // FIXME: Currently this is not set correctly for breaks inside comments, as
127 // the \c BreakableToken is still doing its own alignment.
129
131 // Changes might be in the middle of a token, so we cannot just keep the
132 // FormatToken around to query its information.
135 // Only used when the token is at the start of a line. The column that the
136 // position of the line is derived from. The alignment procedure moves the
137 // line when it moves a token in the same unwrapped line that is to the left
138 // of said column.
142 std::string CurrentLinePrefix;
145
146 // The number of spaces in front of the token or broken part of the token.
147 // This will be adapted when aligning tokens.
148 // Can be negative to retain information about the initial relative offset
149 // of the lines in a block comment. This is used when aligning trailing
150 // comments. Uncompensated negative offset is truncated to 0.
152
153 // If this change is inside of a token but not at the start of the token or
154 // directly after a newline.
156
157 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
158 // \c EscapedNewlineColumn will be calculated in
159 // \c calculateLineBreakInformation.
161 unsigned TokenLength;
164
165 // These fields are used to retain correct relative line indentation in a
166 // block comment when aligning trailing comments.
167 //
168 // If this Change represents a continuation of a block comment,
169 // \c StartOfBlockComment is pointer to the first Change in the block
170 // comment. \c IndentationOffset is a relative column offset to this
171 // change, so that the correct column can be reconstructed at the end of
172 // the alignment process.
175
176 // Depth of conditionals. Computed from tracking fake parenthesis, except
177 // it does not increase the indent for "chained" conditionals.
179
180 // A combination of indent, nesting and conditionals levels, which are used
181 // in tandem to compute lexical scope, for the purposes of deciding
182 // when to stop consecutive alignment runs.
183 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
184 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
186 }
187 };
188
189private:
190 struct CellDescription {
191 unsigned Index = 0;
192 unsigned Cell = 0;
193 unsigned EndIndex = 0;
194 bool HasSplit = false;
195 CellDescription *NextColumnElement = nullptr;
196
197 constexpr bool operator==(const CellDescription &Other) const {
198 return Index == Other.Index && Cell == Other.Cell &&
199 EndIndex == Other.EndIndex;
200 }
201 constexpr bool operator!=(const CellDescription &Other) const {
202 return !(*this == Other);
203 }
204 };
205
206 struct CellDescriptions {
207 SmallVector<CellDescription> Cells;
208 SmallVector<unsigned> CellCounts;
209 unsigned InitialSpaces = 0;
210
211 // Determine if every row in the array
212 // has the same number of columns.
213 bool isRectangular() const {
214 if (CellCounts.size() < 2)
215 return false;
216
217 for (auto NumberOfColumns : CellCounts)
218 if (NumberOfColumns != CellCounts[0])
219 return false;
220 return true;
221 }
222 };
223
224 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
225 /// or token parts in a line and \c PreviousEndOfTokenColumn and
226 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
227 void calculateLineBreakInformation();
228
229 /// Align consecutive C/C++ preprocessor macros over all \c Changes.
230 void alignConsecutiveMacros();
231
232 /// Align consecutive assignments over all \c Changes.
233 void alignConsecutiveAssignments();
234
235 /// Align consecutive bitfields over all \c Changes.
236 void alignConsecutiveBitFields();
237
238 /// Align consecutive colon. For bitfields, TableGen DAGArgs and definitions.
239 void
240 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
242
243 /// Align consecutive declarations over all \c Changes.
244 void alignConsecutiveDeclarations();
245
246 /// Align consecutive declarations over all \c Changes.
247 void alignChainedConditionals();
248
249 /// Align consecutive short case statements over all \c Changes.
250 void alignConsecutiveShortCaseStatements(bool IsExpr);
251
252 /// Align consecutive TableGen DAGArg colon over all \c Changes.
253 void alignConsecutiveTableGenBreakingDAGArgColons();
254
255 /// Align consecutive TableGen cond operator colon over all \c Changes.
256 void alignConsecutiveTableGenCondOperatorColons();
257
258 /// Align consecutive TableGen definitions over all \c Changes.
259 void alignConsecutiveTableGenDefinitions();
260
261 /// Align trailing comments over all \c Changes.
262 void alignTrailingComments();
263
264 /// Align trailing comments from change \p Start to change \p End at
265 /// the specified \p Column.
266 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
267
268 /// Align escaped newlines over all \c Changes.
269 void alignEscapedNewlines();
270
271 /// Align escaped newlines from change \p Start to change \p End at
272 /// the specified \p Column.
273 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
274
275 /// Align Array Initializers over all \c Changes.
276 void alignArrayInitializers();
277
278 /// Align Array Initializers from change \p Start to change \p End at
279 /// the specified \p Column.
280 void alignArrayInitializers(unsigned Start, unsigned End);
281
282 /// Align Array Initializers being careful to right justify the columns
283 /// as described by \p CellDescs.
284 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
285
286 /// Align Array Initializers being careful to left justify the columns
287 /// as described by \p CellDescs.
288 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
289
290 /// Calculate the cell width between two indexes.
291 unsigned calculateCellWidth(unsigned Start, unsigned End,
292 bool WithSpaces = false) const;
293
294 /// Get a set of fully specified CellDescriptions between \p Start and
295 /// \p End of the change list.
296 CellDescriptions getCells(unsigned Start, unsigned End);
297
298 /// Does this \p Cell contain a split element?
299 static bool isSplitCell(const CellDescription &Cell);
300
301 /// Get the width of the preceding cells from \p Start to \p End.
302 template <typename I>
303 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
304 auto NetWidth = InitialSpaces;
305 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
306 // If we broke the line the initial spaces are already
307 // accounted for.
308 assert(PrevIter->Index < Changes.size());
309 if (Changes[PrevIter->Index].NewlinesBefore > 0)
310 NetWidth = 0;
311 NetWidth +=
312 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
313 }
314 return NetWidth;
315 }
316
317 /// Get the maximum width of a cell in a sequence of columns.
318 template <typename I>
319 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
320 unsigned CellWidth =
321 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
322 if (Changes[CellIter->Index].NewlinesBefore == 0)
323 CellWidth += NetWidth;
324 for (const auto *Next = CellIter->NextColumnElement; Next;
325 Next = Next->NextColumnElement) {
326 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
327 if (Changes[Next->Index].NewlinesBefore == 0)
328 ThisWidth += NetWidth;
329 CellWidth = std::max(CellWidth, ThisWidth);
330 }
331 return CellWidth;
332 }
333
334 /// Get The maximum width of all columns to a given cell.
335 template <typename I>
336 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
337 unsigned InitialSpaces, unsigned CellCount,
338 unsigned MaxRowCount) const {
339 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
340 auto RowCount = 1U;
341 auto Offset = std::distance(CellStart, CellStop);
342 for (const auto *Next = CellStop->NextColumnElement; Next;
343 Next = Next->NextColumnElement) {
344 if (RowCount >= MaxRowCount)
345 break;
346 auto Start = (CellStart + RowCount * CellCount);
347 auto End = Start + Offset;
348 MaxNetWidth =
349 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
350 ++RowCount;
351 }
352 return MaxNetWidth;
353 }
354
355 /// Align a split cell with a newline to the first element in the cell.
356 void alignToStartOfCell(unsigned Start, unsigned End);
357
358 /// Link the Cell pointers in the list of Cells.
359 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
360
361 void setChangeSpaces(unsigned Start, unsigned Spaces);
362
363 /// Fill \c Replaces with the replacements for all effective changes.
364 void generateChanges();
365
366 /// Stores \p Text as the replacement for the whitespace in \p Range.
367 void storeReplacement(SourceRange Range, StringRef Text);
368 void appendNewlineText(std::string &Text, const Change &C);
369 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
370 unsigned PreviousEndOfTokenColumn,
371 unsigned EscapedNewlineColumn);
372 void appendIndentText(std::string &Text, unsigned IndentLevel,
373 unsigned Spaces, unsigned WhitespaceStartColumn,
374 bool IsAligned);
375 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
376 unsigned Indentation);
377
378 SmallVector<Change, 16> Changes;
379 const SourceManager &SourceMgr;
380 tooling::Replacements Replaces;
381 const FormatStyle &Style;
382 bool UseCRLF;
383};
384
385} // namespace format
386} // namespace clang
387
388#endif
int Newlines
The number of newlines immediately before the Token after formatting.
Token Tok
The Token.
unsigned NewlinesBefore
The number of newlines immediately before the Token.
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
FormatToken * Next
The next token in the unwrapped line.
Defines the SourceManager interface.
This file implements a token annotator, i.e.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
bool operator()(const Change &C1, const Change &C2) const
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, const FormatToken *AlignedTo=nullptr, bool InPPDirective=false, unsigned IndentedFromColumn=0)
Replaces the whitespace in front of Tok.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
TokenType
Determines the semantic type of a syntactic token, e.g.
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition CallGraph.h:206
@ Type
The name was classified as a type.
Definition Sema.h:564
bool operator!=(CanQual< T > x, CanQual< U > y)
@ Other
Other implicit parameter.
Definition Decl.h:1761
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const
Change(const FormatToken &Tok, bool CreateReplacement, SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned IndentedFromColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, const FormatToken *AlignedTo, bool ContinuesPPDirective, bool IsInsideToken)
Creates a Change.