clang 22.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 ///
53 /// \p IndentedFromColumn is only used when the replacement starts a new
54 /// line. It should be the column that the position of the line is derived
55 /// from. It is used for determining what lines the alignment process should
56 /// move.
57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
58 unsigned StartOfTokenColumn, bool IsAligned = false,
59 bool InPPDirective = false,
60 unsigned IndentedFromColumn = 0);
61
62 /// Adds information about an unchangeable token's whitespace.
63 ///
64 /// Needs to be called for every token for which \c replaceWhitespace
65 /// was not called.
66 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
67
68 llvm::Error addReplacement(const tooling::Replacement &Replacement);
69
70 /// Inserts or replaces whitespace in the middle of a token.
71 ///
72 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
73 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
74 /// characters.
75 ///
76 /// Note: \p Spaces can be negative to retain information about initial
77 /// relative column offset between a line of a block comment and the start of
78 /// the comment. This negative offset may be compensated by trailing comment
79 /// alignment here. In all other cases negative \p Spaces will be truncated to
80 /// 0.
81 ///
82 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
83 /// used to align backslashes correctly.
84 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
85 unsigned ReplaceChars,
86 StringRef PreviousPostfix,
87 StringRef CurrentPrefix, bool InPPDirective,
88 unsigned Newlines, int Spaces);
89
90 /// Returns all the \c Replacements created during formatting.
92
93 /// Represents a change before a token, a break inside a token,
94 /// or the layout of an unchanged token (or whitespace within).
95 struct Change {
96 /// Functor to sort changes in original source order.
98 public:
99 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
100 bool operator()(const Change &C1, const Change &C2) const;
101
102 private:
103 const SourceManager &SourceMgr;
104 };
105
106 /// Creates a \c Change.
107 ///
108 /// The generated \c Change will replace the characters at
109 /// \p OriginalWhitespaceRange with a concatenation of
110 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
111 /// and \p CurrentLinePrefix.
112 ///
113 /// \p StartOfTokenColumn and \p ContinuesPPDirective will be used to lay
114 /// out trailing comments and escaped newlines. \p IndentedFromColumn will
115 /// be used to continue aligned lines.
118 unsigned StartOfTokenColumn, unsigned IndentedFromColumn,
119 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
120 StringRef CurrentLinePrefix, bool IsAligned,
122
123 // The kind of the token whose whitespace this change replaces, or in which
124 // this change inserts whitespace.
125 // FIXME: Currently this is not set correctly for breaks inside comments, as
126 // the \c BreakableToken is still doing its own alignment.
128
130 // Changes might be in the middle of a token, so we cannot just keep the
131 // FormatToken around to query its information.
134 // Only used when the token is at the start of a line. The column that the
135 // position of the line is derived from. The alignment procedure moves the
136 // line when it moves a token in the same unwrapped line that is to the left
137 // of said column.
141 std::string CurrentLinePrefix;
144
145 // The number of spaces in front of the token or broken part of the token.
146 // This will be adapted when aligning tokens.
147 // Can be negative to retain information about the initial relative offset
148 // of the lines in a block comment. This is used when aligning trailing
149 // comments. Uncompensated negative offset is truncated to 0.
151
152 // If this change is inside of a token but not at the start of the token or
153 // directly after a newline.
155
156 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
157 // \c EscapedNewlineColumn will be calculated in
158 // \c calculateLineBreakInformation.
160 unsigned TokenLength;
163
164 // These fields are used to retain correct relative line indentation in a
165 // block comment when aligning trailing comments.
166 //
167 // If this Change represents a continuation of a block comment,
168 // \c StartOfBlockComment is pointer to the first Change in the block
169 // comment. \c IndentationOffset is a relative column offset to this
170 // change, so that the correct column can be reconstructed at the end of
171 // the alignment process.
174
175 // Depth of conditionals. Computed from tracking fake parenthesis, except
176 // it does not increase the indent for "chained" conditionals.
178
179 // A combination of indent, nesting and conditionals levels, which are used
180 // in tandem to compute lexical scope, for the purposes of deciding
181 // when to stop consecutive alignment runs.
182 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
183 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
185 }
186 };
187
188private:
189 struct CellDescription {
190 unsigned Index = 0;
191 unsigned Cell = 0;
192 unsigned EndIndex = 0;
193 bool HasSplit = false;
194 CellDescription *NextColumnElement = nullptr;
195
196 constexpr bool operator==(const CellDescription &Other) const {
197 return Index == Other.Index && Cell == Other.Cell &&
198 EndIndex == Other.EndIndex;
199 }
200 constexpr bool operator!=(const CellDescription &Other) const {
201 return !(*this == Other);
202 }
203 };
204
205 struct CellDescriptions {
206 SmallVector<CellDescription> Cells;
207 SmallVector<unsigned> CellCounts;
208 unsigned InitialSpaces = 0;
209
210 // Determine if every row in the array
211 // has the same number of columns.
212 bool isRectangular() const {
213 if (CellCounts.size() < 2)
214 return false;
215
216 for (auto NumberOfColumns : CellCounts)
217 if (NumberOfColumns != CellCounts[0])
218 return false;
219 return true;
220 }
221 };
222
223 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
224 /// or token parts in a line and \c PreviousEndOfTokenColumn and
225 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
226 void calculateLineBreakInformation();
227
228 /// Align consecutive C/C++ preprocessor macros over all \c Changes.
229 void alignConsecutiveMacros();
230
231 /// Align consecutive assignments over all \c Changes.
232 void alignConsecutiveAssignments();
233
234 /// Align consecutive bitfields over all \c Changes.
235 void alignConsecutiveBitFields();
236
237 /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.
238 void
239 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
241
242 /// Align consecutive declarations over all \c Changes.
243 void alignConsecutiveDeclarations();
244
245 /// Align consecutive declarations over all \c Changes.
246 void alignChainedConditionals();
247
248 /// Align consecutive short case statements over all \c Changes.
249 void alignConsecutiveShortCaseStatements(bool IsExpr);
250
251 /// Align consecutive TableGen DAGArg colon over all \c Changes.
252 void alignConsecutiveTableGenBreakingDAGArgColons();
253
254 /// Align consecutive TableGen cond operator colon over all \c Changes.
255 void alignConsecutiveTableGenCondOperatorColons();
256
257 /// Align consecutive TableGen definitions over all \c Changes.
258 void alignConsecutiveTableGenDefinitions();
259
260 /// Align trailing comments over all \c Changes.
261 void alignTrailingComments();
262
263 /// Align trailing comments from change \p Start to change \p End at
264 /// the specified \p Column.
265 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
266
267 /// Align escaped newlines over all \c Changes.
268 void alignEscapedNewlines();
269
270 /// Align escaped newlines from change \p Start to change \p End at
271 /// the specified \p Column.
272 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
273
274 /// Align Array Initializers over all \c Changes.
275 void alignArrayInitializers();
276
277 /// Align Array Initializers from change \p Start to change \p End at
278 /// the specified \p Column.
279 void alignArrayInitializers(unsigned Start, unsigned End);
280
281 /// Align Array Initializers being careful to right justify the columns
282 /// as described by \p CellDescs.
283 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
284
285 /// Align Array Initializers being careful to left justify the columns
286 /// as described by \p CellDescs.
287 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
288
289 /// Calculate the cell width between two indexes.
290 unsigned calculateCellWidth(unsigned Start, unsigned End,
291 bool WithSpaces = false) const;
292
293 /// Get a set of fully specified CellDescriptions between \p Start and
294 /// \p End of the change list.
295 CellDescriptions getCells(unsigned Start, unsigned End);
296
297 /// Does this \p Cell contain a split element?
298 static bool isSplitCell(const CellDescription &Cell);
299
300 /// Get the width of the preceding cells from \p Start to \p End.
301 template <typename I>
302 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
303 auto NetWidth = InitialSpaces;
304 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
305 // If we broke the line the initial spaces are already
306 // accounted for.
307 assert(PrevIter->Index < Changes.size());
308 if (Changes[PrevIter->Index].NewlinesBefore > 0)
309 NetWidth = 0;
310 NetWidth +=
311 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
312 }
313 return NetWidth;
314 }
315
316 /// Get the maximum width of a cell in a sequence of columns.
317 template <typename I>
318 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
319 unsigned CellWidth =
320 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
321 if (Changes[CellIter->Index].NewlinesBefore == 0)
322 CellWidth += NetWidth;
323 for (const auto *Next = CellIter->NextColumnElement; Next;
324 Next = Next->NextColumnElement) {
325 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
326 if (Changes[Next->Index].NewlinesBefore == 0)
327 ThisWidth += NetWidth;
328 CellWidth = std::max(CellWidth, ThisWidth);
329 }
330 return CellWidth;
331 }
332
333 /// Get The maximum width of all columns to a given cell.
334 template <typename I>
335 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
336 unsigned InitialSpaces, unsigned CellCount,
337 unsigned MaxRowCount) const {
338 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
339 auto RowCount = 1U;
340 auto Offset = std::distance(CellStart, CellStop);
341 for (const auto *Next = CellStop->NextColumnElement; Next;
342 Next = Next->NextColumnElement) {
343 if (RowCount >= MaxRowCount)
344 break;
345 auto Start = (CellStart + RowCount * CellCount);
346 auto End = Start + Offset;
347 MaxNetWidth =
348 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
349 ++RowCount;
350 }
351 return MaxNetWidth;
352 }
353
354 /// Align a split cell with a newline to the first element in the cell.
355 void alignToStartOfCell(unsigned Start, unsigned End);
356
357 /// Link the Cell pointers in the list of Cells.
358 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
359
360 /// Fill \c Replaces with the replacements for all effective changes.
361 void generateChanges();
362
363 /// Stores \p Text as the replacement for the whitespace in \p Range.
364 void storeReplacement(SourceRange Range, StringRef Text);
365 void appendNewlineText(std::string &Text, const Change &C);
366 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
367 unsigned PreviousEndOfTokenColumn,
368 unsigned EscapedNewlineColumn);
369 void appendIndentText(std::string &Text, unsigned IndentLevel,
370 unsigned Spaces, unsigned WhitespaceStartColumn,
371 bool IsAligned);
372 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
373 unsigned Indentation);
374
375 SmallVector<Change, 16> Changes;
376 const SourceManager &SourceMgr;
377 tooling::Replacements Replaces;
378 const FormatStyle &Style;
379 bool UseCRLF;
380};
381
382} // namespace format
383} // namespace clang
384
385#endif
int Newlines
The number of newlines immediately before the Token after formatting.
Token Tok
The Token.
unsigned NewlinesBefore
The number of newlines immediately before the Token.
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
FormatToken * Next
The next token in the unwrapped line.
Defines the SourceManager interface.
This file implements a token annotator, i.e.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
bool operator()(const Change &C1, const Change &C2) const
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false, unsigned IndentedFromColumn=0)
Replaces the whitespace in front of Tok.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
TokenType
Determines the semantic type of a syntactic token, e.g.
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition CallGraph.h:204
@ Type
The name was classified as a type.
Definition Sema.h:562
bool operator!=(CanQual< T > x, CanQual< U > y)
@ Other
Other implicit parameter.
Definition Decl.h:1746
A wrapper around a Token storing information about the whitespace characters preceding it.
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const
Change(const FormatToken &Tok, bool CreateReplacement, SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned IndentedFromColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken)
Creates a Change.