clang 19.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// WhitespaceManager class manages whitespace around tokens and their
11/// replacements.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17
18#include "TokenAnnotator.h"
20
21namespace clang {
22namespace format {
23
24/// Manages the whitespaces around tokens and their replacements.
25///
26/// This includes special handling for certain constructs, e.g. the alignment of
27/// trailing line comments.
28///
29/// To guarantee correctness of alignment operations, the \c WhitespaceManager
30/// must be informed about every token in the source file; for each token, there
31/// must be exactly one call to either \c replaceWhitespace or
32/// \c addUntouchableToken.
33///
34/// There may be multiple calls to \c breakToken for a given token.
36public:
37 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38 bool UseCRLF)
39 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40
41 bool useCRLF() const { return UseCRLF; }
42
43 /// Infers whether the input is using CRLF.
44 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45
46 /// Replaces the whitespace in front of \p Tok. Only call once for
47 /// each \c AnnotatedToken.
48 ///
49 /// \p StartOfTokenColumn is the column at which the token will start after
50 /// this replacement. It is needed for determining how \p Spaces is turned
51 /// into tabs and spaces for some format styles.
52 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
53 unsigned StartOfTokenColumn, bool IsAligned = false,
54 bool InPPDirective = false);
55
56 /// Adds information about an unchangeable token's whitespace.
57 ///
58 /// Needs to be called for every token for which \c replaceWhitespace
59 /// was not called.
60 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
61
62 llvm::Error addReplacement(const tooling::Replacement &Replacement);
63
64 /// Inserts or replaces whitespace in the middle of a token.
65 ///
66 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
67 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
68 /// characters.
69 ///
70 /// Note: \p Spaces can be negative to retain information about initial
71 /// relative column offset between a line of a block comment and the start of
72 /// the comment. This negative offset may be compensated by trailing comment
73 /// alignment here. In all other cases negative \p Spaces will be truncated to
74 /// 0.
75 ///
76 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
77 /// used to align backslashes correctly.
78 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
79 unsigned ReplaceChars,
80 StringRef PreviousPostfix,
81 StringRef CurrentPrefix, bool InPPDirective,
82 unsigned Newlines, int Spaces);
83
84 /// Returns all the \c Replacements created during formatting.
86
87 /// Represents a change before a token, a break inside a token,
88 /// or the layout of an unchanged token (or whitespace within).
89 struct Change {
90 /// Functor to sort changes in original source order.
92 public:
93 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
94 bool operator()(const Change &C1, const Change &C2) const;
95
96 private:
97 const SourceManager &SourceMgr;
98 };
99
100 /// Creates a \c Change.
101 ///
102 /// The generated \c Change will replace the characters at
103 /// \p OriginalWhitespaceRange with a concatenation of
104 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
105 /// and \p CurrentLinePrefix.
106 ///
107 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
108 /// trailing comments and escaped newlines.
111 unsigned StartOfTokenColumn, unsigned NewlinesBefore,
112 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
114
115 // The kind of the token whose whitespace this change replaces, or in which
116 // this change inserts whitespace.
117 // FIXME: Currently this is not set correctly for breaks inside comments, as
118 // the \c BreakableToken is still doing its own alignment.
120
122 // Changes might be in the middle of a token, so we cannot just keep the
123 // FormatToken around to query its information.
128 std::string CurrentLinePrefix;
131
132 // The number of spaces in front of the token or broken part of the token.
133 // This will be adapted when aligning tokens.
134 // Can be negative to retain information about the initial relative offset
135 // of the lines in a block comment. This is used when aligning trailing
136 // comments. Uncompensated negative offset is truncated to 0.
138
139 // If this change is inside of a token but not at the start of the token or
140 // directly after a newline.
142
143 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
144 // \c EscapedNewlineColumn will be calculated in
145 // \c calculateLineBreakInformation.
147 unsigned TokenLength;
150
151 // These fields are used to retain correct relative line indentation in a
152 // block comment when aligning trailing comments.
153 //
154 // If this Change represents a continuation of a block comment,
155 // \c StartOfBlockComment is pointer to the first Change in the block
156 // comment. \c IndentationOffset is a relative column offset to this
157 // change, so that the correct column can be reconstructed at the end of
158 // the alignment process.
161
162 // Depth of conditionals. Computed from tracking fake parenthesis, except
163 // it does not increase the indent for "chained" conditionals.
165
166 // A combination of indent, nesting and conditionals levels, which are used
167 // in tandem to compute lexical scope, for the purposes of deciding
168 // when to stop consecutive alignment runs.
169 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
170 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
172 }
173 };
174
175private:
176 struct CellDescription {
177 unsigned Index = 0;
178 unsigned Cell = 0;
179 unsigned EndIndex = 0;
180 bool HasSplit = false;
181 CellDescription *NextColumnElement = nullptr;
182
183 constexpr bool operator==(const CellDescription &Other) const {
184 return Index == Other.Index && Cell == Other.Cell &&
185 EndIndex == Other.EndIndex;
186 }
187 constexpr bool operator!=(const CellDescription &Other) const {
188 return !(*this == Other);
189 }
190 };
191
192 struct CellDescriptions {
193 SmallVector<CellDescription> Cells;
194 SmallVector<unsigned> CellCounts;
195 unsigned InitialSpaces = 0;
196
197 // Determine if every row in the array
198 // has the same number of columns.
199 bool isRectangular() const {
200 if (CellCounts.size() < 2)
201 return false;
202
203 for (auto NumberOfColumns : CellCounts)
204 if (NumberOfColumns != CellCounts[0])
205 return false;
206 return true;
207 }
208 };
209
210 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
211 /// or token parts in a line and \c PreviousEndOfTokenColumn and
212 /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
213 void calculateLineBreakInformation();
214
215 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
216 void alignConsecutiveMacros();
217
218 /// Align consecutive assignments over all \c Changes.
219 void alignConsecutiveAssignments();
220
221 /// Align consecutive bitfields over all \c Changes.
222 void alignConsecutiveBitFields();
223
224 /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.
225 void
226 alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
227 TokenType Type);
228
229 /// Align consecutive declarations over all \c Changes.
230 void alignConsecutiveDeclarations();
231
232 /// Align consecutive declarations over all \c Changes.
233 void alignChainedConditionals();
234
235 /// Align consecutive short case statements over all \c Changes.
236 void alignConsecutiveShortCaseStatements();
237
238 /// Align consecutive TableGen cond operator colon over all \c Changes.
239 void alignConsecutiveTableGenCondOperatorColons();
240
241 /// Align consecutive TableGen definitions over all \c Changes.
242 void alignConsecutiveTableGenDefinitions();
243
244 /// Align trailing comments over all \c Changes.
245 void alignTrailingComments();
246
247 /// Align trailing comments from change \p Start to change \p End at
248 /// the specified \p Column.
249 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
250
251 /// Align escaped newlines over all \c Changes.
252 void alignEscapedNewlines();
253
254 /// Align escaped newlines from change \p Start to change \p End at
255 /// the specified \p Column.
256 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
257
258 /// Align Array Initializers over all \c Changes.
259 void alignArrayInitializers();
260
261 /// Align Array Initializers from change \p Start to change \p End at
262 /// the specified \p Column.
263 void alignArrayInitializers(unsigned Start, unsigned End);
264
265 /// Align Array Initializers being careful to right justify the columns
266 /// as described by \p CellDescs.
267 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
268
269 /// Align Array Initializers being careful to left justify the columns
270 /// as described by \p CellDescs.
271 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
272
273 /// Calculate the cell width between two indexes.
274 unsigned calculateCellWidth(unsigned Start, unsigned End,
275 bool WithSpaces = false) const;
276
277 /// Get a set of fully specified CellDescriptions between \p Start and
278 /// \p End of the change list.
279 CellDescriptions getCells(unsigned Start, unsigned End);
280
281 /// Does this \p Cell contain a split element?
282 static bool isSplitCell(const CellDescription &Cell);
283
284 /// Get the width of the preceding cells from \p Start to \p End.
285 template <typename I>
286 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
287 auto NetWidth = InitialSpaces;
288 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
289 // If we broke the line the initial spaces are already
290 // accounted for.
291 assert(PrevIter->Index < Changes.size());
292 if (Changes[PrevIter->Index].NewlinesBefore > 0)
293 NetWidth = 0;
294 NetWidth +=
295 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
296 }
297 return NetWidth;
298 }
299
300 /// Get the maximum width of a cell in a sequence of columns.
301 template <typename I>
302 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
303 unsigned CellWidth =
304 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
305 if (Changes[CellIter->Index].NewlinesBefore == 0)
306 CellWidth += NetWidth;
307 for (const auto *Next = CellIter->NextColumnElement; Next;
308 Next = Next->NextColumnElement) {
309 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
310 if (Changes[Next->Index].NewlinesBefore == 0)
311 ThisWidth += NetWidth;
312 CellWidth = std::max(CellWidth, ThisWidth);
313 }
314 return CellWidth;
315 }
316
317 /// Get The maximum width of all columns to a given cell.
318 template <typename I>
319 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
320 unsigned InitialSpaces, unsigned CellCount,
321 unsigned MaxRowCount) const {
322 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
323 auto RowCount = 1U;
324 auto Offset = std::distance(CellStart, CellStop);
325 for (const auto *Next = CellStop->NextColumnElement; Next;
326 Next = Next->NextColumnElement) {
327 if (RowCount >= MaxRowCount)
328 break;
329 auto Start = (CellStart + RowCount * CellCount);
330 auto End = Start + Offset;
331 MaxNetWidth =
332 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
333 ++RowCount;
334 }
335 return MaxNetWidth;
336 }
337
338 /// Align a split cell with a newline to the first element in the cell.
339 void alignToStartOfCell(unsigned Start, unsigned End);
340
341 /// Link the Cell pointers in the list of Cells.
342 static CellDescriptions linkCells(CellDescriptions &&CellDesc);
343
344 /// Fill \c Replaces with the replacements for all effective changes.
345 void generateChanges();
346
347 /// Stores \p Text as the replacement for the whitespace in \p Range.
348 void storeReplacement(SourceRange Range, StringRef Text);
349 void appendNewlineText(std::string &Text, unsigned Newlines);
350 void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
351 unsigned PreviousEndOfTokenColumn,
352 unsigned EscapedNewlineColumn);
353 void appendIndentText(std::string &Text, unsigned IndentLevel,
354 unsigned Spaces, unsigned WhitespaceStartColumn,
355 bool IsAligned);
356 unsigned appendTabIndent(std::string &Text, unsigned Spaces,
357 unsigned Indentation);
358
359 SmallVector<Change, 16> Changes;
360 const SourceManager &SourceMgr;
361 tooling::Replacements Replaces;
362 const FormatStyle &Style;
363 bool UseCRLF;
364};
365
366} // namespace format
367} // namespace clang
368
369#endif
StringRef Text
Definition: Format.cpp:2953
Defines the SourceManager interface.
This file implements a token annotator, i.e.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Functor to sort changes in original source order.
bool operator()(const Change &C1, const Change &C2) const
Manages the whitespaces around tokens and their replacements.
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:198
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:207
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:283
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:506
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:509
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const