clang 23.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
39
41 // Contained in class declaration/definition.
43 // Contained in enum declaration/definition.
45 // Contained in compound requirement.
47 // Contained in other blocks (function, lambda, loop, if/else, child, etc).
49};
50
52public:
54 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
67 assert(!Line.Tokens.empty());
68
69 // Calculate Next and Previous for all tokens. Note that we must overwrite
70 // Next and Previous for every token, as previous formatting runs might have
71 // left them in a different state.
72 First->Previous = nullptr;
73 FormatToken *Current = First;
74 addChildren(Line.Tokens.front(), Current);
75 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
76 if (Node.Tok->MacroParent)
77 ContainsMacroCall = true;
78 Current->Next = Node.Tok;
79 Node.Tok->Previous = Current;
80 Current = Current->Next;
81 addChildren(Node, Current);
82 // FIXME: if we add children, previous will point to the token before
83 // the children; changing this requires significant changes across
84 // clang-format.
85 }
86 Last = Current;
87 Last->Next = nullptr;
88 }
89
90 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
91 Current->Children.clear();
92 for (const auto &Child : Node.Children) {
93 Children.push_back(new AnnotatedLine(Child));
94 if (Children.back()->ContainsMacroCall)
95 ContainsMacroCall = true;
96 Current->Children.push_back(Children.back());
97 }
98 }
99
100 size_t size() const {
101 size_t Size = 1;
102 for (const auto *Child : Children)
103 Size += Child->size();
104 return Size;
105 }
106
108 for (AnnotatedLine *Child : Children)
109 delete Child;
110 FormatToken *Current = First;
111 while (Current) {
112 Current->Children.clear();
113 Current->Role.reset();
114 Current = Current->Next;
115 }
116 }
117
118 bool isComment() const {
119 return First && First->is(tok::comment) && !First->getNextNonComment();
120 }
121
122 /// \c true if this line starts with the given tokens in order, ignoring
123 /// comments.
124 template <typename... Ts> bool startsWith(Ts... Tokens) const {
125 return First && First->startsSequence(Tokens...);
126 }
127
128 /// \c true if this line ends with the given tokens in reversed order,
129 /// ignoring comments.
130 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
131 /// this line is like "... T3 T2 T1".
132 template <typename... Ts> bool endsWith(Ts... Tokens) const {
133 return Last && Last->endsSequence(Tokens...);
134 }
135
136 /// \c true if this line looks like a function definition instead of a
137 /// function declaration. Asserts MightBeFunctionDecl.
139 assert(MightBeFunctionDecl);
140 // Try to determine if the end of a stream of tokens is either the
141 // Definition or the Declaration for a function. It does this by looking for
142 // the ';' in foo(); and using that it ends with a ; to know this is the
143 // Definition, however the line could end with
144 // foo(); /* comment */
145 // or
146 // foo(); // comment
147 // or
148 // foo() // comment
149 // endsWith() ignores the comment.
150 return !endsWith(tok::semi);
151 }
152
153 /// \c true if this line starts a namespace definition.
154 bool startsWithNamespace() const {
155 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
156 startsWith(tok::kw_inline, tok::kw_namespace) ||
157 startsWith(tok::kw_export, tok::kw_namespace);
158 }
159
160 /// \c true if this line starts a C++ export block.
162 return startsWith(tok::kw_export, tok::l_brace);
163 }
164
166 assert(First);
167 return First->is(tok::comment) ? First->getNextNonComment() : First;
168 }
169
171 assert(Last);
172 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
173 }
174
177
179
181 unsigned Level;
182 unsigned PPLevel;
192
193 /// \c True if this line contains a macro call for which an expansion exists.
194 bool ContainsMacroCall = false;
195
196 /// \c True if calculateFormattingInformation() has been called on this line.
197 bool Computed = false;
198
199 /// \c True if this line should be formatted, i.e. intersects directly or
200 /// indirectly with one of the input ranges.
202
203 /// \c True if the leading empty lines of this line intersect with one of the
204 /// input ranges.
206
207 /// \c True if one of this line's children intersects with an input range.
209
210 /// \c True if breaking after last attribute group in function return type.
212
213 /// \c True if this line should be indented by ContinuationIndent in addition
214 /// to the normal indention level.
216
218
219private:
220 // Disallow copying.
221 AnnotatedLine(const AnnotatedLine &) = delete;
222 void operator=(const AnnotatedLine &) = delete;
223};
224
225/// Determines extra information about the tokens comprising an
226/// \c UnwrappedLine.
228public:
229 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
230 : Style(Style), IsCpp(Style.isCpp()),
231 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {}
232
233 /// Adapts the indent levels of comment lines to the indent of the
234 /// subsequent line.
235 // FIXME: Can/should this be done in the UnwrappedLineParser?
237
240
241private:
242 /// Calculate the penalty for splitting before \c Tok.
243 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
244 bool InFunctionDecl) const;
245
246 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
247
248 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
249 const FormatToken &Right) const;
250
251 bool spaceRequiredBefore(const AnnotatedLine &Line,
252 const FormatToken &Right) const;
253
254 bool mustBreakBefore(AnnotatedLine &Line, const FormatToken &Right) const;
255
256 bool canBreakBefore(const AnnotatedLine &Line,
257 const FormatToken &Right) const;
258
259 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
260
261 bool mustBreakBeforeReturnType(const AnnotatedLine &Line) const;
262
263 void printDebugInfo(const AnnotatedLine &Line) const;
264
265 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
266
267 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
268
269 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
270 FormatToken *CurrentToken,
271 unsigned Depth) const;
272 FormatStyle::PointerAlignmentStyle
273 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
274
275 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
276 const FormatToken &PointerOrReference) const;
277
278 const FormatStyle &Style;
279
280 bool IsCpp;
281 LangOptions LangOpts;
282
283 const AdditionalKeywords &Keywords;
284
285 SmallVector<ScopeType> Scopes, MacroBodyScopes;
286};
287
288} // end namespace format
289} // end namespace clang
290
291#endif
Token Tok
The Token.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool Computed
True if calculateFormattingInformation() has been called on this line.
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool startsWithExportBlock() const
true if this line starts a C++ export block.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
LangOptions getFormattingLangOpts(const FormatStyle &Style)
Definition Format.cpp:4458
The JSON file list parser is used to communicate input to InstallAPI.
bool isCpp() const
Definition Format.h:3843
#define false
Definition stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
A wrapper around a Token storing information about the whitespace characters preceding it.
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
FormatToken * Next
The next token in the unwrapped line.
std::shared_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token's formatting.
SmallVector< UnwrappedLine, 0 > Children
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...