clang 23.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
39
41 // Contained in class declaration/definition.
43 // Contained in enum declaration/definition.
45 // Contained in compound requirement.
47 // Contained in other blocks (function, lambda, loop, if/else, child, etc).
49};
50
52public:
54 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
66 assert(!Line.Tokens.empty());
67
68 // Calculate Next and Previous for all tokens. Note that we must overwrite
69 // Next and Previous for every token, as previous formatting runs might have
70 // left them in a different state.
71 First->Previous = nullptr;
72 FormatToken *Current = First;
73 addChildren(Line.Tokens.front(), Current);
74 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
75 if (Node.Tok->MacroParent)
76 ContainsMacroCall = true;
77 Current->Next = Node.Tok;
78 Node.Tok->Previous = Current;
79 Current = Current->Next;
80 addChildren(Node, Current);
81 // FIXME: if we add children, previous will point to the token before
82 // the children; changing this requires significant changes across
83 // clang-format.
84 }
85 Last = Current;
86 Last->Next = nullptr;
87 }
88
89 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
90 Current->Children.clear();
91 for (const auto &Child : Node.Children) {
92 Children.push_back(new AnnotatedLine(Child));
93 if (Children.back()->ContainsMacroCall)
94 ContainsMacroCall = true;
95 Current->Children.push_back(Children.back());
96 }
97 }
98
99 size_t size() const {
100 size_t Size = 1;
101 for (const auto *Child : Children)
102 Size += Child->size();
103 return Size;
104 }
105
107 for (AnnotatedLine *Child : Children)
108 delete Child;
109 FormatToken *Current = First;
110 while (Current) {
111 Current->Children.clear();
112 Current->Role.reset();
113 Current = Current->Next;
114 }
115 }
116
117 bool isComment() const {
118 return First && First->is(tok::comment) && !First->getNextNonComment();
119 }
120
121 /// \c true if this line starts with the given tokens in order, ignoring
122 /// comments.
123 template <typename... Ts> bool startsWith(Ts... Tokens) const {
124 return First && First->startsSequence(Tokens...);
125 }
126
127 /// \c true if this line ends with the given tokens in reversed order,
128 /// ignoring comments.
129 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
130 /// this line is like "... T3 T2 T1".
131 template <typename... Ts> bool endsWith(Ts... Tokens) const {
132 return Last && Last->endsSequence(Tokens...);
133 }
134
135 /// \c true if this line looks like a function definition instead of a
136 /// function declaration. Asserts MightBeFunctionDecl.
138 assert(MightBeFunctionDecl);
139 // Try to determine if the end of a stream of tokens is either the
140 // Definition or the Declaration for a function. It does this by looking for
141 // the ';' in foo(); and using that it ends with a ; to know this is the
142 // Definition, however the line could end with
143 // foo(); /* comment */
144 // or
145 // foo(); // comment
146 // or
147 // foo() // comment
148 // endsWith() ignores the comment.
149 return !endsWith(tok::semi);
150 }
151
152 /// \c true if this line starts a namespace definition.
153 bool startsWithNamespace() const {
154 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
155 startsWith(tok::kw_inline, tok::kw_namespace) ||
156 startsWith(tok::kw_export, tok::kw_namespace);
157 }
158
159 /// \c true if this line starts a C++ export block.
161 return startsWith(tok::kw_export, tok::l_brace);
162 }
163
165 assert(First);
166 return First->is(tok::comment) ? First->getNextNonComment() : First;
167 }
168
170 assert(Last);
171 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
172 }
173
176
178
180 unsigned Level;
181 unsigned PPLevel;
190
191 /// \c True if this line contains a macro call for which an expansion exists.
192 bool ContainsMacroCall = false;
193
194 /// \c True if calculateFormattingInformation() has been called on this line.
195 bool Computed = false;
196
197 /// \c True if this line should be formatted, i.e. intersects directly or
198 /// indirectly with one of the input ranges.
200
201 /// \c True if the leading empty lines of this line intersect with one of the
202 /// input ranges.
204
205 /// \c True if one of this line's children intersects with an input range.
207
208 /// \c True if breaking after last attribute group in function return type.
210
211 /// \c True if this line should be indented by ContinuationIndent in addition
212 /// to the normal indention level.
214
216
217private:
218 // Disallow copying.
219 AnnotatedLine(const AnnotatedLine &) = delete;
220 void operator=(const AnnotatedLine &) = delete;
221};
222
223/// Determines extra information about the tokens comprising an
224/// \c UnwrappedLine.
226public:
227 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
228 : Style(Style), IsCpp(Style.isCpp()),
229 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {}
230
231 /// Adapts the indent levels of comment lines to the indent of the
232 /// subsequent line.
233 // FIXME: Can/should this be done in the UnwrappedLineParser?
235
238
239private:
240 /// Calculate the penalty for splitting before \c Tok.
241 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
242 bool InFunctionDecl) const;
243
244 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
245
246 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
247 const FormatToken &Right) const;
248
249 bool spaceRequiredBefore(const AnnotatedLine &Line,
250 const FormatToken &Right) const;
251
252 bool mustBreakBefore(AnnotatedLine &Line, const FormatToken &Right) const;
253
254 bool canBreakBefore(const AnnotatedLine &Line,
255 const FormatToken &Right) const;
256
257 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
258
259 bool mustBreakBeforeReturnType(const AnnotatedLine &Line) const;
260
261 void printDebugInfo(const AnnotatedLine &Line) const;
262
263 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
264
265 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
266
267 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
268 FormatToken *CurrentToken,
269 unsigned Depth) const;
270 FormatStyle::PointerAlignmentStyle
271 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
272
273 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
274 const FormatToken &PointerOrReference) const;
275
276 const FormatStyle &Style;
277
278 bool IsCpp;
279 LangOptions LangOpts;
280
281 const AdditionalKeywords &Keywords;
282
283 SmallVector<ScopeType> Scopes, MacroBodyScopes;
284};
285
286} // end namespace format
287} // end namespace clang
288
289#endif
Token Tok
The Token.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool Computed
True if calculateFormattingInformation() has been called on this line.
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool startsWithExportBlock() const
true if this line starts a C++ export block.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
LangOptions getFormattingLangOpts(const FormatStyle &Style)
Definition Format.cpp:4458
The JSON file list parser is used to communicate input to InstallAPI.
bool isCpp() const
Definition Format.h:3843
#define false
Definition stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
A wrapper around a Token storing information about the whitespace characters preceding it.
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
FormatToken * Next
The next token in the unwrapped line.
std::shared_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token's formatting.
SmallVector< UnwrappedLine, 0 > Children
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...