clang 19.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
26 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28 LT_ObjCProperty, // An @property line.
34};
35
37 // Contained in class declaration/definition.
39 // Contained within function definition.
41 // Contained within other scope block (loop, if/else, etc).
43};
44
46public:
48 : First(Line.Tokens.front().Tok), Level(Line.Level),
60 assert(!Line.Tokens.empty());
61
62 // Calculate Next and Previous for all tokens. Note that we must overwrite
63 // Next and Previous for every token, as previous formatting runs might have
64 // left them in a different state.
65 First->Previous = nullptr;
66 FormatToken *Current = First;
67 addChildren(Line.Tokens.front(), Current);
68 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
69 if (Node.Tok->MacroParent)
70 ContainsMacroCall = true;
71 Current->Next = Node.Tok;
72 Node.Tok->Previous = Current;
73 Current = Current->Next;
74 addChildren(Node, Current);
75 // FIXME: if we add children, previous will point to the token before
76 // the children; changing this requires significant changes across
77 // clang-format.
78 }
79 Last = Current;
80 Last->Next = nullptr;
81 }
82
84 Current->Children.clear();
85 for (const auto &Child : Node.Children) {
86 Children.push_back(new AnnotatedLine(Child));
87 if (Children.back()->ContainsMacroCall)
88 ContainsMacroCall = true;
89 Current->Children.push_back(Children.back());
90 }
91 }
92
93 size_t size() const {
94 size_t Size = 1;
95 for (const auto *Child : Children)
96 Size += Child->size();
97 return Size;
98 }
99
101 for (AnnotatedLine *Child : Children)
102 delete Child;
103 FormatToken *Current = First;
104 while (Current) {
105 Current->Children.clear();
106 Current->Role.reset();
107 Current = Current->Next;
108 }
109 }
110
111 bool isComment() const {
112 return First && First->is(tok::comment) && !First->getNextNonComment();
113 }
114
115 /// \c true if this line starts with the given tokens in order, ignoring
116 /// comments.
117 template <typename... Ts> bool startsWith(Ts... Tokens) const {
118 return First && First->startsSequence(Tokens...);
119 }
120
121 /// \c true if this line ends with the given tokens in reversed order,
122 /// ignoring comments.
123 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
124 /// this line is like "... T3 T2 T1".
125 template <typename... Ts> bool endsWith(Ts... Tokens) const {
126 return Last && Last->endsSequence(Tokens...);
127 }
128
129 /// \c true if this line looks like a function definition instead of a
130 /// function declaration. Asserts MightBeFunctionDecl.
132 assert(MightBeFunctionDecl);
133 // Try to determine if the end of a stream of tokens is either the
134 // Definition or the Declaration for a function. It does this by looking for
135 // the ';' in foo(); and using that it ends with a ; to know this is the
136 // Definition, however the line could end with
137 // foo(); /* comment */
138 // or
139 // foo(); // comment
140 // or
141 // foo() // comment
142 // endsWith() ignores the comment.
143 return !endsWith(tok::semi);
144 }
145
146 /// \c true if this line starts a namespace definition.
147 bool startsWithNamespace() const {
148 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
149 startsWith(tok::kw_inline, tok::kw_namespace) ||
150 startsWith(tok::kw_export, tok::kw_namespace);
151 }
152
154 assert(First);
155 return First->is(tok::comment) ? First->getNextNonComment() : First;
156 }
157
159 assert(Last);
160 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
161 }
162
165
167
169 unsigned Level;
170 unsigned PPLevel;
179
180 /// \c True if this line contains a macro call for which an expansion exists.
181 bool ContainsMacroCall = false;
182
183 /// \c True if this line should be formatted, i.e. intersects directly or
184 /// indirectly with one of the input ranges.
186
187 /// \c True if the leading empty lines of this line intersect with one of the
188 /// input ranges.
190
191 /// \c True if one of this line's children intersects with an input range.
193
194 /// \c True if breaking after last attribute group in function return type.
196
197 /// \c True if this line should be indented by ContinuationIndent in addition
198 /// to the normal indention level.
200
202
203private:
204 // Disallow copying.
205 AnnotatedLine(const AnnotatedLine &) = delete;
206 void operator=(const AnnotatedLine &) = delete;
207};
208
209/// Determines extra information about the tokens comprising an
210/// \c UnwrappedLine.
212public:
213 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
214 : Style(Style), IsCpp(Style.isCpp()), Keywords(Keywords) {}
215
216 /// Adapts the indent levels of comment lines to the indent of the
217 /// subsequent line.
218 // FIXME: Can/should this be done in the UnwrappedLineParser?
220
223
224private:
225 /// Calculate the penalty for splitting before \c Tok.
226 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
227 bool InFunctionDecl) const;
228
229 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
230
231 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
232 const FormatToken &Right) const;
233
234 bool spaceRequiredBefore(const AnnotatedLine &Line,
235 const FormatToken &Right) const;
236
237 bool mustBreakBefore(const AnnotatedLine &Line,
238 const FormatToken &Right) const;
239
240 bool canBreakBefore(const AnnotatedLine &Line,
241 const FormatToken &Right) const;
242
243 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
244
245 void printDebugInfo(const AnnotatedLine &Line) const;
246
247 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
248
249 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
250
251 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
252 FormatToken *CurrentToken,
253 unsigned Depth) const;
255 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
256
257 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
258 const FormatToken &PointerOrReference) const;
259
260 const FormatStyle &Style;
261
262 bool IsCpp;
263
264 const AdditionalKeywords &Keywords;
265
267};
268
269} // end namespace format
270} // end namespace clang
271
272#endif
DynTypedNode Node
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:996
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3519
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:287
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:636
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:812
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:804
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:559
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:599
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:556
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:647
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...