clang 19.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19
20namespace clang {
21namespace format {
22
26 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28 LT_ObjCProperty, // An @property line.
34};
35
37 // Contained in class declaration/definition.
39 // Contained within function definition.
41 // Contained within other scope block (loop, if/else, etc).
43};
44
46public:
48 : First(Line.Tokens.front().Tok), Level(Line.Level),
60 assert(!Line.Tokens.empty());
61
62 // Calculate Next and Previous for all tokens. Note that we must overwrite
63 // Next and Previous for every token, as previous formatting runs might have
64 // left them in a different state.
65 First->Previous = nullptr;
66 FormatToken *Current = First;
67 addChildren(Line.Tokens.front(), Current);
68 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
69 if (Node.Tok->MacroParent)
70 ContainsMacroCall = true;
71 Current->Next = Node.Tok;
72 Node.Tok->Previous = Current;
73 Current = Current->Next;
74 addChildren(Node, Current);
75 // FIXME: if we add children, previous will point to the token before
76 // the children; changing this requires significant changes across
77 // clang-format.
78 }
79 Last = Current;
80 Last->Next = nullptr;
81 }
82
84 Current->Children.clear();
85 for (const auto &Child : Node.Children) {
86 Children.push_back(new AnnotatedLine(Child));
87 if (Children.back()->ContainsMacroCall)
88 ContainsMacroCall = true;
89 Current->Children.push_back(Children.back());
90 }
91 }
92
93 size_t size() const {
94 size_t Size = 1;
95 for (const auto *Child : Children)
96 Size += Child->size();
97 return Size;
98 }
99
101 for (AnnotatedLine *Child : Children)
102 delete Child;
103 FormatToken *Current = First;
104 while (Current) {
105 Current->Children.clear();
106 Current->Role.reset();
107 Current = Current->Next;
108 }
109 }
110
111 bool isComment() const {
112 return First && First->is(tok::comment) && !First->getNextNonComment();
113 }
114
115 /// \c true if this line starts with the given tokens in order, ignoring
116 /// comments.
117 template <typename... Ts> bool startsWith(Ts... Tokens) const {
118 return First && First->startsSequence(Tokens...);
119 }
120
121 /// \c true if this line ends with the given tokens in reversed order,
122 /// ignoring comments.
123 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
124 /// this line is like "... T3 T2 T1".
125 template <typename... Ts> bool endsWith(Ts... Tokens) const {
126 return Last && Last->endsSequence(Tokens...);
127 }
128
129 /// \c true if this line looks like a function definition instead of a
130 /// function declaration. Asserts MightBeFunctionDecl.
132 assert(MightBeFunctionDecl);
133 // Try to determine if the end of a stream of tokens is either the
134 // Definition or the Declaration for a function. It does this by looking for
135 // the ';' in foo(); and using that it ends with a ; to know this is the
136 // Definition, however the line could end with
137 // foo(); /* comment */
138 // or
139 // foo(); // comment
140 // or
141 // foo() // comment
142 // endsWith() ignores the comment.
143 return !endsWith(tok::semi);
144 }
145
146 /// \c true if this line starts a namespace definition.
147 bool startsWithNamespace() const {
148 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
149 startsWith(tok::kw_inline, tok::kw_namespace) ||
150 startsWith(tok::kw_export, tok::kw_namespace);
151 }
152
154 assert(First);
155 return First->is(tok::comment) ? First->getNextNonComment() : First;
156 }
157
159 assert(Last);
160 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
161 }
162
165
167
169 unsigned Level;
170 unsigned PPLevel;
179
180 /// \c True if this line contains a macro call for which an expansion exists.
181 bool ContainsMacroCall = false;
182
183 /// \c True if this line should be formatted, i.e. intersects directly or
184 /// indirectly with one of the input ranges.
186
187 /// \c True if the leading empty lines of this line intersect with one of the
188 /// input ranges.
190
191 /// \c True if one of this line's children intersects with an input range.
193
194 /// \c True if breaking after last attribute group in function return type.
196
197 /// \c True if this line should be indented by ContinuationIndent in addition
198 /// to the normal indention level.
200
202
203private:
204 // Disallow copying.
205 AnnotatedLine(const AnnotatedLine &) = delete;
206 void operator=(const AnnotatedLine &) = delete;
207};
208
209/// Determines extra information about the tokens comprising an
210/// \c UnwrappedLine.
212public:
213 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
214 : Style(Style), Keywords(Keywords) {
215 assert(IsCpp == Style.isCpp());
216 }
217
218 /// Adapts the indent levels of comment lines to the indent of the
219 /// subsequent line.
220 // FIXME: Can/should this be done in the UnwrappedLineParser?
222
225
226private:
227 /// Calculate the penalty for splitting before \c Tok.
228 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
229 bool InFunctionDecl) const;
230
231 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
232
233 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
234 const FormatToken &Right) const;
235
236 bool spaceRequiredBefore(const AnnotatedLine &Line,
237 const FormatToken &Right) const;
238
239 bool mustBreakBefore(const AnnotatedLine &Line,
240 const FormatToken &Right) const;
241
242 bool canBreakBefore(const AnnotatedLine &Line,
243 const FormatToken &Right) const;
244
245 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
246
247 void printDebugInfo(const AnnotatedLine &Line) const;
248
249 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
250
251 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
252
253 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
254 FormatToken *CurrentToken,
255 unsigned Depth) const;
257 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
258
259 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
260 const FormatToken &PointerOrReference) const;
261
262 const FormatStyle &Style;
263
264 const AdditionalKeywords &Keywords;
265
267};
268
269} // end namespace format
270} // end namespace clang
271
272#endif
DynTypedNode Node
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
bool IsCpp
Whether the language is C/C++/Objective-C/Objective-C++.
Definition: FormatToken.cpp:21
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:989
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3490
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:283
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:629
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:805
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:797
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:555
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:592
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:552
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:640
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...