clang 18.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19#include "clang/Format/Format.h"
20
21namespace clang {
22namespace format {
23
27 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29 LT_ObjCProperty, // An @property line.
35};
36
38 // Contained in class declaration/definition.
40 // Contained within function definition.
42 // Contained within other scope block (loop, if/else, etc).
44};
45
47public:
49 : First(Line.Tokens.front().Tok), Level(Line.Level),
50 PPLevel(Line.PPLevel),
61 assert(!Line.Tokens.empty());
62
63 // Calculate Next and Previous for all tokens. Note that we must overwrite
64 // Next and Previous for every token, as previous formatting runs might have
65 // left them in a different state.
66 First->Previous = nullptr;
67 FormatToken *Current = First;
68 addChildren(Line.Tokens.front(), Current);
69 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
70 if (Node.Tok->MacroParent)
71 ContainsMacroCall = true;
72 Current->Next = Node.Tok;
73 Node.Tok->Previous = Current;
74 Current = Current->Next;
75 addChildren(Node, Current);
76 // FIXME: if we add children, previous will point to the token before
77 // the children; changing this requires significant changes across
78 // clang-format.
79 }
80 Last = Current;
81 Last->Next = nullptr;
82 }
83
85 Current->Children.clear();
86 for (const auto &Child : Node.Children) {
87 Children.push_back(new AnnotatedLine(Child));
88 if (Children.back()->ContainsMacroCall)
89 ContainsMacroCall = true;
90 Current->Children.push_back(Children.back());
91 }
92 }
93
94 size_t size() const {
95 size_t Size = 1;
96 for (const auto *Child : Children)
97 Size += Child->size();
98 return Size;
99 }
100
102 for (AnnotatedLine *Child : Children)
103 delete Child;
104 FormatToken *Current = First;
105 while (Current) {
106 Current->Children.clear();
107 Current->Role.reset();
108 Current = Current->Next;
109 }
110 }
111
112 bool isComment() const {
113 return First && First->is(tok::comment) && !First->getNextNonComment();
114 }
115
116 /// \c true if this line starts with the given tokens in order, ignoring
117 /// comments.
118 template <typename... Ts> bool startsWith(Ts... Tokens) const {
119 return First && First->startsSequence(Tokens...);
120 }
121
122 /// \c true if this line ends with the given tokens in reversed order,
123 /// ignoring comments.
124 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
125 /// this line is like "... T3 T2 T1".
126 template <typename... Ts> bool endsWith(Ts... Tokens) const {
127 return Last && Last->endsSequence(Tokens...);
128 }
129
130 /// \c true if this line looks like a function definition instead of a
131 /// function declaration. Asserts MightBeFunctionDecl.
133 assert(MightBeFunctionDecl);
134 // Try to determine if the end of a stream of tokens is either the
135 // Definition or the Declaration for a function. It does this by looking for
136 // the ';' in foo(); and using that it ends with a ; to know this is the
137 // Definition, however the line could end with
138 // foo(); /* comment */
139 // or
140 // foo(); // comment
141 // or
142 // foo() // comment
143 // endsWith() ignores the comment.
144 return !endsWith(tok::semi);
145 }
146
147 /// \c true if this line starts a namespace definition.
148 bool startsWithNamespace() const {
149 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
150 startsWith(tok::kw_inline, tok::kw_namespace) ||
151 startsWith(tok::kw_export, tok::kw_namespace);
152 }
153
155 assert(First);
156 return First->is(tok::comment) ? First->getNextNonComment() : First;
157 }
158
161
163
165 unsigned Level;
166 unsigned PPLevel;
175
176 /// \c True if this line contains a macro call for which an expansion exists.
177 bool ContainsMacroCall = false;
178
179 /// \c True if this line should be formatted, i.e. intersects directly or
180 /// indirectly with one of the input ranges.
182
183 /// \c True if the leading empty lines of this line intersect with one of the
184 /// input ranges.
186
187 /// \c True if one of this line's children intersects with an input range.
189
190 /// \c True if breaking after last attribute group in function return type.
192
193 /// \c True if this line should be indented by ContinuationIndent in addition
194 /// to the normal indention level.
196
198
199private:
200 // Disallow copying.
201 AnnotatedLine(const AnnotatedLine &) = delete;
202 void operator=(const AnnotatedLine &) = delete;
203};
204
205/// Determines extra information about the tokens comprising an
206/// \c UnwrappedLine.
208public:
209 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
210 : Style(Style), Keywords(Keywords) {}
211
212 /// Adapts the indent levels of comment lines to the indent of the
213 /// subsequent line.
214 // FIXME: Can/should this be done in the UnwrappedLineParser?
216
217 void annotate(AnnotatedLine &Line);
219
220private:
221 /// Calculate the penalty for splitting before \c Tok.
222 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
223 bool InFunctionDecl) const;
224
225 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
226
227 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
228 const FormatToken &Right) const;
229
230 bool spaceRequiredBefore(const AnnotatedLine &Line,
231 const FormatToken &Right) const;
232
233 bool mustBreakBefore(const AnnotatedLine &Line,
234 const FormatToken &Right) const;
235
236 bool canBreakBefore(const AnnotatedLine &Line,
237 const FormatToken &Right) const;
238
239 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
240
241 void printDebugInfo(const AnnotatedLine &Line) const;
242
243 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
244
245 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
246
247 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
248 FormatToken *CurrentToken,
249 unsigned Depth) const;
251 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
252
253 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
254 const FormatToken &PointerOrReference) const;
255
256 const FormatStyle &Style;
257
258 const AdditionalKeywords &Keywords;
259
261};
262
263} // end namespace format
264} // end namespace clang
265
266#endif
DynTypedNode Node
Various functions to configurably format source code.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
FormatToken * getFirstNonComment() const
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:952
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3286
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:261
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:598
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:768
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:524
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:561
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:521
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:609
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...