clang 17.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19#include "clang/Format/Format.h"
20
21namespace clang {
22namespace format {
23
27 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29 LT_ObjCProperty, // An @property line.
35};
36
38 // Contained in class declaration/definition.
40 // Contained within function definition.
42 // Contained within other scope block (loop, if/else, etc).
44};
45
47public:
49 : First(Line.Tokens.front().Tok), Level(Line.Level),
50 PPLevel(Line.PPLevel),
61 assert(!Line.Tokens.empty());
62
63 // Calculate Next and Previous for all tokens. Note that we must overwrite
64 // Next and Previous for every token, as previous formatting runs might have
65 // left them in a different state.
66 First->Previous = nullptr;
67 FormatToken *Current = First;
68 addChildren(Line.Tokens.front(), Current);
69 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
70 if (Node.Tok->MacroParent)
71 ContainsMacroCall = true;
72 Current->Next = Node.Tok;
73 Node.Tok->Previous = Current;
74 Current = Current->Next;
75 addChildren(Node, Current);
76 // FIXME: if we add children, previous will point to the token before
77 // the children; changing this requires significant changes across
78 // clang-format.
79 }
80 Last = Current;
81 Last->Next = nullptr;
82 }
83
85 Current->Children.clear();
86 for (const auto &Child : Node.Children) {
87 Children.push_back(new AnnotatedLine(Child));
88 if (Children.back()->ContainsMacroCall)
89 ContainsMacroCall = true;
90 Current->Children.push_back(Children.back());
91 }
92 }
93
95 for (AnnotatedLine *Child : Children)
96 delete Child;
97 FormatToken *Current = First;
98 while (Current) {
99 Current->Children.clear();
100 Current->Role.reset();
101 Current = Current->Next;
102 }
103 }
104
105 bool isComment() const {
106 return First && First->is(tok::comment) && !First->getNextNonComment();
107 }
108
109 /// \c true if this line starts with the given tokens in order, ignoring
110 /// comments.
111 template <typename... Ts> bool startsWith(Ts... Tokens) const {
112 return First && First->startsSequence(Tokens...);
113 }
114
115 /// \c true if this line ends with the given tokens in reversed order,
116 /// ignoring comments.
117 /// For example, given tokens [T1, T2, T3, ...], the function returns true if
118 /// this line is like "... T3 T2 T1".
119 template <typename... Ts> bool endsWith(Ts... Tokens) const {
120 return Last && Last->endsSequence(Tokens...);
121 }
122
123 /// \c true if this line looks like a function definition instead of a
124 /// function declaration. Asserts MightBeFunctionDecl.
126 assert(MightBeFunctionDecl);
127 // Try to determine if the end of a stream of tokens is either the
128 // Definition or the Declaration for a function. It does this by looking for
129 // the ';' in foo(); and using that it ends with a ; to know this is the
130 // Definition, however the line could end with
131 // foo(); /* comment */
132 // or
133 // foo(); // comment
134 // or
135 // foo() // comment
136 // endsWith() ignores the comment.
137 return !endsWith(tok::semi);
138 }
139
140 /// \c true if this line starts a namespace definition.
141 bool startsWithNamespace() const {
142 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
143 startsWith(tok::kw_inline, tok::kw_namespace) ||
144 startsWith(tok::kw_export, tok::kw_namespace);
145 }
146
149
151
153 unsigned Level;
154 unsigned PPLevel;
163
164 /// \c True if this line contains a macro call for which an expansion exists.
165 bool ContainsMacroCall = false;
166
167 /// \c True if this line should be formatted, i.e. intersects directly or
168 /// indirectly with one of the input ranges.
170
171 /// \c True if the leading empty lines of this line intersect with one of the
172 /// input ranges.
174
175 /// \c True if one of this line's children intersects with an input range.
177
178 /// \c True if breaking after last attribute group in function return type.
180
181 /// \c True if this line should be indented by ContinuationIndent in addition
182 /// to the normal indention level.
184
186
187private:
188 // Disallow copying.
189 AnnotatedLine(const AnnotatedLine &) = delete;
190 void operator=(const AnnotatedLine &) = delete;
191};
192
193/// Determines extra information about the tokens comprising an
194/// \c UnwrappedLine.
196public:
197 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
198 : Style(Style), Keywords(Keywords) {}
199
200 /// Adapts the indent levels of comment lines to the indent of the
201 /// subsequent line.
202 // FIXME: Can/should this be done in the UnwrappedLineParser?
204
205 void annotate(AnnotatedLine &Line);
207
208private:
209 /// Calculate the penalty for splitting before \c Tok.
210 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
211 bool InFunctionDecl) const;
212
213 bool spaceRequiredBeforeParens(const FormatToken &Right) const;
214
215 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
216 const FormatToken &Right) const;
217
218 bool spaceRequiredBefore(const AnnotatedLine &Line,
219 const FormatToken &Right) const;
220
221 bool mustBreakBefore(const AnnotatedLine &Line,
222 const FormatToken &Right) const;
223
224 bool canBreakBefore(const AnnotatedLine &Line,
225 const FormatToken &Right) const;
226
227 bool mustBreakForReturnType(const AnnotatedLine &Line) const;
228
229 void printDebugInfo(const AnnotatedLine &Line) const;
230
231 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
232
233 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
234
235 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
236 FormatToken *CurrentToken,
237 unsigned Depth) const;
239 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
240
241 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
242 const FormatToken &PointerOrReference) const;
243
244 const FormatStyle &Style;
245
246 const AdditionalKeywords &Keywords;
247
249};
250
251} // end namespace format
252} // end namespace clang
253
254#endif
DynTypedNode Node
Various functions to configurably format source code.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
#define false
Definition: stdbool.h:22
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:935
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3035
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:249
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:580
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:754
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:506
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:543
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:503
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:591
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...