clang  16.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 namespace format {
23 
24 enum LineType {
27  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29  LT_ObjCProperty, // An @property line.
35 };
36 
38 public:
40  : First(Line.Tokens.front().Tok), Level(Line.Level),
52  assert(!Line.Tokens.empty());
53 
54  // Calculate Next and Previous for all tokens. Note that we must overwrite
55  // Next and Previous for every token, as previous formatting runs might have
56  // left them in a different state.
57  First->Previous = nullptr;
58  FormatToken *Current = First;
59  for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
60  Current->Next = Node.Tok;
61  Node.Tok->Previous = Current;
62  Current = Current->Next;
63  Current->Children.clear();
64  for (const auto &Child : Node.Children) {
65  Children.push_back(new AnnotatedLine(Child));
66  Current->Children.push_back(Children.back());
67  }
68  }
69  Last = Current;
70  Last->Next = nullptr;
71  }
72 
74  for (AnnotatedLine *Child : Children)
75  delete Child;
76  FormatToken *Current = First;
77  while (Current) {
78  Current->Children.clear();
79  Current->Role.reset();
80  Current = Current->Next;
81  }
82  }
83 
84  bool isComment() const {
85  return First && First->is(tok::comment) && !First->getNextNonComment();
86  }
87 
88  /// \c true if this line starts with the given tokens in order, ignoring
89  /// comments.
90  template <typename... Ts> bool startsWith(Ts... Tokens) const {
91  return First && First->startsSequence(Tokens...);
92  }
93 
94  /// \c true if this line ends with the given tokens in reversed order,
95  /// ignoring comments.
96  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
97  /// this line is like "... T3 T2 T1".
98  template <typename... Ts> bool endsWith(Ts... Tokens) const {
99  return Last && Last->endsSequence(Tokens...);
100  }
101 
102  /// \c true if this line looks like a function definition instead of a
103  /// function declaration. Asserts MightBeFunctionDecl.
105  assert(MightBeFunctionDecl);
106  // Try to determine if the end of a stream of tokens is either the
107  // Definition or the Declaration for a function. It does this by looking for
108  // the ';' in foo(); and using that it ends with a ; to know this is the
109  // Definition, however the line could end with
110  // foo(); /* comment */
111  // or
112  // foo(); // comment
113  // or
114  // foo() // comment
115  // endsWith() ignores the comment.
116  return !endsWith(tok::semi);
117  }
118 
119  /// \c true if this line starts a namespace definition.
120  bool startsWithNamespace() const {
121  return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
122  startsWith(tok::kw_inline, tok::kw_namespace) ||
123  startsWith(tok::kw_export, tok::kw_namespace);
124  }
125 
128 
130 
132  unsigned Level;
133  unsigned PPLevel;
142 
143  /// \c True if this line should be formatted, i.e. intersects directly or
144  /// indirectly with one of the input ranges.
145  bool Affected;
146 
147  /// \c True if the leading empty lines of this line intersect with one of the
148  /// input ranges.
150 
151  /// \c True if one of this line's children intersects with an input range.
153 
154  /// \c True if this line should be indented by ContinuationIndent in addition
155  /// to the normal indention level.
157 
159 
160 private:
161  // Disallow copying.
162  AnnotatedLine(const AnnotatedLine &) = delete;
163  void operator=(const AnnotatedLine &) = delete;
164 };
165 
166 /// Determines extra information about the tokens comprising an
167 /// \c UnwrappedLine.
169 public:
170  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
171  : Style(Style), Keywords(Keywords) {}
172 
173  /// Adapts the indent levels of comment lines to the indent of the
174  /// subsequent line.
175  // FIXME: Can/should this be done in the UnwrappedLineParser?
177 
178  void annotate(AnnotatedLine &Line) const;
180 
181 private:
182  /// Calculate the penalty for splitting before \c Tok.
183  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
184  bool InFunctionDecl) const;
185 
186  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
187 
188  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
189  const FormatToken &Right) const;
190 
191  bool spaceRequiredBefore(const AnnotatedLine &Line,
192  const FormatToken &Right) const;
193 
194  bool mustBreakBefore(const AnnotatedLine &Line,
195  const FormatToken &Right) const;
196 
197  bool canBreakBefore(const AnnotatedLine &Line,
198  const FormatToken &Right) const;
199 
200  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
201 
202  void printDebugInfo(const AnnotatedLine &Line) const;
203 
204  void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
205 
206  void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
207 
208  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
209  FormatToken *CurrentToken,
210  unsigned Depth) const;
212  getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
213 
214  FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
215  const FormatToken &PointerOrReference) const;
216 
217  const FormatStyle &Style;
218 
219  const AdditionalKeywords &Keywords;
220 };
221 
222 } // end namespace format
223 } // end namespace clang
224 
225 #endif
clang::format::FormatToken::getNextNonComment
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:736
clang::format::AnnotatedLine::MatchingOpeningBlockLineIndex
size_t MatchingOpeningBlockLineIndex
Definition: TokenAnnotator.h:134
clang::format::AnnotatedLine::MightBeFunctionDecl
bool MightBeFunctionDecl
Definition: TokenAnnotator.h:140
clang::format::AnnotatedLine::First
FormatToken * First
Definition: TokenAnnotator.h:126
llvm::SmallVector
Definition: LLVM.h:38
clang::format::AnnotatedLine::ChildrenAffected
bool ChildrenAffected
True if one of this line's children intersects with an input range.
Definition: TokenAnnotator.h:152
clang::format::AnnotatedLine::mightBeFunctionDefinition
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
Definition: TokenAnnotator.h:104
clang::format::AnnotatedLine::Children
SmallVector< AnnotatedLine *, 0 > Children
Definition: TokenAnnotator.h:129
clang::format::TokenAnnotator::setCommentLineLevels
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
Definition: TokenAnnotator.cpp:2721
clang::format::AnnotatedLine::Level
unsigned Level
Definition: TokenAnnotator.h:132
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
clang::format::AnnotatedLine::LeadingEmptyLinesAffected
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
Definition: TokenAnnotator.h:149
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:240
clang::format::AdditionalKeywords
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:917
clang::format::AnnotatedLine::InMacroBody
bool InMacroBody
Definition: TokenAnnotator.h:138
clang::format::UnwrappedLine
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
Definition: UnwrappedLineParser.h:38
clang::format::AnnotatedLine::PPLevel
unsigned PPLevel
Definition: TokenAnnotator.h:133
Format.h
clang::format::LT_CommentAbovePPDirective
@ LT_CommentAbovePPDirective
Definition: TokenAnnotator.h:34
clang::format::AnnotatedLine::FirstStartColumn
unsigned FirstStartColumn
Definition: TokenAnnotator.h:158
clang::format::LT_PreprocessorDirective
@ LT_PreprocessorDirective
Definition: TokenAnnotator.h:31
clang::format::FormatToken::Previous
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:485
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:68
clang::format::AnnotatedLine::isComment
bool isComment() const
Definition: TokenAnnotator.h:84
clang::format::AnnotatedLine
Definition: TokenAnnotator.h:37
Depth
int Depth
Definition: ASTDiff.cpp:189
clang::format::LT_ImportStatement
@ LT_ImportStatement
Definition: TokenAnnotator.h:26
clang::format::TokenAnnotator
Determines extra information about the tokens comprising an UnwrappedLine.
Definition: TokenAnnotator.h:168
clang::format::TokenAnnotator::annotate
void annotate(AnnotatedLine &Line) const
Definition: TokenAnnotator.cpp:2760
clang::format::AnnotatedLine::Type
LineType Type
Definition: TokenAnnotator.h:131
clang::format::TokenAnnotator::TokenAnnotator
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
Definition: TokenAnnotator.h:170
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:136
clang::format::AnnotatedLine::~AnnotatedLine
~AnnotatedLine()
Definition: TokenAnnotator.h:73
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::LT_Invalid
@ LT_Invalid
Definition: TokenAnnotator.h:25
clang::format::AnnotatedLine::startsWith
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Definition: TokenAnnotator.h:90
UnwrappedLineParser.h
clang::format::FormatStyle::PointerAlignmentStyle
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:2884
clang::format::FormatToken::startsSequence
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:562
clang::format::LineType
LineType
Definition: TokenAnnotator.h:24
clang::format::TokenAnnotator::calculateFormattingInformation
void calculateFormattingInformation(AnnotatedLine &Line) const
Definition: TokenAnnotator.cpp:2937
false
#define false
Definition: stdbool.h:22
clang::format::LT_ObjCDecl
@ LT_ObjCDecl
Definition: TokenAnnotator.h:27
clang::format::AnnotatedLine::Last
FormatToken * Last
Definition: TokenAnnotator.h:127
clang::format::AnnotatedLine::IsContinuation
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
Definition: TokenAnnotator.h:156
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:525
clang::format::AnnotatedLine::startsWithNamespace
bool startsWithNamespace() const
true if this line starts a namespace definition.
Definition: TokenAnnotator.h:120
clang::format::AnnotatedLine::InPragmaDirective
bool InPragmaDirective
Definition: TokenAnnotator.h:137
clang::format::FormatToken::endsSequence
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:573
clang::format::UnwrappedLineNode
Definition: UnwrappedLineParser.h:355
clang::format::AnnotatedLine::MatchingClosingBlockLineIndex
size_t MatchingClosingBlockLineIndex
Definition: TokenAnnotator.h:135
clang
Definition: CalledOnceCheck.h:17
clang::format::AnnotatedLine::endsWith
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
Definition: TokenAnnotator.h:98
clang::format::AnnotatedLine::AnnotatedLine
AnnotatedLine(const UnwrappedLine &Line)
Definition: TokenAnnotator.h:39
clang::format::LT_ArrayOfStructInitializer
@ LT_ArrayOfStructInitializer
Definition: TokenAnnotator.h:33
clang::format::AnnotatedLine::MustBeDeclaration
bool MustBeDeclaration
Definition: TokenAnnotator.h:139
clang::format::AnnotatedLine::IsMultiVariableDeclStmt
bool IsMultiVariableDeclStmt
Definition: TokenAnnotator.h:141
clang::format::LT_Other
@ LT_Other
Definition: TokenAnnotator.h:30
llvm::SmallVectorImpl
Definition: Randstruct.h:18
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:488
clang::format::LT_ObjCMethodDecl
@ LT_ObjCMethodDecl
Definition: TokenAnnotator.h:28
clang::format::LT_VirtualFunctionDecl
@ LT_VirtualFunctionDecl
Definition: TokenAnnotator.h:32
clang::format::LT_ObjCProperty
@ LT_ObjCProperty
Definition: TokenAnnotator.h:29
clang::format::AnnotatedLine::Affected
bool Affected
True if this line should be formatted, i.e.
Definition: TokenAnnotator.h:145