clang  15.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 namespace format {
23 
24 enum LineType {
27  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29  LT_ObjCProperty, // An @property line.
34 };
35 
37 public:
39  : First(Line.Tokens.front().Tok), Level(Line.Level),
47  assert(!Line.Tokens.empty());
48 
49  // Calculate Next and Previous for all tokens. Note that we must overwrite
50  // Next and Previous for every token, as previous formatting runs might have
51  // left them in a different state.
52  First->Previous = nullptr;
53  FormatToken *Current = First;
54  for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
55  Current->Next = Node.Tok;
56  Node.Tok->Previous = Current;
57  Current = Current->Next;
58  Current->Children.clear();
59  for (const auto &Child : Node.Children) {
60  Children.push_back(new AnnotatedLine(Child));
61  Current->Children.push_back(Children.back());
62  }
63  }
64  Last = Current;
65  Last->Next = nullptr;
66  }
67 
69  for (AnnotatedLine *Child : Children)
70  delete Child;
71  FormatToken *Current = First;
72  while (Current) {
73  Current->Children.clear();
74  Current->Role.reset();
75  Current = Current->Next;
76  }
77  }
78 
79  bool isComment() const {
80  return First && First->is(tok::comment) && !First->getNextNonComment();
81  }
82 
83  /// \c true if this line starts with the given tokens in order, ignoring
84  /// comments.
85  template <typename... Ts> bool startsWith(Ts... Tokens) const {
86  return First && First->startsSequence(Tokens...);
87  }
88 
89  /// \c true if this line ends with the given tokens in reversed order,
90  /// ignoring comments.
91  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
92  /// this line is like "... T3 T2 T1".
93  template <typename... Ts> bool endsWith(Ts... Tokens) const {
94  return Last && Last->endsSequence(Tokens...);
95  }
96 
97  /// \c true if this line looks like a function definition instead of a
98  /// function declaration. Asserts MightBeFunctionDecl.
100  assert(MightBeFunctionDecl);
101  // Try to determine if the end of a stream of tokens is either the
102  // Definition or the Declaration for a function. It does this by looking for
103  // the ';' in foo(); and using that it ends with a ; to know this is the
104  // Definition, however the line could end with
105  // foo(); /* comment */
106  // or
107  // foo(); // comment
108  // or
109  // foo() // comment
110  // endsWith() ignores the comment.
111  return !endsWith(tok::semi);
112  }
113 
114  /// \c true if this line starts a namespace definition.
115  bool startsWithNamespace() const {
116  return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
117  startsWith(tok::kw_inline, tok::kw_namespace) ||
118  startsWith(tok::kw_export, tok::kw_namespace);
119  }
120 
123 
125 
127  unsigned Level;
134 
135  /// \c True if this line should be formatted, i.e. intersects directly or
136  /// indirectly with one of the input ranges.
137  bool Affected;
138 
139  /// \c True if the leading empty lines of this line intersect with one of the
140  /// input ranges.
142 
143  /// \c True if one of this line's children intersects with an input range.
145 
147 
148 private:
149  // Disallow copying.
150  AnnotatedLine(const AnnotatedLine &) = delete;
151  void operator=(const AnnotatedLine &) = delete;
152 };
153 
154 /// Determines extra information about the tokens comprising an
155 /// \c UnwrappedLine.
157 public:
158  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
159  : Style(Style), Keywords(Keywords) {}
160 
161  /// Adapts the indent levels of comment lines to the indent of the
162  /// subsequent line.
163  // FIXME: Can/should this be done in the UnwrappedLineParser?
165 
166  void annotate(AnnotatedLine &Line) const;
168 
169 private:
170  /// Calculate the penalty for splitting before \c Tok.
171  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
172  bool InFunctionDecl) const;
173 
174  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
175 
176  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
177  const FormatToken &Right) const;
178 
179  bool spaceRequiredBefore(const AnnotatedLine &Line,
180  const FormatToken &Right) const;
181 
182  bool mustBreakBefore(const AnnotatedLine &Line,
183  const FormatToken &Right) const;
184 
185  bool canBreakBefore(const AnnotatedLine &Line,
186  const FormatToken &Right) const;
187 
188  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
189 
190  void printDebugInfo(const AnnotatedLine &Line) const;
191 
192  void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
193 
194  void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
195 
196  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
197  FormatToken *CurrentToken,
198  unsigned Depth) const;
200  getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
201 
202  FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
203  const FormatToken &PointerOrReference) const;
204 
205  const FormatStyle &Style;
206 
207  const AdditionalKeywords &Keywords;
208 };
209 
210 } // end namespace format
211 } // end namespace clang
212 
213 #endif
clang::format::AnnotatedLine::MatchingOpeningBlockLineIndex
size_t MatchingOpeningBlockLineIndex
Definition: TokenAnnotator.h:128
clang::format::FormatToken::getNextNonComment
const LLVM_NODISCARD FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:704
clang::format::AnnotatedLine::MightBeFunctionDecl
bool MightBeFunctionDecl
Definition: TokenAnnotator.h:132
clang::format::AnnotatedLine::First
FormatToken * First
Definition: TokenAnnotator.h:121
llvm::SmallVector
Definition: LLVM.h:38
clang::format::AnnotatedLine::ChildrenAffected
bool ChildrenAffected
True if one of this line's children intersects with an input range.
Definition: TokenAnnotator.h:144
clang::format::AnnotatedLine::mightBeFunctionDefinition
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
Definition: TokenAnnotator.h:99
clang::format::AnnotatedLine::Children
SmallVector< AnnotatedLine *, 0 > Children
Definition: TokenAnnotator.h:124
clang::format::TokenAnnotator::setCommentLineLevels
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
Definition: TokenAnnotator.cpp:2525
clang::format::AnnotatedLine::Level
unsigned Level
Definition: TokenAnnotator.h:127
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
clang::format::AnnotatedLine::LeadingEmptyLinesAffected
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
Definition: TokenAnnotator.h:141
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:227
clang::format::AdditionalKeywords
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:885
clang::format::UnwrappedLine
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
Definition: UnwrappedLineParser.h:37
Format.h
clang::format::AnnotatedLine::FirstStartColumn
unsigned FirstStartColumn
Definition: TokenAnnotator.h:146
clang::format::LT_PreprocessorDirective
@ LT_PreprocessorDirective
Definition: TokenAnnotator.h:31
clang::format::FormatToken::Previous
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:469
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:68
clang::format::AnnotatedLine::isComment
bool isComment() const
Definition: TokenAnnotator.h:79
clang::format::AnnotatedLine
Definition: TokenAnnotator.h:36
Depth
int Depth
Definition: ASTDiff.cpp:191
clang::format::LT_ImportStatement
@ LT_ImportStatement
Definition: TokenAnnotator.h:26
clang::format::TokenAnnotator
Determines extra information about the tokens comprising an UnwrappedLine.
Definition: TokenAnnotator.h:156
clang::format::TokenAnnotator::annotate
void annotate(AnnotatedLine &Line) const
Definition: TokenAnnotator.cpp:2561
clang::format::AnnotatedLine::Type
LineType Type
Definition: TokenAnnotator.h:126
clang::format::TokenAnnotator::TokenAnnotator
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
Definition: TokenAnnotator.h:158
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:130
clang::format::AnnotatedLine::~AnnotatedLine
~AnnotatedLine()
Definition: TokenAnnotator.h:68
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::LT_Invalid
@ LT_Invalid
Definition: TokenAnnotator.h:25
clang::format::AnnotatedLine::startsWith
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Definition: TokenAnnotator.h:85
UnwrappedLineParser.h
clang::format::FormatStyle::PointerAlignmentStyle
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:2842
clang::format::FormatToken::startsSequence
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:537
clang::format::LineType
LineType
Definition: TokenAnnotator.h:24
clang::format::TokenAnnotator::calculateFormattingInformation
void calculateFormattingInformation(AnnotatedLine &Line) const
Definition: TokenAnnotator.cpp:2728
false
#define false
Definition: stdbool.h:22
clang::format::LT_ObjCDecl
@ LT_ObjCDecl
Definition: TokenAnnotator.h:27
clang::format::AnnotatedLine::Last
FormatToken * Last
Definition: TokenAnnotator.h:122
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:500
clang::format::AnnotatedLine::startsWithNamespace
bool startsWithNamespace() const
true if this line starts a namespace definition.
Definition: TokenAnnotator.h:115
clang::format::FormatToken::endsSequence
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:548
clang::format::UnwrappedLineNode
Definition: UnwrappedLineParser.h:329
clang::format::AnnotatedLine::MatchingClosingBlockLineIndex
size_t MatchingClosingBlockLineIndex
Definition: TokenAnnotator.h:129
clang
Definition: CalledOnceCheck.h:17
clang::format::AnnotatedLine::endsWith
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
Definition: TokenAnnotator.h:93
clang::format::AnnotatedLine::AnnotatedLine
AnnotatedLine(const UnwrappedLine &Line)
Definition: TokenAnnotator.h:38
clang::format::LT_ArrayOfStructInitializer
@ LT_ArrayOfStructInitializer
Definition: TokenAnnotator.h:33
clang::format::AnnotatedLine::MustBeDeclaration
bool MustBeDeclaration
Definition: TokenAnnotator.h:131
clang::format::AnnotatedLine::IsMultiVariableDeclStmt
bool IsMultiVariableDeclStmt
Definition: TokenAnnotator.h:133
clang::format::LT_Other
@ LT_Other
Definition: TokenAnnotator.h:30
llvm::SmallVectorImpl
Definition: Randstruct.h:18
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:472
clang::format::LT_ObjCMethodDecl
@ LT_ObjCMethodDecl
Definition: TokenAnnotator.h:28
clang::format::LT_VirtualFunctionDecl
@ LT_VirtualFunctionDecl
Definition: TokenAnnotator.h:32
clang::format::LT_ObjCProperty
@ LT_ObjCProperty
Definition: TokenAnnotator.h:29
clang::format::AnnotatedLine::Affected
bool Affected
True if this line should be formatted, i.e.
Definition: TokenAnnotator.h:137