clang  14.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 enum LineType {
29  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
31  LT_ObjCProperty, // An @property line.
36 };
37 
39 public:
41  : First(Line.Tokens.front().Tok), Level(Line.Level),
49  assert(!Line.Tokens.empty());
50 
51  // Calculate Next and Previous for all tokens. Note that we must overwrite
52  // Next and Previous for every token, as previous formatting runs might have
53  // left them in a different state.
54  First->Previous = nullptr;
55  FormatToken *Current = First;
56  for (auto I = ++Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) {
57  const UnwrappedLineNode &Node = *I;
58  Current->Next = I->Tok;
59  I->Tok->Previous = Current;
60  Current = Current->Next;
61  Current->Children.clear();
62  for (const auto &Child : Node.Children) {
63  Children.push_back(new AnnotatedLine(Child));
64  Current->Children.push_back(Children.back());
65  }
66  }
67  Last = Current;
68  Last->Next = nullptr;
69  }
70 
72  for (unsigned i = 0, e = Children.size(); i != e; ++i) {
73  delete Children[i];
74  }
75  FormatToken *Current = First;
76  while (Current) {
77  Current->Children.clear();
78  Current->Role.reset();
79  Current = Current->Next;
80  }
81  }
82 
83  /// \c true if this line starts with the given tokens in order, ignoring
84  /// comments.
85  template <typename... Ts> bool startsWith(Ts... Tokens) const {
86  return First && First->startsSequence(Tokens...);
87  }
88 
89  /// \c true if this line ends with the given tokens in reversed order,
90  /// ignoring comments.
91  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
92  /// this line is like "... T3 T2 T1".
93  template <typename... Ts> bool endsWith(Ts... Tokens) const {
94  return Last && Last->endsSequence(Tokens...);
95  }
96 
97  /// \c true if this line looks like a function definition instead of a
98  /// function declaration. Asserts MightBeFunctionDecl.
100  assert(MightBeFunctionDecl);
101  // Try to determine if the end of a stream of tokens is either the
102  // Definition or the Declaration for a function. It does this by looking for
103  // the ';' in foo(); and using that it ends with a ; to know this is the
104  // Definition, however the line could end with
105  // foo(); /* comment */
106  // or
107  // foo(); // comment
108  // or
109  // foo() // comment
110  // endsWith() ignores the comment.
111  return !endsWith(tok::semi);
112  }
113 
114  /// \c true if this line starts a namespace definition.
115  bool startsWithNamespace() const {
116  return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
117  startsWith(tok::kw_inline, tok::kw_namespace) ||
118  startsWith(tok::kw_export, tok::kw_namespace);
119  }
120 
123 
125 
127  unsigned Level;
134 
135  /// \c True if this line should be formatted, i.e. intersects directly or
136  /// indirectly with one of the input ranges.
137  bool Affected;
138 
139  /// \c True if the leading empty lines of this line intersect with one of the
140  /// input ranges.
142 
143  /// \c True if one of this line's children intersects with an input range.
145 
147 
148 private:
149  // Disallow copying.
150  AnnotatedLine(const AnnotatedLine &) = delete;
151  void operator=(const AnnotatedLine &) = delete;
152 };
153 
154 /// Determines extra information about the tokens comprising an
155 /// \c UnwrappedLine.
157 public:
158  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
159  : Style(Style), Keywords(Keywords) {}
160 
161  /// Adapts the indent levels of comment lines to the indent of the
162  /// subsequent line.
163  // FIXME: Can/should this be done in the UnwrappedLineParser?
165 
166  void annotate(AnnotatedLine &Line);
168 
169 private:
170  /// Calculate the penalty for splitting before \c Tok.
171  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
172  bool InFunctionDecl);
173 
174  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
175 
176  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
177  const FormatToken &Right);
178 
179  bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
180 
181  bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
182 
183  bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
184 
185  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
186 
187  void printDebugInfo(const AnnotatedLine &Line);
188 
189  void calculateUnbreakableTailLengths(AnnotatedLine &Line);
190 
191  void calculateArrayInitializerColumnList(AnnotatedLine &Line);
192 
193  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
194  FormatToken *CurrentToken,
195  unsigned Depth);
197  getTokenReferenceAlignment(const FormatToken &PointerOrReference);
198 
200  getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference);
201 
202  const FormatStyle &Style;
203 
204  const AdditionalKeywords &Keywords;
205 };
206 
207 } // end namespace format
208 } // end namespace clang
209 
210 #endif
clang::format::AnnotatedLine::MatchingOpeningBlockLineIndex
size_t MatchingOpeningBlockLineIndex
Definition: TokenAnnotator.h:128
clang::format::TokenAnnotator::calculateFormattingInformation
void calculateFormattingInformation(AnnotatedLine &Line)
Definition: TokenAnnotator.cpp:2542
clang::format::AnnotatedLine::MightBeFunctionDecl
bool MightBeFunctionDecl
Definition: TokenAnnotator.h:132
clang::format::AnnotatedLine::First
FormatToken * First
Definition: TokenAnnotator.h:121
llvm::SmallVector
Definition: LLVM.h:38
clang::format::AnnotatedLine::ChildrenAffected
bool ChildrenAffected
True if one of this line's children intersects with an input range.
Definition: TokenAnnotator.h:144
clang::format::AnnotatedLine::mightBeFunctionDefinition
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
Definition: TokenAnnotator.h:99
clang::format::AnnotatedLine::Children
SmallVector< AnnotatedLine *, 0 > Children
Definition: TokenAnnotator.h:124
clang::format::AnnotatedLine::Level
unsigned Level
Definition: TokenAnnotator.h:127
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:59
clang::format::AnnotatedLine::LeadingEmptyLinesAffected
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
Definition: TokenAnnotator.h:141
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:211
clang::format::AdditionalKeywords
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:842
clang::format::UnwrappedLine
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
Definition: UnwrappedLineParser.h:36
Format.h
clang::format::AnnotatedLine::FirstStartColumn
unsigned FirstStartColumn
Definition: TokenAnnotator.h:146
clang::format::LT_PreprocessorDirective
@ LT_PreprocessorDirective
Definition: TokenAnnotator.h:33
clang::format::FormatToken::Previous
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:431
Node
DynTypedNode Node
Definition: ASTMatchFinder.cpp:67
clang::format::AnnotatedLine
Definition: TokenAnnotator.h:38
Depth
int Depth
Definition: ASTDiff.cpp:191
clang::format::LT_ImportStatement
@ LT_ImportStatement
Definition: TokenAnnotator.h:28
clang::format::TokenAnnotator
Determines extra information about the tokens comprising an UnwrappedLine.
Definition: TokenAnnotator.h:156
clang::format::AnnotatedLine::Type
LineType Type
Definition: TokenAnnotator.h:126
clang::format::TokenAnnotator::TokenAnnotator
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
Definition: TokenAnnotator.h:158
clang::format::TokenAnnotator::annotate
void annotate(AnnotatedLine &Line)
Definition: TokenAnnotator.cpp:2375
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:130
clang::format::FormatStyle::PointerAlignmentStyle
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:2912
clang::format::AnnotatedLine::~AnnotatedLine
~AnnotatedLine()
Definition: TokenAnnotator.h:71
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::LT_Invalid
@ LT_Invalid
Definition: TokenAnnotator.h:27
clang::format::AnnotatedLine::startsWith
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Definition: TokenAnnotator.h:85
UnwrappedLineParser.h
clang::format::FormatToken::startsSequence
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:490
clang::format::LineType
LineType
Definition: TokenAnnotator.h:26
false
#define false
Definition: stdbool.h:17
clang::format::LT_ObjCDecl
@ LT_ObjCDecl
Definition: TokenAnnotator.h:29
clang::format::AnnotatedLine::Last
FormatToken * Last
Definition: TokenAnnotator.h:122
clang::format::AnnotatedLine::startsWithNamespace
bool startsWithNamespace() const
true if this line starts a namespace definition.
Definition: TokenAnnotator.h:115
clang::format::TokenAnnotator::setCommentLineLevels
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines)
Adapts the indent levels of comment lines to the indent of the subsequent line.
Definition: TokenAnnotator.cpp:2334
clang::format::FormatToken::endsSequence
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:501
clang::format::UnwrappedLineNode
Definition: UnwrappedLineParser.h:300
clang::format::AnnotatedLine::MatchingClosingBlockLineIndex
size_t MatchingClosingBlockLineIndex
Definition: TokenAnnotator.h:129
clang
Definition: CalledOnceCheck.h:17
clang::format::AnnotatedLine::endsWith
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
Definition: TokenAnnotator.h:93
clang::format::AnnotatedLine::AnnotatedLine
AnnotatedLine(const UnwrappedLine &Line)
Definition: TokenAnnotator.h:40
clang::format::LT_ArrayOfStructInitializer
@ LT_ArrayOfStructInitializer
Definition: TokenAnnotator.h:35
clang::format::AnnotatedLine::MustBeDeclaration
bool MustBeDeclaration
Definition: TokenAnnotator.h:131
clang::format::AnnotatedLine::IsMultiVariableDeclStmt
bool IsMultiVariableDeclStmt
Definition: TokenAnnotator.h:133
clang::format::LT_Other
@ LT_Other
Definition: TokenAnnotator.h:32
llvm::SmallVectorImpl
Definition: LLVM.h:39
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:434
clang::format::LT_ObjCMethodDecl
@ LT_ObjCMethodDecl
Definition: TokenAnnotator.h:30
clang::format::LT_VirtualFunctionDecl
@ LT_VirtualFunctionDecl
Definition: TokenAnnotator.h:34
clang::format::LT_ObjCProperty
@ LT_ObjCProperty
Definition: TokenAnnotator.h:31
clang::format::AnnotatedLine::Affected
bool Affected
True if this line should be formatted, i.e.
Definition: TokenAnnotator.h:137