clang 20.0.0git
DefinitionBlockSeparator.cpp
Go to the documentation of this file.
1//===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts
11/// or removes empty lines separating definition blocks like classes, structs,
12/// functions, enums, and namespaces in between.
13///
14//===----------------------------------------------------------------------===//
15
17#include "llvm/Support/Debug.h"
18#define DEBUG_TYPE "definition-block-separator"
19
20namespace clang {
21namespace format {
22std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
23 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
24 FormatTokenLexer &Tokens) {
28 separateBlocks(AnnotatedLines, Result, Tokens);
29 return {Result, 0};
30}
31
32void DefinitionBlockSeparator::separateBlocks(
34 FormatTokenLexer &Tokens) {
35 const bool IsNeverStyle =
37 const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
38 auto GetBracketLevelChange = [](const FormatToken *Tok) {
39 if (Tok->isOneOf(tok::l_brace, tok::l_paren, tok::l_square))
40 return 1;
41 if (Tok->isOneOf(tok::r_brace, tok::r_paren, tok::r_square))
42 return -1;
43 return 0;
44 };
45 auto LikelyDefinition = [&](const AnnotatedLine *Line,
46 bool ExcludeEnum = false) {
47 if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
48 Line->startsWithNamespace()) {
49 return true;
50 }
51 int BracketLevel = 0;
52 for (const FormatToken *CurrentToken = Line->First; CurrentToken;
53 CurrentToken = CurrentToken->Next) {
54 if (BracketLevel == 0) {
55 if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct,
56 tok::kw_union) ||
58 CurrentToken->is(ExtraKeywords.kw_function))) {
59 return true;
60 }
61 if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
62 return true;
63 }
64 BracketLevel += GetBracketLevelChange(CurrentToken);
65 }
66 return false;
67 };
68 unsigned NewlineCount =
70 WhitespaceManager Whitespaces(
77 for (unsigned I = 0; I < Lines.size(); ++I) {
78 const auto &CurrentLine = Lines[I];
79 if (CurrentLine->InPPDirective)
80 continue;
81 FormatToken *TargetToken = nullptr;
82 AnnotatedLine *TargetLine;
83 auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
84 AnnotatedLine *OpeningLine = nullptr;
85 const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
86 return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
87 };
88 const auto InsertReplacement = [&](const int NewlineToInsert) {
89 assert(TargetLine);
90 assert(TargetToken);
91
92 // Do not handle EOF newlines.
93 if (TargetToken->is(tok::eof))
94 return;
95 if (IsAccessSpecifierToken(TargetToken) ||
96 (OpeningLineIndex > 0 &&
97 IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) {
98 return;
99 }
100 if (!TargetLine->Affected)
101 return;
102 Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
103 TargetToken->OriginalColumn,
104 TargetToken->OriginalColumn);
105 };
106 const auto IsPPConditional = [&](const size_t LineIndex) {
107 const auto &Line = Lines[LineIndex];
108 return Line->First->is(tok::hash) && Line->First->Next &&
109 Line->First->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_else,
110 tok::pp_ifndef, tok::pp_elifndef,
111 tok::pp_elifdef, tok::pp_elif,
112 tok::pp_endif);
113 };
114 const auto FollowingOtherOpening = [&]() {
115 return OpeningLineIndex == 0 ||
116 Lines[OpeningLineIndex - 1]->Last->opensScope() ||
117 IsPPConditional(OpeningLineIndex - 1);
118 };
119 const auto HasEnumOnLine = [&]() {
120 bool FoundEnumKeyword = false;
121 int BracketLevel = 0;
122 for (const FormatToken *CurrentToken = CurrentLine->First; CurrentToken;
123 CurrentToken = CurrentToken->Next) {
124 if (BracketLevel == 0) {
125 if (CurrentToken->is(tok::kw_enum))
126 FoundEnumKeyword = true;
127 else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
128 return true;
129 }
130 BracketLevel += GetBracketLevelChange(CurrentToken);
131 }
132 return FoundEnumKeyword && I + 1 < Lines.size() &&
133 Lines[I + 1]->First->is(tok::l_brace);
134 };
135
136 bool IsDefBlock = false;
137 const auto MayPrecedeDefinition = [&](const int Direction = -1) {
138 assert(Direction >= -1);
139 assert(Direction <= 1);
140 const size_t OperateIndex = OpeningLineIndex + Direction;
141 assert(OperateIndex < Lines.size());
142 const auto &OperateLine = Lines[OperateIndex];
143 if (LikelyDefinition(OperateLine))
144 return false;
145
146 if (const auto *Tok = OperateLine->First;
147 Tok->is(tok::comment) && !isClangFormatOn(Tok->TokenText)) {
148 return true;
149 }
150
151 // A single line identifier that is not in the last line.
152 if (OperateLine->First->is(tok::identifier) &&
153 OperateLine->First == OperateLine->Last &&
154 OperateIndex + 1 < Lines.size()) {
155 // UnwrappedLineParser's recognition of free-standing macro like
156 // Q_OBJECT may also recognize some uppercased type names that may be
157 // used as return type as that kind of macros, which is a bit hard to
158 // distinguish one from another purely from token patterns. Here, we
159 // try not to add new lines below those identifiers.
160 AnnotatedLine *NextLine = Lines[OperateIndex + 1];
161 if (NextLine->MightBeFunctionDecl &&
162 NextLine->mightBeFunctionDefinition() &&
163 NextLine->First->NewlinesBefore == 1 &&
164 OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) {
165 return true;
166 }
167 }
168
169 if (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare))
170 return true;
171 return false;
172 };
173
174 if (HasEnumOnLine() &&
175 !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
176 // We have no scope opening/closing information for enum.
177 IsDefBlock = true;
178 OpeningLineIndex = I;
179 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
180 --OpeningLineIndex;
181 OpeningLine = Lines[OpeningLineIndex];
182 TargetLine = OpeningLine;
183 TargetToken = TargetLine->First;
184 if (!FollowingOtherOpening())
185 InsertReplacement(NewlineCount);
186 else if (IsNeverStyle)
187 InsertReplacement(OpeningLineIndex != 0);
188 TargetLine = CurrentLine;
189 TargetToken = TargetLine->First;
190 while (TargetToken && TargetToken->isNot(tok::r_brace))
191 TargetToken = TargetToken->Next;
192 if (!TargetToken)
193 while (I < Lines.size() && Lines[I]->First->isNot(tok::r_brace))
194 ++I;
195 } else if (CurrentLine->First->closesScope()) {
196 if (OpeningLineIndex > Lines.size())
197 continue;
198 // Handling the case that opening brace has its own line, with checking
199 // whether the last line already had an opening brace to guard against
200 // misrecognition.
201 if (OpeningLineIndex > 0 &&
202 Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
203 Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) {
204 --OpeningLineIndex;
205 }
206 OpeningLine = Lines[OpeningLineIndex];
207 // Closing a function definition.
208 if (LikelyDefinition(OpeningLine)) {
209 IsDefBlock = true;
210 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
211 --OpeningLineIndex;
212 OpeningLine = Lines[OpeningLineIndex];
213 TargetLine = OpeningLine;
214 TargetToken = TargetLine->First;
215 if (!FollowingOtherOpening()) {
216 // Avoid duplicated replacement.
217 if (TargetToken->isNot(tok::l_brace))
218 InsertReplacement(NewlineCount);
219 } else if (IsNeverStyle) {
220 InsertReplacement(OpeningLineIndex != 0);
221 }
222 }
223 }
224
225 // Not the last token.
226 if (IsDefBlock && I + 1 < Lines.size()) {
227 OpeningLineIndex = I + 1;
228 TargetLine = Lines[OpeningLineIndex];
229 TargetToken = TargetLine->First;
230
231 // No empty line for continuously closing scopes. The token will be
232 // handled in another case if the line following is opening a
233 // definition.
234 if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) {
235 // Check whether current line may precede a definition line.
236 while (OpeningLineIndex + 1 < Lines.size() &&
237 MayPrecedeDefinition(/*Direction=*/0)) {
238 ++OpeningLineIndex;
239 }
240 TargetLine = Lines[OpeningLineIndex];
241 if (!LikelyDefinition(TargetLine)) {
242 OpeningLineIndex = I + 1;
243 TargetLine = Lines[I + 1];
244 TargetToken = TargetLine->First;
245 InsertReplacement(NewlineCount);
246 }
247 } else if (IsNeverStyle) {
248 InsertReplacement(/*NewlineToInsert=*/1);
249 }
250 }
251 }
252 for (const auto &R : Whitespaces.generateReplacements()) {
253 // The add method returns an Error instance which simulates program exit
254 // code through overloading boolean operator, thus false here indicates
255 // success.
256 if (Result.add(R))
257 return;
258 }
259}
260} // namespace format
261} // namespace clang
This file declares DefinitionBlockSeparator, a TokenAnalyzer that inserts or removes empty lines sepa...
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:99
const Environment & Env
Definition: TokenAnalyzer.h:97
Determines extra information about the tokens comprising an UnwrappedLine.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
bool isClangFormatOn(StringRef Comment)
Definition: Format.cpp:4173
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1024
@ SDS_Never
Remove any empty line between definition blocks.
Definition: Format.h:4057
@ SDS_Always
Insert an empty line between definition blocks.
Definition: Format.h:4055
@ SDS_Leave
Leave definition blocks as they are.
Definition: Format.h:4053
@ LE_DeriveCRLF
Use \r\n unless the input has more lines ending in \n.
Definition: Format.h:3267
bool isCSharp() const
Definition: Format.h:3245
bool isJavaScript() const
Definition: Format.h:3247
LineEndingStyle LineEnding
Line ending style (\n or \r\n) to use.
Definition: Format.h:3272
SeparateDefinitionStyle SeparateDefinitionBlocks
Specifies the use of empty lines to separate definition blocks, including classes,...
Definition: Format.h:4106
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:292