clang 20.0.0git
NamespaceEndCommentsFixer.cpp
Go to the documentation of this file.
1//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11/// fixes namespace end comments.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/Regex.h"
19
20#define DEBUG_TYPE "namespace-end-comments-fixer"
21
22namespace clang {
23namespace format {
24
25namespace {
26// Iterates all tokens starting from StartTok to EndTok and apply Fn to all
27// tokens between them including StartTok and EndTok. Returns the token after
28// EndTok.
29const FormatToken *
30processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
31 tok::TokenKind EndTok,
32 llvm::function_ref<void(const FormatToken *)> Fn) {
33 if (!Tok || Tok->isNot(StartTok))
34 return Tok;
35 int NestLevel = 0;
36 do {
37 if (Tok->is(StartTok))
38 ++NestLevel;
39 else if (Tok->is(EndTok))
40 --NestLevel;
41 if (Fn)
42 Fn(Tok);
43 Tok = Tok->getNextNonComment();
44 } while (Tok && NestLevel > 0);
45 return Tok;
46}
47
48const FormatToken *skipAttribute(const FormatToken *Tok) {
49 if (!Tok)
50 return nullptr;
51 if (Tok->isAttribute()) {
52 Tok = Tok->getNextNonComment();
53 Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
54 } else if (Tok->is(tok::l_square)) {
55 Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
56 }
57 return Tok;
58}
59
60// Computes the name of a namespace given the namespace token.
61// Returns "" for anonymous namespace.
62std::string computeName(const FormatToken *NamespaceTok) {
63 assert(NamespaceTok &&
64 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
65 "expecting a namespace token");
66 std::string name;
67 const FormatToken *Tok = NamespaceTok->getNextNonComment();
68 if (NamespaceTok->is(TT_NamespaceMacro)) {
69 // Collects all the non-comment tokens between opening parenthesis
70 // and closing parenthesis or comma.
71 assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
72 Tok = Tok->getNextNonComment();
73 while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
74 name += Tok->TokenText;
75 Tok = Tok->getNextNonComment();
76 }
77 return name;
78 }
79 Tok = skipAttribute(Tok);
80
81 std::string FirstNSName;
82 // For `namespace [[foo]] A::B::inline C {` or
83 // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
84 // Peek for the first '::' (or '{' or '(')) and then return all tokens from
85 // one token before that up until the '{'. A '(' might be a macro with
86 // arguments.
87 const FormatToken *FirstNSTok = nullptr;
88 while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
89 if (FirstNSTok)
90 FirstNSName += FirstNSTok->TokenText;
91 FirstNSTok = Tok;
92 Tok = Tok->getNextNonComment();
93 }
94
95 if (FirstNSTok)
96 Tok = FirstNSTok;
97 Tok = skipAttribute(Tok);
98
99 FirstNSTok = nullptr;
100 // Add everything from '(' to ')'.
101 auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
102 bool IsPrevColoncolon = false;
103 bool HasColoncolon = false;
104 bool IsPrevInline = false;
105 bool NameFinished = false;
106 // If we found '::' in name, then it's the name. Otherwise, we can't tell
107 // which one is name. For example, `namespace A B {`.
108 while (Tok && Tok->isNot(tok::l_brace)) {
109 if (FirstNSTok) {
110 if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
111 if (FirstNSTok->is(tok::l_paren)) {
112 FirstNSTok = Tok =
113 processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
114 continue;
115 }
116 if (FirstNSTok->isNot(tok::coloncolon)) {
117 NameFinished = true;
118 break;
119 }
120 }
121 name += FirstNSTok->TokenText;
122 IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
123 HasColoncolon = HasColoncolon || IsPrevColoncolon;
124 if (FirstNSTok->is(tok::kw_inline)) {
125 name += " ";
126 IsPrevInline = true;
127 }
128 }
129 FirstNSTok = Tok;
130 Tok = Tok->getNextNonComment();
131 const FormatToken *TokAfterAttr = skipAttribute(Tok);
132 if (TokAfterAttr != Tok)
133 FirstNSTok = Tok = TokAfterAttr;
134 }
135 if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
136 name += FirstNSTok->TokenText;
137 if (FirstNSName.empty() || HasColoncolon)
138 return name;
139 return name.empty() ? FirstNSName : FirstNSName + " " + name;
140}
141
142std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
143 const FormatToken *NamespaceTok,
144 unsigned SpacesToAdd) {
145 std::string text = "//";
146 text.append(SpacesToAdd, ' ');
147 text += NamespaceTok->TokenText;
148 if (NamespaceTok->is(TT_NamespaceMacro))
149 text += "(";
150 else if (!NamespaceName.empty())
151 text += ' ';
152 text += NamespaceName;
153 if (NamespaceTok->is(TT_NamespaceMacro))
154 text += ")";
155 if (AddNewline)
156 text += '\n';
157 return text;
158}
159
160bool hasEndComment(const FormatToken *RBraceTok) {
161 return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
162}
163
164bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
165 const FormatToken *NamespaceTok) {
166 assert(hasEndComment(RBraceTok));
167 const FormatToken *Comment = RBraceTok->Next;
168
169 // Matches a valid namespace end comment.
170 // Valid namespace end comments don't need to be edited.
171 static const llvm::Regex NamespaceCommentPattern =
172 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
173 "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$",
174 llvm::Regex::IgnoreCase);
175 static const llvm::Regex NamespaceMacroCommentPattern =
176 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
177 "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$",
178 llvm::Regex::IgnoreCase);
179
180 SmallVector<StringRef, 8> Groups;
181 if (NamespaceTok->is(TT_NamespaceMacro) &&
182 NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
183 StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
184 // The name of the macro must be used.
185 if (NamespaceTokenText != NamespaceTok->TokenText)
186 return false;
187 } else if (NamespaceTok->isNot(tok::kw_namespace) ||
188 !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
189 // Comment does not match regex.
190 return false;
191 }
192 StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : "";
193 // Anonymous namespace comments must not mention a namespace name.
194 if (NamespaceName.empty() && !NamespaceNameInComment.empty())
195 return false;
196 StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
197 // Named namespace comments must not mention anonymous namespace.
198 if (!NamespaceName.empty() && !AnonymousInComment.empty())
199 return false;
200 if (NamespaceNameInComment == NamespaceName)
201 return true;
202
203 // Has namespace comment flowed onto the next line.
204 // } // namespace
205 // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
206 if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
207 return false;
208
209 static const llvm::Regex CommentPattern = llvm::Regex(
210 "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
211
212 // Pull out just the comment text.
213 if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
214 return false;
215 NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
216
217 return NamespaceNameInComment == NamespaceName;
218}
219
220void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
221 const SourceManager &SourceMgr,
222 tooling::Replacements *Fixes) {
223 auto EndLoc = RBraceTok->Tok.getEndLoc();
224 auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
225 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
226 if (Err) {
227 llvm::errs() << "Error while adding namespace end comment: "
228 << llvm::toString(std::move(Err)) << "\n";
229 }
230}
231
232void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
233 const SourceManager &SourceMgr,
234 tooling::Replacements *Fixes) {
235 assert(hasEndComment(RBraceTok));
236 const FormatToken *Comment = RBraceTok->Next;
237 auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
238 Comment->Tok.getEndLoc());
239 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
240 if (Err) {
241 llvm::errs() << "Error while updating namespace end comment: "
242 << llvm::toString(std::move(Err)) << "\n";
243 }
244}
245} // namespace
246
247const FormatToken *
249 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
250 if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
251 return nullptr;
252 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
253 if (StartLineIndex == UnwrappedLine::kInvalidIndex)
254 return nullptr;
255 assert(StartLineIndex < AnnotatedLines.size());
256 const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
257 if (NamespaceTok->is(tok::l_brace)) {
258 // "namespace" keyword can be on the line preceding '{', e.g. in styles
259 // where BraceWrapping.AfterNamespace is true.
260 if (StartLineIndex > 0) {
261 NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
262 if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
263 return nullptr;
264 }
265 }
266
267 return NamespaceTok->getNamespaceToken();
268}
269
270StringRef
272 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
273 const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
274 return NamespaceTok ? NamespaceTok->TokenText : StringRef();
275}
276
278 const FormatStyle &Style)
279 : TokenAnalyzer(Env, Style) {}
280
281std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
282 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
283 FormatTokenLexer &Tokens) {
284 const SourceManager &SourceMgr = Env.getSourceManager();
287
288 // Spin through the lines and ensure we have balanced braces.
289 int Braces = 0;
290 for (AnnotatedLine *Line : AnnotatedLines) {
291 FormatToken *Tok = Line->First;
292 while (Tok) {
293 Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
294 Tok = Tok->Next;
295 }
296 }
297 // Don't attempt to comment unbalanced braces or this can
298 // lead to comments being placed on the closing brace which isn't
299 // the matching brace of the namespace. (occurs during incomplete editing).
300 if (Braces != 0)
301 return {Fixes, 0};
302
303 std::string AllNamespaceNames;
304 size_t StartLineIndex = SIZE_MAX;
305 StringRef NamespaceTokenText;
306 unsigned int CompactedNamespacesCount = 0;
307 for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
308 const AnnotatedLine *EndLine = AnnotatedLines[I];
309 const FormatToken *NamespaceTok =
310 getNamespaceToken(EndLine, AnnotatedLines);
311 if (!NamespaceTok)
312 continue;
313 FormatToken *RBraceTok = EndLine->First;
314 if (RBraceTok->Finalized)
315 continue;
316 RBraceTok->Finalized = true;
317 const FormatToken *EndCommentPrevTok = RBraceTok;
318 // Namespaces often end with '};'. In that case, attach namespace end
319 // comments to the semicolon tokens.
320 if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
321 EndCommentPrevTok = RBraceTok->Next;
322 if (StartLineIndex == SIZE_MAX)
323 StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
324 std::string NamespaceName = computeName(NamespaceTok);
326 if (CompactedNamespacesCount == 0)
327 NamespaceTokenText = NamespaceTok->TokenText;
328 if ((I + 1 < E) &&
329 NamespaceTokenText ==
330 getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
331 StartLineIndex - CompactedNamespacesCount - 1 ==
332 AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
333 !AnnotatedLines[I + 1]->First->Finalized) {
334 if (hasEndComment(EndCommentPrevTok)) {
335 // remove end comment, it will be merged in next one
336 updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
337 }
338 ++CompactedNamespacesCount;
339 if (!NamespaceName.empty())
340 AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
341 continue;
342 }
343 NamespaceName += AllNamespaceNames;
344 CompactedNamespacesCount = 0;
345 AllNamespaceNames = std::string();
346 }
347 // The next token in the token stream after the place where the end comment
348 // token must be. This is either the next token on the current line or the
349 // first token on the next line.
350 const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
351 if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
352 EndCommentNextTok = EndCommentNextTok->Next;
353 if (!EndCommentNextTok && I + 1 < E)
354 EndCommentNextTok = AnnotatedLines[I + 1]->First;
355 bool AddNewline = EndCommentNextTok &&
356 EndCommentNextTok->NewlinesBefore == 0 &&
357 EndCommentNextTok->isNot(tok::eof);
358 const std::string EndCommentText =
359 computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
361 if (!hasEndComment(EndCommentPrevTok)) {
362 unsigned LineCount = 0;
363 for (auto J = StartLineIndex + 1; J < I; ++J)
364 LineCount += AnnotatedLines[J]->size();
365 if (LineCount > Style.ShortNamespaceLines) {
366 addEndComment(EndCommentPrevTok,
367 std::string(Style.SpacesBeforeTrailingComments, ' ') +
368 EndCommentText,
369 SourceMgr, &Fixes);
370 }
371 } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
372 NamespaceTok)) {
373 updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
374 }
375 StartLineIndex = SIZE_MAX;
376 }
377 return {Fixes, 0};
378}
379
380} // namespace format
381} // namespace clang
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that fixes namespace end comments.
SourceRange Range
Definition: SemaObjC.cpp:758
Defines the clang::TokenKind enum and support functions.
static CharSourceRange getCharRange(SourceRange R)
This class handles loading and caching of source files into memory.
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style)
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:99
const Environment & Env
Definition: TokenAnalyzer.h:97
Determines extra information about the tokens comprising an UnwrappedLine.
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
const FormatToken * getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
StringRef getNamespaceTokenText(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
The JSON file list parser is used to communicate input to InstallAPI.
@ Braces
New-expression has a C++11 list-initializer.
#define SIZE_MAX
Definition: stdint.h:889
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4749
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned SpacesBeforeTrailingComments
If true, spaces may be inserted into ().
Definition: Format.h:4697
bool CompactNamespaces
If true, consecutive namespace declarations will be on the same line.
Definition: Format.h:2482
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4786
unsigned ShortNamespaceLines
The maximal number of unwrapped lines that a short namespace spans.
Definition: Format.h:4233
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:294
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:881
bool isNot(T Kind) const
Definition: FormatToken.h:628
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:314
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:373
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:566
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:463
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:609
static const size_t kInvalidIndex