clang  15.0.0git
NamespaceEndCommentsFixer.cpp
Go to the documentation of this file.
1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "clang/Basic/TokenKinds.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/Regex.h"
19 
20 #define DEBUG_TYPE "namespace-end-comments-fixer"
21 
22 namespace clang {
23 namespace format {
24 
25 namespace {
26 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
27 // tokens between them including StartTok and EndTok. Returns the token after
28 // EndTok.
29 const FormatToken *
30 processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
31  tok::TokenKind EndTok,
32  llvm::function_ref<void(const FormatToken *)> Fn) {
33  if (!Tok || Tok->isNot(StartTok))
34  return Tok;
35  int NestLevel = 0;
36  do {
37  if (Tok->is(StartTok))
38  ++NestLevel;
39  else if (Tok->is(EndTok))
40  --NestLevel;
41  if (Fn)
42  Fn(Tok);
43  Tok = Tok->getNextNonComment();
44  } while (Tok && NestLevel > 0);
45  return Tok;
46 }
47 
48 const FormatToken *skipAttribute(const FormatToken *Tok) {
49  if (!Tok)
50  return nullptr;
51  if (Tok->is(tok::kw___attribute)) {
52  Tok = Tok->getNextNonComment();
53  Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
54  } else if (Tok->is(tok::l_square)) {
55  Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
56  }
57  return Tok;
58 }
59 
60 // Computes the name of a namespace given the namespace token.
61 // Returns "" for anonymous namespace.
62 std::string computeName(const FormatToken *NamespaceTok) {
63  assert(NamespaceTok &&
64  NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
65  "expecting a namespace token");
67  const FormatToken *Tok = NamespaceTok->getNextNonComment();
68  if (NamespaceTok->is(TT_NamespaceMacro)) {
69  // Collects all the non-comment tokens between opening parenthesis
70  // and closing parenthesis or comma.
71  assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
72  Tok = Tok->getNextNonComment();
73  while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
74  name += Tok->TokenText;
75  Tok = Tok->getNextNonComment();
76  }
77  return name;
78  }
79  Tok = skipAttribute(Tok);
80 
81  std::string FirstNSName;
82  // For `namespace [[foo]] A::B::inline C {` or
83  // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
84  // Peek for the first '::' (or '{' or '(')) and then return all tokens from
85  // one token before that up until the '{'. A '(' might be a macro with
86  // arguments.
87  const FormatToken *FirstNSTok = nullptr;
88  while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
89  if (FirstNSTok)
90  FirstNSName += FirstNSTok->TokenText;
91  FirstNSTok = Tok;
92  Tok = Tok->getNextNonComment();
93  }
94 
95  if (FirstNSTok)
96  Tok = FirstNSTok;
97  Tok = skipAttribute(Tok);
98 
99  FirstNSTok = nullptr;
100  // Add everything from '(' to ')'.
101  auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
102  bool IsPrevColoncolon = false;
103  bool HasColoncolon = false;
104  bool IsPrevInline = false;
105  bool NameFinished = false;
106  // If we found '::' in name, then it's the name. Otherwise, we can't tell
107  // which one is name. For example, `namespace A B {`.
108  while (Tok && Tok->isNot(tok::l_brace)) {
109  if (FirstNSTok) {
110  if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
111  if (FirstNSTok->is(tok::l_paren)) {
112  FirstNSTok = Tok =
113  processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
114  continue;
115  }
116  if (FirstNSTok->isNot(tok::coloncolon)) {
117  NameFinished = true;
118  break;
119  }
120  }
121  name += FirstNSTok->TokenText;
122  IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
123  HasColoncolon = HasColoncolon || IsPrevColoncolon;
124  if (FirstNSTok->is(tok::kw_inline)) {
125  name += " ";
126  IsPrevInline = true;
127  }
128  }
129  FirstNSTok = Tok;
130  Tok = Tok->getNextNonComment();
131  const FormatToken *TokAfterAttr = skipAttribute(Tok);
132  if (TokAfterAttr != Tok)
133  FirstNSTok = Tok = TokAfterAttr;
134  }
135  if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
136  name += FirstNSTok->TokenText;
137  if (FirstNSName.empty() || HasColoncolon)
138  return name;
139  return name.empty() ? FirstNSName : FirstNSName + " " + name;
140 }
141 
142 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
143  const FormatToken *NamespaceTok,
144  unsigned SpacesToAdd) {
145  std::string text = "//";
146  text.append(SpacesToAdd, ' ');
147  text += NamespaceTok->TokenText;
148  if (NamespaceTok->is(TT_NamespaceMacro))
149  text += "(";
150  else if (!NamespaceName.empty())
151  text += ' ';
152  text += NamespaceName;
153  if (NamespaceTok->is(TT_NamespaceMacro))
154  text += ")";
155  if (AddNewline)
156  text += '\n';
157  return text;
158 }
159 
160 bool hasEndComment(const FormatToken *RBraceTok) {
161  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
162 }
163 
164 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
165  const FormatToken *NamespaceTok) {
166  assert(hasEndComment(RBraceTok));
167  const FormatToken *Comment = RBraceTok->Next;
168 
169  // Matches a valid namespace end comment.
170  // Valid namespace end comments don't need to be edited.
171  static const llvm::Regex NamespaceCommentPattern =
172  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
173  "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
174  llvm::Regex::IgnoreCase);
175  static const llvm::Regex NamespaceMacroCommentPattern =
176  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
177  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
178  llvm::Regex::IgnoreCase);
179 
180  SmallVector<StringRef, 8> Groups;
181  if (NamespaceTok->is(TT_NamespaceMacro) &&
182  NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
183  StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
184  // The name of the macro must be used.
185  if (NamespaceTokenText != NamespaceTok->TokenText)
186  return false;
187  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
188  !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
189  // Comment does not match regex.
190  return false;
191  }
192  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
193  // Anonymous namespace comments must not mention a namespace name.
194  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
195  return false;
196  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
197  // Named namespace comments must not mention anonymous namespace.
198  if (!NamespaceName.empty() && !AnonymousInComment.empty())
199  return false;
200  if (NamespaceNameInComment == NamespaceName)
201  return true;
202 
203  // Has namespace comment flowed onto the next line.
204  // } // namespace
205  // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
206  if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
207  return false;
208 
209  static const llvm::Regex CommentPattern = llvm::Regex(
210  "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
211 
212  // Pull out just the comment text.
213  if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
214  return false;
215  NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
216 
217  return NamespaceNameInComment == NamespaceName;
218 }
219 
220 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
221  const SourceManager &SourceMgr,
222  tooling::Replacements *Fixes) {
223  auto EndLoc = RBraceTok->Tok.getEndLoc();
224  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
225  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
226  if (Err) {
227  llvm::errs() << "Error while adding namespace end comment: "
228  << llvm::toString(std::move(Err)) << "\n";
229  }
230 }
231 
232 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
233  const SourceManager &SourceMgr,
234  tooling::Replacements *Fixes) {
235  assert(hasEndComment(RBraceTok));
236  const FormatToken *Comment = RBraceTok->Next;
237  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
238  Comment->Tok.getEndLoc());
239  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
240  if (Err) {
241  llvm::errs() << "Error while updating namespace end comment: "
242  << llvm::toString(std::move(Err)) << "\n";
243  }
244 }
245 } // namespace
246 
247 const FormatToken *
249  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
250  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
251  return nullptr;
252  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
253  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
254  return nullptr;
255  assert(StartLineIndex < AnnotatedLines.size());
256  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
257  if (NamespaceTok->is(tok::l_brace)) {
258  // "namespace" keyword can be on the line preceding '{', e.g. in styles
259  // where BraceWrapping.AfterNamespace is true.
260  if (StartLineIndex > 0) {
261  NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
262  if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
263  return nullptr;
264  }
265  }
266 
267  return NamespaceTok->getNamespaceToken();
268 }
269 
270 StringRef
272  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
273  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
274  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
275 }
276 
278  const FormatStyle &Style)
279  : TokenAnalyzer(Env, Style) {}
280 
281 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
282  TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
283  FormatTokenLexer &Tokens) {
284  const SourceManager &SourceMgr = Env.getSourceManager();
285  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
286  tooling::Replacements Fixes;
287 
288  // Spin through the lines and ensure we have balanced braces.
289  int Braces = 0;
290  for (AnnotatedLine *Line : AnnotatedLines) {
291  FormatToken *Tok = Line->First;
292  while (Tok) {
293  Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
294  Tok = Tok->Next;
295  }
296  }
297  // Don't attempt to comment unbalanced braces or this can
298  // lead to comments being placed on the closing brace which isn't
299  // the matching brace of the namespace. (occurs during incomplete editing).
300  if (Braces != 0)
301  return {Fixes, 0};
302 
303  std::string AllNamespaceNames;
304  size_t StartLineIndex = SIZE_MAX;
305  StringRef NamespaceTokenText;
306  unsigned int CompactedNamespacesCount = 0;
307  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
308  const AnnotatedLine *EndLine = AnnotatedLines[I];
309  const FormatToken *NamespaceTok =
310  getNamespaceToken(EndLine, AnnotatedLines);
311  if (!NamespaceTok)
312  continue;
313  FormatToken *RBraceTok = EndLine->First;
314  if (RBraceTok->Finalized)
315  continue;
316  RBraceTok->Finalized = true;
317  const FormatToken *EndCommentPrevTok = RBraceTok;
318  // Namespaces often end with '};'. In that case, attach namespace end
319  // comments to the semicolon tokens.
320  if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
321  EndCommentPrevTok = RBraceTok->Next;
322  if (StartLineIndex == SIZE_MAX)
323  StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
324  std::string NamespaceName = computeName(NamespaceTok);
325  if (Style.CompactNamespaces) {
326  if (CompactedNamespacesCount == 0)
327  NamespaceTokenText = NamespaceTok->TokenText;
328  if ((I + 1 < E) &&
329  NamespaceTokenText ==
330  getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
331  StartLineIndex - CompactedNamespacesCount - 1 ==
332  AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
333  !AnnotatedLines[I + 1]->First->Finalized) {
334  if (hasEndComment(EndCommentPrevTok)) {
335  // remove end comment, it will be merged in next one
336  updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
337  }
338  ++CompactedNamespacesCount;
339  if (!NamespaceName.empty())
340  AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
341  continue;
342  }
343  NamespaceName += AllNamespaceNames;
344  CompactedNamespacesCount = 0;
345  AllNamespaceNames = std::string();
346  }
347  // The next token in the token stream after the place where the end comment
348  // token must be. This is either the next token on the current line or the
349  // first token on the next line.
350  const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
351  if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
352  EndCommentNextTok = EndCommentNextTok->Next;
353  if (!EndCommentNextTok && I + 1 < E)
354  EndCommentNextTok = AnnotatedLines[I + 1]->First;
355  bool AddNewline = EndCommentNextTok &&
356  EndCommentNextTok->NewlinesBefore == 0 &&
357  EndCommentNextTok->isNot(tok::eof);
358  const std::string EndCommentText =
359  computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
361  if (!hasEndComment(EndCommentPrevTok)) {
362  bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
363  if (!isShort)
364  addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
365  } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
366  NamespaceTok)) {
367  updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
368  }
369  StartLineIndex = SIZE_MAX;
370  }
371  return {Fixes, 0};
372 }
373 
374 } // namespace format
375 } // namespace clang
clang::comments::tok::text
@ text
Definition: CommentLexer.h:35
clang::format::AnnotatedLine::MatchingOpeningBlockLineIndex
size_t MatchingOpeningBlockLineIndex
Definition: TokenAnnotator.h:128
clang::format::FormatToken::getNamespaceToken
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:737
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::format::TokenAnalyzer::Env
const Environment & Env
Definition: TokenAnalyzer.h:106
clang::format::getNamespaceTokenText
StringRef getNamespaceTokenText(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
Definition: NamespaceEndCommentsFixer.cpp:271
clang::format::AnnotatedLine::First
FormatToken * First
Definition: TokenAnnotator.h:121
clang::format::FormatStyle::ShortNamespaceLines
unsigned ShortNamespaceLines
The maximal number of unwrapped lines that a short namespace spans.
Definition: Format.h:3195
clang::tooling::Replacements
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:209
clang::format::UnwrappedLine::kInvalidIndex
static const size_t kInvalidIndex
Definition: UnwrappedLineParser.h:61
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:245
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:227
clang::ComparisonCategoryType::First
@ First
clang::format::NamespaceEndCommentsFixer::NamespaceEndCommentsFixer
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style)
Definition: NamespaceEndCommentsFixer.cpp:277
clang::format::FormatToken::isNot
bool isNot(T Kind) const
Definition: FormatToken.h:519
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:627
clang::format::FormatToken::NewlinesBefore
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:375
TokenKinds.h
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:265
clang::format::TokenAnalyzer::AffectedRangeMgr
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:108
clang::threadSafety::sx::toString
std::string toString(const til::SExpr *E)
Definition: ThreadSafetyCommon.h:90
clang::format::FormatStyle::SpacesInLineComment::Minimum
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:3667
clang::format::AnnotatedLine
Definition: TokenAnnotator.h:36
clang::format::Environment
Definition: TokenAnalyzer.h:37
clang::format::TokenAnnotator
Determines extra information about the tokens comprising an UnwrappedLine.
Definition: TokenAnnotator.h:156
NamespaceEndCommentsFixer.h
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:130
SIZE_MAX
#define SIZE_MAX
Definition: stdint.h:790
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::AnnotatedLine::startsWith
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Definition: TokenAnnotator.h:85
clang::format::Environment::getSourceManager
const SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:49
clang::tok::TokenKind
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
clang::format::FormatToken::Finalized
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:300
clang::format::FormatStyle::CompactNamespaces
bool CompactNamespaces
If true, consecutive namespace declarations will be on the same line.
Definition: Format.h:1884
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:500
clang::format::getNamespaceToken
const FormatToken * getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
Definition: NamespaceEndCommentsFixer.cpp:248
clang::format::FormatStyle::SpacesInLineCommentPrefix
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:3702
clang
Definition: CalledOnceCheck.h:17
clang::format::TokenAnalyzer
Definition: TokenAnalyzer.h:88
clang::format::FormatTokenLexer
Definition: FormatTokenLexer.h:39
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
llvm::SmallVectorImpl
Definition: Randstruct.h:18
clang::format::NamespaceEndCommentsFixer::analyze
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
Definition: NamespaceEndCommentsFixer.cpp:281
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:472
clang::transformer::name
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
Definition: RangeSelector.cpp:200
clang::format::AffectedRangeManager::computeAffectedLines
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
Definition: AffectedRangeManager.cpp:22
clang::format::TokenAnalyzer::Style
FormatStyle Style
Definition: TokenAnalyzer.h:104
clang::format::AnnotatedLine::Affected
bool Affected
True if this line should be formatted, i.e.
Definition: TokenAnnotator.h:137