clang  14.0.0git
NamespaceEndCommentsFixer.cpp
Go to the documentation of this file.
1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Regex.h"
18 
19 #define DEBUG_TYPE "namespace-end-comments-fixer"
20 
21 namespace clang {
22 namespace format {
23 
24 namespace {
25 // Computes the name of a namespace given the namespace token.
26 // Returns "" for anonymous namespace.
27 std::string computeName(const FormatToken *NamespaceTok) {
28  assert(NamespaceTok &&
29  NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
30  "expecting a namespace token");
31  std::string name = "";
32  const FormatToken *Tok = NamespaceTok->getNextNonComment();
33  if (NamespaceTok->is(TT_NamespaceMacro)) {
34  // Collects all the non-comment tokens between opening parenthesis
35  // and closing parenthesis or comma.
36  assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
37  Tok = Tok->getNextNonComment();
38  while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
39  name += Tok->TokenText;
40  Tok = Tok->getNextNonComment();
41  }
42  } else {
43  // For `namespace [[foo]] A::B::inline C {` or
44  // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
45  // Peek for the first '::' (or '{') and then return all tokens from one
46  // token before that up until the '{'.
47  const FormatToken *FirstNSTok = Tok;
48  while (Tok && !Tok->is(tok::l_brace) && !Tok->is(tok::coloncolon)) {
49  FirstNSTok = Tok;
50  Tok = Tok->getNextNonComment();
51  }
52 
53  Tok = FirstNSTok;
54  while (Tok && !Tok->is(tok::l_brace)) {
55  name += Tok->TokenText;
56  if (Tok->is(tok::kw_inline))
57  name += " ";
58  Tok = Tok->getNextNonComment();
59  }
60  }
61  return name;
62 }
63 
64 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
65  const FormatToken *NamespaceTok,
66  unsigned SpacesToAdd) {
67  std::string text = "//";
68  text.append(SpacesToAdd, ' ');
69  text += NamespaceTok->TokenText;
70  if (NamespaceTok->is(TT_NamespaceMacro))
71  text += "(";
72  else if (!NamespaceName.empty())
73  text += ' ';
74  text += NamespaceName;
75  if (NamespaceTok->is(TT_NamespaceMacro))
76  text += ")";
77  if (AddNewline)
78  text += '\n';
79  return text;
80 }
81 
82 bool hasEndComment(const FormatToken *RBraceTok) {
83  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
84 }
85 
86 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
87  const FormatToken *NamespaceTok) {
88  assert(hasEndComment(RBraceTok));
89  const FormatToken *Comment = RBraceTok->Next;
90 
91  // Matches a valid namespace end comment.
92  // Valid namespace end comments don't need to be edited.
93  static const llvm::Regex NamespaceCommentPattern =
94  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
95  "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
96  llvm::Regex::IgnoreCase);
97  static const llvm::Regex NamespaceMacroCommentPattern =
98  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
99  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
100  llvm::Regex::IgnoreCase);
101 
102  SmallVector<StringRef, 8> Groups;
103  if (NamespaceTok->is(TT_NamespaceMacro) &&
104  NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
105  StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
106  // The name of the macro must be used.
107  if (NamespaceTokenText != NamespaceTok->TokenText)
108  return false;
109  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
110  !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
111  // Comment does not match regex.
112  return false;
113  }
114  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
115  // Anonymous namespace comments must not mention a namespace name.
116  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
117  return false;
118  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
119  // Named namespace comments must not mention anonymous namespace.
120  if (!NamespaceName.empty() && !AnonymousInComment.empty())
121  return false;
122  if (NamespaceNameInComment == NamespaceName)
123  return true;
124 
125  // Has namespace comment flowed onto the next line.
126  // } // namespace
127  // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
128  if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
129  return false;
130 
131  static const llvm::Regex CommentPattern = llvm::Regex(
132  "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
133 
134  // Pull out just the comment text.
135  if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) {
136  return false;
137  }
138  NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
139 
140  return (NamespaceNameInComment == NamespaceName);
141 }
142 
143 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
144  const SourceManager &SourceMgr,
145  tooling::Replacements *Fixes) {
146  auto EndLoc = RBraceTok->Tok.getEndLoc();
147  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
148  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
149  if (Err) {
150  llvm::errs() << "Error while adding namespace end comment: "
151  << llvm::toString(std::move(Err)) << "\n";
152  }
153 }
154 
155 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
156  const SourceManager &SourceMgr,
157  tooling::Replacements *Fixes) {
158  assert(hasEndComment(RBraceTok));
159  const FormatToken *Comment = RBraceTok->Next;
160  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
161  Comment->Tok.getEndLoc());
162  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
163  if (Err) {
164  llvm::errs() << "Error while updating namespace end comment: "
165  << llvm::toString(std::move(Err)) << "\n";
166  }
167 }
168 } // namespace
169 
170 const FormatToken *
172  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
173  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
174  return nullptr;
175  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
176  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
177  return nullptr;
178  assert(StartLineIndex < AnnotatedLines.size());
179  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
180  if (NamespaceTok->is(tok::l_brace)) {
181  // "namespace" keyword can be on the line preceding '{', e.g. in styles
182  // where BraceWrapping.AfterNamespace is true.
183  if (StartLineIndex > 0)
184  NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
185  }
186  return NamespaceTok->getNamespaceToken();
187 }
188 
189 StringRef
191  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
192  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
193  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
194 }
195 
197  const FormatStyle &Style)
198  : TokenAnalyzer(Env, Style) {}
199 
200 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
201  TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
202  FormatTokenLexer &Tokens) {
203  const SourceManager &SourceMgr = Env.getSourceManager();
204  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
205  tooling::Replacements Fixes;
206 
207  // Spin through the lines and ensure we have balanced braces.
208  int Braces = 0;
209  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
210  FormatToken *Tok = AnnotatedLines[I]->First;
211  while (Tok) {
212  Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
213  Tok = Tok->Next;
214  }
215  }
216  // Don't attempt to comment unbalanced braces or this can
217  // lead to comments being placed on the closing brace which isn't
218  // the matching brace of the namespace. (occurs during incomplete editing).
219  if (Braces != 0) {
220  return {Fixes, 0};
221  }
222 
223  std::string AllNamespaceNames = "";
224  size_t StartLineIndex = SIZE_MAX;
225  StringRef NamespaceTokenText;
226  unsigned int CompactedNamespacesCount = 0;
227  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
228  const AnnotatedLine *EndLine = AnnotatedLines[I];
229  const FormatToken *NamespaceTok =
230  getNamespaceToken(EndLine, AnnotatedLines);
231  if (!NamespaceTok)
232  continue;
233  FormatToken *RBraceTok = EndLine->First;
234  if (RBraceTok->Finalized)
235  continue;
236  RBraceTok->Finalized = true;
237  const FormatToken *EndCommentPrevTok = RBraceTok;
238  // Namespaces often end with '};'. In that case, attach namespace end
239  // comments to the semicolon tokens.
240  if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
241  EndCommentPrevTok = RBraceTok->Next;
242  }
243  if (StartLineIndex == SIZE_MAX)
244  StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
245  std::string NamespaceName = computeName(NamespaceTok);
246  if (Style.CompactNamespaces) {
247  if (CompactedNamespacesCount == 0)
248  NamespaceTokenText = NamespaceTok->TokenText;
249  if ((I + 1 < E) &&
250  NamespaceTokenText ==
251  getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
252  StartLineIndex - CompactedNamespacesCount - 1 ==
253  AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
254  !AnnotatedLines[I + 1]->First->Finalized) {
255  if (hasEndComment(EndCommentPrevTok)) {
256  // remove end comment, it will be merged in next one
257  updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
258  }
259  CompactedNamespacesCount++;
260  AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
261  continue;
262  }
263  NamespaceName += AllNamespaceNames;
264  CompactedNamespacesCount = 0;
265  AllNamespaceNames = std::string();
266  }
267  // The next token in the token stream after the place where the end comment
268  // token must be. This is either the next token on the current line or the
269  // first token on the next line.
270  const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
271  if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
272  EndCommentNextTok = EndCommentNextTok->Next;
273  if (!EndCommentNextTok && I + 1 < E)
274  EndCommentNextTok = AnnotatedLines[I + 1]->First;
275  bool AddNewline = EndCommentNextTok &&
276  EndCommentNextTok->NewlinesBefore == 0 &&
277  EndCommentNextTok->isNot(tok::eof);
278  const std::string EndCommentText =
279  computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
281  if (!hasEndComment(EndCommentPrevTok)) {
282  bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
283  if (!isShort)
284  addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
285  } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
286  NamespaceTok)) {
287  updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
288  }
289  StartLineIndex = SIZE_MAX;
290  }
291  return {Fixes, 0};
292 }
293 
294 } // namespace format
295 } // namespace clang
clang::comments::tok::text
@ text
Definition: CommentLexer.h:35
clang::format::AnnotatedLine::MatchingOpeningBlockLineIndex
size_t MatchingOpeningBlockLineIndex
Definition: TokenAnnotator.h:128
clang::format::FormatToken::getNamespaceToken
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:694
string
string(SUBSTRING ${CMAKE_CURRENT_BINARY_DIR} 0 ${PATH_LIB_START} PATH_HEAD) string(SUBSTRING $
Definition: CMakeLists.txt:22
clang::format::TokenAnalyzer::Env
const Environment & Env
Definition: TokenAnalyzer.h:106
clang::format::getNamespaceTokenText
StringRef getNamespaceTokenText(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
Definition: NamespaceEndCommentsFixer.cpp:190
clang::format::AnnotatedLine::First
FormatToken * First
Definition: TokenAnnotator.h:121
clang::format::FormatStyle::ShortNamespaceLines
unsigned ShortNamespaceLines
The maximal number of unwrapped lines that a short namespace spans.
Definition: Format.h:3072
clang::tooling::Replacements
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:209
clang::format::UnwrappedLine::kInvalidIndex
static const size_t kInvalidIndex
Definition: UnwrappedLineParser.h:60
clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:59
clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:228
clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:211
clang::ComparisonCategoryType::First
@ First
clang::format::NamespaceEndCommentsFixer::NamespaceEndCommentsFixer
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style)
Definition: NamespaceEndCommentsFixer.cpp:196
clang::format::FormatToken::isNot
bool isNot(T Kind) const
Definition: FormatToken.h:472
clang::SourceManager
This class handles loading and caching of source files into memory.
Definition: SourceManager.h:626
clang::format::FormatToken::NewlinesBefore
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:337
clang::CharSourceRange::getCharRange
static CharSourceRange getCharRange(SourceRange R)
Definition: SourceLocation.h:265
clang::format::TokenAnalyzer::AffectedRangeMgr
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:108
clang::threadSafety::sx::toString
std::string toString(const til::SExpr *E)
Definition: ThreadSafetyCommon.h:89
clang::format::FormatStyle::SpacesInLineComment::Minimum
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:3513
clang::format::AnnotatedLine
Definition: TokenAnnotator.h:38
clang::format::Environment
Definition: TokenAnalyzer.h:37
clang::format::TokenAnnotator
Determines extra information about the tokens comprising an UnwrappedLine.
Definition: TokenAnnotator.h:156
NamespaceEndCommentsFixer.h
clang::format::AnnotatedLine::InPPDirective
bool InPPDirective
Definition: TokenAnnotator.h:130
SIZE_MAX
#define SIZE_MAX
Definition: stdint.h:653
Line
const AnnotatedLine * Line
Definition: UsingDeclarationsSorter.cpp:68
clang::format::AnnotatedLine::startsWith
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
Definition: TokenAnnotator.h:85
clang::format::Environment::getSourceManager
const SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:49
clang::format::FormatToken::Finalized
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:283
clang::format::FormatStyle::CompactNamespaces
bool CompactNamespaces
If true, consecutive namespace declarations will be on the same line.
Definition: Format.h:1994
clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:453
clang::format::getNamespaceToken
const FormatToken * getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine * > &AnnotatedLines)
Definition: NamespaceEndCommentsFixer.cpp:171
clang::format::FormatStyle::SpacesInLineCommentPrefix
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:3548
clang
Definition: CalledOnceCheck.h:17
clang::format::TokenAnalyzer
Definition: TokenAnalyzer.h:88
clang::format::FormatTokenLexer
Definition: FormatTokenLexer.h:38
clang::comments::tok::eof
@ eof
Definition: CommentLexer.h:33
llvm::SmallVectorImpl
Definition: LLVM.h:39
clang::format::NamespaceEndCommentsFixer::analyze
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
Definition: NamespaceEndCommentsFixer.cpp:200
clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:434
clang::transformer::name
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
Definition: RangeSelector.cpp:200
clang::format::AffectedRangeManager::computeAffectedLines
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
Definition: AffectedRangeManager.cpp:22
clang::format::TokenAnalyzer::Style
FormatStyle Style
Definition: TokenAnalyzer.h:104
clang::format::AnnotatedLine::Affected
bool Affected
True if this line should be formatted, i.e.
Definition: TokenAnnotator.h:137