clang  11.0.0git
NamespaceEndCommentsFixer.cpp
Go to the documentation of this file.
1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Regex.h"
18 
19 #define DEBUG_TYPE "namespace-end-comments-fixer"
20 
21 namespace clang {
22 namespace format {
23 
24 namespace {
25 // The maximal number of unwrapped lines that a short namespace spans.
26 // Short namespaces don't need an end comment.
27 static const int kShortNamespaceMaxLines = 1;
28 
29 // Computes the name of a namespace given the namespace token.
30 // Returns "" for anonymous namespace.
31 std::string computeName(const FormatToken *NamespaceTok) {
32  assert(NamespaceTok &&
33  NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
34  "expecting a namespace token");
35  std::string name = "";
36  const FormatToken *Tok = NamespaceTok->getNextNonComment();
37  if (NamespaceTok->is(TT_NamespaceMacro)) {
38  // Collects all the non-comment tokens between opening parenthesis
39  // and closing parenthesis or comma.
40  assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
41  Tok = Tok->getNextNonComment();
42  while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
43  name += Tok->TokenText;
44  Tok = Tok->getNextNonComment();
45  }
46  } else {
47  // For `namespace [[foo]] A::B::inline C {` or
48  // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
49  // Peek for the first '::' (or '{') and then return all tokens from one
50  // token before that up until the '{'.
51  const FormatToken *FirstNSTok = Tok;
52  while (Tok && !Tok->is(tok::l_brace) && !Tok->is(tok::coloncolon)) {
53  FirstNSTok = Tok;
54  Tok = Tok->getNextNonComment();
55  }
56 
57  Tok = FirstNSTok;
58  while (Tok && !Tok->is(tok::l_brace)) {
59  name += Tok->TokenText;
60  if (Tok->is(tok::kw_inline))
61  name += " ";
62  Tok = Tok->getNextNonComment();
63  }
64  }
65  return name;
66 }
67 
68 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
69  const FormatToken *NamespaceTok) {
70  std::string text = "// ";
71  text += NamespaceTok->TokenText;
72  if (NamespaceTok->is(TT_NamespaceMacro))
73  text += "(";
74  else if (!NamespaceName.empty())
75  text += ' ';
76  text += NamespaceName;
77  if (NamespaceTok->is(TT_NamespaceMacro))
78  text += ")";
79  if (AddNewline)
80  text += '\n';
81  return text;
82 }
83 
84 bool hasEndComment(const FormatToken *RBraceTok) {
85  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
86 }
87 
88 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
89  const FormatToken *NamespaceTok) {
90  assert(hasEndComment(RBraceTok));
91  const FormatToken *Comment = RBraceTok->Next;
92 
93  // Matches a valid namespace end comment.
94  // Valid namespace end comments don't need to be edited.
95  static const llvm::Regex NamespaceCommentPattern =
96  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
97  "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
98  llvm::Regex::IgnoreCase);
99  static const llvm::Regex NamespaceMacroCommentPattern =
100  llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
101  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
102  llvm::Regex::IgnoreCase);
103 
104  SmallVector<StringRef, 8> Groups;
105  if (NamespaceTok->is(TT_NamespaceMacro) &&
106  NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
107  StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
108  // The name of the macro must be used.
109  if (NamespaceTokenText != NamespaceTok->TokenText)
110  return false;
111  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
112  !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
113  // Comment does not match regex.
114  return false;
115  }
116  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
117  // Anonymous namespace comments must not mention a namespace name.
118  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
119  return false;
120  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
121  // Named namespace comments must not mention anonymous namespace.
122  if (!NamespaceName.empty() && !AnonymousInComment.empty())
123  return false;
124  if (NamespaceNameInComment == NamespaceName)
125  return true;
126 
127  // Has namespace comment flowed onto the next line.
128  // } // namespace
129  // // verylongnamespacenamethatdidnotfitonthepreviouscommentline
130  if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
131  return false;
132 
133  static const llvm::Regex CommentPattern = llvm::Regex(
134  "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
135 
136  // Pull out just the comment text.
137  if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) {
138  return false;
139  }
140  NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
141 
142  return (NamespaceNameInComment == NamespaceName);
143 }
144 
145 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
146  const SourceManager &SourceMgr,
147  tooling::Replacements *Fixes) {
148  auto EndLoc = RBraceTok->Tok.getEndLoc();
149  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
150  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
151  if (Err) {
152  llvm::errs() << "Error while adding namespace end comment: "
153  << llvm::toString(std::move(Err)) << "\n";
154  }
155 }
156 
157 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
158  const SourceManager &SourceMgr,
159  tooling::Replacements *Fixes) {
160  assert(hasEndComment(RBraceTok));
161  const FormatToken *Comment = RBraceTok->Next;
162  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
163  Comment->Tok.getEndLoc());
164  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
165  if (Err) {
166  llvm::errs() << "Error while updating namespace end comment: "
167  << llvm::toString(std::move(Err)) << "\n";
168  }
169 }
170 } // namespace
171 
172 const FormatToken *
174  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
175  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
176  return nullptr;
177  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
178  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
179  return nullptr;
180  assert(StartLineIndex < AnnotatedLines.size());
181  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
182  if (NamespaceTok->is(tok::l_brace)) {
183  // "namespace" keyword can be on the line preceding '{', e.g. in styles
184  // where BraceWrapping.AfterNamespace is true.
185  if (StartLineIndex > 0)
186  NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
187  }
188  return NamespaceTok->getNamespaceToken();
189 }
190 
191 StringRef
193  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
194  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
195  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
196 }
197 
199  const FormatStyle &Style)
200  : TokenAnalyzer(Env, Style) {}
201 
202 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
203  TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
204  FormatTokenLexer &Tokens) {
205  const SourceManager &SourceMgr = Env.getSourceManager();
206  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
207  tooling::Replacements Fixes;
208 
209  // Spin through the lines and ensure we have balanced braces.
210  int Braces = 0;
211  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
212  FormatToken *Tok = AnnotatedLines[I]->First;
213  while (Tok) {
214  Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
215  Tok = Tok->Next;
216  }
217  }
218  // Don't attempt to comment unbalanced braces or this can
219  // lead to comments being placed on the closing brace which isn't
220  // the matching brace of the namespace. (occurs during incomplete editing).
221  if (Braces != 0) {
222  return {Fixes, 0};
223  }
224 
225  std::string AllNamespaceNames = "";
226  size_t StartLineIndex = SIZE_MAX;
227  StringRef NamespaceTokenText;
228  unsigned int CompactedNamespacesCount = 0;
229  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
230  const AnnotatedLine *EndLine = AnnotatedLines[I];
231  const FormatToken *NamespaceTok =
232  getNamespaceToken(EndLine, AnnotatedLines);
233  if (!NamespaceTok)
234  continue;
235  FormatToken *RBraceTok = EndLine->First;
236  if (RBraceTok->Finalized)
237  continue;
238  RBraceTok->Finalized = true;
239  const FormatToken *EndCommentPrevTok = RBraceTok;
240  // Namespaces often end with '};'. In that case, attach namespace end
241  // comments to the semicolon tokens.
242  if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
243  EndCommentPrevTok = RBraceTok->Next;
244  }
245  if (StartLineIndex == SIZE_MAX)
246  StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
247  std::string NamespaceName = computeName(NamespaceTok);
248  if (Style.CompactNamespaces) {
249  if (CompactedNamespacesCount == 0)
250  NamespaceTokenText = NamespaceTok->TokenText;
251  if ((I + 1 < E) &&
252  NamespaceTokenText ==
253  getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
254  StartLineIndex - CompactedNamespacesCount - 1 ==
255  AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
256  !AnnotatedLines[I + 1]->First->Finalized) {
257  if (hasEndComment(EndCommentPrevTok)) {
258  // remove end comment, it will be merged in next one
259  updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
260  }
261  CompactedNamespacesCount++;
262  AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
263  continue;
264  }
265  NamespaceName += AllNamespaceNames;
266  CompactedNamespacesCount = 0;
267  AllNamespaceNames = std::string();
268  }
269  // The next token in the token stream after the place where the end comment
270  // token must be. This is either the next token on the current line or the
271  // first token on the next line.
272  const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
273  if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
274  EndCommentNextTok = EndCommentNextTok->Next;
275  if (!EndCommentNextTok && I + 1 < E)
276  EndCommentNextTok = AnnotatedLines[I + 1]->First;
277  bool AddNewline = EndCommentNextTok &&
278  EndCommentNextTok->NewlinesBefore == 0 &&
279  EndCommentNextTok->isNot(tok::eof);
280  const std::string EndCommentText =
281  computeEndCommentText(NamespaceName, AddNewline, NamespaceTok);
282  if (!hasEndComment(EndCommentPrevTok)) {
283  bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
284  if (!isShort)
285  addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
286  } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
287  NamespaceTok)) {
288  updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
289  }
290  StartLineIndex = SIZE_MAX;
291  }
292  return {Fixes, 0};
293 }
294 
295 } // namespace format
296 } // namespace clang
if(T->getSizeExpr()) TRY_TO(TraverseStmt(T -> getSizeExpr()))
StringRef getNamespaceTokenText(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine *> &AnnotatedLines)
NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style)
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:517
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:209
AffectedRangeManager AffectedRangeMgr
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:152
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:320
const Environment & Env
Definition: TokenAnalyzer.h:98
const FormatToken * getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl< AnnotatedLine *> &AnnotatedLines)
bool isNot(T Kind) const
Definition: FormatToken.h:350
const FormatToken & Tok
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr or CxxCtorInitializer) selects the name&#39;s to...
const SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:49
Determines extra information about the tokens comprising an UnwrappedLine.
const AnnotatedLine * Line
static const size_t kInvalidIndex
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:142
bool Affected
True if this line should be formatted, i.e.
static CharSourceRange getCharRange(SourceRange R)
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:334
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:196
#define SIZE_MAX
Definition: stdint.h:653
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:54
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine *> &Lines)
Dataflow Directional Tag Classes.
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:332
std::string toString(const til::SExpr *E)
This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that fixes namespace end comments...
This class handles loading and caching of source files into memory.
const FormatStyle & Style