clang  8.0.0svn
RawCommentList.cpp
Go to the documentation of this file.
1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/Comment.h"
15 #include "clang/AST/CommentLexer.h"
17 #include "clang/AST/CommentSema.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/STLExtras.h"
20 
21 using namespace clang;
22 
23 namespace {
24 /// Get comment kind and bool describing if it is a trailing comment.
25 std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
26  bool ParseAllComments) {
27  const size_t MinCommentLength = ParseAllComments ? 2 : 3;
28  if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
29  return std::make_pair(RawComment::RCK_Invalid, false);
30 
32  if (Comment[1] == '/') {
33  if (Comment.size() < 3)
34  return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
35 
36  if (Comment[2] == '/')
38  else if (Comment[2] == '!')
40  else
41  return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
42  } else {
43  assert(Comment.size() >= 4);
44 
45  // Comment lexer does not understand escapes in comment markers, so pretend
46  // that this is not a comment.
47  if (Comment[1] != '*' ||
48  Comment[Comment.size() - 2] != '*' ||
49  Comment[Comment.size() - 1] != '/')
50  return std::make_pair(RawComment::RCK_Invalid, false);
51 
52  if (Comment[2] == '*')
54  else if (Comment[2] == '!')
56  else
57  return std::make_pair(RawComment::RCK_OrdinaryC, false);
58  }
59  const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
60  return std::make_pair(K, TrailingComment);
61 }
62 
63 bool mergedCommentIsTrailingComment(StringRef Comment) {
64  return (Comment.size() > 3) && (Comment[3] == '<');
65 }
66 
67 /// Returns true if R1 and R2 both have valid locations that start on the same
68 /// column.
69 bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
70  const RawComment &R2) {
71  SourceLocation L1 = R1.getBeginLoc();
72  SourceLocation L2 = R2.getBeginLoc();
73  bool Invalid = false;
74  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
75  if (!Invalid) {
76  unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
77  return !Invalid && (C1 == C2);
78  }
79  return false;
80 }
81 } // unnamed namespace
82 
83 /// Determines whether there is only whitespace in `Buffer` between `P`
84 /// and the previous line.
85 /// \param Buffer The buffer to search in.
86 /// \param P The offset from the beginning of `Buffer` to start from.
87 /// \return true if all of the characters in `Buffer` ranging from the closest
88 /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
89 /// are whitespace.
90 static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
91  // Search backwards until we see linefeed or carriage return.
92  for (unsigned I = P; I != 0; --I) {
93  char C = Buffer[I - 1];
94  if (isVerticalWhitespace(C))
95  return true;
96  if (!isHorizontalWhitespace(C))
97  return false;
98  }
99  // We hit the beginning of the buffer.
100  return true;
101 }
102 
103 /// Returns whether `K` is an ordinary comment kind.
105  return (K == RawComment::RCK_OrdinaryBCPL) ||
107 }
108 
110  const CommentOptions &CommentOpts, bool Merged) :
111  Range(SR), RawTextValid(false), BriefTextValid(false),
112  IsAttached(false), IsTrailingComment(false),
113  IsAlmostTrailingComment(false) {
114  // Extract raw comment text, if possible.
115  if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
116  Kind = RCK_Invalid;
117  return;
118  }
119 
120  // Guess comment kind.
121  std::pair<CommentKind, bool> K =
122  getCommentKind(RawText, CommentOpts.ParseAllComments);
123 
124  // Guess whether an ordinary comment is trailing.
125  if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
126  FileID BeginFileID;
127  unsigned BeginOffset;
128  std::tie(BeginFileID, BeginOffset) =
129  SourceMgr.getDecomposedLoc(Range.getBegin());
130  if (BeginOffset != 0) {
131  bool Invalid = false;
132  const char *Buffer =
133  SourceMgr.getBufferData(BeginFileID, &Invalid).data();
134  IsTrailingComment |=
135  (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
136  }
137  }
138 
139  if (!Merged) {
140  Kind = K.first;
141  IsTrailingComment |= K.second;
142 
143  IsAlmostTrailingComment = RawText.startswith("//<") ||
144  RawText.startswith("/*<");
145  } else {
146  Kind = RCK_Merged;
147  IsTrailingComment =
148  IsTrailingComment || mergedCommentIsTrailingComment(RawText);
149  }
150 }
151 
152 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
153  FileID BeginFileID;
154  FileID EndFileID;
155  unsigned BeginOffset;
156  unsigned EndOffset;
157 
158  std::tie(BeginFileID, BeginOffset) =
159  SourceMgr.getDecomposedLoc(Range.getBegin());
160  std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
161 
162  const unsigned Length = EndOffset - BeginOffset;
163  if (Length < 2)
164  return StringRef();
165 
166  // The comment can't begin in one file and end in another.
167  assert(BeginFileID == EndFileID);
168 
169  bool Invalid = false;
170  const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
171  &Invalid).data();
172  if (Invalid)
173  return StringRef();
174 
175  return StringRef(BufferStart + BeginOffset, Length);
176 }
177 
178 const char *RawComment::extractBriefText(const ASTContext &Context) const {
179  // Lazily initialize RawText using the accessor before using it.
180  (void)getRawText(Context.getSourceManager());
181 
182  // Since we will be copying the resulting text, all allocations made during
183  // parsing are garbage after resulting string is formed. Thus we can use
184  // a separate allocator for all temporary stuff.
185  llvm::BumpPtrAllocator Allocator;
186 
187  comments::Lexer L(Allocator, Context.getDiagnostics(),
188  Context.getCommentCommandTraits(),
189  Range.getBegin(),
190  RawText.begin(), RawText.end());
192 
193  const std::string Result = P.Parse();
194  const unsigned BriefTextLength = Result.size();
195  char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
196  memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
197  BriefText = BriefTextPtr;
198  BriefTextValid = true;
199 
200  return BriefTextPtr;
201 }
202 
204  const Preprocessor *PP,
205  const Decl *D) const {
206  // Lazily initialize RawText using the accessor before using it.
207  (void)getRawText(Context.getSourceManager());
208 
209  comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
210  Context.getCommentCommandTraits(),
212  RawText.begin(), RawText.end());
213  comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
214  Context.getDiagnostics(),
215  Context.getCommentCommandTraits(),
216  PP);
217  S.setDecl(D);
218  comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
219  Context.getDiagnostics(),
220  Context.getCommentCommandTraits());
221 
222  return P.parseFullComment();
223 }
224 
226  SourceLocation Loc1, SourceLocation Loc2,
227  unsigned MaxNewlinesAllowed) {
228  std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
229  std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
230 
231  // Question does not make sense if locations are in different files.
232  if (Loc1Info.first != Loc2Info.first)
233  return false;
234 
235  bool Invalid = false;
236  const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
237  if (Invalid)
238  return false;
239 
240  unsigned NumNewlines = 0;
241  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
242  // Look for non-whitespace characters and remember any newlines seen.
243  for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
244  switch (Buffer[I]) {
245  default:
246  return false;
247  case ' ':
248  case '\t':
249  case '\f':
250  case '\v':
251  break;
252  case '\r':
253  case '\n':
254  ++NumNewlines;
255 
256  // Check if we have found more than the maximum allowed number of
257  // newlines.
258  if (NumNewlines > MaxNewlinesAllowed)
259  return false;
260 
261  // Collapse \r\n and \n\r into a single newline.
262  if (I + 1 != Loc2Info.second &&
263  (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
264  Buffer[I] != Buffer[I + 1])
265  ++I;
266  break;
267  }
268  }
269 
270  return true;
271 }
272 
274  const CommentOptions &CommentOpts,
275  llvm::BumpPtrAllocator &Allocator) {
276  if (RC.isInvalid())
277  return;
278 
279  // Check if the comments are not in source order.
280  while (!Comments.empty() &&
281  !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getBeginLoc(),
282  RC.getBeginLoc())) {
283  // If they are, just pop a few last comments that don't fit.
284  // This happens if an \#include directive contains comments.
285  Comments.pop_back();
286  }
287 
288  // Ordinary comments are not interesting for us.
289  if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
290  return;
291 
292  // If this is the first Doxygen comment, save it (because there isn't
293  // anything to merge it with).
294  if (Comments.empty()) {
295  Comments.push_back(new (Allocator) RawComment(RC));
296  return;
297  }
298 
299  const RawComment &C1 = *Comments.back();
300  const RawComment &C2 = RC;
301 
302  // Merge comments only if there is only whitespace between them.
303  // Can't merge trailing and non-trailing comments unless the second is
304  // non-trailing ordinary in the same column, as in the case:
305  // int x; // documents x
306  // // more text
307  // versus:
308  // int x; // documents x
309  // int y; // documents y
310  // or:
311  // int x; // documents x
312  // // documents y
313  // int y;
314  // Merge comments if they are on same or consecutive lines.
315  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
316  (C1.isTrailingComment() && !C2.isTrailingComment() &&
317  isOrdinaryKind(C2.getKind()) &&
318  commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
319  onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
320  /*MaxNewlinesAllowed=*/1)) {
321  SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
322  *Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true);
323  } else {
324  Comments.push_back(new (Allocator) RawComment(RC));
325  }
326 }
327 
328 void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
329  std::vector<RawComment *> MergedComments;
330  MergedComments.reserve(Comments.size() + DeserializedComments.size());
331 
332  std::merge(Comments.begin(), Comments.end(),
333  DeserializedComments.begin(), DeserializedComments.end(),
334  std::back_inserter(MergedComments),
335  BeforeThanCompare<RawComment>(SourceMgr));
336  std::swap(Comments, MergedComments);
337 }
338 
339 std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
340  DiagnosticsEngine &Diags) const {
341  llvm::StringRef CommentText = getRawText(SourceMgr);
342  if (CommentText.empty())
343  return "";
344 
345  llvm::BumpPtrAllocator Allocator;
346  // We do not parse any commands, so CommentOptions are ignored by
347  // comments::Lexer. Therefore, we just use default-constructed options.
348  CommentOptions DefOpts;
349  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
350  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
351  CommentText.begin(), CommentText.end(),
352  /*ParseCommands=*/false);
353 
354  std::string Result;
355  // A column number of the first non-whitespace token in the comment text.
356  // We skip whitespace up to this column, but keep the whitespace after this
357  // column. IndentColumn is calculated when lexing the first line and reused
358  // for the rest of lines.
359  unsigned IndentColumn = 0;
360 
361  // Processes one line of the comment and adds it to the result.
362  // Handles skipping the indent at the start of the line.
363  // Returns false when eof is reached and true otherwise.
364  auto LexLine = [&](bool IsFirstLine) -> bool {
366  // Lex the first token on the line. We handle it separately, because we to
367  // fix up its indentation.
368  L.lex(Tok);
369  if (Tok.is(comments::tok::eof))
370  return false;
371  if (Tok.is(comments::tok::newline)) {
372  Result += "\n";
373  return true;
374  }
375  llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
376  bool LocInvalid = false;
377  unsigned TokColumn =
378  SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
379  assert(!LocInvalid && "getFormattedText for invalid location");
380 
381  // Amount of leading whitespace in TokText.
382  size_t WhitespaceLen = TokText.find_first_not_of(" \t");
383  if (WhitespaceLen == StringRef::npos)
384  WhitespaceLen = TokText.size();
385  // Remember the amount of whitespace we skipped in the first line to remove
386  // indent up to that column in the following lines.
387  if (IsFirstLine)
388  IndentColumn = TokColumn + WhitespaceLen;
389 
390  // Amount of leading whitespace we actually want to skip.
391  // For the first line we skip all the whitespace.
392  // For the rest of the lines, we skip whitespace up to IndentColumn.
393  unsigned SkipLen =
394  IsFirstLine
395  ? WhitespaceLen
396  : std::min<size_t>(
397  WhitespaceLen,
398  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
399  llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
400  Result += Trimmed;
401  // Lex all tokens in the rest of the line.
402  for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
403  if (Tok.is(comments::tok::newline)) {
404  Result += "\n";
405  return true;
406  }
407  Result += L.getSpelling(Tok, SourceMgr);
408  }
409  // We've reached the end of file token.
410  return false;
411  };
412 
413  auto DropTrailingNewLines = [](std::string &Str) {
414  while (Str.back() == '\n')
415  Str.pop_back();
416  };
417 
418  // Proces first line separately to remember indent for the following lines.
419  if (!LexLine(/*IsFirstLine=*/true)) {
420  DropTrailingNewLines(Result);
421  return Result;
422  }
423  // Process the rest of the lines.
424  while (LexLine(/*IsFirstLine=*/false))
425  ;
426  DropTrailingNewLines(Result);
427  return Result;
428 }
Defines the clang::ASTContext interface.
bool isInvalid() const LLVM_READONLY
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:87
unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
StringRef P
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
DiagnosticsEngine & getDiagnostics() const
Any normal BCPL comments.
Options for controlling comment parsing.
SourceRange getSourceRange() const LLVM_READONLY
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;.
Definition: CharInfo.h:71
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:154
bool isNot(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:94
const FormatToken & Tok
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator)
SourceLocation getEndLoc() const LLVM_READONLY
static bool onlyWhitespaceBetween(SourceManager &SM, SourceLocation Loc1, SourceLocation Loc2, unsigned MaxNewlinesAllowed)
Any normal C comment.
comments::CommandTraits & getCommentCommandTraits() const
Definition: ASTContext.h:884
SourceLocation getBeginLoc() const LLVM_READONLY
SourceLocation getEnd() const
SourceLocation getLocation() const LLVM_READONLY
Definition: CommentLexer.h:81
The result type of a method or function.
const SourceManager & SM
Definition: Format.cpp:1472
comments::FullComment * parse(const ASTContext &Context, const Preprocessor *PP, const Decl *D) const
Parse the comment, assuming it is attached to decl D.
bool isOrdinary() const LLVM_READONLY
Returns true if this comment is not a documentation comment.
bool is(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:93
This class provides information about commands that can be used in comments.
Compare comments&#39; source locations.
#define false
Definition: stdbool.h:33
Kind
Two or more documentation comments merged together.
Encodes a location in the source.
static bool isOrdinaryKind(RawComment::CommentKind K)
Returns whether K is an ordinary comment kind.
Comment lexer.
Definition: CommentLexer.h:221
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P)
Determines whether there is only whitespace in Buffer between P and the previous line.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isTrailingComment() const LLVM_READONLY
Returns true if it is a comment that should be put after a member:
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: &#39;\n&#39;, &#39;\r&#39;.
Definition: CharInfo.h:79
llvm::BumpPtrAllocator & getAllocator() const
Definition: ASTContext.h:674
Comment token.
Definition: CommentLexer.h:56
SourceManager & getSourceManager()
Definition: ASTContext.h:671
Doxygen comment parser.
Definition: CommentParser.h:30
StringRef getRawText(const SourceManager &SourceMgr) const
Returns raw comment text with comment markers.
CommentKind getKind() const LLVM_READONLY
A very simple comment parser that extracts "a brief description".
A trivial tuple used to represent a source range.
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
A full comment attached to a declaration, contains block content.
Definition: Comment.h:1092
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:127
bool ParseAllComments
Treat ordinary comments as documentation comments.