clang 23.0.0git
RawCommentList.cpp
Go to the documentation of this file.
1//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "clang/AST/Comment.h"
18#include "llvm/Support/Allocator.h"
19
20using namespace clang;
21
22namespace {
23/// Get comment kind and bool describing if it is a trailing comment.
24std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25 bool ParseAllComments) {
26 const size_t MinCommentLength = ParseAllComments ? 2 : 3;
27 if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
28 return std::make_pair(RawComment::RCK_Invalid, false);
29
31 if (Comment[1] == '/') {
32 if (Comment.size() < 3)
33 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
34
35 if (Comment[2] == '/')
37 else if (Comment[2] == '!')
39 else
40 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
41 } else {
42 assert(Comment.size() >= 4);
43
44 // Comment lexer does not understand escapes in comment markers, so pretend
45 // that this is not a comment.
46 if (Comment[1] != '*' ||
47 Comment[Comment.size() - 2] != '*' ||
48 Comment[Comment.size() - 1] != '/')
49 return std::make_pair(RawComment::RCK_Invalid, false);
50
51 if (Comment[2] == '*')
53 else if (Comment[2] == '!')
55 else
56 return std::make_pair(RawComment::RCK_OrdinaryC, false);
57 }
58 const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
59 return std::make_pair(K, TrailingComment);
60}
61
62bool mergedCommentIsTrailingComment(StringRef Comment) {
63 return (Comment.size() > 3) && (Comment[3] == '<');
64}
65
66/// Returns true if R1 and R2 both have valid locations that start on the same
67/// column.
68bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
69 const RawComment &R2) {
72 bool Invalid = false;
73 unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
74 if (!Invalid) {
75 unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
76 return !Invalid && (C1 == C2);
77 }
78 return false;
79}
80} // unnamed namespace
81
82/// Determines whether there is only whitespace in `Buffer` between `P`
83/// and the previous line.
84/// \param Buffer The buffer to search in.
85/// \param P The offset from the beginning of `Buffer` to start from.
86/// \return true if all of the characters in `Buffer` ranging from the closest
87/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
88/// are whitespace.
89static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
90 // Search backwards until we see linefeed or carriage return.
91 for (unsigned I = P; I != 0; --I) {
92 char C = Buffer[I - 1];
94 return true;
96 return false;
97 }
98 // We hit the beginning of the buffer.
99 return true;
100}
101
102/// Returns whether `K` is an ordinary comment kind.
107
109 const CommentOptions &CommentOpts, bool Merged) :
110 Range(SR), RawTextValid(false), BriefTextValid(false),
111 IsAttached(false), IsTrailingComment(false),
112 IsAlmostTrailingComment(false) {
113 // Extract raw comment text, if possible.
114 if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
115 Kind = RCK_Invalid;
116 return;
117 }
118
119 // Guess comment kind.
120 std::pair<CommentKind, bool> K =
121 getCommentKind(RawText, CommentOpts.ParseAllComments);
122
123 // Guess whether an ordinary comment is trailing.
124 if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
125 FileID BeginFileID;
126 unsigned BeginOffset;
127 std::tie(BeginFileID, BeginOffset) =
128 SourceMgr.getDecomposedLoc(Range.getBegin());
129 if (BeginOffset != 0) {
130 bool Invalid = false;
131 const char *Buffer =
132 SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133 IsTrailingComment |=
134 (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135 }
136 }
137
138 if (!Merged) {
139 Kind = K.first;
140 IsTrailingComment |= K.second;
141
142 IsAlmostTrailingComment =
143 RawText.starts_with("//<") || RawText.starts_with("/*<");
144 } else {
145 Kind = RCK_Merged;
146 IsTrailingComment =
147 IsTrailingComment || mergedCommentIsTrailingComment(RawText);
148 }
149}
150
151StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152 FileID BeginFileID;
153 FileID EndFileID;
154 unsigned BeginOffset;
155 unsigned EndOffset;
156
157 std::tie(BeginFileID, BeginOffset) =
158 SourceMgr.getDecomposedLoc(Range.getBegin());
159 std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160
161 const unsigned Length = EndOffset - BeginOffset;
162 if (Length < 2)
163 return StringRef();
164
165 // The comment can't begin in one file and end in another.
166 assert(BeginFileID == EndFileID);
167
168 bool Invalid = false;
169 const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170 &Invalid).data();
171 if (Invalid)
172 return StringRef();
173
174 return StringRef(BufferStart + BeginOffset, Length);
175}
176
177const char *RawComment::extractBriefText(const ASTContext &Context) const {
178 // Lazily initialize RawText using the accessor before using it.
179 (void)getRawText(Context.getSourceManager());
180
181 // Since we will be copying the resulting text, all allocations made during
182 // parsing are garbage after resulting string is formed. Thus we can use
183 // a separate allocator for all temporary stuff.
184 llvm::BumpPtrAllocator Allocator;
185
186 comments::Lexer L(Allocator, Context.getDiagnostics(),
187 Context.getCommentCommandTraits(),
188 Range.getBegin(),
189 RawText.begin(), RawText.end());
190 comments::BriefParser P(L, Context.getCommentCommandTraits());
191
192 const std::string Result = P.Parse();
193 const unsigned BriefTextLength = Result.size();
194 char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195 memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196 BriefText = BriefTextPtr;
197 BriefTextValid = true;
198
199 return BriefTextPtr;
200}
201
203 const Preprocessor *PP,
204 const Decl *D) const {
205 if (D->isInvalidDecl())
206 return nullptr;
207
208 // Lazily initialize RawText using the accessor before using it.
209 (void)getRawText(Context.getSourceManager());
210
211 comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
212 Context.getCommentCommandTraits(),
213 getSourceRange().getBegin(),
214 RawText.begin(), RawText.end());
215 comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
216 Context.getDiagnostics(),
217 Context.getCommentCommandTraits(),
218 PP);
219 S.setDecl(D);
220 comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
221 Context.getDiagnostics(),
222 Context.getCommentCommandTraits());
223
224 return P.parseFullComment();
225}
226
229 unsigned MaxNewlinesAllowed) {
230 FileIDAndOffset Loc1Info = SM.getDecomposedLoc(Loc1);
231 FileIDAndOffset Loc2Info = SM.getDecomposedLoc(Loc2);
232
233 // Question does not make sense if locations are in different files.
234 if (Loc1Info.first != Loc2Info.first)
235 return false;
236
237 bool Invalid = false;
238 const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
239 if (Invalid)
240 return false;
241
242 unsigned NumNewlines = 0;
243 assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
244 // Look for non-whitespace characters and remember any newlines seen.
245 for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
246 switch (Buffer[I]) {
247 default:
248 return false;
249 case ' ':
250 case '\t':
251 case '\f':
252 case '\v':
253 break;
254 case '\r':
255 case '\n':
256 ++NumNewlines;
257
258 // Check if we have found more than the maximum allowed number of
259 // newlines.
260 if (NumNewlines > MaxNewlinesAllowed)
261 return false;
262
263 // Collapse \r\n and \n\r into a single newline.
264 if (I + 1 != Loc2Info.second &&
265 (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
266 Buffer[I] != Buffer[I + 1])
267 ++I;
268 break;
269 }
270 }
271
272 return true;
273}
274
276 const CommentOptions &CommentOpts,
277 llvm::BumpPtrAllocator &Allocator) {
278 if (RC.isInvalid())
279 return;
280
281 // Ordinary comments are not interesting for us.
282 if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
283 return;
284
285 FileIDAndOffset Loc = SourceMgr.getDecomposedLoc(RC.getBeginLoc());
286
287 const FileID CommentFile = Loc.first;
288 const unsigned CommentOffset = Loc.second;
289
290 // If this is the first Doxygen comment, save it (because there isn't
291 // anything to merge it with).
292 auto &OC = OrderedComments[CommentFile];
293 if (OC.empty()) {
294 OC[CommentOffset] = new (Allocator) RawComment(RC);
295 return;
296 }
297
298 const RawComment &C1 = *OC.rbegin()->second;
299 const RawComment &C2 = RC;
300
301 // Merge comments only if there is only whitespace between them.
302 // Can't merge trailing and non-trailing comments unless the second is
303 // non-trailing ordinary in the same column, as in the case:
304 // int x; // documents x
305 // // more text
306 // versus:
307 // int x; // documents x
308 // int y; // documents y
309 // or:
310 // int x; // documents x
311 // // documents y
312 // int y;
313 // Merge comments if they are on same or consecutive lines.
314 if ((C1.isTrailingComment() == C2.isTrailingComment() ||
315 (C1.isTrailingComment() && !C2.isTrailingComment() &&
316 isOrdinaryKind(C2.getKind()) &&
317 commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
318 onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
319 /*MaxNewlinesAllowed=*/1)) {
320 SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321 *OrderedComments[CommentFile].rbegin()->second =
322 RawComment(SourceMgr, MergedRange, CommentOpts, true);
323 } else {
324 OrderedComments[CommentFile][CommentOffset] =
325 new (Allocator) RawComment(RC);
326 }
327}
328
329const std::map<unsigned, RawComment *> *
331 auto CommentsInFile = OrderedComments.find(File);
332 if (CommentsInFile == OrderedComments.end())
333 return nullptr;
334
335 return &CommentsInFile->second;
336}
337
338bool RawCommentList::empty() const { return OrderedComments.empty(); }
339
341 unsigned Offset) const {
342 auto Cached = CommentBeginLine.find(C);
343 if (Cached != CommentBeginLine.end())
344 return Cached->second;
345 const unsigned Line = SourceMgr.getLineNumber(File, Offset);
346 CommentBeginLine[C] = Line;
347 return Line;
348}
349
351 auto Cached = CommentEndOffset.find(C);
352 if (Cached != CommentEndOffset.end())
353 return Cached->second;
354 const unsigned Offset =
355 SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
356 CommentEndOffset[C] = Offset;
357 return Offset;
358}
359
360std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
361 DiagnosticsEngine &Diags) const {
362 llvm::StringRef CommentText = getRawText(SourceMgr);
363 if (CommentText.empty())
364 return "";
365
366 std::string Result;
367 for (const RawComment::CommentLine &Line :
368 getFormattedLines(SourceMgr, Diags))
369 Result += Line.Text + "\n";
370
371 auto LastChar = Result.find_last_not_of('\n');
372 Result.erase(LastChar + 1, Result.size());
373
374 return Result;
375}
376
377std::vector<RawComment::CommentLine>
379 DiagnosticsEngine &Diags) const {
380 llvm::StringRef CommentText = getRawText(SourceMgr);
381 if (CommentText.empty())
382 return {};
383
384 llvm::BumpPtrAllocator Allocator;
385 // We do not parse any commands, so CommentOptions are ignored by
386 // comments::Lexer. Therefore, we just use default-constructed options.
387 CommentOptions DefOpts;
388 comments::CommandTraits EmptyTraits(Allocator, DefOpts);
389 comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
390 CommentText.begin(), CommentText.end(),
391 /*ParseCommands=*/false);
392
393 std::vector<RawComment::CommentLine> Result;
394 // A column number of the first non-whitespace token in the comment text.
395 // We skip whitespace up to this column, but keep the whitespace after this
396 // column. IndentColumn is calculated when lexing the first line and reused
397 // for the rest of lines.
398 unsigned IndentColumn = 0;
399
400 // Record the line number of the last processed comment line.
401 // For block-style comments, an extra newline token will be produced after
402 // the end-comment marker, e.g.:
403 // /** This is a multi-line comment block.
404 // The lexer will produce two newline tokens here > */
405 // previousLine will record the line number when we previously saw a newline
406 // token and recorded a comment line. If we see another newline token on the
407 // same line, don't record anything in between.
408 unsigned PreviousLine = 0;
409
410 // Processes one line of the comment and adds it to the result.
411 // Handles skipping the indent at the start of the line.
412 // Returns false when eof is reached and true otherwise.
413 auto LexLine = [&](bool IsFirstLine) -> bool {
415 // Lex the first token on the line. We handle it separately, because we to
416 // fix up its indentation.
417 L.lex(Tok);
418 if (Tok.is(comments::tok::eof))
419 return false;
420 if (Tok.is(comments::tok::newline)) {
421 PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
422 if (Loc.getLine() != PreviousLine) {
423 Result.emplace_back("", Loc, Loc);
424 PreviousLine = Loc.getLine();
425 }
426 return true;
427 }
429 llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
430 bool LocInvalid = false;
431 unsigned TokColumn =
432 SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
433 assert(!LocInvalid && "getFormattedText for invalid location");
434
435 // Amount of leading whitespace in TokText.
436 size_t WhitespaceLen = TokText.find_first_not_of(" \t");
437 if (WhitespaceLen == StringRef::npos)
438 WhitespaceLen = TokText.size();
439 // Remember the amount of whitespace we skipped in the first line to remove
440 // indent up to that column in the following lines.
441 if (IsFirstLine)
442 IndentColumn = TokColumn + WhitespaceLen;
443
444 // Amount of leading whitespace we actually want to skip.
445 // For the first line we skip all the whitespace.
446 // For the rest of the lines, we skip whitespace up to IndentColumn.
447 unsigned SkipLen =
448 IsFirstLine
449 ? WhitespaceLen
450 : std::min<size_t>(
451 WhitespaceLen,
452 std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
453 llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
454 Line += Trimmed;
455 // Get the beginning location of the adjusted comment line.
456 PresumedLoc Begin =
457 SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
458
459 // Lex all tokens in the rest of the line.
460 for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
461 if (Tok.is(comments::tok::newline)) {
462 // Get the ending location of the comment line.
463 PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
464 if (End.getLine() != PreviousLine) {
465 Result.emplace_back(Line, Begin, End);
466 PreviousLine = End.getLine();
467 }
468 return true;
469 }
470 Line += L.getSpelling(Tok, SourceMgr);
471 }
472 PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
473 Result.emplace_back(Line, Begin, End);
474 // We've reached the end of file token.
475 return false;
476 };
477
478 // Process first line separately to remember indent for the following lines.
479 if (!LexLine(/*IsFirstLine=*/true))
480 return Result;
481 // Process the rest of the lines.
482 while (LexLine(/*IsFirstLine=*/false))
483 ;
484 return Result;
485}
Defines the clang::ASTContext interface.
Token Tok
The Token.
#define SM(sm)
static bool onlyWhitespaceBetween(SourceManager &SM, SourceLocation Loc1, SourceLocation Loc2, unsigned MaxNewlinesAllowed)
static bool isOrdinaryKind(RawComment::CommentKind K)
Returns whether K is an ordinary comment kind.
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P)
Determines whether there is only whitespace in Buffer between P and the previous line.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:226
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
bool isInvalidDecl() const
Definition DeclBase.h:588
Concrete class used by the front-end to report problems and issues.
Definition Diagnostic.h:232
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getLine() const
Return the presumed line number of this location.
unsigned getCommentBeginLine(RawComment *C, FileID File, unsigned Offset) const
const std::map< unsigned, RawComment * > * getCommentsInFile(FileID File) const
unsigned getCommentEndOffset(RawComment *C) const
void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator)
@ RCK_OrdinaryC
Any normal C comment.
@ RCK_Merged
Two or more documentation comments merged together.
@ RCK_Invalid
Invalid comment.
@ RCK_OrdinaryBCPL
Any normal BCPL comments.
bool isOrdinary() const LLVM_READONLY
Returns true if this comment is not a documentation comment.
bool isTrailingComment() const LLVM_READONLY
Returns true if it is a comment that should be put after a member:
StringRef getRawText(const SourceManager &SourceMgr) const
Returns raw comment text with comment markers.
SourceLocation getEndLoc() const LLVM_READONLY
std::vector< CommentLine > getFormattedLines(const SourceManager &SourceMgr, DiagnosticsEngine &Diags) const
Returns sanitized comment text as separated lines with locations in source, suitable for further proc...
bool isInvalid() const LLVM_READONLY
std::string getFormattedText(const SourceManager &SourceMgr, DiagnosticsEngine &Diags) const
Returns sanitized comment text, suitable for presentation in editor UIs.
CommentKind getKind() const LLVM_READONLY
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation getBeginLoc() const LLVM_READONLY
comments::FullComment * parse(const ASTContext &Context, const Preprocessor *PP, const Decl *D) const
Parse the comment, assuming it is attached to decl D.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
A very simple comment parser that extracts "a brief description".
This class provides information about commands that can be used in comments.
A full comment attached to a declaration, contains block content.
Definition Comment.h:1104
StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const
Doxygen comment parser.
FullComment * parseFullComment()
void setDecl(const Decl *D)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
Definition CharInfo.h:99
std::pair< FileID, unsigned > FileIDAndOffset
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition CharInfo.h:91
@ Result
The result type of a method or function.
Definition TypeBase.h:905
#define false
Definition stdbool.h:26
Options for controlling comment parsing.
bool ParseAllComments
Treat ordinary comments as documentation comments.