clang 20.0.0git
RawCommentList.cpp
Go to the documentation of this file.
1//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "clang/AST/Comment.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Support/Allocator.h"
21
22using namespace clang;
23
24namespace {
25/// Get comment kind and bool describing if it is a trailing comment.
26std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
27 bool ParseAllComments) {
28 const size_t MinCommentLength = ParseAllComments ? 2 : 3;
29 if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
30 return std::make_pair(RawComment::RCK_Invalid, false);
31
33 if (Comment[1] == '/') {
34 if (Comment.size() < 3)
35 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
36
37 if (Comment[2] == '/')
39 else if (Comment[2] == '!')
41 else
42 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
43 } else {
44 assert(Comment.size() >= 4);
45
46 // Comment lexer does not understand escapes in comment markers, so pretend
47 // that this is not a comment.
48 if (Comment[1] != '*' ||
49 Comment[Comment.size() - 2] != '*' ||
50 Comment[Comment.size() - 1] != '/')
51 return std::make_pair(RawComment::RCK_Invalid, false);
52
53 if (Comment[2] == '*')
55 else if (Comment[2] == '!')
57 else
58 return std::make_pair(RawComment::RCK_OrdinaryC, false);
59 }
60 const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
61 return std::make_pair(K, TrailingComment);
62}
63
64bool mergedCommentIsTrailingComment(StringRef Comment) {
65 return (Comment.size() > 3) && (Comment[3] == '<');
66}
67
68/// Returns true if R1 and R2 both have valid locations that start on the same
69/// column.
70bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
71 const RawComment &R2) {
74 bool Invalid = false;
75 unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
76 if (!Invalid) {
77 unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
78 return !Invalid && (C1 == C2);
79 }
80 return false;
81}
82} // unnamed namespace
83
84/// Determines whether there is only whitespace in `Buffer` between `P`
85/// and the previous line.
86/// \param Buffer The buffer to search in.
87/// \param P The offset from the beginning of `Buffer` to start from.
88/// \return true if all of the characters in `Buffer` ranging from the closest
89/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
90/// are whitespace.
91static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
92 // Search backwards until we see linefeed or carriage return.
93 for (unsigned I = P; I != 0; --I) {
94 char C = Buffer[I - 1];
96 return true;
98 return false;
99 }
100 // We hit the beginning of the buffer.
101 return true;
102}
103
104/// Returns whether `K` is an ordinary comment kind.
106 return (K == RawComment::RCK_OrdinaryBCPL) ||
108}
109
111 const CommentOptions &CommentOpts, bool Merged) :
112 Range(SR), RawTextValid(false), BriefTextValid(false),
113 IsAttached(false), IsTrailingComment(false),
114 IsAlmostTrailingComment(false) {
115 // Extract raw comment text, if possible.
116 if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
117 Kind = RCK_Invalid;
118 return;
119 }
120
121 // Guess comment kind.
122 std::pair<CommentKind, bool> K =
123 getCommentKind(RawText, CommentOpts.ParseAllComments);
124
125 // Guess whether an ordinary comment is trailing.
126 if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
127 FileID BeginFileID;
128 unsigned BeginOffset;
129 std::tie(BeginFileID, BeginOffset) =
130 SourceMgr.getDecomposedLoc(Range.getBegin());
131 if (BeginOffset != 0) {
132 bool Invalid = false;
133 const char *Buffer =
134 SourceMgr.getBufferData(BeginFileID, &Invalid).data();
135 IsTrailingComment |=
136 (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
137 }
138 }
139
140 if (!Merged) {
141 Kind = K.first;
142 IsTrailingComment |= K.second;
143
144 IsAlmostTrailingComment =
145 RawText.starts_with("//<") || RawText.starts_with("/*<");
146 } else {
147 Kind = RCK_Merged;
148 IsTrailingComment =
149 IsTrailingComment || mergedCommentIsTrailingComment(RawText);
150 }
151}
152
153StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
154 FileID BeginFileID;
155 FileID EndFileID;
156 unsigned BeginOffset;
157 unsigned EndOffset;
158
159 std::tie(BeginFileID, BeginOffset) =
160 SourceMgr.getDecomposedLoc(Range.getBegin());
161 std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
162
163 const unsigned Length = EndOffset - BeginOffset;
164 if (Length < 2)
165 return StringRef();
166
167 // The comment can't begin in one file and end in another.
168 assert(BeginFileID == EndFileID);
169
170 bool Invalid = false;
171 const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
172 &Invalid).data();
173 if (Invalid)
174 return StringRef();
175
176 return StringRef(BufferStart + BeginOffset, Length);
177}
178
179const char *RawComment::extractBriefText(const ASTContext &Context) const {
180 // Lazily initialize RawText using the accessor before using it.
181 (void)getRawText(Context.getSourceManager());
182
183 // Since we will be copying the resulting text, all allocations made during
184 // parsing are garbage after resulting string is formed. Thus we can use
185 // a separate allocator for all temporary stuff.
186 llvm::BumpPtrAllocator Allocator;
187
188 comments::Lexer L(Allocator, Context.getDiagnostics(),
189 Context.getCommentCommandTraits(),
190 Range.getBegin(),
191 RawText.begin(), RawText.end());
193
194 const std::string Result = P.Parse();
195 const unsigned BriefTextLength = Result.size();
196 char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
197 memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
198 BriefText = BriefTextPtr;
199 BriefTextValid = true;
200
201 return BriefTextPtr;
202}
203
205 const Preprocessor *PP,
206 const Decl *D) const {
207 // Lazily initialize RawText using the accessor before using it.
208 (void)getRawText(Context.getSourceManager());
209
210 comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
211 Context.getCommentCommandTraits(),
212 getSourceRange().getBegin(),
213 RawText.begin(), RawText.end());
214 comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
215 Context.getDiagnostics(),
216 Context.getCommentCommandTraits(),
217 PP);
218 S.setDecl(D);
219 comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
220 Context.getDiagnostics(),
221 Context.getCommentCommandTraits());
222
223 return P.parseFullComment();
224}
225
228 unsigned MaxNewlinesAllowed) {
229 std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
230 std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
231
232 // Question does not make sense if locations are in different files.
233 if (Loc1Info.first != Loc2Info.first)
234 return false;
235
236 bool Invalid = false;
237 const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
238 if (Invalid)
239 return false;
240
241 unsigned NumNewlines = 0;
242 assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
243 // Look for non-whitespace characters and remember any newlines seen.
244 for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
245 switch (Buffer[I]) {
246 default:
247 return false;
248 case ' ':
249 case '\t':
250 case '\f':
251 case '\v':
252 break;
253 case '\r':
254 case '\n':
255 ++NumNewlines;
256
257 // Check if we have found more than the maximum allowed number of
258 // newlines.
259 if (NumNewlines > MaxNewlinesAllowed)
260 return false;
261
262 // Collapse \r\n and \n\r into a single newline.
263 if (I + 1 != Loc2Info.second &&
264 (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
265 Buffer[I] != Buffer[I + 1])
266 ++I;
267 break;
268 }
269 }
270
271 return true;
272}
273
275 const CommentOptions &CommentOpts,
276 llvm::BumpPtrAllocator &Allocator) {
277 if (RC.isInvalid())
278 return;
279
280 // Ordinary comments are not interesting for us.
281 if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
282 return;
283
284 std::pair<FileID, unsigned> Loc =
285 SourceMgr.getDecomposedLoc(RC.getBeginLoc());
286
287 const FileID CommentFile = Loc.first;
288 const unsigned CommentOffset = Loc.second;
289
290 // If this is the first Doxygen comment, save it (because there isn't
291 // anything to merge it with).
292 if (OrderedComments[CommentFile].empty()) {
293 OrderedComments[CommentFile][CommentOffset] =
294 new (Allocator) RawComment(RC);
295 return;
296 }
297
298 const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
299 const RawComment &C2 = RC;
300
301 // Merge comments only if there is only whitespace between them.
302 // Can't merge trailing and non-trailing comments unless the second is
303 // non-trailing ordinary in the same column, as in the case:
304 // int x; // documents x
305 // // more text
306 // versus:
307 // int x; // documents x
308 // int y; // documents y
309 // or:
310 // int x; // documents x
311 // // documents y
312 // int y;
313 // Merge comments if they are on same or consecutive lines.
314 if ((C1.isTrailingComment() == C2.isTrailingComment() ||
315 (C1.isTrailingComment() && !C2.isTrailingComment() &&
316 isOrdinaryKind(C2.getKind()) &&
317 commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
318 onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
319 /*MaxNewlinesAllowed=*/1)) {
320 SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321 *OrderedComments[CommentFile].rbegin()->second =
322 RawComment(SourceMgr, MergedRange, CommentOpts, true);
323 } else {
324 OrderedComments[CommentFile][CommentOffset] =
325 new (Allocator) RawComment(RC);
326 }
327}
328
329const std::map<unsigned, RawComment *> *
331 auto CommentsInFile = OrderedComments.find(File);
332 if (CommentsInFile == OrderedComments.end())
333 return nullptr;
334
335 return &CommentsInFile->second;
336}
337
338bool RawCommentList::empty() const { return OrderedComments.empty(); }
339
341 unsigned Offset) const {
342 auto Cached = CommentBeginLine.find(C);
343 if (Cached != CommentBeginLine.end())
344 return Cached->second;
345 const unsigned Line = SourceMgr.getLineNumber(File, Offset);
346 CommentBeginLine[C] = Line;
347 return Line;
348}
349
351 auto Cached = CommentEndOffset.find(C);
352 if (Cached != CommentEndOffset.end())
353 return Cached->second;
354 const unsigned Offset =
355 SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
356 CommentEndOffset[C] = Offset;
357 return Offset;
358}
359
360std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
361 DiagnosticsEngine &Diags) const {
362 llvm::StringRef CommentText = getRawText(SourceMgr);
363 if (CommentText.empty())
364 return "";
365
366 std::string Result;
367 for (const RawComment::CommentLine &Line :
368 getFormattedLines(SourceMgr, Diags))
369 Result += Line.Text + "\n";
370
371 auto LastChar = Result.find_last_not_of('\n');
372 Result.erase(LastChar + 1, Result.size());
373
374 return Result;
375}
376
377std::vector<RawComment::CommentLine>
379 DiagnosticsEngine &Diags) const {
380 llvm::StringRef CommentText = getRawText(SourceMgr);
381 if (CommentText.empty())
382 return {};
383
384 llvm::BumpPtrAllocator Allocator;
385 // We do not parse any commands, so CommentOptions are ignored by
386 // comments::Lexer. Therefore, we just use default-constructed options.
387 CommentOptions DefOpts;
388 comments::CommandTraits EmptyTraits(Allocator, DefOpts);
389 comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
390 CommentText.begin(), CommentText.end(),
391 /*ParseCommands=*/false);
392
393 std::vector<RawComment::CommentLine> Result;
394 // A column number of the first non-whitespace token in the comment text.
395 // We skip whitespace up to this column, but keep the whitespace after this
396 // column. IndentColumn is calculated when lexing the first line and reused
397 // for the rest of lines.
398 unsigned IndentColumn = 0;
399
400 // Record the line number of the last processed comment line.
401 // For block-style comments, an extra newline token will be produced after
402 // the end-comment marker, e.g.:
403 // /** This is a multi-line comment block.
404 // The lexer will produce two newline tokens here > */
405 // previousLine will record the line number when we previously saw a newline
406 // token and recorded a comment line. If we see another newline token on the
407 // same line, don't record anything in between.
408 unsigned PreviousLine = 0;
409
410 // Processes one line of the comment and adds it to the result.
411 // Handles skipping the indent at the start of the line.
412 // Returns false when eof is reached and true otherwise.
413 auto LexLine = [&](bool IsFirstLine) -> bool {
414 comments::Token Tok;
415 // Lex the first token on the line. We handle it separately, because we to
416 // fix up its indentation.
417 L.lex(Tok);
418 if (Tok.is(comments::tok::eof))
419 return false;
420 if (Tok.is(comments::tok::newline)) {
421 PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
422 if (Loc.getLine() != PreviousLine) {
423 Result.emplace_back("", Loc, Loc);
424 PreviousLine = Loc.getLine();
425 }
426 return true;
427 }
429 llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
430 bool LocInvalid = false;
431 unsigned TokColumn =
432 SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
433 assert(!LocInvalid && "getFormattedText for invalid location");
434
435 // Amount of leading whitespace in TokText.
436 size_t WhitespaceLen = TokText.find_first_not_of(" \t");
437 if (WhitespaceLen == StringRef::npos)
438 WhitespaceLen = TokText.size();
439 // Remember the amount of whitespace we skipped in the first line to remove
440 // indent up to that column in the following lines.
441 if (IsFirstLine)
442 IndentColumn = TokColumn + WhitespaceLen;
443
444 // Amount of leading whitespace we actually want to skip.
445 // For the first line we skip all the whitespace.
446 // For the rest of the lines, we skip whitespace up to IndentColumn.
447 unsigned SkipLen =
448 IsFirstLine
449 ? WhitespaceLen
450 : std::min<size_t>(
451 WhitespaceLen,
452 std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
453 llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
454 Line += Trimmed;
455 // Get the beginning location of the adjusted comment line.
457 SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
458
459 // Lex all tokens in the rest of the line.
460 for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
461 if (Tok.is(comments::tok::newline)) {
462 // Get the ending location of the comment line.
463 PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
464 if (End.getLine() != PreviousLine) {
465 Result.emplace_back(Line, Begin, End);
466 PreviousLine = End.getLine();
467 }
468 return true;
469 }
470 Line += L.getSpelling(Tok, SourceMgr);
471 }
472 PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
473 Result.emplace_back(Line, Begin, End);
474 // We've reached the end of file token.
475 return false;
476 };
477
478 // Process first line separately to remember indent for the following lines.
479 if (!LexLine(/*IsFirstLine=*/true))
480 return Result;
481 // Process the rest of the lines.
482 while (LexLine(/*IsFirstLine=*/false))
483 ;
484 return Result;
485}
Defines the clang::ASTContext interface.
StringRef P
#define SM(sm)
Definition: Cuda.cpp:83
const Decl * D
static bool onlyWhitespaceBetween(SourceManager &SM, SourceLocation Loc1, SourceLocation Loc2, unsigned MaxNewlinesAllowed)
static bool isOrdinaryKind(RawComment::CommentKind K)
Returns whether K is an ordinary comment kind.
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P)
Determines whether there is only whitespace in Buffer between P and the previous line.
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
SourceLocation Begin
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
SourceManager & getSourceManager()
Definition: ASTContext.h:721
comments::CommandTraits & getCommentCommandTraits() const
Definition: ASTContext.h:958
llvm::BumpPtrAllocator & getAllocator() const
Definition: ASTContext.h:730
DiagnosticsEngine & getDiagnostics() const
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:137
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getCommentBeginLine(RawComment *C, FileID File, unsigned Offset) const
const std::map< unsigned, RawComment * > * getCommentsInFile(FileID File) const
unsigned getCommentEndOffset(RawComment *C) const
void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator)
@ RCK_OrdinaryC
Any normal C comment.
@ RCK_Merged
Two or more documentation comments merged together.
@ RCK_Invalid
Invalid comment.
@ RCK_OrdinaryBCPL
Any normal BCPL comments.
bool isOrdinary() const LLVM_READONLY
Returns true if this comment is not a documentation comment.
bool isTrailingComment() const LLVM_READONLY
Returns true if it is a comment that should be put after a member:
StringRef getRawText(const SourceManager &SourceMgr) const
Returns raw comment text with comment markers.
SourceLocation getEndLoc() const LLVM_READONLY
std::vector< CommentLine > getFormattedLines(const SourceManager &SourceMgr, DiagnosticsEngine &Diags) const
Returns sanitized comment text as separated lines with locations in source, suitable for further proc...
bool isInvalid() const LLVM_READONLY
std::string getFormattedText(const SourceManager &SourceMgr, DiagnosticsEngine &Diags) const
Returns sanitized comment text, suitable for presentation in editor UIs.
CommentKind getKind() const LLVM_READONLY
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation getBeginLoc() const LLVM_READONLY
comments::FullComment * parse(const ASTContext &Context, const Preprocessor *PP, const Decl *D) const
Parse the comment, assuming it is attached to decl D.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid=nullptr) const
Given a SourceLocation, return the spelling line number for the position indicated.
unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
A very simple comment parser that extracts "a brief description".
This class provides information about commands that can be used in comments.
A full comment attached to a declaration, contains block content.
Definition: Comment.h:1083
StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const
Doxygen comment parser.
Definition: CommentParser.h:29
Comment token.
Definition: CommentLexer.h:55
bool isNot(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:93
bool is(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:92
SourceLocation getLocation() const LLVM_READONLY
Definition: CommentLexer.h:80
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
Definition: CharInfo.h:99
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:91
@ Result
The result type of a method or function.
#define false
Definition: stdbool.h:26
Options for controlling comment parsing.
bool ParseAllComments
Treat ordinary comments as documentation comments.