clang-tools 22.0.0git
Markup.cpp
Go to the documentation of this file.
1//===--- Markup.cpp -----------------------------------------*- C++-*------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "support/Markup.h"
9#include "clang/Basic/CharInfo.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/raw_ostream.h"
16#include <cstddef>
17#include <iterator>
18#include <memory>
19#include <string>
20#include <vector>
21
22namespace clang {
23namespace clangd {
24namespace markup {
25namespace {
26
27// Is <contents a plausible start to an HTML tag?
28// Contents may not be the rest of the line, but it's the rest of the plain
29// text, so we expect to see at least the tag name.
30bool looksLikeTag(llvm::StringRef Contents) {
31 if (Contents.empty())
32 return false;
33 if (Contents.front() == '!' || Contents.front() == '?' ||
34 Contents.front() == '/')
35 return true;
36 // Check the start of the tag name.
37 if (!llvm::isAlpha(Contents.front()))
38 return false;
39 // Drop rest of the tag name, and following whitespace.
40 Contents = Contents
41 .drop_while([](char C) {
42 return llvm::isAlnum(C) || C == '-' || C == '_' || C == ':';
43 })
44 .drop_while(llvm::isSpace);
45 // The rest of the tag consists of attributes, which have restrictive names.
46 // If we hit '=', all bets are off (attribute values can contain anything).
47 for (; !Contents.empty(); Contents = Contents.drop_front()) {
48 if (llvm::isAlnum(Contents.front()) || llvm::isSpace(Contents.front()))
49 continue;
50 if (Contents.front() == '>' || Contents.starts_with("/>"))
51 return true; // May close the tag.
52 if (Contents.front() == '=')
53 return true; // Don't try to parse attribute values.
54 return false; // Random punctuation means this isn't a tag.
55 }
56 return true; // Potentially incomplete tag.
57}
58
59// Tests whether C should be backslash-escaped in markdown.
60// The string being escaped is Before + C + After. This is part of a paragraph.
61// StartsLine indicates whether `Before` is the start of the line.
62// After may not be everything until the end of the line.
63//
64// It's always safe to escape punctuation, but want minimal escaping.
65// The strategy is to escape the first character of anything that might start
66// a markdown grammar construct.
67bool needsLeadingEscapePlaintext(char C, llvm::StringRef Before,
68 llvm::StringRef After, bool StartsLine) {
69 assert(Before.take_while(llvm::isSpace).empty());
70 auto RulerLength = [&]() -> /*Length*/ unsigned {
71 if (!StartsLine || !Before.empty())
72 return false;
73 llvm::StringRef A = After.rtrim();
74 return llvm::all_of(A, [C](char D) { return C == D; }) ? 1 + A.size() : 0;
75 };
76 auto IsBullet = [&]() {
77 return StartsLine && Before.empty() &&
78 (After.empty() || After.starts_with(" "));
79 };
80 auto SpaceSurrounds = [&]() {
81 return (After.empty() || llvm::isSpace(After.front())) &&
82 (Before.empty() || llvm::isSpace(Before.back()));
83 };
84 auto WordSurrounds = [&]() {
85 return (!After.empty() && llvm::isAlnum(After.front())) &&
86 (!Before.empty() && llvm::isAlnum(Before.back()));
87 };
88
89 switch (C) {
90 case '\\': // Escaped character.
91 return true;
92 case '`': // Code block or inline code
93 // Any number of backticks can delimit an inline code block that can end
94 // anywhere (including on another line). We must escape them all.
95 return true;
96 case '~': // Code block
97 return StartsLine && Before.empty() && After.starts_with("~~");
98 case '#': { // ATX heading.
99 if (!StartsLine || !Before.empty())
100 return false;
101 llvm::StringRef Rest = After.ltrim(C);
102 return Rest.empty() || Rest.starts_with(" ");
103 }
104 case ']': // Link or link reference.
105 // We escape ] rather than [ here, because it's more constrained:
106 // ](...) is an in-line link
107 // ]: is a link reference
108 // The following are only links if the link reference exists:
109 // ] by itself is a shortcut link
110 // ][...] is an out-of-line link
111 // Because we never emit link references, we don't need to handle these.
112 return After.starts_with(":") || After.starts_with("(");
113 case '=': // Setex heading.
114 return RulerLength() > 0;
115 case '_': // Horizontal ruler or matched delimiter.
116 if (RulerLength() >= 3)
117 return true;
118 // Not a delimiter if surrounded by space, or inside a word.
119 // (The rules at word boundaries are subtle).
120 return !(SpaceSurrounds() || WordSurrounds());
121 case '-': // Setex heading, horizontal ruler, or bullet.
122 if (RulerLength() > 0)
123 return true;
124 return IsBullet();
125 case '+': // Bullet list.
126 return IsBullet();
127 case '*': // Bullet list, horizontal ruler, or delimiter.
128 return IsBullet() || RulerLength() >= 3 || !SpaceSurrounds();
129 case '<': // HTML tag (or autolink, which we choose not to escape)
130 return looksLikeTag(After);
131 case '>': // Quote marker. Needs escaping at start of line.
132 return StartsLine && Before.empty();
133 case '&': { // HTML entity reference
134 auto End = After.find(';');
135 if (End == llvm::StringRef::npos)
136 return false;
137 llvm::StringRef Content = After.substr(0, End);
138 if (Content.consume_front("#")) {
139 if (Content.consume_front("x") || Content.consume_front("X"))
140 return llvm::all_of(Content, llvm::isHexDigit);
141 return llvm::all_of(Content, llvm::isDigit);
142 }
143 return llvm::all_of(Content, llvm::isAlpha);
144 }
145 case '.': // Numbered list indicator. Escape 12. -> 12\. at start of line.
146 case ')':
147 return StartsLine && !Before.empty() &&
148 llvm::all_of(Before, llvm::isDigit) && After.starts_with(" ");
149 default:
150 return false;
151 }
152}
153
154/// \brief Tests whether \p C should be backslash-escaped in markdown.
155///
156/// The MarkupContent LSP specification defines that `markdown` content needs to
157/// follow GFM (GitHub Flavored Markdown) rules. And we can assume that markdown
158/// is rendered on the client side. This means we do not need to escape any
159/// markdown constructs.
160/// The only exception is when the client does not support HTML rendering in
161/// markdown. In that case, we need to escape HTML tags and HTML entities.
162///
163/// **FIXME:** handle the case when the client does support HTML rendering in
164/// markdown. For this, the LSP server needs to check the
165/// [supportsHtml
166/// capability](https://github.com/microsoft/language-server-protocol/issues/1344)
167/// of the client.
168///
169/// \param C The character to check.
170/// \param After The string that follows \p C .
171/// This is used to determine if \p C is part of a tag or an entity reference.
172///
173/// \returns true if \p C should be escaped, false otherwise.
174bool needsLeadingEscapeMarkdown(char C, llvm::StringRef After) {
175 switch (C) {
176 case '<': // HTML tag (or autolink, which we choose not to escape)
177 return looksLikeTag(After);
178 case '&': { // HTML entity reference
179 auto End = After.find(';');
180 if (End == llvm::StringRef::npos)
181 return false;
182 llvm::StringRef Content = After.substr(0, End);
183 if (Content.consume_front("#")) {
184 if (Content.consume_front("x") || Content.consume_front("X"))
185 return llvm::all_of(Content, llvm::isHexDigit);
186 return llvm::all_of(Content, llvm::isDigit);
187 }
188 return llvm::all_of(Content, llvm::isAlpha);
189 }
190 default:
191 return false;
192 }
193}
194
195bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
196 bool StartsLine, bool EscapeMarkdown) {
197 if (EscapeMarkdown)
198 return needsLeadingEscapePlaintext(C, Before, After, StartsLine);
199 return needsLeadingEscapeMarkdown(C, After);
200}
201
202/// \brief Render text for markdown output.
203///
204/// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
205/// any of the markdown constructs.
206///
207/// Else, markdown syntax is not escaped, only HTML tags and entities.
208/// HTML is escaped because usually clients do not support HTML rendering by
209/// default. Passing unescaped HTML will therefore often result in not showing
210/// the HTML at all.
211/// \note In markdown code spans, we do not escape anything.
212std::string renderText(llvm::StringRef Input, bool StartsLine,
213 bool EscapeMarkdown) {
214 std::string R;
215 R.reserve(Input.size());
216
217 // split the input into lines, and escape each line separately.
218 llvm::StringRef Line, Rest;
219
220 bool IsFirstLine = true;
221
222 // Inside markdown code spans, we do not escape anything when EscapeMarkdown
223 // is false.
224 bool InCodeSpan = false;
225
226 for (std::tie(Line, Rest) = Input.split('\n');
227 !(Line.empty() && Rest.empty());
228 std::tie(Line, Rest) = Rest.split('\n')) {
229
230 bool StartsLineIntern = IsFirstLine ? StartsLine : true;
231
232 // Ignore leading spaces for the escape logic, but preserve them in the
233 // output.
234 StringRef LeadingSpaces = Line.take_while(llvm::isSpace);
235 if (!LeadingSpaces.empty()) {
236 R.append(LeadingSpaces);
237 }
238
239 // Handle the case where the user escaped a character themselves.
240 // This is relevant for markdown code spans if EscapeMarkdown is false,
241 // because if the user escaped a backtick, we must treat the enclosed text
242 // as normal markdown text.
243 bool UserEscape = false;
244 for (unsigned I = LeadingSpaces.size(); I < Line.size(); ++I) {
245
246 if (!EscapeMarkdown && !UserEscape && Line[I] == '`')
247 InCodeSpan = !InCodeSpan;
248
249 if (!InCodeSpan &&
250 needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
251 Line.substr(I + 1), StartsLineIntern,
252 EscapeMarkdown))
253 R.push_back('\\');
254 R.push_back(Line[I]);
255
256 if (Line[I] == '\\')
257 UserEscape = !UserEscape;
258 else
259 UserEscape = false;
260 }
261
262 IsFirstLine = false;
263 if (!Rest.empty())
264 R.push_back('\n');
265 }
266
267 return R;
268}
269
270/// Renders \p Input as an inline block of code in markdown. The returned value
271/// is surrounded by backticks and the inner contents are properly escaped.
272std::string renderInlineBlock(llvm::StringRef Input) {
273 std::string R;
274 R.reserve(Input.size());
275 // Double all backticks to make sure we don't close the inline block early.
276 for (size_t From = 0; From < Input.size();) {
277 size_t Next = Input.find("`", From);
278 R += Input.substr(From, Next - From);
279 if (Next == llvm::StringRef::npos)
280 break;
281 R += "``"; // double the found backtick.
282
283 From = Next + 1;
284 }
285 // If results starts with a backtick, add spaces on both sides. The spaces
286 // are ignored by markdown renderers.
287 if (llvm::StringRef(R).starts_with("`") || llvm::StringRef(R).ends_with("`"))
288 return "` " + std::move(R) + " `";
289 // Markdown render should ignore first and last space if both are there. We
290 // add an extra pair of spaces in that case to make sure we render what the
291 // user intended.
292 if (llvm::StringRef(R).starts_with(" ") && llvm::StringRef(R).ends_with(" "))
293 return "` " + std::move(R) + " `";
294 return "`" + std::move(R) + "`";
295}
296
297/// Get marker required for \p Input to represent a markdown codeblock. It
298/// consists of at least 3 backticks(`). Although markdown also allows to use
299/// tilde(~) for code blocks, they are never used.
300std::string getMarkerForCodeBlock(llvm::StringRef Input) {
301 // Count the maximum number of consecutive backticks in \p Input. We need to
302 // start and end the code block with more.
303 unsigned MaxBackticks = 0;
304 unsigned Backticks = 0;
305 for (char C : Input) {
306 if (C == '`') {
307 ++Backticks;
308 continue;
309 }
312 }
314 // Use the corresponding number of backticks to start and end a code block.
315 return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
316}
317
318// Trims the input and concatenates whitespace blocks into a single ` `.
319std::string canonicalizeSpaces(llvm::StringRef Input) {
320 llvm::SmallVector<llvm::StringRef> Words;
321 llvm::SplitString(Input, Words);
322 return llvm::join(Words, " ");
323}
324
325std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,
326 void (Block::*RenderFunc)(llvm::raw_ostream &) const) {
327 std::string R;
328 llvm::raw_string_ostream OS(R);
329
330 // Trim rulers.
331 Children = Children.drop_while(
332 [](const std::unique_ptr<Block> &C) { return C->isRuler(); });
333 auto Last = llvm::find_if(
334 llvm::reverse(Children),
335 [](const std::unique_ptr<Block> &C) { return !C->isRuler(); });
336 Children = Children.drop_back(Children.end() - Last.base());
337
338 bool LastBlockWasRuler = true;
339 for (const auto &C : Children) {
340 if (C->isRuler() && LastBlockWasRuler)
341 continue;
342 LastBlockWasRuler = C->isRuler();
343 ((*C).*RenderFunc)(OS);
344 }
345
346 // Get rid of redundant empty lines introduced in plaintext while imitating
347 // padding in markdown.
348 std::string AdjustedResult;
349 llvm::StringRef TrimmedText(OS.str());
350 TrimmedText = TrimmedText.trim();
351
352 llvm::copy_if(TrimmedText, std::back_inserter(AdjustedResult),
353 [&TrimmedText](const char &C) {
354 return !llvm::StringRef(TrimmedText.data(),
355 &C - TrimmedText.data() + 1)
356 // We allow at most two newlines.
357 .ends_with("\n\n\n");
358 });
359
360 return AdjustedResult;
361}
362
363// Separates two blocks with extra spacing. Note that it might render strangely
364// in vscode if the trailing block is a codeblock, see
365// https://github.com/microsoft/vscode/issues/88416 for details.
366class Ruler : public Block {
367public:
368 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
369 renderMarkdown(OS);
370 }
371 void renderMarkdown(llvm::raw_ostream &OS) const override {
372 // Note that we need an extra new line before the ruler, otherwise we might
373 // make previous block a title instead of introducing a ruler.
374 OS << "\n---\n";
375 }
376 void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; }
377 std::unique_ptr<Block> clone() const override {
378 return std::make_unique<Ruler>(*this);
379 }
380 bool isRuler() const override { return true; }
381};
382
383class CodeBlock : public Block {
384public:
385 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
386 renderMarkdown(OS);
387 }
388 void renderMarkdown(llvm::raw_ostream &OS) const override {
389 std::string Marker = getMarkerForCodeBlock(Contents);
390 // No need to pad from previous blocks, as they should end with a new line.
391 OS << Marker << Language << '\n' << Contents;
392 if (!Contents.empty() && Contents.back() != '\n')
393 OS << '\n';
394 // Always end with an empty line to separate code blocks from following
395 // paragraphs.
396 OS << Marker << "\n\n";
397 }
398
399 void renderPlainText(llvm::raw_ostream &OS) const override {
400 // In plaintext we want one empty line before and after codeblocks.
401 OS << '\n' << Contents << "\n\n";
402 }
403
404 std::unique_ptr<Block> clone() const override {
405 return std::make_unique<CodeBlock>(*this);
406 }
407
408 CodeBlock(std::string Contents, std::string Language)
409 : Contents(std::move(Contents)), Language(std::move(Language)) {}
410
411private:
412 std::string Contents;
413 std::string Language;
414};
415
416// Inserts two spaces after each `\n` to indent each line. First line is not
417// indented.
418std::string indentLines(llvm::StringRef Input) {
419 assert(!Input.ends_with("\n") && "Input should've been trimmed.");
420 std::string IndentedR;
421 // We'll add 2 spaces after each new line which is not followed by another new
422 // line.
423 IndentedR.reserve(Input.size() + Input.count('\n') * 2);
424 for (size_t I = 0; I < Input.size(); ++I) {
425 char C = Input[I];
426 IndentedR += C;
427 if (C == '\n' && (((I + 1) < Input.size()) && (Input[I + 1] != '\n')))
428 IndentedR.append(" ");
429 }
430 return IndentedR;
431}
432
433class Heading : public Paragraph {
434public:
435 Heading(size_t Level) : Level(Level) {}
436
437 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
438 insertHeadingMarkers(OS);
439 Paragraph::renderEscapedMarkdown(OS);
440 }
441
442 void renderMarkdown(llvm::raw_ostream &OS) const override {
443 insertHeadingMarkers(OS);
444 Paragraph::renderMarkdown(OS);
445 }
446
447private:
448 size_t Level;
449
450 void insertHeadingMarkers(llvm::raw_ostream &OS) const {
451 OS << std::string(Level, '#') << ' ';
452 }
453};
454
455} // namespace
456
457std::string Block::asEscapedMarkdown() const {
458 std::string R;
459 llvm::raw_string_ostream OS(R);
461 return llvm::StringRef(OS.str()).trim().str();
462}
463
464std::string Block::asMarkdown() const {
465 std::string R;
466 llvm::raw_string_ostream OS(R);
467 renderMarkdown(OS);
468 return llvm::StringRef(OS.str()).trim().str();
469}
470
471std::string Block::asPlainText() const {
472 std::string R;
473 llvm::raw_string_ostream OS(R);
474 renderPlainText(OS);
475 return llvm::StringRef(OS.str()).trim().str();
476}
477
478void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS,
479 llvm::StringRef ParagraphText) const {
480 llvm::StringRef Line, Rest;
481
482 for (std::tie(Line, Rest) = ParagraphText.ltrim("\n").rtrim().split('\n');
483 !(Line.empty() && Rest.empty());
484 std::tie(Line, Rest) = Rest.split('\n')) {
485
486 if (Line.empty()) {
487 // Blank lines are preserved in markdown.
488 OS << '\n';
489 continue;
490 }
491
492 OS << Line;
493
494 if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true))
495 // In markdown, 2 spaces before a line break forces a line break.
496 OS << " ";
497 OS << '\n';
498 }
499}
500
501void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
502 bool NeedsSpace = false;
503 bool HasChunks = false;
504 std::string ParagraphText;
505 ParagraphText.reserve(EstimatedStringSize);
506 llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
507 for (auto &C : Chunks) {
508 if (C.SpaceBefore || NeedsSpace)
509 ParagraphTextOS << " ";
510 switch (C.Kind) {
511 case ChunkKind::PlainText:
512 ParagraphTextOS << renderText(C.Contents, !HasChunks,
513 /*EscapeMarkdown=*/true);
514 break;
515 case ChunkKind::InlineCode:
516 ParagraphTextOS << renderInlineBlock(C.Contents);
517 break;
518 case ChunkKind::Bold:
519 ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks,
520 /*EscapeMarkdown=*/true);
521 break;
522 case ChunkKind::Emphasized:
523 ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks,
524 /*EscapeMarkdown=*/true);
525 break;
526 }
527 HasChunks = true;
528 NeedsSpace = C.SpaceAfter;
529 }
530
531 renderNewlinesMarkdown(OS, ParagraphText);
532
533 // A paragraph in markdown is separated by a blank line.
534 OS << "\n\n";
535}
536
537void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
538 bool NeedsSpace = false;
539 bool HasChunks = false;
540 std::string ParagraphText;
541 ParagraphText.reserve(EstimatedStringSize);
542 llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
543 for (auto &C : Chunks) {
544 if (C.SpaceBefore || NeedsSpace)
545 ParagraphTextOS << " ";
546 switch (C.Kind) {
547 case ChunkKind::PlainText:
548 ParagraphTextOS << renderText(C.Contents, !HasChunks,
549 /*EscapeMarkdown=*/false);
550 break;
551 case ChunkKind::InlineCode:
552 ParagraphTextOS << renderInlineBlock(C.Contents);
553 break;
554 case ChunkKind::Bold:
555 ParagraphTextOS << "**"
556 << renderText(C.Contents, !HasChunks,
557 /*EscapeMarkdown=*/false)
558 << "**";
559 break;
560 case ChunkKind::Emphasized:
561 ParagraphTextOS << "*"
562 << renderText(C.Contents, !HasChunks,
563 /*EscapeMarkdown=*/false)
564 << "*";
565 break;
566 }
567 HasChunks = true;
568 NeedsSpace = C.SpaceAfter;
569 }
570
571 renderNewlinesMarkdown(OS, ParagraphText);
572
573 // A paragraph in markdown is separated by a blank line.
574 OS << "\n\n";
575}
576
577std::unique_ptr<Block> Paragraph::clone() const {
578 return std::make_unique<Paragraph>(*this);
579}
580
581llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
582 llvm::StringRef Text) const {
583 // Prefer a delimiter whose characters don't appear in the text.
584 for (llvm::StringRef S : Options)
585 if (Text.find_first_of(S) == llvm::StringRef::npos)
586 return S;
587 return Options.front();
588}
589
590bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line,
591 bool IsMarkdown) const {
592 constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
593
594 if (!IsMarkdown && Line.ends_with(" "))
595 return true;
596
597 Line = Line.rtrim();
598 return !Line.empty() && Punctuation.contains(Line.back());
599}
600
601bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest,
602 bool IsMarkdown) const {
603 // Plaintext indicators:
604 // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
605 // '#' headings, '`' code blocks
606 constexpr llvm::StringLiteral LinebreakIndicatorsPlainText =
607 R"txt(-*@>#`)txt";
608 // Markdown indicators:
609 // Only '@' and '\' documentation commands/escaped markdown syntax.
610 constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt";
611
612 Rest = Rest.ltrim(" \t");
613 if (Rest.empty())
614 return false;
615
616 if (IsMarkdown)
617 return LinebreakIndicatorsMarkdown.contains(Rest.front());
618
619 if (LinebreakIndicatorsPlainText.contains(Rest.front()))
620 return true;
621
622 if (llvm::isDigit(Rest.front())) {
623 llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
624 if (AfterDigit.starts_with(".") || AfterDigit.starts_with(")"))
625 return true;
626 }
627 return false;
628}
629
630bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
631 bool IsMarkdown) const {
632 // Should we also consider whether Line is short?
633 return punctuationIndicatesLineBreak(Line, IsMarkdown) ||
634 isHardLineBreakIndicator(Rest, IsMarkdown);
635}
636
637void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS,
638 llvm::StringRef ParagraphText) const {
639 llvm::StringRef Line, Rest;
640
641 for (std::tie(Line, Rest) = ParagraphText.trim().split('\n');
642 !(Line.empty() && Rest.empty());
643 std::tie(Line, Rest) = Rest.split('\n')) {
644
645 // Remove lines which only contain whitespace.
646 //
647 // Note: this also handles the case when there are multiple newlines
648 // in a row, since all leading newlines are removed.
649 //
650 // The documentation parsing treats multiple newlines as paragraph
651 // separators, hence it will create a new Paragraph instead of adding
652 // multiple newlines to the same Paragraph.
653 // Therfore multiple newlines are never added to a paragraph
654 // except if the user explicitly adds them using
655 // e.g. appendText("user text\n\nnext text").
656 Line = Line.ltrim();
657 if (Line.empty())
658 continue;
659
660 OS << canonicalizeSpaces(Line);
661
662 if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false))
663 OS << '\n';
664 else if (!Rest.empty())
665 // Since we removed any trailing whitespace from the input using trim(),
666 // we know that the next line contains non-whitespace characters.
667 // Therefore, we can add a space without worrying about trailing spaces.
668 OS << ' ';
669 }
670}
671
672void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
673 bool NeedsSpace = false;
674 std::string ParagraphText;
675 ParagraphText.reserve(EstimatedStringSize);
676
677 llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
678
679 for (auto &C : Chunks) {
680
681 if (C.Kind == ChunkKind::PlainText) {
682 if (C.SpaceBefore || NeedsSpace)
683 ParagraphTextOS << ' ';
684
685 ParagraphTextOS << C.Contents;
686 NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
687 continue;
688 }
689
690 if (C.SpaceBefore || NeedsSpace)
691 ParagraphTextOS << ' ';
692 llvm::StringRef Marker = "";
693 if (C.Preserve && C.Kind == ChunkKind::InlineCode)
694 Marker = chooseMarker({"`", "'", "\""}, C.Contents);
695 else if (C.Kind == ChunkKind::Bold)
696 Marker = "**";
697 else if (C.Kind == ChunkKind::Emphasized)
698 Marker = "*";
699 ParagraphTextOS << Marker << C.Contents << Marker;
700 NeedsSpace = C.SpaceAfter;
701 }
702
703 renderNewlinesPlaintext(OS, ParagraphText);
704
705 // Paragraphs are separated by a blank line.
706 OS << "\n\n";
707}
708
709BulletList::BulletList() = default;
710BulletList::~BulletList() = default;
711
712void BulletList::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
713 for (auto &D : Items) {
714 std::string M = D.asEscapedMarkdown();
715 // Instead of doing this we might prefer passing Indent to children to get
716 // rid of the copies, if it turns out to be a bottleneck.
717 OS << "- " << indentLines(M) << '\n';
718 }
719 // We add 2 newlines after list to terminate it in markdown.
720 OS << "\n\n";
721}
722
723void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
724 for (auto &D : Items) {
725 std::string M = D.asMarkdown();
726 // Instead of doing this we might prefer passing Indent to children to get
727 // rid of the copies, if it turns out to be a bottleneck.
728 OS << "- " << indentLines(M) << '\n';
729 }
730 // We add 2 newlines after list to terminate it in markdown.
731 OS << "\n\n";
732}
733
734void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
735 for (auto &D : Items) {
736 // Instead of doing this we might prefer passing Indent to children to get
737 // rid of the copies, if it turns out to be a bottleneck.
738 OS << "- " << indentLines(D.asPlainText()) << '\n';
739 }
740 OS << '\n';
741}
742
743Paragraph &Paragraph::appendSpace() {
744 if (!Chunks.empty())
745 Chunks.back().SpaceAfter = true;
746 return *this;
747}
748
749Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
750 if (Contents.empty())
751 return *this;
752 Chunks.emplace_back();
753 Chunk &C = Chunks.back();
754 C.Contents = Contents;
755 C.Kind = K;
756
757 EstimatedStringSize += Contents.size();
758 return *this;
759}
760
762 if (!Chunks.empty() && Chunks.back().Kind == ChunkKind::PlainText) {
763 Chunks.back().Contents += std::move(Text);
764 return *this;
765 }
766
767 return appendChunk(std::move(Text), ChunkKind::PlainText);
768}
769
771 return appendChunk(canonicalizeSpaces(std::move(Text)),
772 ChunkKind::Emphasized);
773}
774
776 return appendChunk(canonicalizeSpaces(std::move(Text)), ChunkKind::Bold);
777}
778
779Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
780 bool AdjacentCode =
781 !Chunks.empty() && Chunks.back().Kind == ChunkKind::InlineCode;
782 std::string Norm = canonicalizeSpaces(std::move(Code));
783 if (Norm.empty())
784 return *this;
785 EstimatedStringSize += Norm.size();
786 Chunks.emplace_back();
787 Chunk &C = Chunks.back();
788 C.Contents = std::move(Norm);
789 C.Kind = ChunkKind::InlineCode;
790 C.Preserve = Preserve;
791 // Disallow adjacent code spans without spaces, markdown can't render them.
792 C.SpaceBefore = AdjacentCode;
793
794 return *this;
795}
796
797std::unique_ptr<Block> BulletList::clone() const {
798 return std::make_unique<BulletList>(*this);
799}
800
801class Document &BulletList::addItem() {
802 Items.emplace_back();
803 return Items.back();
804}
805
806Document &Document::operator=(const Document &Other) {
807 Children.clear();
808 for (const auto &C : Other.Children)
809 Children.push_back(C->clone());
810 return *this;
811}
812
813void Document::append(Document Other) {
814 std::move(Other.Children.begin(), Other.Children.end(),
815 std::back_inserter(Children));
816}
817
818Paragraph &Document::addParagraph() {
819 Children.push_back(std::make_unique<Paragraph>());
820 return *static_cast<Paragraph *>(Children.back().get());
821}
822
823void Document::addRuler() { Children.push_back(std::make_unique<Ruler>()); }
824
825void Document::addCodeBlock(std::string Code, std::string Language) {
826 Children.emplace_back(
827 std::make_unique<CodeBlock>(std::move(Code), std::move(Language)));
828}
829
830std::string Document::asEscapedMarkdown() const {
832}
833
834std::string Document::asMarkdown() const {
836}
837
838std::string Document::asPlainText() const {
840}
841
842BulletList &Document::addBulletList() {
843 Children.emplace_back(std::make_unique<BulletList>());
844 return *static_cast<BulletList *>(Children.back().get());
845}
846
847Paragraph &Document::addHeading(size_t Level) {
848 assert(Level > 0);
849 Children.emplace_back(std::make_unique<Heading>(Level));
850 return *static_cast<Paragraph *>(Children.back().get());
851}
852} // namespace markup
853} // namespace clangd
854} // namespace clang
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:388
CodeBlock(std::string Contents, std::string Language)
Definition Markup.cpp:408
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:399
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:385
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:404
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:437
Heading(size_t Level)
Definition Markup.cpp:435
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:442
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:368
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:376
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:377
bool isRuler() const override
Definition Markup.cpp:380
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:371
std::string asEscapedMarkdown() const
Definition Markup.cpp:457
virtual void renderPlainText(llvm::raw_ostream &OS) const =0
virtual void renderMarkdown(llvm::raw_ostream &OS) const =0
virtual void renderEscapedMarkdown(llvm::raw_ostream &OS) const =0
std::string asMarkdown() const
Definition Markup.cpp:464
std::string asPlainText() const
Definition Markup.cpp:471
Represents parts of the markup that can contain strings, like inline code, code block or plain text.
Definition Markup.h:45
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:672
Paragraph & appendEmphasizedText(llvm::StringRef Text)
Append emphasized text, this translates to the * block in markdown.
Definition Markup.cpp:770
Paragraph & appendText(llvm::StringRef Text)
Append plain text to the end of the string.
Definition Markup.cpp:761
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:501
void renderNewlinesMarkdown(llvm::raw_ostream &OS, llvm::StringRef ParagraphText) const
Go through the contents line by line to handle the newlines and required spacing correctly for markdo...
Definition Markup.cpp:478
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:577
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest, bool IsMarkdown) const
Checks if a hard line break should be added after the given line.
Definition Markup.cpp:630
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:537
Paragraph & appendBoldText(llvm::StringRef Text)
Append bold text, this translates to the ** block in markdown.
Definition Markup.cpp:775
bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const
Append inline code, this translates to the ` block in markdown.
Definition Markup.cpp:601
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition AST.cpp:45
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::string renderBlocks(llvm::ArrayRef< std::unique_ptr< Block > > Children, void(Block::*RenderFunc)(llvm::raw_ostream &) const)
Definition Markup.cpp:325
std::string indentLines(llvm::StringRef Input)
Definition Markup.cpp:418
std::string canonicalizeSpaces(llvm::StringRef Input)
Definition Markup.cpp:319