clang-tools 22.0.0git
Markup.cpp
Go to the documentation of this file.
1//===--- Markup.cpp -----------------------------------------*- C++-*------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "support/Markup.h"
9#include "clang/Basic/CharInfo.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/SmallVector.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/raw_ostream.h"
16#include <cstddef>
17#include <iterator>
18#include <memory>
19#include <string>
20#include <vector>
21
22namespace clang {
23namespace clangd {
24namespace markup {
25namespace {
26
27// Is <contents a plausible start to an HTML tag?
28// Contents may not be the rest of the line, but it's the rest of the plain
29// text, so we expect to see at least the tag name.
30bool looksLikeTag(llvm::StringRef Contents) {
31 if (Contents.empty())
32 return false;
33 if (Contents.front() == '!' || Contents.front() == '?' ||
34 Contents.front() == '/')
35 return true;
36 // Check the start of the tag name.
37 if (!llvm::isAlpha(Contents.front()))
38 return false;
39 // Drop rest of the tag name, and following whitespace.
40 Contents = Contents
41 .drop_while([](char C) {
42 return llvm::isAlnum(C) || C == '-' || C == '_' || C == ':';
43 })
44 .drop_while(llvm::isSpace);
45 // The rest of the tag consists of attributes, which have restrictive names.
46 // If we hit '=', all bets are off (attribute values can contain anything).
47 for (; !Contents.empty(); Contents = Contents.drop_front()) {
48 if (llvm::isAlnum(Contents.front()) || llvm::isSpace(Contents.front()))
49 continue;
50 if (Contents.front() == '>' || Contents.starts_with("/>"))
51 return true; // May close the tag.
52 if (Contents.front() == '=')
53 return true; // Don't try to parse attribute values.
54 return false; // Random punctuation means this isn't a tag.
55 }
56 return true; // Potentially incomplete tag.
57}
58
59// Tests whether C should be backslash-escaped in markdown.
60// The string being escaped is Before + C + After. This is part of a paragraph.
61// StartsLine indicates whether `Before` is the start of the line.
62// After may not be everything until the end of the line.
63//
64// It's always safe to escape punctuation, but want minimal escaping.
65// The strategy is to escape the first character of anything that might start
66// a markdown grammar construct.
67bool needsLeadingEscapePlaintext(char C, llvm::StringRef Before,
68 llvm::StringRef After, bool StartsLine) {
69 assert(Before.take_while(llvm::isSpace).empty());
70 auto RulerLength = [&]() -> /*Length*/ unsigned {
71 if (!StartsLine || !Before.empty())
72 return false;
73 llvm::StringRef A = After.rtrim();
74 return llvm::all_of(A, [C](char D) { return C == D; }) ? 1 + A.size() : 0;
75 };
76 auto IsBullet = [&]() {
77 return StartsLine && Before.empty() &&
78 (After.empty() || After.starts_with(" "));
79 };
80 auto SpaceSurrounds = [&]() {
81 return (After.empty() || llvm::isSpace(After.front())) &&
82 (Before.empty() || llvm::isSpace(Before.back()));
83 };
84 auto WordSurrounds = [&]() {
85 return (!After.empty() && llvm::isAlnum(After.front())) &&
86 (!Before.empty() && llvm::isAlnum(Before.back()));
87 };
88
89 switch (C) {
90 case '\\': // Escaped character.
91 return true;
92 case '`': // Code block or inline code
93 // Any number of backticks can delimit an inline code block that can end
94 // anywhere (including on another line). We must escape them all.
95 return true;
96 case '~': // Code block
97 return StartsLine && Before.empty() && After.starts_with("~~");
98 case '#': { // ATX heading.
99 if (!StartsLine || !Before.empty())
100 return false;
101 llvm::StringRef Rest = After.ltrim(C);
102 return Rest.empty() || Rest.starts_with(" ");
103 }
104 case ']': // Link or link reference.
105 // We escape ] rather than [ here, because it's more constrained:
106 // ](...) is an in-line link
107 // ]: is a link reference
108 // The following are only links if the link reference exists:
109 // ] by itself is a shortcut link
110 // ][...] is an out-of-line link
111 // Because we never emit link references, we don't need to handle these.
112 return After.starts_with(":") || After.starts_with("(");
113 case '=': // Setex heading.
114 return RulerLength() > 0;
115 case '_': // Horizontal ruler or matched delimiter.
116 if (RulerLength() >= 3)
117 return true;
118 // Not a delimiter if surrounded by space, or inside a word.
119 // (The rules at word boundaries are subtle).
120 return !(SpaceSurrounds() || WordSurrounds());
121 case '-': // Setex heading, horizontal ruler, or bullet.
122 if (RulerLength() > 0)
123 return true;
124 return IsBullet();
125 case '+': // Bullet list.
126 return IsBullet();
127 case '*': // Bullet list, horizontal ruler, or delimiter.
128 return IsBullet() || RulerLength() >= 3 || !SpaceSurrounds();
129 case '<': // HTML tag (or autolink, which we choose not to escape)
130 return looksLikeTag(After);
131 case '>': // Quote marker. Needs escaping at start of line.
132 return StartsLine && Before.empty();
133 case '&': { // HTML entity reference
134 auto End = After.find(';');
135 if (End == llvm::StringRef::npos)
136 return false;
137 llvm::StringRef Content = After.substr(0, End);
138 if (Content.consume_front("#")) {
139 if (Content.consume_front("x") || Content.consume_front("X"))
140 return llvm::all_of(Content, llvm::isHexDigit);
141 return llvm::all_of(Content, llvm::isDigit);
142 }
143 return llvm::all_of(Content, llvm::isAlpha);
144 }
145 case '.': // Numbered list indicator. Escape 12. -> 12\. at start of line.
146 case ')':
147 return StartsLine && !Before.empty() &&
148 llvm::all_of(Before, llvm::isDigit) && After.starts_with(" ");
149 default:
150 return false;
151 }
152}
153
154/// \brief Tests whether \p C should be backslash-escaped in markdown.
155///
156/// The MarkupContent LSP specification defines that `markdown` content needs to
157/// follow GFM (GitHub Flavored Markdown) rules. And we can assume that markdown
158/// is rendered on the client side. This means we do not need to escape any
159/// markdown constructs.
160/// The only exception is when the client does not support HTML rendering in
161/// markdown. In that case, we need to escape HTML tags and HTML entities.
162///
163/// **FIXME:** handle the case when the client does support HTML rendering in
164/// markdown. For this, the LSP server needs to check the
165/// [supportsHtml
166/// capability](https://github.com/microsoft/language-server-protocol/issues/1344)
167/// of the client.
168///
169/// \param C The character to check.
170/// \param After The string that follows \p C .
171/// This is used to determine if \p C is part of a tag or an entity reference.
172///
173/// \returns true if \p C should be escaped, false otherwise.
174bool needsLeadingEscapeMarkdown(char C, llvm::StringRef After) {
175 switch (C) {
176 case '<': // HTML tag (or autolink, which we choose not to escape)
177 return looksLikeTag(After);
178 case '&': { // HTML entity reference
179 auto End = After.find(';');
180 if (End == llvm::StringRef::npos)
181 return false;
182 llvm::StringRef Content = After.substr(0, End);
183 if (Content.consume_front("#")) {
184 if (Content.consume_front("x") || Content.consume_front("X"))
185 return llvm::all_of(Content, llvm::isHexDigit);
186 return llvm::all_of(Content, llvm::isDigit);
187 }
188 return llvm::all_of(Content, llvm::isAlpha);
189 }
190 default:
191 return false;
192 }
193}
194
195bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
196 bool StartsLine, bool EscapeMarkdown) {
197 if (EscapeMarkdown)
198 return needsLeadingEscapePlaintext(C, Before, After, StartsLine);
199 return needsLeadingEscapeMarkdown(C, After);
200}
201
202/// \brief Render text for markdown output.
203///
204/// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
205/// any of the markdown constructs.
206///
207/// Else, markdown syntax is not escaped, only HTML tags and entities.
208/// HTML is escaped because usually clients do not support HTML rendering by
209/// default. Passing unescaped HTML will therefore often result in not showing
210/// the HTML at all.
211/// \note In markdown code spans, we do not escape anything.
212std::string renderText(llvm::StringRef Input, bool StartsLine,
213 bool EscapeMarkdown) {
214 std::string R;
215 R.reserve(Input.size());
216
217 // split the input into lines, and escape each line separately.
218 llvm::StringRef Line, Rest;
219
220 bool IsFirstLine = true;
221
222 // Inside markdown code spans, we do not escape anything when EscapeMarkdown
223 // is false.
224 bool InCodeSpan = false;
225
226 for (std::tie(Line, Rest) = Input.split('\n');
227 !(Line.empty() && Rest.empty());
228 std::tie(Line, Rest) = Rest.split('\n')) {
229
230 bool StartsLineIntern = IsFirstLine ? StartsLine : true;
231
232 // Ignore leading spaces for the escape logic, but preserve them in the
233 // output.
234 StringRef LeadingSpaces = Line.take_while(llvm::isSpace);
235 if (!LeadingSpaces.empty()) {
236 R.append(LeadingSpaces);
237 }
238
239 // Handle the case where the user escaped a character themselves.
240 // This is relevant for markdown code spans if EscapeMarkdown is false,
241 // because if the user escaped a backtick, we must treat the enclosed text
242 // as normal markdown text.
243 bool UserEscape = false;
244 for (unsigned I = LeadingSpaces.size(); I < Line.size(); ++I) {
245
246 if (!EscapeMarkdown && !UserEscape && Line[I] == '`')
247 InCodeSpan = !InCodeSpan;
248
249 if (!InCodeSpan &&
250 needsLeadingEscape(Line[I], Line.substr(LeadingSpaces.size(), I),
251 Line.substr(I + 1), StartsLineIntern,
252 EscapeMarkdown))
253 R.push_back('\\');
254 R.push_back(Line[I]);
255
256 if (Line[I] == '\\')
257 UserEscape = !UserEscape;
258 else
259 UserEscape = false;
260 }
261
262 IsFirstLine = false;
263 if (!Rest.empty())
264 R.push_back('\n');
265 }
266
267 return R;
268}
269
270/// Renders \p Input as an inline block of code in markdown. The returned value
271/// is surrounded by backticks and the inner contents are properly escaped.
272std::string renderInlineBlock(llvm::StringRef Input) {
273 std::string R;
274 R.reserve(Input.size());
275 // Double all backticks to make sure we don't close the inline block early.
276 for (size_t From = 0; From < Input.size();) {
277 size_t Next = Input.find("`", From);
278 R += Input.substr(From, Next - From);
279 if (Next == llvm::StringRef::npos)
280 break;
281 R += "``"; // double the found backtick.
282
283 From = Next + 1;
284 }
285 // If results starts with a backtick, add spaces on both sides. The spaces
286 // are ignored by markdown renderers.
287 if (llvm::StringRef(R).starts_with("`") || llvm::StringRef(R).ends_with("`"))
288 return "` " + std::move(R) + " `";
289 // Markdown render should ignore first and last space if both are there. We
290 // add an extra pair of spaces in that case to make sure we render what the
291 // user intended.
292 if (llvm::StringRef(R).starts_with(" ") && llvm::StringRef(R).ends_with(" "))
293 return "` " + std::move(R) + " `";
294 return "`" + std::move(R) + "`";
295}
296
297/// Get marker required for \p Input to represent a markdown codeblock. It
298/// consists of at least 3 backticks(`). Although markdown also allows to use
299/// tilde(~) for code blocks, they are never used.
300std::string getMarkerForCodeBlock(llvm::StringRef Input) {
301 // Count the maximum number of consecutive backticks in \p Input. We need to
302 // start and end the code block with more.
303 unsigned MaxBackticks = 0;
304 unsigned Backticks = 0;
305 for (char C : Input) {
306 if (C == '`') {
307 ++Backticks;
308 continue;
309 }
312 }
314 // Use the corresponding number of backticks to start and end a code block.
315 return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
316}
317
318// Trims the input and concatenates whitespace blocks into a single ` `.
319std::string canonicalizeSpaces(llvm::StringRef Input) {
320 llvm::SmallVector<llvm::StringRef> Words;
321 llvm::SplitString(Input, Words);
322 return llvm::join(Words, " ");
323}
324
325std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,
326 void (Block::*RenderFunc)(llvm::raw_ostream &) const) {
327 std::string R;
328 llvm::raw_string_ostream OS(R);
329
330 // Trim rulers.
331 Children = Children.drop_while(
332 [](const std::unique_ptr<Block> &C) { return C->isRuler(); });
333 auto Last = llvm::find_if(
334 llvm::reverse(Children),
335 [](const std::unique_ptr<Block> &C) { return !C->isRuler(); });
336 Children = Children.drop_back(Children.end() - Last.base());
337
338 bool LastBlockWasRuler = true;
339 for (const auto &C : Children) {
340 if (C->isRuler() && LastBlockWasRuler)
341 continue;
342 LastBlockWasRuler = C->isRuler();
343 ((*C).*RenderFunc)(OS);
344 }
345
346 // Get rid of redundant empty lines introduced in plaintext while imitating
347 // padding in markdown.
348 std::string AdjustedResult;
349 llvm::StringRef TrimmedText(OS.str());
350 TrimmedText = TrimmedText.trim();
351
352 llvm::copy_if(TrimmedText, std::back_inserter(AdjustedResult),
353 [&TrimmedText](const char &C) {
354 return !llvm::StringRef(TrimmedText.data(),
355 &C - TrimmedText.data() + 1)
356 // We allow at most two newlines.
357 .ends_with("\n\n\n");
358 });
359
360 return AdjustedResult;
361}
362
363// Separates two blocks with extra spacing. Note that it might render strangely
364// in vscode if the trailing block is a codeblock, see
365// https://github.com/microsoft/vscode/issues/88416 for details.
366class Ruler : public Block {
367public:
368 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
369 renderMarkdown(OS);
370 }
371 void renderMarkdown(llvm::raw_ostream &OS) const override {
372 // Note that we need an extra new line before the ruler, otherwise we might
373 // make previous block a title instead of introducing a ruler.
374 OS << "\n---\n";
375 }
376 void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; }
377 std::unique_ptr<Block> clone() const override {
378 return std::make_unique<Ruler>(*this);
379 }
380 bool isRuler() const override { return true; }
381};
382
383class CodeBlock : public Block {
384public:
385 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
386 renderMarkdown(OS);
387 }
388 void renderMarkdown(llvm::raw_ostream &OS) const override {
389 std::string Marker = getMarkerForCodeBlock(Contents);
390 // No need to pad from previous blocks, as they should end with a new line.
391 OS << Marker << Language << '\n' << Contents;
392 if (!Contents.empty() && Contents.back() != '\n')
393 OS << '\n';
394 // Always end with an empty line to separate code blocks from following
395 // paragraphs.
396 OS << Marker << "\n\n";
397 }
398
399 void renderPlainText(llvm::raw_ostream &OS) const override {
400 // In plaintext we want one empty line before and after codeblocks.
401 OS << '\n' << Contents << "\n\n";
402 }
403
404 std::unique_ptr<Block> clone() const override {
405 return std::make_unique<CodeBlock>(*this);
406 }
407
408 CodeBlock(std::string Contents, std::string Language)
409 : Contents(std::move(Contents)), Language(std::move(Language)) {}
410
411private:
412 std::string Contents;
413 std::string Language;
414};
415
416// Inserts two spaces after each `\n` to indent each line. First line is not
417// indented.
418std::string indentLines(llvm::StringRef Input) {
419 assert(!Input.ends_with("\n") && "Input should've been trimmed.");
420 std::string IndentedR;
421 // We'll add 2 spaces after each new line which is not followed by another new
422 // line.
423 IndentedR.reserve(Input.size() + Input.count('\n') * 2);
424 for (size_t I = 0; I < Input.size(); ++I) {
425 char C = Input[I];
426 IndentedR += C;
427 if (C == '\n' && (((I + 1) < Input.size()) && (Input[I + 1] != '\n')))
428 IndentedR.append(" ");
429 }
430 return IndentedR;
431}
432
433class Heading : public Paragraph {
434public:
435 Heading(size_t Level) : Level(Level) {}
436
437 void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
438 insertHeadingMarkers(OS);
439 Paragraph::renderEscapedMarkdown(OS);
440 }
441
442 void renderMarkdown(llvm::raw_ostream &OS) const override {
443 insertHeadingMarkers(OS);
444 Paragraph::renderMarkdown(OS);
445 }
446
447private:
448 size_t Level;
449
450 void insertHeadingMarkers(llvm::raw_ostream &OS) const {
451 OS << std::string(Level, '#') << ' ';
452 }
453};
454
455} // namespace
456
457std::string Block::asEscapedMarkdown() const {
458 std::string R;
459 llvm::raw_string_ostream OS(R);
461 return llvm::StringRef(OS.str()).trim().str();
462}
463
464std::string Block::asMarkdown() const {
465 std::string R;
466 llvm::raw_string_ostream OS(R);
467 renderMarkdown(OS);
468 return llvm::StringRef(OS.str()).trim().str();
469}
470
471std::string Block::asPlainText() const {
472 std::string R;
473 llvm::raw_string_ostream OS(R);
474 renderPlainText(OS);
475 return llvm::StringRef(OS.str()).trim().str();
476}
477
478void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
479 bool NeedsSpace = false;
480 bool HasChunks = false;
481 for (auto &C : Chunks) {
482 if (C.SpaceBefore || NeedsSpace)
483 OS << " ";
484 switch (C.Kind) {
485 case ChunkKind::PlainText:
486 OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
487 break;
488 case ChunkKind::InlineCode:
489 OS << renderInlineBlock(C.Contents);
490 break;
491 case ChunkKind::Bold:
492 OS << renderText("**" + C.Contents + "**", !HasChunks,
493 /*EscapeMarkdown=*/true);
494 break;
495 case ChunkKind::Emphasized:
496 OS << renderText("*" + C.Contents + "*", !HasChunks,
497 /*EscapeMarkdown=*/true);
498 break;
499 }
500 HasChunks = true;
501 NeedsSpace = C.SpaceAfter;
502 }
503 // A paragraph in markdown is separated by a blank line.
504 OS << "\n\n";
505}
506
507void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
508 bool NeedsSpace = false;
509 bool HasChunks = false;
510 for (auto &C : Chunks) {
511 if (C.SpaceBefore || NeedsSpace)
512 OS << " ";
513 switch (C.Kind) {
514 case ChunkKind::PlainText:
515 OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
516 break;
517 case ChunkKind::InlineCode:
518 OS << renderInlineBlock(C.Contents);
519 break;
520 case ChunkKind::Bold:
521 OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
522 << "**";
523 break;
524 case ChunkKind::Emphasized:
525 OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
526 << "*";
527 break;
528 }
529 HasChunks = true;
530 NeedsSpace = C.SpaceAfter;
531 }
532 // A paragraph in markdown is separated by a blank line.
533 OS << "\n\n";
534}
535
536std::unique_ptr<Block> Paragraph::clone() const {
537 return std::make_unique<Paragraph>(*this);
538}
539
540/// Choose a marker to delimit `Text` from a prioritized list of options.
541/// This is more readable than escaping for plain-text.
542llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
543 llvm::StringRef Text) const {
544 // Prefer a delimiter whose characters don't appear in the text.
545 for (llvm::StringRef S : Options)
546 if (Text.find_first_of(S) == llvm::StringRef::npos)
547 return S;
548 return Options.front();
549}
550
551bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
552 constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
553
554 Line = Line.rtrim();
555 return !Line.empty() && Punctuation.contains(Line.back());
556}
557
558bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
559 // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
560 // '#' headings, '`' code blocks, two spaces (markdown force newline)
561 constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@>#`)txt";
562
563 Rest = Rest.ltrim(" \t");
564 if (Rest.empty())
565 return false;
566
567 if (LinebreakIndicators.contains(Rest.front()))
568 return true;
569
570 if (llvm::isDigit(Rest.front())) {
571 llvm::StringRef AfterDigit = Rest.drop_while(llvm::isDigit);
572 if (AfterDigit.starts_with(".") || AfterDigit.starts_with(")"))
573 return true;
574 }
575 return false;
576}
577
578bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
579 llvm::StringRef Rest) const {
580 // In Markdown, 2 spaces before a line break forces a line break.
581 // Add a line break for plaintext in this case too.
582 // Should we also consider whether Line is short?
583 return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) ||
584 isHardLineBreakIndicator(Rest);
585}
586
587void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
588 bool NeedsSpace = false;
589 std::string ConcatenatedText;
590 ConcatenatedText.reserve(EstimatedStringSize);
591
592 llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
593
594 for (auto &C : Chunks) {
595
596 if (C.Kind == ChunkKind::PlainText) {
597 if (C.SpaceBefore || NeedsSpace)
598 ConcatenatedOS << ' ';
599
600 ConcatenatedOS << C.Contents;
601 NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
602 continue;
603 }
604
605 if (C.SpaceBefore || NeedsSpace)
606 ConcatenatedOS << ' ';
607 llvm::StringRef Marker = "";
608 if (C.Preserve && C.Kind == ChunkKind::InlineCode)
609 Marker = chooseMarker({"`", "'", "\""}, C.Contents);
610 else if (C.Kind == ChunkKind::Bold)
611 Marker = "**";
612 else if (C.Kind == ChunkKind::Emphasized)
613 Marker = "*";
614 ConcatenatedOS << Marker << C.Contents << Marker;
615 NeedsSpace = C.SpaceAfter;
616 }
617
618 // We go through the contents line by line to handle the newlines
619 // and required spacing correctly.
620 //
621 // Newlines are added if:
622 // - the line ends with 2 spaces and a newline follows
623 // - the line ends with punctuation that indicates a line break (.:,;!?)
624 // - the next line starts with a hard line break indicator (-@>#`, or a digit
625 // followed by '.' or ')'), ignoring leading whitespace.
626 //
627 // Otherwise, newlines in the input are replaced with a single space.
628 //
629 // Multiple spaces are collapsed into a single space.
630 //
631 // Lines containing only whitespace are ignored.
632 llvm::StringRef Line, Rest;
633
634 for (std::tie(Line, Rest) =
635 llvm::StringRef(ConcatenatedText).trim().split('\n');
636 !(Line.empty() && Rest.empty());
637 std::tie(Line, Rest) = Rest.split('\n')) {
638
639 // Remove lines which only contain whitespace.
640 //
641 // Note: this also handles the case when there are multiple newlines
642 // in a row, since all leading newlines are removed.
643 //
644 // The documentation parsing treats multiple newlines as paragraph
645 // separators, hence it will create a new Paragraph instead of adding
646 // multiple newlines to the same Paragraph.
647 // Therfore multiple newlines are never added to a paragraph
648 // except if the user explicitly adds them using
649 // e.g. appendText("user text\n\nnext text").
650 Line = Line.ltrim();
651 if (Line.empty())
652 continue;
653
654 OS << canonicalizeSpaces(Line);
655
656 if (isHardLineBreakAfter(Line, Rest))
657 OS << '\n';
658 else if (!Rest.empty())
659 // Since we removed any trailing whitespace from the input using trim(),
660 // we know that the next line contains non-whitespace characters.
661 // Therefore, we can add a space without worrying about trailing spaces.
662 OS << ' ';
663 }
664
665 // Paragraphs are separated by a blank line.
666 OS << "\n\n";
667}
668
669BulletList::BulletList() = default;
670BulletList::~BulletList() = default;
671
672void BulletList::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
673 for (auto &D : Items) {
674 std::string M = D.asEscapedMarkdown();
675 // Instead of doing this we might prefer passing Indent to children to get
676 // rid of the copies, if it turns out to be a bottleneck.
677 OS << "- " << indentLines(M) << '\n';
678 }
679 // We add 2 newlines after list to terminate it in markdown.
680 OS << "\n\n";
681}
682
683void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
684 for (auto &D : Items) {
685 std::string M = D.asMarkdown();
686 // Instead of doing this we might prefer passing Indent to children to get
687 // rid of the copies, if it turns out to be a bottleneck.
688 OS << "- " << indentLines(M) << '\n';
689 }
690 // We add 2 newlines after list to terminate it in markdown.
691 OS << "\n\n";
692}
693
694void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
695 for (auto &D : Items) {
696 // Instead of doing this we might prefer passing Indent to children to get
697 // rid of the copies, if it turns out to be a bottleneck.
698 OS << "- " << indentLines(D.asPlainText()) << '\n';
699 }
700 OS << '\n';
701}
702
703Paragraph &Paragraph::appendSpace() {
704 if (!Chunks.empty())
705 Chunks.back().SpaceAfter = true;
706 return *this;
707}
708
709Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
710 if (Contents.empty())
711 return *this;
712 Chunks.emplace_back();
713 Chunk &C = Chunks.back();
714 C.Contents = Contents;
715 C.Kind = K;
716
717 EstimatedStringSize += Contents.size();
718 return *this;
719}
720
722 if (!Chunks.empty() && Chunks.back().Kind == ChunkKind::PlainText) {
723 Chunks.back().Contents += std::move(Text);
724 return *this;
725 }
726
727 return appendChunk(std::move(Text), ChunkKind::PlainText);
728}
729
731 return appendChunk(canonicalizeSpaces(std::move(Text)),
732 ChunkKind::Emphasized);
733}
734
736 return appendChunk(canonicalizeSpaces(std::move(Text)), ChunkKind::Bold);
737}
738
739Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
740 bool AdjacentCode =
741 !Chunks.empty() && Chunks.back().Kind == ChunkKind::InlineCode;
742 std::string Norm = canonicalizeSpaces(std::move(Code));
743 if (Norm.empty())
744 return *this;
745 EstimatedStringSize += Norm.size();
746 Chunks.emplace_back();
747 Chunk &C = Chunks.back();
748 C.Contents = std::move(Norm);
749 C.Kind = ChunkKind::InlineCode;
750 C.Preserve = Preserve;
751 // Disallow adjacent code spans without spaces, markdown can't render them.
752 C.SpaceBefore = AdjacentCode;
753
754 return *this;
755}
756
757std::unique_ptr<Block> BulletList::clone() const {
758 return std::make_unique<BulletList>(*this);
759}
760
761class Document &BulletList::addItem() {
762 Items.emplace_back();
763 return Items.back();
764}
765
766Document &Document::operator=(const Document &Other) {
767 Children.clear();
768 for (const auto &C : Other.Children)
769 Children.push_back(C->clone());
770 return *this;
771}
772
773void Document::append(Document Other) {
774 std::move(Other.Children.begin(), Other.Children.end(),
775 std::back_inserter(Children));
776}
777
778Paragraph &Document::addParagraph() {
779 Children.push_back(std::make_unique<Paragraph>());
780 return *static_cast<Paragraph *>(Children.back().get());
781}
782
783void Document::addRuler() { Children.push_back(std::make_unique<Ruler>()); }
784
785void Document::addCodeBlock(std::string Code, std::string Language) {
786 Children.emplace_back(
787 std::make_unique<CodeBlock>(std::move(Code), std::move(Language)));
788}
789
790std::string Document::asEscapedMarkdown() const {
792}
793
794std::string Document::asMarkdown() const {
796}
797
798std::string Document::asPlainText() const {
800}
801
802BulletList &Document::addBulletList() {
803 Children.emplace_back(std::make_unique<BulletList>());
804 return *static_cast<BulletList *>(Children.back().get());
805}
806
807Paragraph &Document::addHeading(size_t Level) {
808 assert(Level > 0);
809 Children.emplace_back(std::make_unique<Heading>(Level));
810 return *static_cast<Paragraph *>(Children.back().get());
811}
812} // namespace markup
813} // namespace clangd
814} // namespace clang
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:388
CodeBlock(std::string Contents, std::string Language)
Definition Markup.cpp:408
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:399
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:385
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:404
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:437
Heading(size_t Level)
Definition Markup.cpp:435
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:442
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:368
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:376
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:377
bool isRuler() const override
Definition Markup.cpp:380
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:371
std::string asEscapedMarkdown() const
Definition Markup.cpp:457
virtual void renderPlainText(llvm::raw_ostream &OS) const =0
virtual void renderMarkdown(llvm::raw_ostream &OS) const =0
virtual void renderEscapedMarkdown(llvm::raw_ostream &OS) const =0
std::string asMarkdown() const
Definition Markup.cpp:464
std::string asPlainText() const
Definition Markup.cpp:471
Represents parts of the markup that can contain strings, like inline code, code block or plain text.
Definition Markup.h:45
void renderPlainText(llvm::raw_ostream &OS) const override
Definition Markup.cpp:587
Paragraph & appendEmphasizedText(llvm::StringRef Text)
Append emphasized text, this translates to the * block in markdown.
Definition Markup.cpp:730
Paragraph & appendText(llvm::StringRef Text)
Append plain text to the end of the string.
Definition Markup.cpp:721
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:478
std::unique_ptr< Block > clone() const override
Definition Markup.cpp:536
void renderMarkdown(llvm::raw_ostream &OS) const override
Definition Markup.cpp:507
Paragraph & appendBoldText(llvm::StringRef Text)
Append bold text, this translates to the ** block in markdown.
Definition Markup.cpp:735
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition AST.cpp:45
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::string renderBlocks(llvm::ArrayRef< std::unique_ptr< Block > > Children, void(Block::*RenderFunc)(llvm::raw_ostream &) const)
Definition Markup.cpp:325
std::string indentLines(llvm::StringRef Input)
Definition Markup.cpp:418
std::string canonicalizeSpaces(llvm::StringRef Input)
Definition Markup.cpp:319