clang 20.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks = " \t\v\f\r";
29static bool IsBlank(char C) {
30 switch (C) {
31 case ' ':
32 case '\t':
33 case '\v':
34 case '\f':
35 case '\r':
36 return true;
37 default:
38 return false;
39 }
40}
41
42static StringRef getLineCommentIndentPrefix(StringRef Comment,
43 const FormatStyle &Style) {
44 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45 "//!", "//:", "//"};
46 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47 "//", "#"};
48 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
50 KnownPrefixes = KnownTextProtoPrefixes;
51
52 assert(
53 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54 return Lhs.size() > Rhs.size();
55 }));
56
57 for (StringRef KnownPrefix : KnownPrefixes) {
58 if (Comment.starts_with(KnownPrefix)) {
59 const auto PrefixLength =
60 Comment.find_first_not_of(' ', KnownPrefix.size());
61 return Comment.substr(0, PrefixLength);
62 }
63 }
64 return {};
65}
66
68getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69 unsigned ColumnLimit, unsigned TabWidth,
70 encoding::Encoding Encoding, const FormatStyle &Style,
71 bool DecorationEndsWithStar = false) {
72 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73 << "\", Column limit: " << ColumnLimit
74 << ", Content start: " << ContentStartColumn << "\n");
75 if (ColumnLimit <= ContentStartColumn + 1)
76 return BreakableToken::Split(StringRef::npos, 0);
77
78 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79 unsigned MaxSplitBytes = 0;
80
81 for (unsigned NumChars = 0;
82 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83 unsigned BytesInChar =
84 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
86 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
87 TabWidth, Encoding);
88 MaxSplitBytes += BytesInChar;
89 }
90
91 // In JavaScript, some @tags can be followed by {, and machinery that parses
92 // these comments will fail to understand the comment if followed by a line
93 // break. So avoid ever breaking before a {.
94 if (Style.isJavaScript()) {
95 StringRef::size_type SpaceOffset =
96 Text.find_first_of(Blanks, MaxSplitBytes);
97 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98 Text[SpaceOffset + 1] == '{') {
99 MaxSplitBytes = SpaceOffset + 1;
100 }
101 }
102
103 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104
105 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106 // Some spaces are unacceptable to break on, rewind past them.
107 while (SpaceOffset != StringRef::npos) {
108 // If a line-comment ends with `\`, the next line continues the comment,
109 // whether or not it starts with `//`. This is confusing and triggers
110 // -Wcomment.
111 // Avoid introducing multiline comments by not allowing a break right
112 // after '\'.
113 if (Style.isCpp()) {
114 StringRef::size_type LastNonBlank =
115 Text.find_last_not_of(Blanks, SpaceOffset);
116 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118 continue;
119 }
120 }
121
122 // Do not split before a number followed by a dot: this would be interpreted
123 // as a numbered list, which would prevent re-flowing in subsequent passes.
124 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126 continue;
127 }
128
129 // Avoid ever breaking before a @tag or a { in JavaScript.
130 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133 continue;
134 }
135
136 break;
137 }
138
139 if (SpaceOffset == StringRef::npos ||
140 // Don't break at leading whitespace.
141 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142 // Make sure that we don't break at leading whitespace that
143 // reaches past MaxSplit.
144 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145 if (FirstNonWhitespace == StringRef::npos) {
146 // If the comment is only whitespace, we cannot split.
147 return BreakableToken::Split(StringRef::npos, 0);
148 }
149 SpaceOffset = Text.find_first_of(
150 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
151 }
152 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153 // adaptStartOfLine will break after lines starting with /** if the comment
154 // is broken anywhere. Avoid emitting this break twice here.
155 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
156 // insert a break after /**, so this code must not insert the same break.
157 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
158 return BreakableToken::Split(StringRef::npos, 0);
159 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160 StringRef AfterCut = Text.substr(SpaceOffset);
161 // Don't trim the leading blanks if it would create a */ after the break.
162 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163 AfterCut = AfterCut.ltrim(Blanks);
164 return BreakableToken::Split(BeforeCut.size(),
165 AfterCut.begin() - BeforeCut.end());
166 }
167 return BreakableToken::Split(StringRef::npos, 0);
168}
169
171getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172 unsigned TabWidth, encoding::Encoding Encoding) {
173 // FIXME: Reduce unit test case.
174 if (Text.empty())
175 return BreakableToken::Split(StringRef::npos, 0);
176 if (ColumnLimit <= UsedColumns)
177 return BreakableToken::Split(StringRef::npos, 0);
178 unsigned MaxSplit = ColumnLimit - UsedColumns;
179 StringRef::size_type SpaceOffset = 0;
180 StringRef::size_type SlashOffset = 0;
181 StringRef::size_type WordStartOffset = 0;
182 StringRef::size_type SplitPoint = 0;
183 for (unsigned Chars = 0;;) {
184 unsigned Advance;
185 if (Text[0] == '\\') {
187 Chars += Advance;
188 } else {
189 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
191 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192 }
193
194 if (Chars > MaxSplit || Text.size() <= Advance)
195 break;
196
197 if (IsBlank(Text[0]))
198 SpaceOffset = SplitPoint;
199 if (Text[0] == '/')
200 SlashOffset = SplitPoint;
201 if (Advance == 1 && !isAlphanumeric(Text[0]))
202 WordStartOffset = SplitPoint;
203
204 SplitPoint += Advance;
205 Text = Text.substr(Advance);
206 }
207
208 if (SpaceOffset != 0)
209 return BreakableToken::Split(SpaceOffset + 1, 0);
210 if (SlashOffset != 0)
211 return BreakableToken::Split(SlashOffset + 1, 0);
212 if (WordStartOffset != 0)
213 return BreakableToken::Split(WordStartOffset + 1, 0);
214 if (SplitPoint != 0)
215 return BreakableToken::Split(SplitPoint, 0);
216 return BreakableToken::Split(StringRef::npos, 0);
217}
218
220 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221 "formatting regions are switched by comment tokens");
222 StringRef Content = Token.TokenText.substr(2).ltrim();
223 return Content.starts_with("clang-format on") ||
224 Content.starts_with("clang-format off");
225}
226
227unsigned
228BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229 Split Split) const {
230 // Example: consider the content
231 // lala lala
232 // - RemainingTokenColumns is the original number of columns, 10;
233 // - Split is (4, 2), denoting the two spaces between the two words;
234 //
235 // We compute the number of columns when the split is compressed into a single
236 // space, like:
237 // lala lala
238 //
239 // FIXME: Correctly measure the length of whitespace in Split.second so it
240 // works with tabs.
241 return RemainingTokenColumns + 1 - Split.second;
242}
243
244unsigned BreakableStringLiteral::getLineCount() const { return 1; }
245
246unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247 unsigned Offset,
248 StringRef::size_type Length,
249 unsigned StartColumn) const {
250 llvm_unreachable("Getting the length of a part of the string literal "
251 "indicates that the code tries to reflow it.");
252}
253
254unsigned
255BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
256 unsigned StartColumn) const {
257 return UnbreakableTailLength + Postfix.size() +
260}
261
263 bool Break) const {
264 return StartColumn + Prefix.size();
265}
266
268 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
270 encoding::Encoding Encoding, const FormatStyle &Style)
271 : BreakableToken(Tok, InPPDirective, Encoding, Style),
272 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273 UnbreakableTailLength(UnbreakableTailLength) {
274 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
275 Line = Tok.TokenText.substr(
276 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
277}
278
280 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
283 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
284}
285
287 unsigned TailOffset, Split Split,
288 unsigned ContentIndent,
289 WhitespaceManager &Whitespaces) const {
290 Whitespaces.replaceWhitespaceInToken(
291 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
293}
294
296 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
297 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
298 encoding::Encoding Encoding, const FormatStyle &Style)
300 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
301 : QuoteStyle == AtDoubleQuotes ? "@\""
302 : "\"",
303 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
304 UnbreakableTailLength, InPPDirective, Encoding, Style),
305 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
306 QuoteStyle(QuoteStyle) {
307 // Find the replacement text for inserting braces and quotes and line breaks.
308 // We don't create an allocated string concatenated from parts here because it
309 // has to outlive the BreakableStringliteral object. The brace replacements
310 // include a quote so that WhitespaceManager can tell it apart from whitespace
311 // replacements between the string and surrounding tokens.
312
313 // The option is not implemented in JavaScript.
314 bool SignOnNewLine =
315 !Style.isJavaScript() &&
317
318 if (Style.isVerilog()) {
319 // In Verilog, all strings are quoted by double quotes, joined by commas,
320 // and wrapped in braces. The comma is always before the newline.
321 assert(QuoteStyle == DoubleQuotes);
322 LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
323 RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
324 Postfix = "\",";
325 Prefix = "\"";
326 } else {
327 // The plus sign may be on either line. And also C# and JavaScript have
328 // several quoting styles.
329 if (QuoteStyle == SingleQuotes) {
332 Postfix = SignOnNewLine ? "'" : "' +";
333 Prefix = SignOnNewLine ? "+ '" : "'";
334 } else {
335 if (QuoteStyle == AtDoubleQuotes) {
337 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338 } else {
340 Prefix = SignOnNewLine ? "+ \"" : "\"";
341 }
343 Postfix = SignOnNewLine ? "\"" : "\" +";
344 }
345 }
346
347 // Following lines are indented by the width of the brace and space if any.
349 // The plus sign may need to be unindented depending on the style.
350 // FIXME: Add support for DontAlign.
351 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
354 }
355}
356
358 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
359 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
362}
363
364unsigned
366 bool Break) const {
367 return std::max(
368 0,
369 static_cast<int>(StartColumn) +
370 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
371 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
372 : 0) +
373 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
374}
375
377 unsigned LineIndex, unsigned TailOffset, Split Split,
378 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
379 Whitespaces.replaceWhitespaceInToken(
380 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
381 Split.first,
382 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
383 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
384 /*Spaces=*/
385 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
386}
387
389 WhitespaceManager &Whitespaces) const {
390 // Add the braces required for breaking the token if they are needed.
391 if (!BracesNeeded)
392 return;
393
394 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
395 // brace and another quote. This is because the rest of the program requires
396 // one replacement for each source range. If we replace the empty strings
397 // around the string, it may conflict with whitespace replacements between the
398 // string and adjacent tokens.
399 Whitespaces.replaceWhitespaceInToken(
400 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
401 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
402 /*Spaces=*/0);
403 Whitespaces.replaceWhitespaceInToken(
404 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
405 /*PreviousPostfix=*/RightBraceQuote,
406 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
407}
408
410 unsigned StartColumn, bool InPPDirective,
411 encoding::Encoding Encoding,
412 const FormatStyle &Style)
413 : BreakableToken(Token, InPPDirective, Encoding, Style),
414 StartColumn(StartColumn) {}
415
416unsigned BreakableComment::getLineCount() const { return Lines.size(); }
417
419BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
420 unsigned ColumnLimit, unsigned ContentStartColumn,
421 const llvm::Regex &CommentPragmasRegex) const {
422 // Don't break lines matching the comment pragmas regex.
423 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
424 return Split(StringRef::npos, 0);
425 return getCommentSplit(Content[LineIndex].substr(TailOffset),
426 ContentStartColumn, ColumnLimit, Style.TabWidth,
427 Encoding, Style);
428}
429
431 unsigned LineIndex, unsigned TailOffset, Split Split,
432 WhitespaceManager &Whitespaces) const {
433 StringRef Text = Content[LineIndex].substr(TailOffset);
434 // Text is relative to the content line, but Whitespaces operates relative to
435 // the start of the corresponding token, so compute the start of the Split
436 // that needs to be compressed into a single space relative to the start of
437 // its token.
438 unsigned BreakOffsetInToken =
439 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
440 unsigned CharsToRemove = Split.second;
441 Whitespaces.replaceWhitespaceInToken(
442 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
443 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
444}
445
446const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
447 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
448}
449
450static bool mayReflowContent(StringRef Content) {
451 Content = Content.trim(Blanks);
452 // Lines starting with '@' or '\' commonly have special meaning.
453 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
454 bool hasSpecialMeaningPrefix = false;
455 for (StringRef Prefix :
456 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457 if (Content.starts_with(Prefix)) {
458 hasSpecialMeaningPrefix = true;
459 break;
460 }
461 }
462
463 // Numbered lists may also start with a number followed by '.'
464 // To avoid issues if a line starts with a number which is actually the end
465 // of a previous line, we only consider numbers with up to 2 digits.
466 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
467 hasSpecialMeaningPrefix =
468 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
469
470 // Simple heuristic for what to reflow: content should contain at least two
471 // characters and either the first or second character must be
472 // non-punctuation.
473 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
474 !Content.ends_with("\\") &&
475 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
476 // true, then the first code point must be 1 byte long.
477 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
478}
479
481 const FormatToken &Token, unsigned StartColumn,
482 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
483 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
484 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
485 DelimitersOnNewline(false),
486 UnbreakableTailLength(Token.UnbreakableTailLength) {
487 assert(Tok.is(TT_BlockComment) &&
488 "block comment section must start with a block comment");
489
490 StringRef TokenText(Tok.TokenText);
491 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492 TokenText.substr(2, TokenText.size() - 4)
493 .split(Lines, UseCRLF ? "\r\n" : "\n");
494
495 int IndentDelta = StartColumn - OriginalStartColumn;
496 Content.resize(Lines.size());
497 Content[0] = Lines[0];
498 ContentColumn.resize(Lines.size());
499 // Account for the initial '/*'.
500 ContentColumn[0] = StartColumn + 2;
501 Tokens.resize(Lines.size());
502 for (size_t i = 1; i < Lines.size(); ++i)
503 adjustWhitespace(i, IndentDelta);
504
505 // Align decorations with the column of the star on the first line,
506 // that is one column after the start "/*".
507 DecorationColumn = StartColumn + 1;
508
509 // Account for comment decoration patterns like this:
510 //
511 // /*
512 // ** blah blah blah
513 // */
514 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
515 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
516 DecorationColumn = StartColumn;
517 }
518
519 Decoration = "* ";
520 if (Lines.size() == 1 && !FirstInLine) {
521 // Comments for which FirstInLine is false can start on arbitrary column,
522 // and available horizontal space can be too small to align consecutive
523 // lines with the first one.
524 // FIXME: We could, probably, align them to current indentation level, but
525 // now we just wrap them without stars.
526 Decoration = "";
527 }
528 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
529 const StringRef &Text = Content[i];
530 if (i + 1 == e) {
531 // If the last line is empty, the closing "*/" will have a star.
532 if (Text.empty())
533 break;
534 } else if (!Text.empty() && Decoration.starts_with(Text)) {
535 continue;
536 }
537 while (!Text.starts_with(Decoration))
538 Decoration = Decoration.drop_back(1);
539 }
540
541 LastLineNeedsDecoration = true;
542 IndentAtLineBreak = ContentColumn[0] + 1;
543 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
544 if (Content[i].empty()) {
545 if (i + 1 == e) {
546 // Empty last line means that we already have a star as a part of the
547 // trailing */. We also need to preserve whitespace, so that */ is
548 // correctly indented.
549 LastLineNeedsDecoration = false;
550 // Align the star in the last '*/' with the stars on the previous lines.
551 if (e >= 2 && !Decoration.empty())
552 ContentColumn[i] = DecorationColumn;
553 } else if (Decoration.empty()) {
554 // For all other lines, set the start column to 0 if they're empty, so
555 // we do not insert trailing whitespace anywhere.
556 ContentColumn[i] = 0;
557 }
558 continue;
559 }
560
561 // The first line already excludes the star.
562 // The last line excludes the star if LastLineNeedsDecoration is false.
563 // For all other lines, adjust the line to exclude the star and
564 // (optionally) the first whitespace.
565 unsigned DecorationSize = Decoration.starts_with(Content[i])
566 ? Content[i].size()
567 : Decoration.size();
568 if (DecorationSize)
569 ContentColumn[i] = DecorationColumn + DecorationSize;
570 Content[i] = Content[i].substr(DecorationSize);
571 if (!Decoration.starts_with(Content[i])) {
572 IndentAtLineBreak =
573 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
574 }
575 }
576 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
577
578 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
580 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
581 // This is a multiline jsdoc comment.
582 DelimitersOnNewline = true;
583 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
584 // Detect a long single-line comment, like:
585 // /** long long long */
586 // Below, '2' is the width of '*/'.
587 unsigned EndColumn =
588 ContentColumn[0] +
591 2;
592 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
593 }
594 }
595
596 LLVM_DEBUG({
597 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599 for (size_t i = 0; i < Lines.size(); ++i) {
600 llvm::dbgs() << i << " |" << Content[i] << "| "
601 << "CC=" << ContentColumn[i] << "| "
602 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603 }
604 });
605}
606
608 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610 // Don't break lines matching the comment pragmas regex.
611 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
612 return Split(StringRef::npos, 0);
613 return getCommentSplit(Content[LineIndex].substr(TailOffset),
614 ContentStartColumn, ColumnLimit, Style.TabWidth,
615 Encoding, Style, Decoration.ends_with("*"));
616}
617
618void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619 int IndentDelta) {
620 // When in a preprocessor directive, the trailing backslash in a block comment
621 // is not needed, but can serve a purpose of uniformity with necessary escaped
622 // newlines outside the comment. In this case we remove it here before
623 // trimming the trailing whitespace. The backslash will be re-added later when
624 // inserting a line break.
625 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
626 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
627 --EndOfPreviousLine;
628
629 // Calculate the end of the non-whitespace text in the previous line.
630 EndOfPreviousLine =
631 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
632 if (EndOfPreviousLine == StringRef::npos)
633 EndOfPreviousLine = 0;
634 else
635 ++EndOfPreviousLine;
636 // Calculate the start of the non-whitespace text in the current line.
637 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
638 if (StartOfLine == StringRef::npos)
639 StartOfLine = Lines[LineIndex].size();
640
641 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
642 // Adjust Lines to only contain relevant text.
643 size_t PreviousContentOffset =
644 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
645 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
646 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
647 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
648
649 // Adjust the start column uniformly across all lines.
650 ContentColumn[LineIndex] =
652 IndentDelta;
653}
654
655unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
656 unsigned Offset,
657 StringRef::size_type Length,
658 unsigned StartColumn) const {
660 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
661 Encoding);
662}
663
665 unsigned Offset,
666 unsigned StartColumn) const {
667 unsigned LineLength =
668 UnbreakableTailLength +
669 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
670 if (LineIndex + 1 == Lines.size()) {
671 LineLength += 2;
672 // We never need a decoration when breaking just the trailing "*/" postfix.
673 bool HasRemainingText = Offset < Content[LineIndex].size();
674 if (!HasRemainingText) {
675 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
676 if (HasDecoration)
677 LineLength -= Decoration.size();
678 }
679 }
680 return LineLength;
681}
682
684 bool Break) const {
685 if (Break)
686 return IndentAtLineBreak;
687 return std::max(0, ContentColumn[LineIndex]);
688}
689
690const llvm::StringSet<>
692 "@param", "@return", "@returns", "@throws", "@type", "@template",
693 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694};
695
696unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
698 return 0;
699 // The content at LineIndex 0 of a comment like:
700 // /** line 0 */
701 // is "* line 0", so we need to skip over the decoration in that case.
702 StringRef ContentWithNoDecoration = Content[LineIndex];
703 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
704 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
705 StringRef FirstWord = ContentWithNoDecoration.substr(
706 0, ContentWithNoDecoration.find_first_of(Blanks));
707 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
709 return 0;
710}
711
712void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
713 Split Split, unsigned ContentIndent,
714 WhitespaceManager &Whitespaces) const {
715 StringRef Text = Content[LineIndex].substr(TailOffset);
716 StringRef Prefix = Decoration;
717 // We need this to account for the case when we have a decoration "* " for all
718 // the lines except for the last one, where the star in "*/" acts as a
719 // decoration.
720 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721 if (LineIndex + 1 == Lines.size() &&
722 Text.size() == Split.first + Split.second) {
723 // For the last line we need to break before "*/", but not to add "* ".
724 Prefix = "";
725 if (LocalIndentAtLineBreak >= 2)
726 LocalIndentAtLineBreak -= 2;
727 }
728 // The split offset is from the beginning of the line. Convert it to an offset
729 // from the beginning of the token text.
730 unsigned BreakOffsetInToken =
731 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
732 unsigned CharsToRemove = Split.second;
733 assert(LocalIndentAtLineBreak >= Prefix.size());
734 std::string PrefixWithTrailingIndent = std::string(Prefix);
735 PrefixWithTrailingIndent.append(ContentIndent, ' ');
736 Whitespaces.replaceWhitespaceInToken(
737 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
738 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
739 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
740 PrefixWithTrailingIndent.size());
741}
742
744 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745 if (!mayReflow(LineIndex, CommentPragmasRegex))
746 return Split(StringRef::npos, 0);
747
748 // If we're reflowing into a line with content indent, only reflow the next
749 // line if its starting whitespace matches the content indent.
750 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
751 if (LineIndex) {
752 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
753 if (PreviousContentIndent && Trimmed != StringRef::npos &&
754 Trimmed != PreviousContentIndent) {
755 return Split(StringRef::npos, 0);
756 }
757 }
758
759 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
760}
761
763 // A break is introduced when we want delimiters on newline.
764 return DelimitersOnNewline &&
765 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
766}
767
768void BreakableBlockComment::reflow(unsigned LineIndex,
769 WhitespaceManager &Whitespaces) const {
770 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
771 // Here we need to reflow.
772 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
773 "Reflowing whitespace within a token");
774 // This is the offset of the end of the last line relative to the start of
775 // the token text in the token.
776 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
777 Content[LineIndex - 1].size() -
778 tokenAt(LineIndex).TokenText.data();
779 unsigned WhitespaceLength = TrimmedContent.data() -
780 tokenAt(LineIndex).TokenText.data() -
781 WhitespaceOffsetInToken;
782 Whitespaces.replaceWhitespaceInToken(
783 tokenAt(LineIndex), WhitespaceOffsetInToken,
784 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
785 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
786 /*Spaces=*/0);
787}
788
790 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
791 if (LineIndex == 0) {
792 if (DelimitersOnNewline) {
793 // Since we're breaking at index 1 below, the break position and the
794 // break length are the same.
795 // Note: this works because getCommentSplit is careful never to split at
796 // the beginning of a line.
797 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
798 if (BreakLength != StringRef::npos) {
799 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
800 Whitespaces);
801 }
802 }
803 return;
804 }
805 // Here no reflow with the previous line will happen.
806 // Fix the decoration of the line at LineIndex.
807 StringRef Prefix = Decoration;
808 if (Content[LineIndex].empty()) {
809 if (LineIndex + 1 == Lines.size()) {
810 if (!LastLineNeedsDecoration) {
811 // If the last line was empty, we don't need a prefix, as the */ will
812 // line up with the decoration (if it exists).
813 Prefix = "";
814 }
815 } else if (!Decoration.empty()) {
816 // For other empty lines, if we do have a decoration, adapt it to not
817 // contain a trailing whitespace.
818 Prefix = Prefix.substr(0, 1);
819 }
820 } else if (ContentColumn[LineIndex] == 1) {
821 // This line starts immediately after the decorating *.
822 Prefix = Prefix.substr(0, 1);
823 }
824 // This is the offset of the end of the last line relative to the start of the
825 // token text in the token.
826 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
827 Content[LineIndex - 1].size() -
828 tokenAt(LineIndex).TokenText.data();
829 unsigned WhitespaceLength = Content[LineIndex].data() -
830 tokenAt(LineIndex).TokenText.data() -
831 WhitespaceOffsetInToken;
832 Whitespaces.replaceWhitespaceInToken(
833 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
834 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
835}
836
839 if (DelimitersOnNewline) {
840 // Replace the trailing whitespace of the last line with a newline.
841 // In case the last line is empty, the ending '*/' is already on its own
842 // line.
843 StringRef Line = Content.back().substr(TailOffset);
844 StringRef TrimmedLine = Line.rtrim(Blanks);
845 if (!TrimmedLine.empty())
846 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
847 }
848 return Split(StringRef::npos, 0);
849}
850
852 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
854 // case, we compute the start of the comment pragma manually.
855 StringRef IndentContent = Content[LineIndex];
856 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
857 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
858 return LineIndex > 0 && AlwaysReflow &&
859 !CommentPragmasRegex.match(IndentContent) &&
860 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
861 !switchesFormatting(tokenAt(LineIndex));
862}
863
865 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
866 encoding::Encoding Encoding, const FormatStyle &Style)
867 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
868 assert(Tok.is(TT_LineComment) &&
869 "line comment section must start with a line comment");
870 FormatToken *LineTok = nullptr;
871 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
872 // How many spaces we changed in the first line of the section, this will be
873 // applied in all following lines
874 int FirstLineSpaceChange = 0;
875 for (const FormatToken *CurrentTok = &Tok;
876 CurrentTok && CurrentTok->is(TT_LineComment);
877 CurrentTok = CurrentTok->Next) {
878 LastLineTok = LineTok;
879 StringRef TokenText(CurrentTok->TokenText);
880 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
881 "unsupported line comment prefix, '//' and '#' are supported");
882 size_t FirstLineIndex = Lines.size();
883 TokenText.split(Lines, "\n");
884 Content.resize(Lines.size());
885 ContentColumn.resize(Lines.size());
886 PrefixSpaceChange.resize(Lines.size());
887 Tokens.resize(Lines.size());
888 Prefix.resize(Lines.size());
889 OriginalPrefix.resize(Lines.size());
890 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
891 Lines[i] = Lines[i].ltrim(Blanks);
892 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
893 OriginalPrefix[i] = IndentPrefix;
894 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
895
896 // This lambda also considers multibyte character that is not handled in
897 // functions like isPunctuation provided by CharInfo.
898 const auto NoSpaceBeforeFirstCommentChar = [&]() {
899 assert(Lines[i].size() > IndentPrefix.size());
900 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
901 const unsigned FirstCharByteSize =
902 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
904 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
905 Encoding) != 1) {
906 return false;
907 }
908 // In C-like comments, add a space before #. For example this is useful
909 // to preserve the relative indentation when commenting out code with
910 // #includes.
911 //
912 // In languages using # as the comment leader such as proto, don't
913 // add a space to support patterns like:
914 // #########
915 // # section
916 // #########
917 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
918 return false;
919 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
920 isHorizontalWhitespace(FirstCommentChar);
921 };
922
923 // On the first line of the comment section we calculate how many spaces
924 // are to be added or removed, all lines after that just get only the
925 // change and we will not look at the maximum anymore. Additionally to the
926 // actual first line, we calculate that when the non space Prefix changes,
927 // e.g. from "///" to "//".
928 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
929 OriginalPrefix[i - 1].rtrim(Blanks)) {
930 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
931 !NoSpaceBeforeFirstCommentChar()) {
932 FirstLineSpaceChange = Minimum - SpacesInPrefix;
933 } else if (static_cast<unsigned>(SpacesInPrefix) >
935 FirstLineSpaceChange =
936 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
937 } else {
938 FirstLineSpaceChange = 0;
939 }
940 }
941
942 if (Lines[i].size() != IndentPrefix.size()) {
943 PrefixSpaceChange[i] = FirstLineSpaceChange;
944
945 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
946 PrefixSpaceChange[i] +=
947 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
948 }
949
950 assert(Lines[i].size() > IndentPrefix.size());
951 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
952 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
953 const bool LineRequiresLeadingSpace =
954 !NoSpaceBeforeFirstCommentChar() ||
955 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
956 const bool AllowsSpaceChange =
957 !IsFormatComment &&
958 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
959
960 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
961 Prefix[i] = IndentPrefix.str();
962 Prefix[i].append(PrefixSpaceChange[i], ' ');
963 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
964 Prefix[i] = IndentPrefix
965 .drop_back(std::min<std::size_t>(
966 -PrefixSpaceChange[i], SpacesInPrefix))
967 .str();
968 } else {
969 Prefix[i] = IndentPrefix.str();
970 }
971 } else {
972 // If the IndentPrefix is the whole line, there is no content and we
973 // drop just all space
974 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
975 }
976
977 Tokens[i] = LineTok;
978 Content[i] = Lines[i].substr(IndentPrefix.size());
979 ContentColumn[i] =
982
983 // Calculate the end of the non-whitespace text in this line.
984 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
985 if (EndOfLine == StringRef::npos)
986 EndOfLine = Content[i].size();
987 else
988 ++EndOfLine;
989 Content[i] = Content[i].substr(0, EndOfLine);
990 }
991 LineTok = CurrentTok->Next;
992 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
993 // A line comment section needs to broken by a line comment that is
994 // preceded by at least two newlines. Note that we put this break here
995 // instead of breaking at a previous stage during parsing, since that
996 // would split the contents of the enum into two unwrapped lines in this
997 // example, which is undesirable:
998 // enum A {
999 // a, // comment about a
1000 //
1001 // // comment about b
1002 // b
1003 // };
1004 //
1005 // FIXME: Consider putting separate line comment sections as children to
1006 // the unwrapped line instead.
1007 break;
1008 }
1009 }
1010}
1011
1012unsigned
1013BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1014 StringRef::size_type Length,
1015 unsigned StartColumn) const {
1017 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1018 Encoding);
1019}
1020
1021unsigned
1023 bool /*Break*/) const {
1024 return ContentColumn[LineIndex];
1025}
1026
1028 unsigned LineIndex, unsigned TailOffset, Split Split,
1029 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1030 StringRef Text = Content[LineIndex].substr(TailOffset);
1031 // Compute the offset of the split relative to the beginning of the token
1032 // text.
1033 unsigned BreakOffsetInToken =
1034 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1035 unsigned CharsToRemove = Split.second;
1036 Whitespaces.replaceWhitespaceInToken(
1037 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1038 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1039 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1040}
1041
1043 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1044 if (!mayReflow(LineIndex, CommentPragmasRegex))
1045 return Split(StringRef::npos, 0);
1046
1047 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1048
1049 // In a line comment section each line is a separate token; thus, after a
1050 // split we replace all whitespace before the current line comment token
1051 // (which does not need to be included in the split), plus the start of the
1052 // line up to where the content starts.
1053 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1054}
1055
1057 WhitespaceManager &Whitespaces) const {
1058 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1059 // Reflow happens between tokens. Replace the whitespace between the
1060 // tokens by the empty string.
1061 Whitespaces.replaceWhitespace(
1062 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1063 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1064 /*InPPDirective=*/false);
1065 } else if (LineIndex > 0) {
1066 // In case we're reflowing after the '\' in:
1067 //
1068 // // line comment \
1069 // // line 2
1070 //
1071 // the reflow happens inside the single comment token (it is a single line
1072 // comment with an unescaped newline).
1073 // Replace the whitespace between the '\' and '//' with the empty string.
1074 //
1075 // Offset points to after the '\' relative to start of the token.
1076 unsigned Offset = Lines[LineIndex - 1].data() +
1077 Lines[LineIndex - 1].size() -
1078 tokenAt(LineIndex - 1).TokenText.data();
1079 // WhitespaceLength is the number of chars between the '\' and the '//' on
1080 // the next line.
1081 unsigned WhitespaceLength =
1082 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1083 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1084 /*ReplaceChars=*/WhitespaceLength,
1085 /*PreviousPostfix=*/"",
1086 /*CurrentPrefix=*/"",
1087 /*InPPDirective=*/false,
1088 /*Newlines=*/0,
1089 /*Spaces=*/0);
1090 }
1091 // Replace the indent and prefix of the token with the reflow prefix.
1092 unsigned Offset =
1093 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1094 unsigned WhitespaceLength =
1095 Content[LineIndex].data() - Lines[LineIndex].data();
1096 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1097 /*ReplaceChars=*/WhitespaceLength,
1098 /*PreviousPostfix=*/"",
1099 /*CurrentPrefix=*/ReflowPrefix,
1100 /*InPPDirective=*/false,
1101 /*Newlines=*/0,
1102 /*Spaces=*/0);
1103}
1104
1106 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1107 // If this is the first line of a token, we need to inform Whitespace Manager
1108 // about it: either adapt the whitespace range preceding it, or mark it as an
1109 // untouchable token.
1110 // This happens for instance here:
1111 // // line 1 \
1112 // // line 2
1113 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1114 // This is the first line for the current token, but no reflow with the
1115 // previous token is necessary. However, we still may need to adjust the
1116 // start column. Note that ContentColumn[LineIndex] is the expected
1117 // content column after a possible update to the prefix, hence the prefix
1118 // length change is included.
1119 unsigned LineColumn =
1120 ContentColumn[LineIndex] -
1121 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1122 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1123
1124 // We always want to create a replacement instead of adding an untouchable
1125 // token, even if LineColumn is the same as the original column of the
1126 // token. This is because WhitespaceManager doesn't align trailing
1127 // comments if they are untouchable.
1128 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1129 /*Newlines=*/1,
1130 /*Spaces=*/LineColumn,
1131 /*StartOfTokenColumn=*/LineColumn,
1132 /*IsAligned=*/true,
1133 /*InPPDirective=*/false);
1134 }
1135 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1136 // Adjust the prefix if necessary.
1137 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1138 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1139 Whitespaces.replaceWhitespaceInToken(
1140 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1141 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1142 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1143 }
1144}
1145
1147 if (LastLineTok)
1148 State.NextToken = LastLineTok->Next;
1149}
1150
1152 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1153 // Line comments have the indent as part of the prefix, so we need to
1154 // recompute the start of the line.
1155 StringRef IndentContent = Content[LineIndex];
1156 if (Lines[LineIndex].starts_with("//"))
1157 IndentContent = Lines[LineIndex].substr(2);
1158 // FIXME: Decide whether we want to reflow non-regular indents:
1159 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1160 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1161 // // text that protrudes
1162 // // into text with different indent
1163 // We do reflow in that case in block comments.
1164 return LineIndex > 0 && AlwaysReflow &&
1165 !CommentPragmasRegex.match(IndentContent) &&
1166 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1167 !switchesFormatting(tokenAt(LineIndex)) &&
1168 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1169}
1170
1171} // namespace format
1172} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
StringRef Text
Definition: Format.cpp:3033
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:138
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:91
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:152
#define false
Definition: stdbool.h:26
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4751
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4749
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:4869
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2503
@ LK_Java
Should be used for Java.
Definition: Format.h:3264
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3278
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2526
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1761
@ BOS_None
Break after operators.
Definition: Format.h:1732
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3296
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:5039
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:565
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:4908
bool isVerilog() const
Definition: Format.h:3288
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4786
bool isJavaScript() const
Definition: Format.h:3287
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:559
unsigned ColumnLimit
The column limit.
Definition: Format.h:2404
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:294
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:314
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:373
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:566
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:609
The current state when indenting a unwrapped line.