clang 19.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18
19#define DEBUG_TYPE "format-token-breaker"
20
21namespace clang {
22namespace format {
23
24static constexpr StringRef Blanks = " \t\v\f\r";
25static bool IsBlank(char C) {
26 switch (C) {
27 case ' ':
28 case '\t':
29 case '\v':
30 case '\f':
31 case '\r':
32 return true;
33 default:
34 return false;
35 }
36}
37
38static StringRef getLineCommentIndentPrefix(StringRef Comment,
39 const FormatStyle &Style) {
40 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
41 "//!", "//:", "//"};
42 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
43 "//", "#"};
44 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
46 KnownPrefixes = KnownTextProtoPrefixes;
47
48 assert(
49 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
50 return Lhs.size() > Rhs.size();
51 }));
52
53 for (StringRef KnownPrefix : KnownPrefixes) {
54 if (Comment.starts_with(KnownPrefix)) {
55 const auto PrefixLength =
56 Comment.find_first_not_of(' ', KnownPrefix.size());
57 return Comment.substr(0, PrefixLength);
58 }
59 }
60 return {};
61}
62
64getCommentSplit(StringRef Text, unsigned ContentStartColumn,
65 unsigned ColumnLimit, unsigned TabWidth,
66 encoding::Encoding Encoding, const FormatStyle &Style,
67 bool DecorationEndsWithStar = false) {
68 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
69 << "\", Column limit: " << ColumnLimit
70 << ", Content start: " << ContentStartColumn << "\n");
71 if (ColumnLimit <= ContentStartColumn + 1)
72 return BreakableToken::Split(StringRef::npos, 0);
73
74 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
75 unsigned MaxSplitBytes = 0;
76
77 for (unsigned NumChars = 0;
78 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
79 unsigned BytesInChar =
80 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
82 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
83 TabWidth, Encoding);
84 MaxSplitBytes += BytesInChar;
85 }
86
87 // In JavaScript, some @tags can be followed by {, and machinery that parses
88 // these comments will fail to understand the comment if followed by a line
89 // break. So avoid ever breaking before a {.
90 if (Style.isJavaScript()) {
91 StringRef::size_type SpaceOffset =
92 Text.find_first_of(Blanks, MaxSplitBytes);
93 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
94 Text[SpaceOffset + 1] == '{') {
95 MaxSplitBytes = SpaceOffset + 1;
96 }
97 }
98
99 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
100
101 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
102 // Some spaces are unacceptable to break on, rewind past them.
103 while (SpaceOffset != StringRef::npos) {
104 // If a line-comment ends with `\`, the next line continues the comment,
105 // whether or not it starts with `//`. This is confusing and triggers
106 // -Wcomment.
107 // Avoid introducing multiline comments by not allowing a break right
108 // after '\'.
109 if (Style.isCpp()) {
110 StringRef::size_type LastNonBlank =
111 Text.find_last_not_of(Blanks, SpaceOffset);
112 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
113 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
114 continue;
115 }
116 }
117
118 // Do not split before a number followed by a dot: this would be interpreted
119 // as a numbered list, which would prevent re-flowing in subsequent passes.
120 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
121 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
122 continue;
123 }
124
125 // Avoid ever breaking before a @tag or a { in JavaScript.
126 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
127 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
128 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
129 continue;
130 }
131
132 break;
133 }
134
135 if (SpaceOffset == StringRef::npos ||
136 // Don't break at leading whitespace.
137 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
138 // Make sure that we don't break at leading whitespace that
139 // reaches past MaxSplit.
140 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
141 if (FirstNonWhitespace == StringRef::npos) {
142 // If the comment is only whitespace, we cannot split.
143 return BreakableToken::Split(StringRef::npos, 0);
144 }
145 SpaceOffset = Text.find_first_of(
146 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
147 }
148 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
149 // adaptStartOfLine will break after lines starting with /** if the comment
150 // is broken anywhere. Avoid emitting this break twice here.
151 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
152 // insert a break after /**, so this code must not insert the same break.
153 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
154 return BreakableToken::Split(StringRef::npos, 0);
155 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
156 StringRef AfterCut = Text.substr(SpaceOffset);
157 // Don't trim the leading blanks if it would create a */ after the break.
158 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
159 AfterCut = AfterCut.ltrim(Blanks);
160 return BreakableToken::Split(BeforeCut.size(),
161 AfterCut.begin() - BeforeCut.end());
162 }
163 return BreakableToken::Split(StringRef::npos, 0);
164}
165
167getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
168 unsigned TabWidth, encoding::Encoding Encoding) {
169 // FIXME: Reduce unit test case.
170 if (Text.empty())
171 return BreakableToken::Split(StringRef::npos, 0);
172 if (ColumnLimit <= UsedColumns)
173 return BreakableToken::Split(StringRef::npos, 0);
174 unsigned MaxSplit = ColumnLimit - UsedColumns;
175 StringRef::size_type SpaceOffset = 0;
176 StringRef::size_type SlashOffset = 0;
177 StringRef::size_type WordStartOffset = 0;
178 StringRef::size_type SplitPoint = 0;
179 for (unsigned Chars = 0;;) {
180 unsigned Advance;
181 if (Text[0] == '\\') {
183 Chars += Advance;
184 } else {
185 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
187 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
188 }
189
190 if (Chars > MaxSplit || Text.size() <= Advance)
191 break;
192
193 if (IsBlank(Text[0]))
194 SpaceOffset = SplitPoint;
195 if (Text[0] == '/')
196 SlashOffset = SplitPoint;
197 if (Advance == 1 && !isAlphanumeric(Text[0]))
198 WordStartOffset = SplitPoint;
199
200 SplitPoint += Advance;
201 Text = Text.substr(Advance);
202 }
203
204 if (SpaceOffset != 0)
205 return BreakableToken::Split(SpaceOffset + 1, 0);
206 if (SlashOffset != 0)
207 return BreakableToken::Split(SlashOffset + 1, 0);
208 if (WordStartOffset != 0)
209 return BreakableToken::Split(WordStartOffset + 1, 0);
210 if (SplitPoint != 0)
211 return BreakableToken::Split(SplitPoint, 0);
212 return BreakableToken::Split(StringRef::npos, 0);
213}
214
216 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
217 "formatting regions are switched by comment tokens");
218 StringRef Content = Token.TokenText.substr(2).ltrim();
219 return Content.starts_with("clang-format on") ||
220 Content.starts_with("clang-format off");
221}
222
223unsigned
224BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
225 Split Split) const {
226 // Example: consider the content
227 // lala lala
228 // - RemainingTokenColumns is the original number of columns, 10;
229 // - Split is (4, 2), denoting the two spaces between the two words;
230 //
231 // We compute the number of columns when the split is compressed into a single
232 // space, like:
233 // lala lala
234 //
235 // FIXME: Correctly measure the length of whitespace in Split.second so it
236 // works with tabs.
237 return RemainingTokenColumns + 1 - Split.second;
238}
239
240unsigned BreakableStringLiteral::getLineCount() const { return 1; }
241
242unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
243 unsigned Offset,
244 StringRef::size_type Length,
245 unsigned StartColumn) const {
246 llvm_unreachable("Getting the length of a part of the string literal "
247 "indicates that the code tries to reflow it.");
248}
249
250unsigned
251BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
252 unsigned StartColumn) const {
253 return UnbreakableTailLength + Postfix.size() +
256}
257
259 bool Break) const {
260 return StartColumn + Prefix.size();
261}
262
264 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
265 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
266 encoding::Encoding Encoding, const FormatStyle &Style)
267 : BreakableToken(Tok, InPPDirective, Encoding, Style),
268 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
269 UnbreakableTailLength(UnbreakableTailLength) {
270 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
271 Line = Tok.TokenText.substr(
272 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
273}
274
276 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
277 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
278 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
279 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
280}
281
283 unsigned TailOffset, Split Split,
284 unsigned ContentIndent,
285 WhitespaceManager &Whitespaces) const {
286 Whitespaces.replaceWhitespaceInToken(
287 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
289}
290
292 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
293 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
294 encoding::Encoding Encoding, const FormatStyle &Style)
296 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
297 : QuoteStyle == AtDoubleQuotes ? "@\""
298 : "\"",
299 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
300 UnbreakableTailLength, InPPDirective, Encoding, Style),
301 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
302 QuoteStyle(QuoteStyle) {
303 // Find the replacement text for inserting braces and quotes and line breaks.
304 // We don't create an allocated string concatenated from parts here because it
305 // has to outlive the BreakableStringliteral object. The brace replacements
306 // include a quote so that WhitespaceManager can tell it apart from whitespace
307 // replacements between the string and surrounding tokens.
308
309 // The option is not implemented in JavaScript.
310 bool SignOnNewLine =
311 !Style.isJavaScript() &&
313
314 if (Style.isVerilog()) {
315 // In Verilog, all strings are quoted by double quotes, joined by commas,
316 // and wrapped in braces. The comma is always before the newline.
317 assert(QuoteStyle == DoubleQuotes);
318 LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
319 RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
320 Postfix = "\",";
321 Prefix = "\"";
322 } else {
323 // The plus sign may be on either line. And also C# and JavaScript have
324 // several quoting styles.
325 if (QuoteStyle == SingleQuotes) {
328 Postfix = SignOnNewLine ? "'" : "' +";
329 Prefix = SignOnNewLine ? "+ '" : "'";
330 } else {
331 if (QuoteStyle == AtDoubleQuotes) {
333 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
334 } else {
336 Prefix = SignOnNewLine ? "+ \"" : "\"";
337 }
339 Postfix = SignOnNewLine ? "\"" : "\" +";
340 }
341 }
342
343 // Following lines are indented by the width of the brace and space if any.
345 // The plus sign may need to be unindented depending on the style.
346 // FIXME: Add support for DontAlign.
347 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
350 }
351}
352
354 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
355 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
358}
359
360unsigned
362 bool Break) const {
363 return std::max(
364 0,
365 static_cast<int>(StartColumn) +
366 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
367 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
368 : 0) +
369 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
370}
371
373 unsigned LineIndex, unsigned TailOffset, Split Split,
374 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
375 Whitespaces.replaceWhitespaceInToken(
376 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
377 Split.first,
378 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
379 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
380 /*Spaces=*/
381 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
382}
383
385 WhitespaceManager &Whitespaces) const {
386 // Add the braces required for breaking the token if they are needed.
387 if (!BracesNeeded)
388 return;
389
390 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
391 // brace and another quote. This is because the rest of the program requires
392 // one replacement for each source range. If we replace the empty strings
393 // around the string, it may conflict with whitespace replacements between the
394 // string and adjacent tokens.
395 Whitespaces.replaceWhitespaceInToken(
396 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
397 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
398 /*Spaces=*/0);
399 Whitespaces.replaceWhitespaceInToken(
400 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
401 /*PreviousPostfix=*/RightBraceQuote,
402 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
403}
404
406 unsigned StartColumn, bool InPPDirective,
407 encoding::Encoding Encoding,
408 const FormatStyle &Style)
409 : BreakableToken(Token, InPPDirective, Encoding, Style),
410 StartColumn(StartColumn) {}
411
412unsigned BreakableComment::getLineCount() const { return Lines.size(); }
413
415BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
416 unsigned ColumnLimit, unsigned ContentStartColumn,
417 const llvm::Regex &CommentPragmasRegex) const {
418 // Don't break lines matching the comment pragmas regex.
419 if (CommentPragmasRegex.match(Content[LineIndex]))
420 return Split(StringRef::npos, 0);
421 return getCommentSplit(Content[LineIndex].substr(TailOffset),
422 ContentStartColumn, ColumnLimit, Style.TabWidth,
423 Encoding, Style);
424}
425
427 unsigned LineIndex, unsigned TailOffset, Split Split,
428 WhitespaceManager &Whitespaces) const {
429 StringRef Text = Content[LineIndex].substr(TailOffset);
430 // Text is relative to the content line, but Whitespaces operates relative to
431 // the start of the corresponding token, so compute the start of the Split
432 // that needs to be compressed into a single space relative to the start of
433 // its token.
434 unsigned BreakOffsetInToken =
435 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
436 unsigned CharsToRemove = Split.second;
437 Whitespaces.replaceWhitespaceInToken(
438 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
439 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
440}
441
442const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
443 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
444}
445
446static bool mayReflowContent(StringRef Content) {
447 Content = Content.trim(Blanks);
448 // Lines starting with '@' or '\' commonly have special meaning.
449 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
450 bool hasSpecialMeaningPrefix = false;
451 for (StringRef Prefix :
452 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
453 if (Content.starts_with(Prefix)) {
454 hasSpecialMeaningPrefix = true;
455 break;
456 }
457 }
458
459 // Numbered lists may also start with a number followed by '.'
460 // To avoid issues if a line starts with a number which is actually the end
461 // of a previous line, we only consider numbers with up to 2 digits.
462 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
463 hasSpecialMeaningPrefix =
464 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
465
466 // Simple heuristic for what to reflow: content should contain at least two
467 // characters and either the first or second character must be
468 // non-punctuation.
469 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
470 !Content.ends_with("\\") &&
471 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
472 // true, then the first code point must be 1 byte long.
473 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
474}
475
477 const FormatToken &Token, unsigned StartColumn,
478 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
479 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
480 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
481 DelimitersOnNewline(false),
482 UnbreakableTailLength(Token.UnbreakableTailLength) {
483 assert(Tok.is(TT_BlockComment) &&
484 "block comment section must start with a block comment");
485
486 StringRef TokenText(Tok.TokenText);
487 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
488 TokenText.substr(2, TokenText.size() - 4)
489 .split(Lines, UseCRLF ? "\r\n" : "\n");
490
491 int IndentDelta = StartColumn - OriginalStartColumn;
492 Content.resize(Lines.size());
493 Content[0] = Lines[0];
494 ContentColumn.resize(Lines.size());
495 // Account for the initial '/*'.
496 ContentColumn[0] = StartColumn + 2;
497 Tokens.resize(Lines.size());
498 for (size_t i = 1; i < Lines.size(); ++i)
499 adjustWhitespace(i, IndentDelta);
500
501 // Align decorations with the column of the star on the first line,
502 // that is one column after the start "/*".
503 DecorationColumn = StartColumn + 1;
504
505 // Account for comment decoration patterns like this:
506 //
507 // /*
508 // ** blah blah blah
509 // */
510 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
511 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
512 DecorationColumn = StartColumn;
513 }
514
515 Decoration = "* ";
516 if (Lines.size() == 1 && !FirstInLine) {
517 // Comments for which FirstInLine is false can start on arbitrary column,
518 // and available horizontal space can be too small to align consecutive
519 // lines with the first one.
520 // FIXME: We could, probably, align them to current indentation level, but
521 // now we just wrap them without stars.
522 Decoration = "";
523 }
524 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
525 const StringRef &Text = Content[i];
526 if (i + 1 == e) {
527 // If the last line is empty, the closing "*/" will have a star.
528 if (Text.empty())
529 break;
530 } else if (!Text.empty() && Decoration.starts_with(Text)) {
531 continue;
532 }
533 while (!Text.starts_with(Decoration))
534 Decoration = Decoration.drop_back(1);
535 }
536
537 LastLineNeedsDecoration = true;
538 IndentAtLineBreak = ContentColumn[0] + 1;
539 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
540 if (Content[i].empty()) {
541 if (i + 1 == e) {
542 // Empty last line means that we already have a star as a part of the
543 // trailing */. We also need to preserve whitespace, so that */ is
544 // correctly indented.
545 LastLineNeedsDecoration = false;
546 // Align the star in the last '*/' with the stars on the previous lines.
547 if (e >= 2 && !Decoration.empty())
548 ContentColumn[i] = DecorationColumn;
549 } else if (Decoration.empty()) {
550 // For all other lines, set the start column to 0 if they're empty, so
551 // we do not insert trailing whitespace anywhere.
552 ContentColumn[i] = 0;
553 }
554 continue;
555 }
556
557 // The first line already excludes the star.
558 // The last line excludes the star if LastLineNeedsDecoration is false.
559 // For all other lines, adjust the line to exclude the star and
560 // (optionally) the first whitespace.
561 unsigned DecorationSize = Decoration.starts_with(Content[i])
562 ? Content[i].size()
563 : Decoration.size();
564 if (DecorationSize)
565 ContentColumn[i] = DecorationColumn + DecorationSize;
566 Content[i] = Content[i].substr(DecorationSize);
567 if (!Decoration.starts_with(Content[i])) {
568 IndentAtLineBreak =
569 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
570 }
571 }
572 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
573
574 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
576 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
577 // This is a multiline jsdoc comment.
578 DelimitersOnNewline = true;
579 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
580 // Detect a long single-line comment, like:
581 // /** long long long */
582 // Below, '2' is the width of '*/'.
583 unsigned EndColumn =
584 ContentColumn[0] +
587 2;
588 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
589 }
590 }
591
592 LLVM_DEBUG({
593 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
594 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
595 for (size_t i = 0; i < Lines.size(); ++i) {
596 llvm::dbgs() << i << " |" << Content[i] << "| "
597 << "CC=" << ContentColumn[i] << "| "
598 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
599 }
600 });
601}
602
604 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
605 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
606 // Don't break lines matching the comment pragmas regex.
607 if (CommentPragmasRegex.match(Content[LineIndex]))
608 return Split(StringRef::npos, 0);
609 return getCommentSplit(Content[LineIndex].substr(TailOffset),
610 ContentStartColumn, ColumnLimit, Style.TabWidth,
611 Encoding, Style, Decoration.ends_with("*"));
612}
613
614void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
615 int IndentDelta) {
616 // When in a preprocessor directive, the trailing backslash in a block comment
617 // is not needed, but can serve a purpose of uniformity with necessary escaped
618 // newlines outside the comment. In this case we remove it here before
619 // trimming the trailing whitespace. The backslash will be re-added later when
620 // inserting a line break.
621 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
622 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
623 --EndOfPreviousLine;
624
625 // Calculate the end of the non-whitespace text in the previous line.
626 EndOfPreviousLine =
627 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
628 if (EndOfPreviousLine == StringRef::npos)
629 EndOfPreviousLine = 0;
630 else
631 ++EndOfPreviousLine;
632 // Calculate the start of the non-whitespace text in the current line.
633 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
634 if (StartOfLine == StringRef::npos)
635 StartOfLine = Lines[LineIndex].size();
636
637 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
638 // Adjust Lines to only contain relevant text.
639 size_t PreviousContentOffset =
640 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
641 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
642 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
643 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
644
645 // Adjust the start column uniformly across all lines.
646 ContentColumn[LineIndex] =
648 IndentDelta;
649}
650
651unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
652 unsigned Offset,
653 StringRef::size_type Length,
654 unsigned StartColumn) const {
656 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
657 Encoding);
658}
659
661 unsigned Offset,
662 unsigned StartColumn) const {
663 unsigned LineLength =
664 UnbreakableTailLength +
665 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
666 if (LineIndex + 1 == Lines.size()) {
667 LineLength += 2;
668 // We never need a decoration when breaking just the trailing "*/" postfix.
669 bool HasRemainingText = Offset < Content[LineIndex].size();
670 if (!HasRemainingText) {
671 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
672 if (HasDecoration)
673 LineLength -= Decoration.size();
674 }
675 }
676 return LineLength;
677}
678
680 bool Break) const {
681 if (Break)
682 return IndentAtLineBreak;
683 return std::max(0, ContentColumn[LineIndex]);
684}
685
686const llvm::StringSet<>
688 "@param", "@return", "@returns", "@throws", "@type", "@template",
689 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
690};
691
692unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
694 return 0;
695 // The content at LineIndex 0 of a comment like:
696 // /** line 0 */
697 // is "* line 0", so we need to skip over the decoration in that case.
698 StringRef ContentWithNoDecoration = Content[LineIndex];
699 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
700 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
701 StringRef FirstWord = ContentWithNoDecoration.substr(
702 0, ContentWithNoDecoration.find_first_of(Blanks));
703 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
705 return 0;
706}
707
708void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
709 Split Split, unsigned ContentIndent,
710 WhitespaceManager &Whitespaces) const {
711 StringRef Text = Content[LineIndex].substr(TailOffset);
712 StringRef Prefix = Decoration;
713 // We need this to account for the case when we have a decoration "* " for all
714 // the lines except for the last one, where the star in "*/" acts as a
715 // decoration.
716 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
717 if (LineIndex + 1 == Lines.size() &&
718 Text.size() == Split.first + Split.second) {
719 // For the last line we need to break before "*/", but not to add "* ".
720 Prefix = "";
721 if (LocalIndentAtLineBreak >= 2)
722 LocalIndentAtLineBreak -= 2;
723 }
724 // The split offset is from the beginning of the line. Convert it to an offset
725 // from the beginning of the token text.
726 unsigned BreakOffsetInToken =
727 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
728 unsigned CharsToRemove = Split.second;
729 assert(LocalIndentAtLineBreak >= Prefix.size());
730 std::string PrefixWithTrailingIndent = std::string(Prefix);
731 PrefixWithTrailingIndent.append(ContentIndent, ' ');
732 Whitespaces.replaceWhitespaceInToken(
733 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
734 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
735 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
736 PrefixWithTrailingIndent.size());
737}
738
740 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
741 if (!mayReflow(LineIndex, CommentPragmasRegex))
742 return Split(StringRef::npos, 0);
743
744 // If we're reflowing into a line with content indent, only reflow the next
745 // line if its starting whitespace matches the content indent.
746 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
747 if (LineIndex) {
748 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
749 if (PreviousContentIndent && Trimmed != StringRef::npos &&
750 Trimmed != PreviousContentIndent) {
751 return Split(StringRef::npos, 0);
752 }
753 }
754
755 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
756}
757
759 // A break is introduced when we want delimiters on newline.
760 return DelimitersOnNewline &&
761 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
762}
763
764void BreakableBlockComment::reflow(unsigned LineIndex,
765 WhitespaceManager &Whitespaces) const {
766 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
767 // Here we need to reflow.
768 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
769 "Reflowing whitespace within a token");
770 // This is the offset of the end of the last line relative to the start of
771 // the token text in the token.
772 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
773 Content[LineIndex - 1].size() -
774 tokenAt(LineIndex).TokenText.data();
775 unsigned WhitespaceLength = TrimmedContent.data() -
776 tokenAt(LineIndex).TokenText.data() -
777 WhitespaceOffsetInToken;
778 Whitespaces.replaceWhitespaceInToken(
779 tokenAt(LineIndex), WhitespaceOffsetInToken,
780 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
781 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
782 /*Spaces=*/0);
783}
784
786 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
787 if (LineIndex == 0) {
788 if (DelimitersOnNewline) {
789 // Since we're breaking at index 1 below, the break position and the
790 // break length are the same.
791 // Note: this works because getCommentSplit is careful never to split at
792 // the beginning of a line.
793 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
794 if (BreakLength != StringRef::npos) {
795 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
796 Whitespaces);
797 }
798 }
799 return;
800 }
801 // Here no reflow with the previous line will happen.
802 // Fix the decoration of the line at LineIndex.
803 StringRef Prefix = Decoration;
804 if (Content[LineIndex].empty()) {
805 if (LineIndex + 1 == Lines.size()) {
806 if (!LastLineNeedsDecoration) {
807 // If the last line was empty, we don't need a prefix, as the */ will
808 // line up with the decoration (if it exists).
809 Prefix = "";
810 }
811 } else if (!Decoration.empty()) {
812 // For other empty lines, if we do have a decoration, adapt it to not
813 // contain a trailing whitespace.
814 Prefix = Prefix.substr(0, 1);
815 }
816 } else if (ContentColumn[LineIndex] == 1) {
817 // This line starts immediately after the decorating *.
818 Prefix = Prefix.substr(0, 1);
819 }
820 // This is the offset of the end of the last line relative to the start of the
821 // token text in the token.
822 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
823 Content[LineIndex - 1].size() -
824 tokenAt(LineIndex).TokenText.data();
825 unsigned WhitespaceLength = Content[LineIndex].data() -
826 tokenAt(LineIndex).TokenText.data() -
827 WhitespaceOffsetInToken;
828 Whitespaces.replaceWhitespaceInToken(
829 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
830 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
831}
832
835 if (DelimitersOnNewline) {
836 // Replace the trailing whitespace of the last line with a newline.
837 // In case the last line is empty, the ending '*/' is already on its own
838 // line.
839 StringRef Line = Content.back().substr(TailOffset);
840 StringRef TrimmedLine = Line.rtrim(Blanks);
841 if (!TrimmedLine.empty())
842 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
843 }
844 return Split(StringRef::npos, 0);
845}
846
848 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
849 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
850 // case, we compute the start of the comment pragma manually.
851 StringRef IndentContent = Content[LineIndex];
852 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
853 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
854 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
855 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
856 !switchesFormatting(tokenAt(LineIndex));
857}
858
860 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
861 encoding::Encoding Encoding, const FormatStyle &Style)
862 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
863 assert(Tok.is(TT_LineComment) &&
864 "line comment section must start with a line comment");
865 FormatToken *LineTok = nullptr;
866 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
867 // How many spaces we changed in the first line of the section, this will be
868 // applied in all following lines
869 int FirstLineSpaceChange = 0;
870 for (const FormatToken *CurrentTok = &Tok;
871 CurrentTok && CurrentTok->is(TT_LineComment);
872 CurrentTok = CurrentTok->Next) {
873 LastLineTok = LineTok;
874 StringRef TokenText(CurrentTok->TokenText);
875 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
876 "unsupported line comment prefix, '//' and '#' are supported");
877 size_t FirstLineIndex = Lines.size();
878 TokenText.split(Lines, "\n");
879 Content.resize(Lines.size());
880 ContentColumn.resize(Lines.size());
881 PrefixSpaceChange.resize(Lines.size());
882 Tokens.resize(Lines.size());
883 Prefix.resize(Lines.size());
884 OriginalPrefix.resize(Lines.size());
885 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
886 Lines[i] = Lines[i].ltrim(Blanks);
887 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
888 OriginalPrefix[i] = IndentPrefix;
889 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
890
891 // This lambda also considers multibyte character that is not handled in
892 // functions like isPunctuation provided by CharInfo.
893 const auto NoSpaceBeforeFirstCommentChar = [&]() {
894 assert(Lines[i].size() > IndentPrefix.size());
895 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
896 const unsigned FirstCharByteSize =
897 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
899 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
900 Encoding) != 1) {
901 return false;
902 }
903 // In C-like comments, add a space before #. For example this is useful
904 // to preserve the relative indentation when commenting out code with
905 // #includes.
906 //
907 // In languages using # as the comment leader such as proto, don't
908 // add a space to support patterns like:
909 // #########
910 // # section
911 // #########
912 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
913 return false;
914 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
915 isHorizontalWhitespace(FirstCommentChar);
916 };
917
918 // On the first line of the comment section we calculate how many spaces
919 // are to be added or removed, all lines after that just get only the
920 // change and we will not look at the maximum anymore. Additionally to the
921 // actual first line, we calculate that when the non space Prefix changes,
922 // e.g. from "///" to "//".
923 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
924 OriginalPrefix[i - 1].rtrim(Blanks)) {
925 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
926 !NoSpaceBeforeFirstCommentChar()) {
927 FirstLineSpaceChange = Minimum - SpacesInPrefix;
928 } else if (static_cast<unsigned>(SpacesInPrefix) >
930 FirstLineSpaceChange =
931 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
932 } else {
933 FirstLineSpaceChange = 0;
934 }
935 }
936
937 if (Lines[i].size() != IndentPrefix.size()) {
938 PrefixSpaceChange[i] = FirstLineSpaceChange;
939
940 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
941 PrefixSpaceChange[i] +=
942 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
943 }
944
945 assert(Lines[i].size() > IndentPrefix.size());
946 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
947 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
948 const bool LineRequiresLeadingSpace =
949 !NoSpaceBeforeFirstCommentChar() ||
950 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
951 const bool AllowsSpaceChange =
952 !IsFormatComment &&
953 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
954
955 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
956 Prefix[i] = IndentPrefix.str();
957 Prefix[i].append(PrefixSpaceChange[i], ' ');
958 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
959 Prefix[i] = IndentPrefix
960 .drop_back(std::min<std::size_t>(
961 -PrefixSpaceChange[i], SpacesInPrefix))
962 .str();
963 } else {
964 Prefix[i] = IndentPrefix.str();
965 }
966 } else {
967 // If the IndentPrefix is the whole line, there is no content and we
968 // drop just all space
969 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
970 }
971
972 Tokens[i] = LineTok;
973 Content[i] = Lines[i].substr(IndentPrefix.size());
974 ContentColumn[i] =
977
978 // Calculate the end of the non-whitespace text in this line.
979 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
980 if (EndOfLine == StringRef::npos)
981 EndOfLine = Content[i].size();
982 else
983 ++EndOfLine;
984 Content[i] = Content[i].substr(0, EndOfLine);
985 }
986 LineTok = CurrentTok->Next;
987 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
988 // A line comment section needs to broken by a line comment that is
989 // preceded by at least two newlines. Note that we put this break here
990 // instead of breaking at a previous stage during parsing, since that
991 // would split the contents of the enum into two unwrapped lines in this
992 // example, which is undesirable:
993 // enum A {
994 // a, // comment about a
995 //
996 // // comment about b
997 // b
998 // };
999 //
1000 // FIXME: Consider putting separate line comment sections as children to
1001 // the unwrapped line instead.
1002 break;
1003 }
1004 }
1005}
1006
1007unsigned
1008BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1009 StringRef::size_type Length,
1010 unsigned StartColumn) const {
1012 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1013 Encoding);
1014}
1015
1016unsigned
1018 bool /*Break*/) const {
1019 return ContentColumn[LineIndex];
1020}
1021
1023 unsigned LineIndex, unsigned TailOffset, Split Split,
1024 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1025 StringRef Text = Content[LineIndex].substr(TailOffset);
1026 // Compute the offset of the split relative to the beginning of the token
1027 // text.
1028 unsigned BreakOffsetInToken =
1029 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1030 unsigned CharsToRemove = Split.second;
1031 Whitespaces.replaceWhitespaceInToken(
1032 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1033 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1034 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1035}
1036
1038 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1039 if (!mayReflow(LineIndex, CommentPragmasRegex))
1040 return Split(StringRef::npos, 0);
1041
1042 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1043
1044 // In a line comment section each line is a separate token; thus, after a
1045 // split we replace all whitespace before the current line comment token
1046 // (which does not need to be included in the split), plus the start of the
1047 // line up to where the content starts.
1048 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1049}
1050
1052 WhitespaceManager &Whitespaces) const {
1053 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1054 // Reflow happens between tokens. Replace the whitespace between the
1055 // tokens by the empty string.
1056 Whitespaces.replaceWhitespace(
1057 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1058 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1059 /*InPPDirective=*/false);
1060 } else if (LineIndex > 0) {
1061 // In case we're reflowing after the '\' in:
1062 //
1063 // // line comment \
1064 // // line 2
1065 //
1066 // the reflow happens inside the single comment token (it is a single line
1067 // comment with an unescaped newline).
1068 // Replace the whitespace between the '\' and '//' with the empty string.
1069 //
1070 // Offset points to after the '\' relative to start of the token.
1071 unsigned Offset = Lines[LineIndex - 1].data() +
1072 Lines[LineIndex - 1].size() -
1073 tokenAt(LineIndex - 1).TokenText.data();
1074 // WhitespaceLength is the number of chars between the '\' and the '//' on
1075 // the next line.
1076 unsigned WhitespaceLength =
1077 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1078 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1079 /*ReplaceChars=*/WhitespaceLength,
1080 /*PreviousPostfix=*/"",
1081 /*CurrentPrefix=*/"",
1082 /*InPPDirective=*/false,
1083 /*Newlines=*/0,
1084 /*Spaces=*/0);
1085 }
1086 // Replace the indent and prefix of the token with the reflow prefix.
1087 unsigned Offset =
1088 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1089 unsigned WhitespaceLength =
1090 Content[LineIndex].data() - Lines[LineIndex].data();
1091 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1092 /*ReplaceChars=*/WhitespaceLength,
1093 /*PreviousPostfix=*/"",
1094 /*CurrentPrefix=*/ReflowPrefix,
1095 /*InPPDirective=*/false,
1096 /*Newlines=*/0,
1097 /*Spaces=*/0);
1098}
1099
1101 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1102 // If this is the first line of a token, we need to inform Whitespace Manager
1103 // about it: either adapt the whitespace range preceding it, or mark it as an
1104 // untouchable token.
1105 // This happens for instance here:
1106 // // line 1 \
1107 // // line 2
1108 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1109 // This is the first line for the current token, but no reflow with the
1110 // previous token is necessary. However, we still may need to adjust the
1111 // start column. Note that ContentColumn[LineIndex] is the expected
1112 // content column after a possible update to the prefix, hence the prefix
1113 // length change is included.
1114 unsigned LineColumn =
1115 ContentColumn[LineIndex] -
1116 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1117 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1118
1119 // We always want to create a replacement instead of adding an untouchable
1120 // token, even if LineColumn is the same as the original column of the
1121 // token. This is because WhitespaceManager doesn't align trailing
1122 // comments if they are untouchable.
1123 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1124 /*Newlines=*/1,
1125 /*Spaces=*/LineColumn,
1126 /*StartOfTokenColumn=*/LineColumn,
1127 /*IsAligned=*/true,
1128 /*InPPDirective=*/false);
1129 }
1130 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1131 // Adjust the prefix if necessary.
1132 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1133 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1134 Whitespaces.replaceWhitespaceInToken(
1135 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1136 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1137 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1138 }
1139}
1140
1142 if (LastLineTok)
1143 State.NextToken = LastLineTok->Next;
1144}
1145
1147 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1148 // Line comments have the indent as part of the prefix, so we need to
1149 // recompute the start of the line.
1150 StringRef IndentContent = Content[LineIndex];
1151 if (Lines[LineIndex].starts_with("//"))
1152 IndentContent = Lines[LineIndex].substr(2);
1153 // FIXME: Decide whether we want to reflow non-regular indents:
1154 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1155 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1156 // // text that protrudes
1157 // // into text with different indent
1158 // We do reflow in that case in block comments.
1159 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
1160 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1161 !switchesFormatting(tokenAt(LineIndex)) &&
1162 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1163}
1164
1165} // namespace format
1166} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
StringRef Text
Definition: Format.cpp:2953
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:139
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:92
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:153
#define false
Definition: stdbool.h:22
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4515
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4513
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:4620
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2369
@ LK_Java
Should be used for Java.
Definition: Format.h:3093
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3107
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2392
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1676
@ BOS_None
Break after operators.
Definition: Format.h:1647
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3125
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:4733
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:509
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:4655
bool isVerilog() const
Definition: Format.h:3117
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4550
bool isJavaScript() const
Definition: Format.h:3116
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:503
unsigned ColumnLimit
The column limit.
Definition: Format.h:2270
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:283
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:303
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:362
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:555
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:592
The current state when indenting a unwrapped line.