clang 22.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks(" \t\v\f\r");
29
30static StringRef getLineCommentIndentPrefix(StringRef Comment,
31 const FormatStyle &Style) {
32 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
33 "//!", "//:", "//"};
34 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
35 "//", "#"};
36 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
37 if (Style.isTextProto())
38 KnownPrefixes = KnownTextProtoPrefixes;
39
40 assert(
41 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
42 return Lhs.size() > Rhs.size();
43 }));
44
45 for (StringRef KnownPrefix : KnownPrefixes) {
46 if (Comment.starts_with(KnownPrefix)) {
47 const auto PrefixLength =
48 Comment.find_first_not_of(' ', KnownPrefix.size());
49 return Comment.substr(0, PrefixLength);
50 }
51 }
52 return {};
53}
54
56getCommentSplit(StringRef Text, unsigned ContentStartColumn,
57 unsigned ColumnLimit, unsigned TabWidth,
58 encoding::Encoding Encoding, const FormatStyle &Style,
59 bool DecorationEndsWithStar = false) {
60 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
61 << "\", Column limit: " << ColumnLimit
62 << ", Content start: " << ContentStartColumn << "\n");
63 if (ColumnLimit <= ContentStartColumn + 1)
64 return BreakableToken::Split(StringRef::npos, 0);
65
66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
67 unsigned MaxSplitBytes = 0;
68
69 for (unsigned NumChars = 0;
70 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
71 unsigned BytesInChar =
72 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
74 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
75 TabWidth, Encoding);
76 MaxSplitBytes += BytesInChar;
77 }
78
79 // In JavaScript, some @tags can be followed by {, and machinery that parses
80 // these comments will fail to understand the comment if followed by a line
81 // break. So avoid ever breaking before a {.
82 if (Style.isJavaScript()) {
83 StringRef::size_type SpaceOffset =
84 Text.find_first_of(Blanks, MaxSplitBytes);
85 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
86 Text[SpaceOffset + 1] == '{') {
87 MaxSplitBytes = SpaceOffset + 1;
88 }
89 }
90
91 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
92
93 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
94 // Some spaces are unacceptable to break on, rewind past them.
95 while (SpaceOffset != StringRef::npos) {
96 // If a line-comment ends with `\`, the next line continues the comment,
97 // whether or not it starts with `//`. This is confusing and triggers
98 // -Wcomment.
99 // Avoid introducing multiline comments by not allowing a break right
100 // after '\'.
101 if (Style.isCpp()) {
102 StringRef::size_type LastNonBlank =
103 Text.find_last_not_of(Blanks, SpaceOffset);
104 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
105 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
106 continue;
107 }
108 }
109
110 // Do not split before a number followed by a dot: this would be interpreted
111 // as a numbered list, which would prevent re-flowing in subsequent passes.
112 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
113 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
114 continue;
115 }
116
117 // Avoid ever breaking before a @tag or a { in JavaScript.
118 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
119 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
120 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
121 continue;
122 }
123
124 break;
125 }
126
127 if (SpaceOffset == StringRef::npos ||
128 // Don't break at leading whitespace.
129 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
130 // Make sure that we don't break at leading whitespace that
131 // reaches past MaxSplit.
132 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
133 if (FirstNonWhitespace == StringRef::npos) {
134 // If the comment is only whitespace, we cannot split.
135 return BreakableToken::Split(StringRef::npos, 0);
136 }
137 SpaceOffset = Text.find_first_of(
138 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
139 }
140 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
141 // adaptStartOfLine will break after lines starting with /** if the comment
142 // is broken anywhere. Avoid emitting this break twice here.
143 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
144 // insert a break after /**, so this code must not insert the same break.
145 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
146 return BreakableToken::Split(StringRef::npos, 0);
147 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
148 StringRef AfterCut = Text.substr(SpaceOffset);
149 if (!DecorationEndsWithStar)
150 AfterCut = AfterCut.ltrim(Blanks);
151 return BreakableToken::Split(BeforeCut.size(),
152 AfterCut.begin() - BeforeCut.end());
153 }
154 return BreakableToken::Split(StringRef::npos, 0);
155}
156
158getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
159 unsigned TabWidth, encoding::Encoding Encoding) {
160 // FIXME: Reduce unit test case.
161 if (Text.empty())
162 return BreakableToken::Split(StringRef::npos, 0);
163 if (ColumnLimit <= UsedColumns)
164 return BreakableToken::Split(StringRef::npos, 0);
165 unsigned MaxSplit = ColumnLimit - UsedColumns;
166 StringRef::size_type SpaceOffset = 0;
167 StringRef::size_type SlashOffset = 0;
168 StringRef::size_type WordStartOffset = 0;
169 StringRef::size_type SplitPoint = 0;
170 for (unsigned Chars = 0;;) {
171 unsigned Advance;
172 if (Text[0] == '\\') {
174 Chars += Advance;
175 } else {
176 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
178 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
179 }
180
181 if (Chars > MaxSplit || Text.size() <= Advance)
182 break;
183
184 if (Blanks.contains(Text[0]))
185 SpaceOffset = SplitPoint;
186 if (Text[0] == '/')
187 SlashOffset = SplitPoint;
188 if (Advance == 1 && !isAlphanumeric(Text[0]))
189 WordStartOffset = SplitPoint;
190
191 SplitPoint += Advance;
192 Text = Text.substr(Advance);
193 }
194
195 if (SpaceOffset != 0)
196 return BreakableToken::Split(SpaceOffset + 1, 0);
197 if (SlashOffset != 0)
198 return BreakableToken::Split(SlashOffset + 1, 0);
199 if (WordStartOffset != 0)
200 return BreakableToken::Split(WordStartOffset + 1, 0);
201 if (SplitPoint != 0)
202 return BreakableToken::Split(SplitPoint, 0);
203 return BreakableToken::Split(StringRef::npos, 0);
204}
205
207 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
208 "formatting regions are switched by comment tokens");
209 StringRef Content = Token.TokenText.substr(2).ltrim();
210 return Content.starts_with("clang-format on") ||
211 Content.starts_with("clang-format off");
212}
213
214unsigned
215BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
216 Split Split) const {
217 // Example: consider the content
218 // lala lala
219 // - RemainingTokenColumns is the original number of columns, 10;
220 // - Split is (4, 2), denoting the two spaces between the two words;
221 //
222 // We compute the number of columns when the split is compressed into a single
223 // space, like:
224 // lala lala
225 //
226 // FIXME: Correctly measure the length of whitespace in Split.second so it
227 // works with tabs.
228 return RemainingTokenColumns + 1 - Split.second;
229}
230
231unsigned BreakableStringLiteral::getLineCount() const { return 1; }
232
233unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
234 unsigned Offset,
235 StringRef::size_type Length,
236 unsigned StartColumn) const {
237 llvm_unreachable("Getting the length of a part of the string literal "
238 "indicates that the code tries to reflow it.");
239}
240
241unsigned
242BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
243 unsigned StartColumn) const {
244 return UnbreakableTailLength + Postfix.size() +
246 Style.TabWidth, Encoding);
247}
248
250 bool Break) const {
251 return StartColumn + Prefix.size();
252}
253
255 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
256 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
257 encoding::Encoding Encoding, const FormatStyle &Style)
261 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
262 Line = Tok.TokenText.substr(
263 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
264}
265
267 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
268 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
269 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
270 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
271}
272
274 unsigned TailOffset, Split Split,
275 unsigned ContentIndent,
276 WhitespaceManager &Whitespaces) const {
277 Whitespaces.replaceWhitespaceInToken(
278 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
280}
281
283 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
284 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
285 encoding::Encoding Encoding, const FormatStyle &Style)
287 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
288 : QuoteStyle == AtDoubleQuotes ? "@\""
289 : "\"",
290 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
292 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
294 // Find the replacement text for inserting braces and quotes and line breaks.
295 // We don't create an allocated string concatenated from parts here because it
296 // has to outlive the BreakableStringliteral object. The brace replacements
297 // include a quote so that WhitespaceManager can tell it apart from whitespace
298 // replacements between the string and surrounding tokens.
299
300 // The option is not implemented in JavaScript.
301 bool SignOnNewLine =
302 !Style.isJavaScript() &&
303 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
304
305 if (Style.isVerilog()) {
306 // In Verilog, all strings are quoted by double quotes, joined by commas,
307 // and wrapped in braces. The comma is always before the newline.
308 assert(QuoteStyle == DoubleQuotes);
309 LeftBraceQuote =
310 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "{\"" : "{ \"";
311 RightBraceQuote =
312 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ? "\"}" : "\" }";
313 Postfix = "\",";
314 Prefix = "\"";
315 } else {
316 // The plus sign may be on either line. And also C# and JavaScript have
317 // several quoting styles.
318 if (QuoteStyle == SingleQuotes) {
319 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
320 RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
321 Postfix = SignOnNewLine ? "'" : "' +";
322 Prefix = SignOnNewLine ? "+ '" : "'";
323 } else {
324 if (QuoteStyle == AtDoubleQuotes) {
325 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
326 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
327 } else {
328 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
329 Prefix = SignOnNewLine ? "+ \"" : "\"";
330 }
331 RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
332 Postfix = SignOnNewLine ? "\"" : "\" +";
333 }
334 }
335
336 // Following lines are indented by the width of the brace and space if any.
337 ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
338 // The plus sign may need to be unindented depending on the style.
339 // FIXME: Add support for DontAlign.
340 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
341 Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
342 ContinuationIndent -= 2;
343 }
344}
345
347 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
348 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
350 Style.TabWidth, Encoding);
351}
352
353unsigned
355 bool Break) const {
356 return std::max(
357 0,
358 static_cast<int>(StartColumn) +
359 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
360 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
361 : 0) +
362 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
363}
364
366 unsigned LineIndex, unsigned TailOffset, Split Split,
367 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
368 Whitespaces.replaceWhitespaceInToken(
369 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
370 Split.first,
371 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
372 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
373 /*Spaces=*/
374 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
375}
376
378 WhitespaceManager &Whitespaces) const {
379 // Add the braces required for breaking the token if they are needed.
380 if (!BracesNeeded)
381 return;
382
383 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
384 // brace and another quote. This is because the rest of the program requires
385 // one replacement for each source range. If we replace the empty strings
386 // around the string, it may conflict with whitespace replacements between the
387 // string and adjacent tokens.
388 Whitespaces.replaceWhitespaceInToken(
389 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
390 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
391 /*Spaces=*/0);
392 Whitespaces.replaceWhitespaceInToken(
393 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
394 /*PreviousPostfix=*/RightBraceQuote,
395 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
396}
397
404
405unsigned BreakableComment::getLineCount() const { return Lines.size(); }
406
408BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
409 unsigned ColumnLimit, unsigned ContentStartColumn,
410 const llvm::Regex &CommentPragmasRegex) const {
411 // Don't break lines matching the comment pragmas regex.
412 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
413 return Split(StringRef::npos, 0);
414 return getCommentSplit(Content[LineIndex].substr(TailOffset),
415 ContentStartColumn, ColumnLimit, Style.TabWidth,
416 Encoding, Style);
417}
418
420 unsigned LineIndex, unsigned TailOffset, Split Split,
421 WhitespaceManager &Whitespaces) const {
422 StringRef Text = Content[LineIndex].substr(TailOffset);
423 // Text is relative to the content line, but Whitespaces operates relative to
424 // the start of the corresponding token, so compute the start of the Split
425 // that needs to be compressed into a single space relative to the start of
426 // its token.
427 unsigned BreakOffsetInToken =
428 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
429 unsigned CharsToRemove = Split.second;
430 Whitespaces.replaceWhitespaceInToken(
431 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
432 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
433}
434
435const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
436 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
437}
438
439static bool mayReflowContent(StringRef Content) {
440 Content = Content.trim(Blanks);
441 // Lines starting with '@' or '\' commonly have special meaning.
442 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
443 bool hasSpecialMeaningPrefix = false;
444 for (StringRef Prefix :
445 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
446 if (Content.starts_with(Prefix)) {
447 hasSpecialMeaningPrefix = true;
448 break;
449 }
450 }
451
452 // Numbered lists may also start with a number followed by '.'
453 // To avoid issues if a line starts with a number which is actually the end
454 // of a previous line, we only consider numbers with up to 2 digits.
455 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
456 hasSpecialMeaningPrefix =
457 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
458
459 // Simple heuristic for what to reflow: content should contain at least two
460 // characters and either the first or second character must be
461 // non-punctuation.
462 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
463 !Content.ends_with("\\") &&
464 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
465 // true, then the first code point must be 1 byte long.
466 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
467}
468
470 const FormatToken &Token, unsigned StartColumn,
471 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
472 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
474 DelimitersOnNewline(false),
475 UnbreakableTailLength(Token.UnbreakableTailLength) {
476 assert(Tok.is(TT_BlockComment) &&
477 "block comment section must start with a block comment");
478
479 StringRef TokenText(Tok.TokenText);
480 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
481 TokenText.substr(2, TokenText.size() - 4)
482 .split(Lines, UseCRLF ? "\r\n" : "\n");
483
484 int IndentDelta = StartColumn - OriginalStartColumn;
485 Content.resize(Lines.size());
486 Content[0] = Lines[0];
487 ContentColumn.resize(Lines.size());
488 // Account for the initial '/*'.
489 ContentColumn[0] = StartColumn + 2;
490 Tokens.resize(Lines.size());
491 for (size_t i = 1; i < Lines.size(); ++i)
492 adjustWhitespace(i, IndentDelta);
493
494 // Align decorations with the column of the star on the first line,
495 // that is one column after the start "/*".
496 DecorationColumn = StartColumn + 1;
497
498 // Account for comment decoration patterns like this:
499 //
500 // /*
501 // ** blah blah blah
502 // */
503 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
504 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
505 DecorationColumn = StartColumn;
506 }
507
508 Decoration = "* ";
509 if (Lines.size() == 1 && !FirstInLine) {
510 // Comments for which FirstInLine is false can start on arbitrary column,
511 // and available horizontal space can be too small to align consecutive
512 // lines with the first one.
513 // FIXME: We could, probably, align them to current indentation level, but
514 // now we just wrap them without stars.
515 Decoration = "";
516 }
517 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
518 const StringRef Text(Content[i]);
519 if (i + 1 == e) {
520 // If the last line is empty, the closing "*/" will have a star.
521 if (Text.empty())
522 break;
523 } else if (!Text.empty() && Decoration.starts_with(Text)) {
524 continue;
525 }
526 while (!Text.starts_with(Decoration))
527 Decoration = Decoration.drop_back(1);
528 }
529
530 LastLineNeedsDecoration = true;
531 IndentAtLineBreak = ContentColumn[0] + 1;
532 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
533 if (Content[i].empty()) {
534 if (i + 1 == e) {
535 // Empty last line means that we already have a star as a part of the
536 // trailing */. We also need to preserve whitespace, so that */ is
537 // correctly indented.
538 LastLineNeedsDecoration = false;
539 // Align the star in the last '*/' with the stars on the previous lines.
540 if (e >= 2 && !Decoration.empty())
541 ContentColumn[i] = DecorationColumn;
542 } else if (Decoration.empty()) {
543 // For all other lines, set the start column to 0 if they're empty, so
544 // we do not insert trailing whitespace anywhere.
545 ContentColumn[i] = 0;
546 }
547 continue;
548 }
549
550 // The first line already excludes the star.
551 // The last line excludes the star if LastLineNeedsDecoration is false.
552 // For all other lines, adjust the line to exclude the star and
553 // (optionally) the first whitespace.
554 unsigned DecorationSize = Decoration.starts_with(Content[i])
555 ? Content[i].size()
556 : Decoration.size();
557 if (DecorationSize)
558 ContentColumn[i] = DecorationColumn + DecorationSize;
559 Content[i] = Content[i].substr(DecorationSize);
560 if (!Decoration.starts_with(Content[i])) {
561 IndentAtLineBreak =
562 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
563 }
564 }
565 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
566
567 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
568 if (Style.isJavaScript() || Style.isJava()) {
569 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
570 // This is a multiline jsdoc comment.
571 DelimitersOnNewline = true;
572 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
573 // Detect a long single-line comment, like:
574 // /** long long long */
575 // Below, '2' is the width of '*/'.
576 unsigned EndColumn =
577 ContentColumn[0] +
579 Style.TabWidth, Encoding) +
580 2;
581 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
582 }
583 }
584
585 LLVM_DEBUG({
586 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
587 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
588 for (size_t i = 0; i < Lines.size(); ++i) {
589 llvm::dbgs() << i << " |" << Content[i] << "| "
590 << "CC=" << ContentColumn[i] << "| "
591 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
592 }
593 });
594}
595
597 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
598 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
599 // Don't break lines matching the comment pragmas regex.
600 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
601 return Split(StringRef::npos, 0);
602 return getCommentSplit(Content[LineIndex].substr(TailOffset),
603 ContentStartColumn, ColumnLimit, Style.TabWidth,
604 Encoding, Style, Decoration.ends_with("*"));
605}
606
607void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
608 int IndentDelta) {
609 // When in a preprocessor directive, the trailing backslash in a block comment
610 // is not needed, but can serve a purpose of uniformity with necessary escaped
611 // newlines outside the comment. In this case we remove it here before
612 // trimming the trailing whitespace. The backslash will be re-added later when
613 // inserting a line break.
614 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
615 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
616 --EndOfPreviousLine;
617
618 // Calculate the end of the non-whitespace text in the previous line.
619 EndOfPreviousLine =
620 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
621 if (EndOfPreviousLine == StringRef::npos)
622 EndOfPreviousLine = 0;
623 else
624 ++EndOfPreviousLine;
625 // Calculate the start of the non-whitespace text in the current line.
626 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
627 if (StartOfLine == StringRef::npos)
628 StartOfLine = Lines[LineIndex].size();
629
630 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
631 // Adjust Lines to only contain relevant text.
632 size_t PreviousContentOffset =
633 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
634 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
635 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
636 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
637
638 // Adjust the start column uniformly across all lines.
639 ContentColumn[LineIndex] =
640 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
641 IndentDelta;
642}
643
644unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
645 unsigned Offset,
646 StringRef::size_type Length,
647 unsigned StartColumn) const {
649 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
650 Encoding);
651}
652
654 unsigned Offset,
655 unsigned StartColumn) const {
656 unsigned LineLength =
657 UnbreakableTailLength +
658 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
659 if (LineIndex + 1 == Lines.size()) {
660 LineLength += 2;
661 // We never need a decoration when breaking just the trailing "*/" postfix.
662 bool HasRemainingText = Offset < Content[LineIndex].size();
663 if (!HasRemainingText) {
664 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
665 if (HasDecoration)
666 LineLength -= Decoration.size();
667 }
668 }
669 return LineLength;
670}
671
673 bool Break) const {
674 if (Break)
675 return IndentAtLineBreak;
676 return std::max(0, ContentColumn[LineIndex]);
677}
678
679const llvm::StringSet<>
681 "@param", "@return", "@returns", "@throws", "@type", "@template",
682 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
683};
684
685unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
686 if (!Style.isJava() && !Style.isJavaScript())
687 return 0;
688 // The content at LineIndex 0 of a comment like:
689 // /** line 0 */
690 // is "* line 0", so we need to skip over the decoration in that case.
691 StringRef ContentWithNoDecoration = Content[LineIndex];
692 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
693 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
694 StringRef FirstWord = ContentWithNoDecoration.substr(
695 0, ContentWithNoDecoration.find_first_of(Blanks));
696 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
697 return Style.ContinuationIndentWidth;
698 return 0;
699}
700
701void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
702 Split Split, unsigned ContentIndent,
703 WhitespaceManager &Whitespaces) const {
704 StringRef Text = Content[LineIndex].substr(TailOffset);
705 StringRef Prefix = Decoration;
706 // We need this to account for the case when we have a decoration "* " for all
707 // the lines except for the last one, where the star in "*/" acts as a
708 // decoration.
709 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
710 if (LineIndex + 1 == Lines.size() &&
711 Text.size() == Split.first + Split.second) {
712 // For the last line we need to break before "*/", but not to add "* ".
713 Prefix = "";
714 if (LocalIndentAtLineBreak >= 2)
715 LocalIndentAtLineBreak -= 2;
716 }
717 // The split offset is from the beginning of the line. Convert it to an offset
718 // from the beginning of the token text.
719 unsigned BreakOffsetInToken =
720 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
721 unsigned CharsToRemove = Split.second;
722 assert(LocalIndentAtLineBreak >= Prefix.size());
723 std::string PrefixWithTrailingIndent = std::string(Prefix);
724 PrefixWithTrailingIndent.append(ContentIndent, ' ');
725 Whitespaces.replaceWhitespaceInToken(
726 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
727 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
728 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
729 PrefixWithTrailingIndent.size());
730}
731
733 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
734 if (!mayReflow(LineIndex, CommentPragmasRegex))
735 return Split(StringRef::npos, 0);
736
737 // If we're reflowing into a line with content indent, only reflow the next
738 // line if its starting whitespace matches the content indent.
739 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
740 if (LineIndex) {
741 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
742 if (PreviousContentIndent && Trimmed != StringRef::npos &&
743 Trimmed != PreviousContentIndent) {
744 return Split(StringRef::npos, 0);
745 }
746 }
747
748 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
749}
750
752 // A break is introduced when we want delimiters on newline.
753 return DelimitersOnNewline &&
754 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
755}
756
757void BreakableBlockComment::reflow(unsigned LineIndex,
758 WhitespaceManager &Whitespaces) const {
759 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
760 // Here we need to reflow.
761 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
762 "Reflowing whitespace within a token");
763 // This is the offset of the end of the last line relative to the start of
764 // the token text in the token.
765 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
766 Content[LineIndex - 1].size() -
767 tokenAt(LineIndex).TokenText.data();
768 unsigned WhitespaceLength = TrimmedContent.data() -
769 tokenAt(LineIndex).TokenText.data() -
770 WhitespaceOffsetInToken;
771 Whitespaces.replaceWhitespaceInToken(
772 tokenAt(LineIndex), WhitespaceOffsetInToken,
773 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
774 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
775 /*Spaces=*/0);
776}
777
779 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
780 if (LineIndex == 0) {
781 if (DelimitersOnNewline) {
782 // Since we're breaking at index 1 below, the break position and the
783 // break length are the same.
784 // Note: this works because getCommentSplit is careful never to split at
785 // the beginning of a line.
786 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
787 if (BreakLength != StringRef::npos) {
788 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
789 Whitespaces);
790 }
791 }
792 return;
793 }
794 // Here no reflow with the previous line will happen.
795 // Fix the decoration of the line at LineIndex.
796 StringRef Prefix = Decoration;
797 if (Content[LineIndex].empty()) {
798 if (LineIndex + 1 == Lines.size()) {
799 if (!LastLineNeedsDecoration) {
800 // If the last line was empty, we don't need a prefix, as the */ will
801 // line up with the decoration (if it exists).
802 Prefix = "";
803 }
804 } else if (!Decoration.empty()) {
805 // For other empty lines, if we do have a decoration, adapt it to not
806 // contain a trailing whitespace.
807 Prefix = Prefix.substr(0, 1);
808 }
809 } else if (ContentColumn[LineIndex] == 1) {
810 // This line starts immediately after the decorating *.
811 Prefix = Prefix.substr(0, 1);
812 }
813 // This is the offset of the end of the last line relative to the start of the
814 // token text in the token.
815 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
816 Content[LineIndex - 1].size() -
817 tokenAt(LineIndex).TokenText.data();
818 unsigned WhitespaceLength = Content[LineIndex].data() -
819 tokenAt(LineIndex).TokenText.data() -
820 WhitespaceOffsetInToken;
821 Whitespaces.replaceWhitespaceInToken(
822 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
823 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
824}
825
828 if (DelimitersOnNewline) {
829 // Replace the trailing whitespace of the last line with a newline.
830 // In case the last line is empty, the ending '*/' is already on its own
831 // line.
832 StringRef Line = Content.back().substr(TailOffset);
833 StringRef TrimmedLine = Line.rtrim(Blanks);
834 if (!TrimmedLine.empty())
835 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
836 }
837 return Split(StringRef::npos, 0);
838}
839
841 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
842 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
843 // case, we compute the start of the comment pragma manually.
844 StringRef IndentContent = Content[LineIndex];
845 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
846 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
847 return LineIndex > 0 && AlwaysReflow &&
848 !CommentPragmasRegex.match(IndentContent) &&
849 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
850 !switchesFormatting(tokenAt(LineIndex));
851}
852
854 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
855 encoding::Encoding Encoding, const FormatStyle &Style)
857 assert(Tok.is(TT_LineComment) &&
858 "line comment section must start with a line comment");
859 FormatToken *LineTok = nullptr;
860 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
861 // How many spaces we changed in the first line of the section, this will be
862 // applied in all following lines
863 int FirstLineSpaceChange = 0;
864 for (const FormatToken *CurrentTok = &Tok;
865 CurrentTok && CurrentTok->is(TT_LineComment);
866 CurrentTok = CurrentTok->Next) {
867 LastLineTok = LineTok;
868 StringRef TokenText(CurrentTok->TokenText);
869 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
870 "unsupported line comment prefix, '//' and '#' are supported");
871 size_t FirstLineIndex = Lines.size();
872 TokenText.split(Lines, "\n");
873 Content.resize(Lines.size());
874 ContentColumn.resize(Lines.size());
875 PrefixSpaceChange.resize(Lines.size());
876 Tokens.resize(Lines.size());
877 Prefix.resize(Lines.size());
878 OriginalPrefix.resize(Lines.size());
879 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
880 Lines[i] = Lines[i].ltrim(Blanks);
881 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
882 OriginalPrefix[i] = IndentPrefix;
883 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
884
885 // This lambda also considers multibyte character that is not handled in
886 // functions like isPunctuation provided by CharInfo.
887 const auto NoSpaceBeforeFirstCommentChar = [&]() {
888 assert(Lines[i].size() > IndentPrefix.size());
889 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
890 const unsigned FirstCharByteSize =
891 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
893 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
894 Encoding) != 1) {
895 return false;
896 }
897 // In C-like comments, add a space before #. For example this is useful
898 // to preserve the relative indentation when commenting out code with
899 // #includes.
900 //
901 // In languages using # as the comment leader such as proto, don't
902 // add a space to support patterns like:
903 // #########
904 // # section
905 // #########
906 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
907 return false;
908 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
909 isHorizontalWhitespace(FirstCommentChar);
910 };
911
912 // On the first line of the comment section we calculate how many spaces
913 // are to be added or removed, all lines after that just get only the
914 // change and we will not look at the maximum anymore. Additionally to the
915 // actual first line, we calculate that when the non space Prefix changes,
916 // e.g. from "///" to "//".
917 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
918 OriginalPrefix[i - 1].rtrim(Blanks)) {
919 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
920 !NoSpaceBeforeFirstCommentChar()) {
921 FirstLineSpaceChange = Minimum - SpacesInPrefix;
922 } else if (static_cast<unsigned>(SpacesInPrefix) >
923 Style.SpacesInLineCommentPrefix.Maximum) {
924 FirstLineSpaceChange =
925 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
926 } else {
927 FirstLineSpaceChange = 0;
928 }
929 }
930
931 if (Lines[i].size() != IndentPrefix.size()) {
932 assert(Lines[i].size() > IndentPrefix.size());
933
934 PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum
935 ? Minimum - SpacesInPrefix
936 : FirstLineSpaceChange;
937
938 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
939 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
940 const bool LineRequiresLeadingSpace =
941 !NoSpaceBeforeFirstCommentChar() ||
942 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
943 const bool AllowsSpaceChange =
944 !IsFormatComment &&
945 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
946
947 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
948 Prefix[i] = IndentPrefix.str();
949 Prefix[i].append(PrefixSpaceChange[i], ' ');
950 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
951 Prefix[i] = IndentPrefix
952 .drop_back(std::min<std::size_t>(
953 -PrefixSpaceChange[i], SpacesInPrefix))
954 .str();
955 } else {
956 Prefix[i] = IndentPrefix.str();
957 }
958 } else {
959 // If the IndentPrefix is the whole line, there is no content and we
960 // drop just all space
961 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
962 }
963
964 Tokens[i] = LineTok;
965 Content[i] = Lines[i].substr(IndentPrefix.size());
966 ContentColumn[i] =
968 Style.TabWidth, Encoding);
969
970 // Calculate the end of the non-whitespace text in this line.
971 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
972 if (EndOfLine == StringRef::npos)
973 EndOfLine = Content[i].size();
974 else
975 ++EndOfLine;
976 Content[i] = Content[i].substr(0, EndOfLine);
977 }
978 LineTok = CurrentTok->Next;
979 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
980 // A line comment section needs to broken by a line comment that is
981 // preceded by at least two newlines. Note that we put this break here
982 // instead of breaking at a previous stage during parsing, since that
983 // would split the contents of the enum into two unwrapped lines in this
984 // example, which is undesirable:
985 // enum A {
986 // a, // comment about a
987 //
988 // // comment about b
989 // b
990 // };
991 //
992 // FIXME: Consider putting separate line comment sections as children to
993 // the unwrapped line instead.
994 break;
995 }
996 }
997}
998
999unsigned
1000BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1001 StringRef::size_type Length,
1002 unsigned StartColumn) const {
1004 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1005 Encoding);
1006}
1007
1008unsigned
1010 bool /*Break*/) const {
1011 return ContentColumn[LineIndex];
1012}
1013
1015 unsigned LineIndex, unsigned TailOffset, Split Split,
1016 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1017 StringRef Text = Content[LineIndex].substr(TailOffset);
1018 // Compute the offset of the split relative to the beginning of the token
1019 // text.
1020 unsigned BreakOffsetInToken =
1021 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1022 unsigned CharsToRemove = Split.second;
1023 Whitespaces.replaceWhitespaceInToken(
1024 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1025 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1026 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1027}
1028
1030 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1031 if (!mayReflow(LineIndex, CommentPragmasRegex))
1032 return Split(StringRef::npos, 0);
1033
1034 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1035
1036 // In a line comment section each line is a separate token; thus, after a
1037 // split we replace all whitespace before the current line comment token
1038 // (which does not need to be included in the split), plus the start of the
1039 // line up to where the content starts.
1040 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1041}
1042
1044 WhitespaceManager &Whitespaces) const {
1045 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1046 // Reflow happens between tokens. Replace the whitespace between the
1047 // tokens by the empty string.
1048 Whitespaces.replaceWhitespace(
1049 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1050 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1051 /*InPPDirective=*/false);
1052 } else if (LineIndex > 0) {
1053 // In case we're reflowing after the '\' in:
1054 //
1055 // // line comment \
1056 // // line 2
1057 //
1058 // the reflow happens inside the single comment token (it is a single line
1059 // comment with an unescaped newline).
1060 // Replace the whitespace between the '\' and '//' with the empty string.
1061 //
1062 // Offset points to after the '\' relative to start of the token.
1063 unsigned Offset = Lines[LineIndex - 1].data() +
1064 Lines[LineIndex - 1].size() -
1065 tokenAt(LineIndex - 1).TokenText.data();
1066 // WhitespaceLength is the number of chars between the '\' and the '//' on
1067 // the next line.
1068 unsigned WhitespaceLength =
1069 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1070 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1071 /*ReplaceChars=*/WhitespaceLength,
1072 /*PreviousPostfix=*/"",
1073 /*CurrentPrefix=*/"",
1074 /*InPPDirective=*/false,
1075 /*Newlines=*/0,
1076 /*Spaces=*/0);
1077 }
1078 // Replace the indent and prefix of the token with the reflow prefix.
1079 unsigned Offset =
1080 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1081 unsigned WhitespaceLength =
1082 Content[LineIndex].data() - Lines[LineIndex].data();
1083 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1084 /*ReplaceChars=*/WhitespaceLength,
1085 /*PreviousPostfix=*/"",
1086 /*CurrentPrefix=*/ReflowPrefix,
1087 /*InPPDirective=*/false,
1088 /*Newlines=*/0,
1089 /*Spaces=*/0);
1090}
1091
1093 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1094 // If this is the first line of a token, we need to inform Whitespace Manager
1095 // about it: either adapt the whitespace range preceding it, or mark it as an
1096 // untouchable token.
1097 // This happens for instance here:
1098 // // line 1 \
1099 // // line 2
1100 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1101 // This is the first line for the current token, but no reflow with the
1102 // previous token is necessary. However, we still may need to adjust the
1103 // start column. Note that ContentColumn[LineIndex] is the expected
1104 // content column after a possible update to the prefix, hence the prefix
1105 // length change is included.
1106 unsigned LineColumn =
1107 ContentColumn[LineIndex] -
1108 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1109 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1110
1111 // We always want to create a replacement instead of adding an untouchable
1112 // token, even if LineColumn is the same as the original column of the
1113 // token. This is because WhitespaceManager doesn't align trailing
1114 // comments if they are untouchable.
1115 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1116 /*Newlines=*/1,
1117 /*Spaces=*/LineColumn,
1118 /*StartOfTokenColumn=*/LineColumn,
1119 /*IsAligned=*/true,
1120 /*InPPDirective=*/false);
1121 }
1122 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1123 // Adjust the prefix if necessary.
1124 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1125 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1126 Whitespaces.replaceWhitespaceInToken(
1127 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1128 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1129 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1130 }
1131}
1132
1134 if (LastLineTok)
1135 State.NextToken = LastLineTok->Next;
1136}
1137
1139 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1140 // Line comments have the indent as part of the prefix, so we need to
1141 // recompute the start of the line.
1142 StringRef IndentContent = Content[LineIndex];
1143 if (Lines[LineIndex].starts_with("//"))
1144 IndentContent = Lines[LineIndex].substr(2);
1145 // FIXME: Decide whether we want to reflow non-regular indents:
1146 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1147 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1148 // // text that protrudes
1149 // // into text with different indent
1150 // We do reflow in that case in block comments.
1151 return LineIndex > 0 && AlwaysReflow &&
1152 !CommentPragmasRegex.match(IndentContent) &&
1153 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1154 !switchesFormatting(tokenAt(LineIndex)) &&
1155 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1156}
1157
1158} // namespace format
1159} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
bool isNot(T Kind) const
StringRef TokenText
The raw text of the token.
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition Token.h:102
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
BreakableToken(const FormatToken &Tok, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition Encoding.h:44
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static constexpr StringRef Blanks(" \t\v\f\r")
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
unsigned TabWidth
The number of columns used for tab stops.
Definition Format.h:5438
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition CharInfo.h:138
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition CharInfo.h:91
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition CharInfo.h:152
#define false
Definition stdbool.h:26
A wrapper around a Token storing information about the whitespace characters preceding it.
StringRef TokenText
The raw text of the token.
FormatToken * Next
The next token in the unwrapped line.
The current state when indenting a unwrapped line.