clang 19.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks = " \t\v\f\r";
29static bool IsBlank(char C) {
30 switch (C) {
31 case ' ':
32 case '\t':
33 case '\v':
34 case '\f':
35 case '\r':
36 return true;
37 default:
38 return false;
39 }
40}
41
42static StringRef getLineCommentIndentPrefix(StringRef Comment,
43 const FormatStyle &Style) {
44 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45 "//!", "//:", "//"};
46 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47 "//", "#"};
48 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
50 KnownPrefixes = KnownTextProtoPrefixes;
51
52 assert(
53 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54 return Lhs.size() > Rhs.size();
55 }));
56
57 for (StringRef KnownPrefix : KnownPrefixes) {
58 if (Comment.starts_with(KnownPrefix)) {
59 const auto PrefixLength =
60 Comment.find_first_not_of(' ', KnownPrefix.size());
61 return Comment.substr(0, PrefixLength);
62 }
63 }
64 return {};
65}
66
68getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69 unsigned ColumnLimit, unsigned TabWidth,
70 encoding::Encoding Encoding, const FormatStyle &Style,
71 bool DecorationEndsWithStar = false) {
72 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73 << "\", Column limit: " << ColumnLimit
74 << ", Content start: " << ContentStartColumn << "\n");
75 if (ColumnLimit <= ContentStartColumn + 1)
76 return BreakableToken::Split(StringRef::npos, 0);
77
78 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79 unsigned MaxSplitBytes = 0;
80
81 for (unsigned NumChars = 0;
82 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83 unsigned BytesInChar =
84 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
86 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
87 TabWidth, Encoding);
88 MaxSplitBytes += BytesInChar;
89 }
90
91 // In JavaScript, some @tags can be followed by {, and machinery that parses
92 // these comments will fail to understand the comment if followed by a line
93 // break. So avoid ever breaking before a {.
94 if (Style.isJavaScript()) {
95 StringRef::size_type SpaceOffset =
96 Text.find_first_of(Blanks, MaxSplitBytes);
97 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98 Text[SpaceOffset + 1] == '{') {
99 MaxSplitBytes = SpaceOffset + 1;
100 }
101 }
102
103 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104
105 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106 // Some spaces are unacceptable to break on, rewind past them.
107 while (SpaceOffset != StringRef::npos) {
108 // If a line-comment ends with `\`, the next line continues the comment,
109 // whether or not it starts with `//`. This is confusing and triggers
110 // -Wcomment.
111 // Avoid introducing multiline comments by not allowing a break right
112 // after '\'.
113 if (Style.isCpp()) {
114 StringRef::size_type LastNonBlank =
115 Text.find_last_not_of(Blanks, SpaceOffset);
116 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118 continue;
119 }
120 }
121
122 // Do not split before a number followed by a dot: this would be interpreted
123 // as a numbered list, which would prevent re-flowing in subsequent passes.
124 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126 continue;
127 }
128
129 // Avoid ever breaking before a @tag or a { in JavaScript.
130 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133 continue;
134 }
135
136 break;
137 }
138
139 if (SpaceOffset == StringRef::npos ||
140 // Don't break at leading whitespace.
141 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142 // Make sure that we don't break at leading whitespace that
143 // reaches past MaxSplit.
144 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145 if (FirstNonWhitespace == StringRef::npos) {
146 // If the comment is only whitespace, we cannot split.
147 return BreakableToken::Split(StringRef::npos, 0);
148 }
149 SpaceOffset = Text.find_first_of(
150 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
151 }
152 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153 // adaptStartOfLine will break after lines starting with /** if the comment
154 // is broken anywhere. Avoid emitting this break twice here.
155 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
156 // insert a break after /**, so this code must not insert the same break.
157 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
158 return BreakableToken::Split(StringRef::npos, 0);
159 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160 StringRef AfterCut = Text.substr(SpaceOffset);
161 // Don't trim the leading blanks if it would create a */ after the break.
162 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163 AfterCut = AfterCut.ltrim(Blanks);
164 return BreakableToken::Split(BeforeCut.size(),
165 AfterCut.begin() - BeforeCut.end());
166 }
167 return BreakableToken::Split(StringRef::npos, 0);
168}
169
171getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172 unsigned TabWidth, encoding::Encoding Encoding) {
173 // FIXME: Reduce unit test case.
174 if (Text.empty())
175 return BreakableToken::Split(StringRef::npos, 0);
176 if (ColumnLimit <= UsedColumns)
177 return BreakableToken::Split(StringRef::npos, 0);
178 unsigned MaxSplit = ColumnLimit - UsedColumns;
179 StringRef::size_type SpaceOffset = 0;
180 StringRef::size_type SlashOffset = 0;
181 StringRef::size_type WordStartOffset = 0;
182 StringRef::size_type SplitPoint = 0;
183 for (unsigned Chars = 0;;) {
184 unsigned Advance;
185 if (Text[0] == '\\') {
187 Chars += Advance;
188 } else {
189 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
191 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192 }
193
194 if (Chars > MaxSplit || Text.size() <= Advance)
195 break;
196
197 if (IsBlank(Text[0]))
198 SpaceOffset = SplitPoint;
199 if (Text[0] == '/')
200 SlashOffset = SplitPoint;
201 if (Advance == 1 && !isAlphanumeric(Text[0]))
202 WordStartOffset = SplitPoint;
203
204 SplitPoint += Advance;
205 Text = Text.substr(Advance);
206 }
207
208 if (SpaceOffset != 0)
209 return BreakableToken::Split(SpaceOffset + 1, 0);
210 if (SlashOffset != 0)
211 return BreakableToken::Split(SlashOffset + 1, 0);
212 if (WordStartOffset != 0)
213 return BreakableToken::Split(WordStartOffset + 1, 0);
214 if (SplitPoint != 0)
215 return BreakableToken::Split(SplitPoint, 0);
216 return BreakableToken::Split(StringRef::npos, 0);
217}
218
220 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221 "formatting regions are switched by comment tokens");
222 StringRef Content = Token.TokenText.substr(2).ltrim();
223 return Content.starts_with("clang-format on") ||
224 Content.starts_with("clang-format off");
225}
226
227unsigned
228BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229 Split Split) const {
230 // Example: consider the content
231 // lala lala
232 // - RemainingTokenColumns is the original number of columns, 10;
233 // - Split is (4, 2), denoting the two spaces between the two words;
234 //
235 // We compute the number of columns when the split is compressed into a single
236 // space, like:
237 // lala lala
238 //
239 // FIXME: Correctly measure the length of whitespace in Split.second so it
240 // works with tabs.
241 return RemainingTokenColumns + 1 - Split.second;
242}
243
244unsigned BreakableStringLiteral::getLineCount() const { return 1; }
245
246unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247 unsigned Offset,
248 StringRef::size_type Length,
249 unsigned StartColumn) const {
250 llvm_unreachable("Getting the length of a part of the string literal "
251 "indicates that the code tries to reflow it.");
252}
253
254unsigned
255BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
256 unsigned StartColumn) const {
257 return UnbreakableTailLength + Postfix.size() +
260}
261
263 bool Break) const {
264 return StartColumn + Prefix.size();
265}
266
268 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
270 encoding::Encoding Encoding, const FormatStyle &Style)
271 : BreakableToken(Tok, InPPDirective, Encoding, Style),
272 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273 UnbreakableTailLength(UnbreakableTailLength) {
274 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
275 Line = Tok.TokenText.substr(
276 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
277}
278
280 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
283 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
284}
285
287 unsigned TailOffset, Split Split,
288 unsigned ContentIndent,
289 WhitespaceManager &Whitespaces) const {
290 Whitespaces.replaceWhitespaceInToken(
291 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
293}
294
296 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
297 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
298 encoding::Encoding Encoding, const FormatStyle &Style)
300 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
301 : QuoteStyle == AtDoubleQuotes ? "@\""
302 : "\"",
303 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
304 UnbreakableTailLength, InPPDirective, Encoding, Style),
305 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
306 QuoteStyle(QuoteStyle) {
307 // Find the replacement text for inserting braces and quotes and line breaks.
308 // We don't create an allocated string concatenated from parts here because it
309 // has to outlive the BreakableStringliteral object. The brace replacements
310 // include a quote so that WhitespaceManager can tell it apart from whitespace
311 // replacements between the string and surrounding tokens.
312
313 // The option is not implemented in JavaScript.
314 bool SignOnNewLine =
315 !Style.isJavaScript() &&
317
318 if (Style.isVerilog()) {
319 // In Verilog, all strings are quoted by double quotes, joined by commas,
320 // and wrapped in braces. The comma is always before the newline.
321 assert(QuoteStyle == DoubleQuotes);
322 LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
323 RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
324 Postfix = "\",";
325 Prefix = "\"";
326 } else {
327 // The plus sign may be on either line. And also C# and JavaScript have
328 // several quoting styles.
329 if (QuoteStyle == SingleQuotes) {
332 Postfix = SignOnNewLine ? "'" : "' +";
333 Prefix = SignOnNewLine ? "+ '" : "'";
334 } else {
335 if (QuoteStyle == AtDoubleQuotes) {
337 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338 } else {
340 Prefix = SignOnNewLine ? "+ \"" : "\"";
341 }
343 Postfix = SignOnNewLine ? "\"" : "\" +";
344 }
345 }
346
347 // Following lines are indented by the width of the brace and space if any.
349 // The plus sign may need to be unindented depending on the style.
350 // FIXME: Add support for DontAlign.
351 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
354 }
355}
356
358 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
359 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
362}
363
364unsigned
366 bool Break) const {
367 return std::max(
368 0,
369 static_cast<int>(StartColumn) +
370 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
371 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
372 : 0) +
373 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
374}
375
377 unsigned LineIndex, unsigned TailOffset, Split Split,
378 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
379 Whitespaces.replaceWhitespaceInToken(
380 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
381 Split.first,
382 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
383 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
384 /*Spaces=*/
385 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
386}
387
389 WhitespaceManager &Whitespaces) const {
390 // Add the braces required for breaking the token if they are needed.
391 if (!BracesNeeded)
392 return;
393
394 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
395 // brace and another quote. This is because the rest of the program requires
396 // one replacement for each source range. If we replace the empty strings
397 // around the string, it may conflict with whitespace replacements between the
398 // string and adjacent tokens.
399 Whitespaces.replaceWhitespaceInToken(
400 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
401 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
402 /*Spaces=*/0);
403 Whitespaces.replaceWhitespaceInToken(
404 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
405 /*PreviousPostfix=*/RightBraceQuote,
406 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
407}
408
410 unsigned StartColumn, bool InPPDirective,
411 encoding::Encoding Encoding,
412 const FormatStyle &Style)
413 : BreakableToken(Token, InPPDirective, Encoding, Style),
414 StartColumn(StartColumn) {}
415
416unsigned BreakableComment::getLineCount() const { return Lines.size(); }
417
419BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
420 unsigned ColumnLimit, unsigned ContentStartColumn,
421 const llvm::Regex &CommentPragmasRegex) const {
422 // Don't break lines matching the comment pragmas regex.
423 if (CommentPragmasRegex.match(Content[LineIndex]))
424 return Split(StringRef::npos, 0);
425 return getCommentSplit(Content[LineIndex].substr(TailOffset),
426 ContentStartColumn, ColumnLimit, Style.TabWidth,
427 Encoding, Style);
428}
429
431 unsigned LineIndex, unsigned TailOffset, Split Split,
432 WhitespaceManager &Whitespaces) const {
433 StringRef Text = Content[LineIndex].substr(TailOffset);
434 // Text is relative to the content line, but Whitespaces operates relative to
435 // the start of the corresponding token, so compute the start of the Split
436 // that needs to be compressed into a single space relative to the start of
437 // its token.
438 unsigned BreakOffsetInToken =
439 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
440 unsigned CharsToRemove = Split.second;
441 Whitespaces.replaceWhitespaceInToken(
442 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
443 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
444}
445
446const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
447 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
448}
449
450static bool mayReflowContent(StringRef Content) {
451 Content = Content.trim(Blanks);
452 // Lines starting with '@' or '\' commonly have special meaning.
453 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
454 bool hasSpecialMeaningPrefix = false;
455 for (StringRef Prefix :
456 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457 if (Content.starts_with(Prefix)) {
458 hasSpecialMeaningPrefix = true;
459 break;
460 }
461 }
462
463 // Numbered lists may also start with a number followed by '.'
464 // To avoid issues if a line starts with a number which is actually the end
465 // of a previous line, we only consider numbers with up to 2 digits.
466 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
467 hasSpecialMeaningPrefix =
468 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
469
470 // Simple heuristic for what to reflow: content should contain at least two
471 // characters and either the first or second character must be
472 // non-punctuation.
473 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
474 !Content.ends_with("\\") &&
475 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
476 // true, then the first code point must be 1 byte long.
477 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
478}
479
481 const FormatToken &Token, unsigned StartColumn,
482 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
483 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
484 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
485 DelimitersOnNewline(false),
486 UnbreakableTailLength(Token.UnbreakableTailLength) {
487 assert(Tok.is(TT_BlockComment) &&
488 "block comment section must start with a block comment");
489
490 StringRef TokenText(Tok.TokenText);
491 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492 TokenText.substr(2, TokenText.size() - 4)
493 .split(Lines, UseCRLF ? "\r\n" : "\n");
494
495 int IndentDelta = StartColumn - OriginalStartColumn;
496 Content.resize(Lines.size());
497 Content[0] = Lines[0];
498 ContentColumn.resize(Lines.size());
499 // Account for the initial '/*'.
500 ContentColumn[0] = StartColumn + 2;
501 Tokens.resize(Lines.size());
502 for (size_t i = 1; i < Lines.size(); ++i)
503 adjustWhitespace(i, IndentDelta);
504
505 // Align decorations with the column of the star on the first line,
506 // that is one column after the start "/*".
507 DecorationColumn = StartColumn + 1;
508
509 // Account for comment decoration patterns like this:
510 //
511 // /*
512 // ** blah blah blah
513 // */
514 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
515 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
516 DecorationColumn = StartColumn;
517 }
518
519 Decoration = "* ";
520 if (Lines.size() == 1 && !FirstInLine) {
521 // Comments for which FirstInLine is false can start on arbitrary column,
522 // and available horizontal space can be too small to align consecutive
523 // lines with the first one.
524 // FIXME: We could, probably, align them to current indentation level, but
525 // now we just wrap them without stars.
526 Decoration = "";
527 }
528 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
529 const StringRef &Text = Content[i];
530 if (i + 1 == e) {
531 // If the last line is empty, the closing "*/" will have a star.
532 if (Text.empty())
533 break;
534 } else if (!Text.empty() && Decoration.starts_with(Text)) {
535 continue;
536 }
537 while (!Text.starts_with(Decoration))
538 Decoration = Decoration.drop_back(1);
539 }
540
541 LastLineNeedsDecoration = true;
542 IndentAtLineBreak = ContentColumn[0] + 1;
543 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
544 if (Content[i].empty()) {
545 if (i + 1 == e) {
546 // Empty last line means that we already have a star as a part of the
547 // trailing */. We also need to preserve whitespace, so that */ is
548 // correctly indented.
549 LastLineNeedsDecoration = false;
550 // Align the star in the last '*/' with the stars on the previous lines.
551 if (e >= 2 && !Decoration.empty())
552 ContentColumn[i] = DecorationColumn;
553 } else if (Decoration.empty()) {
554 // For all other lines, set the start column to 0 if they're empty, so
555 // we do not insert trailing whitespace anywhere.
556 ContentColumn[i] = 0;
557 }
558 continue;
559 }
560
561 // The first line already excludes the star.
562 // The last line excludes the star if LastLineNeedsDecoration is false.
563 // For all other lines, adjust the line to exclude the star and
564 // (optionally) the first whitespace.
565 unsigned DecorationSize = Decoration.starts_with(Content[i])
566 ? Content[i].size()
567 : Decoration.size();
568 if (DecorationSize)
569 ContentColumn[i] = DecorationColumn + DecorationSize;
570 Content[i] = Content[i].substr(DecorationSize);
571 if (!Decoration.starts_with(Content[i])) {
572 IndentAtLineBreak =
573 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
574 }
575 }
576 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
577
578 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
580 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
581 // This is a multiline jsdoc comment.
582 DelimitersOnNewline = true;
583 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
584 // Detect a long single-line comment, like:
585 // /** long long long */
586 // Below, '2' is the width of '*/'.
587 unsigned EndColumn =
588 ContentColumn[0] +
591 2;
592 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
593 }
594 }
595
596 LLVM_DEBUG({
597 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599 for (size_t i = 0; i < Lines.size(); ++i) {
600 llvm::dbgs() << i << " |" << Content[i] << "| "
601 << "CC=" << ContentColumn[i] << "| "
602 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603 }
604 });
605}
606
608 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610 // Don't break lines matching the comment pragmas regex.
611 if (CommentPragmasRegex.match(Content[LineIndex]))
612 return Split(StringRef::npos, 0);
613 return getCommentSplit(Content[LineIndex].substr(TailOffset),
614 ContentStartColumn, ColumnLimit, Style.TabWidth,
615 Encoding, Style, Decoration.ends_with("*"));
616}
617
618void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619 int IndentDelta) {
620 // When in a preprocessor directive, the trailing backslash in a block comment
621 // is not needed, but can serve a purpose of uniformity with necessary escaped
622 // newlines outside the comment. In this case we remove it here before
623 // trimming the trailing whitespace. The backslash will be re-added later when
624 // inserting a line break.
625 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
626 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
627 --EndOfPreviousLine;
628
629 // Calculate the end of the non-whitespace text in the previous line.
630 EndOfPreviousLine =
631 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
632 if (EndOfPreviousLine == StringRef::npos)
633 EndOfPreviousLine = 0;
634 else
635 ++EndOfPreviousLine;
636 // Calculate the start of the non-whitespace text in the current line.
637 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
638 if (StartOfLine == StringRef::npos)
639 StartOfLine = Lines[LineIndex].size();
640
641 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
642 // Adjust Lines to only contain relevant text.
643 size_t PreviousContentOffset =
644 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
645 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
646 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
647 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
648
649 // Adjust the start column uniformly across all lines.
650 ContentColumn[LineIndex] =
652 IndentDelta;
653}
654
655unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
656 unsigned Offset,
657 StringRef::size_type Length,
658 unsigned StartColumn) const {
660 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
661 Encoding);
662}
663
665 unsigned Offset,
666 unsigned StartColumn) const {
667 unsigned LineLength =
668 UnbreakableTailLength +
669 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
670 if (LineIndex + 1 == Lines.size()) {
671 LineLength += 2;
672 // We never need a decoration when breaking just the trailing "*/" postfix.
673 bool HasRemainingText = Offset < Content[LineIndex].size();
674 if (!HasRemainingText) {
675 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
676 if (HasDecoration)
677 LineLength -= Decoration.size();
678 }
679 }
680 return LineLength;
681}
682
684 bool Break) const {
685 if (Break)
686 return IndentAtLineBreak;
687 return std::max(0, ContentColumn[LineIndex]);
688}
689
690const llvm::StringSet<>
692 "@param", "@return", "@returns", "@throws", "@type", "@template",
693 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694};
695
696unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
698 return 0;
699 // The content at LineIndex 0 of a comment like:
700 // /** line 0 */
701 // is "* line 0", so we need to skip over the decoration in that case.
702 StringRef ContentWithNoDecoration = Content[LineIndex];
703 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
704 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
705 StringRef FirstWord = ContentWithNoDecoration.substr(
706 0, ContentWithNoDecoration.find_first_of(Blanks));
707 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
709 return 0;
710}
711
712void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
713 Split Split, unsigned ContentIndent,
714 WhitespaceManager &Whitespaces) const {
715 StringRef Text = Content[LineIndex].substr(TailOffset);
716 StringRef Prefix = Decoration;
717 // We need this to account for the case when we have a decoration "* " for all
718 // the lines except for the last one, where the star in "*/" acts as a
719 // decoration.
720 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721 if (LineIndex + 1 == Lines.size() &&
722 Text.size() == Split.first + Split.second) {
723 // For the last line we need to break before "*/", but not to add "* ".
724 Prefix = "";
725 if (LocalIndentAtLineBreak >= 2)
726 LocalIndentAtLineBreak -= 2;
727 }
728 // The split offset is from the beginning of the line. Convert it to an offset
729 // from the beginning of the token text.
730 unsigned BreakOffsetInToken =
731 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
732 unsigned CharsToRemove = Split.second;
733 assert(LocalIndentAtLineBreak >= Prefix.size());
734 std::string PrefixWithTrailingIndent = std::string(Prefix);
735 PrefixWithTrailingIndent.append(ContentIndent, ' ');
736 Whitespaces.replaceWhitespaceInToken(
737 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
738 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
739 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
740 PrefixWithTrailingIndent.size());
741}
742
744 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745 if (!mayReflow(LineIndex, CommentPragmasRegex))
746 return Split(StringRef::npos, 0);
747
748 // If we're reflowing into a line with content indent, only reflow the next
749 // line if its starting whitespace matches the content indent.
750 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
751 if (LineIndex) {
752 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
753 if (PreviousContentIndent && Trimmed != StringRef::npos &&
754 Trimmed != PreviousContentIndent) {
755 return Split(StringRef::npos, 0);
756 }
757 }
758
759 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
760}
761
763 // A break is introduced when we want delimiters on newline.
764 return DelimitersOnNewline &&
765 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
766}
767
768void BreakableBlockComment::reflow(unsigned LineIndex,
769 WhitespaceManager &Whitespaces) const {
770 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
771 // Here we need to reflow.
772 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
773 "Reflowing whitespace within a token");
774 // This is the offset of the end of the last line relative to the start of
775 // the token text in the token.
776 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
777 Content[LineIndex - 1].size() -
778 tokenAt(LineIndex).TokenText.data();
779 unsigned WhitespaceLength = TrimmedContent.data() -
780 tokenAt(LineIndex).TokenText.data() -
781 WhitespaceOffsetInToken;
782 Whitespaces.replaceWhitespaceInToken(
783 tokenAt(LineIndex), WhitespaceOffsetInToken,
784 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
785 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
786 /*Spaces=*/0);
787}
788
790 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
791 if (LineIndex == 0) {
792 if (DelimitersOnNewline) {
793 // Since we're breaking at index 1 below, the break position and the
794 // break length are the same.
795 // Note: this works because getCommentSplit is careful never to split at
796 // the beginning of a line.
797 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
798 if (BreakLength != StringRef::npos) {
799 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
800 Whitespaces);
801 }
802 }
803 return;
804 }
805 // Here no reflow with the previous line will happen.
806 // Fix the decoration of the line at LineIndex.
807 StringRef Prefix = Decoration;
808 if (Content[LineIndex].empty()) {
809 if (LineIndex + 1 == Lines.size()) {
810 if (!LastLineNeedsDecoration) {
811 // If the last line was empty, we don't need a prefix, as the */ will
812 // line up with the decoration (if it exists).
813 Prefix = "";
814 }
815 } else if (!Decoration.empty()) {
816 // For other empty lines, if we do have a decoration, adapt it to not
817 // contain a trailing whitespace.
818 Prefix = Prefix.substr(0, 1);
819 }
820 } else if (ContentColumn[LineIndex] == 1) {
821 // This line starts immediately after the decorating *.
822 Prefix = Prefix.substr(0, 1);
823 }
824 // This is the offset of the end of the last line relative to the start of the
825 // token text in the token.
826 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
827 Content[LineIndex - 1].size() -
828 tokenAt(LineIndex).TokenText.data();
829 unsigned WhitespaceLength = Content[LineIndex].data() -
830 tokenAt(LineIndex).TokenText.data() -
831 WhitespaceOffsetInToken;
832 Whitespaces.replaceWhitespaceInToken(
833 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
834 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
835}
836
839 if (DelimitersOnNewline) {
840 // Replace the trailing whitespace of the last line with a newline.
841 // In case the last line is empty, the ending '*/' is already on its own
842 // line.
843 StringRef Line = Content.back().substr(TailOffset);
844 StringRef TrimmedLine = Line.rtrim(Blanks);
845 if (!TrimmedLine.empty())
846 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
847 }
848 return Split(StringRef::npos, 0);
849}
850
852 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
854 // case, we compute the start of the comment pragma manually.
855 StringRef IndentContent = Content[LineIndex];
856 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
857 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
858 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
859 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
860 !switchesFormatting(tokenAt(LineIndex));
861}
862
864 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
865 encoding::Encoding Encoding, const FormatStyle &Style)
866 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
867 assert(Tok.is(TT_LineComment) &&
868 "line comment section must start with a line comment");
869 FormatToken *LineTok = nullptr;
870 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
871 // How many spaces we changed in the first line of the section, this will be
872 // applied in all following lines
873 int FirstLineSpaceChange = 0;
874 for (const FormatToken *CurrentTok = &Tok;
875 CurrentTok && CurrentTok->is(TT_LineComment);
876 CurrentTok = CurrentTok->Next) {
877 LastLineTok = LineTok;
878 StringRef TokenText(CurrentTok->TokenText);
879 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
880 "unsupported line comment prefix, '//' and '#' are supported");
881 size_t FirstLineIndex = Lines.size();
882 TokenText.split(Lines, "\n");
883 Content.resize(Lines.size());
884 ContentColumn.resize(Lines.size());
885 PrefixSpaceChange.resize(Lines.size());
886 Tokens.resize(Lines.size());
887 Prefix.resize(Lines.size());
888 OriginalPrefix.resize(Lines.size());
889 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
890 Lines[i] = Lines[i].ltrim(Blanks);
891 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
892 OriginalPrefix[i] = IndentPrefix;
893 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
894
895 // This lambda also considers multibyte character that is not handled in
896 // functions like isPunctuation provided by CharInfo.
897 const auto NoSpaceBeforeFirstCommentChar = [&]() {
898 assert(Lines[i].size() > IndentPrefix.size());
899 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
900 const unsigned FirstCharByteSize =
901 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
903 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
904 Encoding) != 1) {
905 return false;
906 }
907 // In C-like comments, add a space before #. For example this is useful
908 // to preserve the relative indentation when commenting out code with
909 // #includes.
910 //
911 // In languages using # as the comment leader such as proto, don't
912 // add a space to support patterns like:
913 // #########
914 // # section
915 // #########
916 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
917 return false;
918 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
919 isHorizontalWhitespace(FirstCommentChar);
920 };
921
922 // On the first line of the comment section we calculate how many spaces
923 // are to be added or removed, all lines after that just get only the
924 // change and we will not look at the maximum anymore. Additionally to the
925 // actual first line, we calculate that when the non space Prefix changes,
926 // e.g. from "///" to "//".
927 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
928 OriginalPrefix[i - 1].rtrim(Blanks)) {
929 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
930 !NoSpaceBeforeFirstCommentChar()) {
931 FirstLineSpaceChange = Minimum - SpacesInPrefix;
932 } else if (static_cast<unsigned>(SpacesInPrefix) >
934 FirstLineSpaceChange =
935 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
936 } else {
937 FirstLineSpaceChange = 0;
938 }
939 }
940
941 if (Lines[i].size() != IndentPrefix.size()) {
942 PrefixSpaceChange[i] = FirstLineSpaceChange;
943
944 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
945 PrefixSpaceChange[i] +=
946 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
947 }
948
949 assert(Lines[i].size() > IndentPrefix.size());
950 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
951 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
952 const bool LineRequiresLeadingSpace =
953 !NoSpaceBeforeFirstCommentChar() ||
954 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
955 const bool AllowsSpaceChange =
956 !IsFormatComment &&
957 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
958
959 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
960 Prefix[i] = IndentPrefix.str();
961 Prefix[i].append(PrefixSpaceChange[i], ' ');
962 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
963 Prefix[i] = IndentPrefix
964 .drop_back(std::min<std::size_t>(
965 -PrefixSpaceChange[i], SpacesInPrefix))
966 .str();
967 } else {
968 Prefix[i] = IndentPrefix.str();
969 }
970 } else {
971 // If the IndentPrefix is the whole line, there is no content and we
972 // drop just all space
973 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
974 }
975
976 Tokens[i] = LineTok;
977 Content[i] = Lines[i].substr(IndentPrefix.size());
978 ContentColumn[i] =
981
982 // Calculate the end of the non-whitespace text in this line.
983 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
984 if (EndOfLine == StringRef::npos)
985 EndOfLine = Content[i].size();
986 else
987 ++EndOfLine;
988 Content[i] = Content[i].substr(0, EndOfLine);
989 }
990 LineTok = CurrentTok->Next;
991 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
992 // A line comment section needs to broken by a line comment that is
993 // preceded by at least two newlines. Note that we put this break here
994 // instead of breaking at a previous stage during parsing, since that
995 // would split the contents of the enum into two unwrapped lines in this
996 // example, which is undesirable:
997 // enum A {
998 // a, // comment about a
999 //
1000 // // comment about b
1001 // b
1002 // };
1003 //
1004 // FIXME: Consider putting separate line comment sections as children to
1005 // the unwrapped line instead.
1006 break;
1007 }
1008 }
1009}
1010
1011unsigned
1012BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1013 StringRef::size_type Length,
1014 unsigned StartColumn) const {
1016 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1017 Encoding);
1018}
1019
1020unsigned
1022 bool /*Break*/) const {
1023 return ContentColumn[LineIndex];
1024}
1025
1027 unsigned LineIndex, unsigned TailOffset, Split Split,
1028 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1029 StringRef Text = Content[LineIndex].substr(TailOffset);
1030 // Compute the offset of the split relative to the beginning of the token
1031 // text.
1032 unsigned BreakOffsetInToken =
1033 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1034 unsigned CharsToRemove = Split.second;
1035 Whitespaces.replaceWhitespaceInToken(
1036 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1037 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1038 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1039}
1040
1042 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1043 if (!mayReflow(LineIndex, CommentPragmasRegex))
1044 return Split(StringRef::npos, 0);
1045
1046 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1047
1048 // In a line comment section each line is a separate token; thus, after a
1049 // split we replace all whitespace before the current line comment token
1050 // (which does not need to be included in the split), plus the start of the
1051 // line up to where the content starts.
1052 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1053}
1054
1056 WhitespaceManager &Whitespaces) const {
1057 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1058 // Reflow happens between tokens. Replace the whitespace between the
1059 // tokens by the empty string.
1060 Whitespaces.replaceWhitespace(
1061 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1062 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1063 /*InPPDirective=*/false);
1064 } else if (LineIndex > 0) {
1065 // In case we're reflowing after the '\' in:
1066 //
1067 // // line comment \
1068 // // line 2
1069 //
1070 // the reflow happens inside the single comment token (it is a single line
1071 // comment with an unescaped newline).
1072 // Replace the whitespace between the '\' and '//' with the empty string.
1073 //
1074 // Offset points to after the '\' relative to start of the token.
1075 unsigned Offset = Lines[LineIndex - 1].data() +
1076 Lines[LineIndex - 1].size() -
1077 tokenAt(LineIndex - 1).TokenText.data();
1078 // WhitespaceLength is the number of chars between the '\' and the '//' on
1079 // the next line.
1080 unsigned WhitespaceLength =
1081 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1082 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1083 /*ReplaceChars=*/WhitespaceLength,
1084 /*PreviousPostfix=*/"",
1085 /*CurrentPrefix=*/"",
1086 /*InPPDirective=*/false,
1087 /*Newlines=*/0,
1088 /*Spaces=*/0);
1089 }
1090 // Replace the indent and prefix of the token with the reflow prefix.
1091 unsigned Offset =
1092 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1093 unsigned WhitespaceLength =
1094 Content[LineIndex].data() - Lines[LineIndex].data();
1095 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1096 /*ReplaceChars=*/WhitespaceLength,
1097 /*PreviousPostfix=*/"",
1098 /*CurrentPrefix=*/ReflowPrefix,
1099 /*InPPDirective=*/false,
1100 /*Newlines=*/0,
1101 /*Spaces=*/0);
1102}
1103
1105 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1106 // If this is the first line of a token, we need to inform Whitespace Manager
1107 // about it: either adapt the whitespace range preceding it, or mark it as an
1108 // untouchable token.
1109 // This happens for instance here:
1110 // // line 1 \
1111 // // line 2
1112 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1113 // This is the first line for the current token, but no reflow with the
1114 // previous token is necessary. However, we still may need to adjust the
1115 // start column. Note that ContentColumn[LineIndex] is the expected
1116 // content column after a possible update to the prefix, hence the prefix
1117 // length change is included.
1118 unsigned LineColumn =
1119 ContentColumn[LineIndex] -
1120 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1121 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1122
1123 // We always want to create a replacement instead of adding an untouchable
1124 // token, even if LineColumn is the same as the original column of the
1125 // token. This is because WhitespaceManager doesn't align trailing
1126 // comments if they are untouchable.
1127 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1128 /*Newlines=*/1,
1129 /*Spaces=*/LineColumn,
1130 /*StartOfTokenColumn=*/LineColumn,
1131 /*IsAligned=*/true,
1132 /*InPPDirective=*/false);
1133 }
1134 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1135 // Adjust the prefix if necessary.
1136 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1137 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1138 Whitespaces.replaceWhitespaceInToken(
1139 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1140 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1141 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1142 }
1143}
1144
1146 if (LastLineTok)
1147 State.NextToken = LastLineTok->Next;
1148}
1149
1151 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1152 // Line comments have the indent as part of the prefix, so we need to
1153 // recompute the start of the line.
1154 StringRef IndentContent = Content[LineIndex];
1155 if (Lines[LineIndex].starts_with("//"))
1156 IndentContent = Lines[LineIndex].substr(2);
1157 // FIXME: Decide whether we want to reflow non-regular indents:
1158 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1159 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1160 // // text that protrudes
1161 // // into text with different indent
1162 // We do reflow in that case in block comments.
1163 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
1164 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1165 !switchesFormatting(tokenAt(LineIndex)) &&
1166 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1167}
1168
1169} // namespace format
1170} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
StringRef Text
Definition: Format.cpp:2970
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:139
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:92
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:153
#define false
Definition: stdbool.h:22
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4530
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4528
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:4635
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2384
@ LK_Java
Should be used for Java.
Definition: Format.h:3108
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3122
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2407
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1691
@ BOS_None
Break after operators.
Definition: Format.h:1662
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3140
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:4802
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:524
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:4670
bool isVerilog() const
Definition: Format.h:3132
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4565
bool isJavaScript() const
Definition: Format.h:3131
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:518
unsigned ColumnLimit
The column limit.
Definition: Format.h:2285
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:285
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:305
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:364
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:557
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:594
The current state when indenting a unwrapped line.