clang 17.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks = " \t\v\f\r";
29static bool IsBlank(char C) {
30 switch (C) {
31 case ' ':
32 case '\t':
33 case '\v':
34 case '\f':
35 case '\r':
36 return true;
37 default:
38 return false;
39 }
40}
41
42static StringRef getLineCommentIndentPrefix(StringRef Comment,
43 const FormatStyle &Style) {
44 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45 "//!", "//:", "//"};
46 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47 "//", "#"};
48 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
50 KnownPrefixes = KnownTextProtoPrefixes;
51
52 assert(
53 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54 return Lhs.size() > Rhs.size();
55 }));
56
57 for (StringRef KnownPrefix : KnownPrefixes) {
58 if (Comment.startswith(KnownPrefix)) {
59 const auto PrefixLength =
60 Comment.find_first_not_of(' ', KnownPrefix.size());
61 return Comment.substr(0, PrefixLength);
62 }
63 }
64 return {};
65}
66
68getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69 unsigned ColumnLimit, unsigned TabWidth,
70 encoding::Encoding Encoding, const FormatStyle &Style,
71 bool DecorationEndsWithStar = false) {
72 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73 << "\", Column limit: " << ColumnLimit
74 << ", Content start: " << ContentStartColumn << "\n");
75 if (ColumnLimit <= ContentStartColumn + 1)
76 return BreakableToken::Split(StringRef::npos, 0);
77
78 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79 unsigned MaxSplitBytes = 0;
80
81 for (unsigned NumChars = 0;
82 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83 unsigned BytesInChar =
84 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
85 NumChars +=
86 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
87 ContentStartColumn, TabWidth, Encoding);
88 MaxSplitBytes += BytesInChar;
89 }
90
91 // In JavaScript, some @tags can be followed by {, and machinery that parses
92 // these comments will fail to understand the comment if followed by a line
93 // break. So avoid ever breaking before a {.
94 if (Style.isJavaScript()) {
95 StringRef::size_type SpaceOffset =
96 Text.find_first_of(Blanks, MaxSplitBytes);
97 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98 Text[SpaceOffset + 1] == '{') {
99 MaxSplitBytes = SpaceOffset + 1;
100 }
101 }
102
103 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104
105 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106 // Some spaces are unacceptable to break on, rewind past them.
107 while (SpaceOffset != StringRef::npos) {
108 // If a line-comment ends with `\`, the next line continues the comment,
109 // whether or not it starts with `//`. This is confusing and triggers
110 // -Wcomment.
111 // Avoid introducing multiline comments by not allowing a break right
112 // after '\'.
113 if (Style.isCpp()) {
114 StringRef::size_type LastNonBlank =
115 Text.find_last_not_of(Blanks, SpaceOffset);
116 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118 continue;
119 }
120 }
121
122 // Do not split before a number followed by a dot: this would be interpreted
123 // as a numbered list, which would prevent re-flowing in subsequent passes.
124 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126 continue;
127 }
128
129 // Avoid ever breaking before a @tag or a { in JavaScript.
130 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133 continue;
134 }
135
136 break;
137 }
138
139 if (SpaceOffset == StringRef::npos ||
140 // Don't break at leading whitespace.
141 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142 // Make sure that we don't break at leading whitespace that
143 // reaches past MaxSplit.
144 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145 if (FirstNonWhitespace == StringRef::npos) {
146 // If the comment is only whitespace, we cannot split.
147 return BreakableToken::Split(StringRef::npos, 0);
148 }
149 SpaceOffset = Text.find_first_of(
150 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
151 }
152 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153 // adaptStartOfLine will break after lines starting with /** if the comment
154 // is broken anywhere. Avoid emitting this break twice here.
155 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
156 // insert a break after /**, so this code must not insert the same break.
157 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
158 return BreakableToken::Split(StringRef::npos, 0);
159 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160 StringRef AfterCut = Text.substr(SpaceOffset);
161 // Don't trim the leading blanks if it would create a */ after the break.
162 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163 AfterCut = AfterCut.ltrim(Blanks);
164 return BreakableToken::Split(BeforeCut.size(),
165 AfterCut.begin() - BeforeCut.end());
166 }
167 return BreakableToken::Split(StringRef::npos, 0);
168}
169
171getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172 unsigned TabWidth, encoding::Encoding Encoding) {
173 // FIXME: Reduce unit test case.
174 if (Text.empty())
175 return BreakableToken::Split(StringRef::npos, 0);
176 if (ColumnLimit <= UsedColumns)
177 return BreakableToken::Split(StringRef::npos, 0);
178 unsigned MaxSplit = ColumnLimit - UsedColumns;
179 StringRef::size_type SpaceOffset = 0;
180 StringRef::size_type SlashOffset = 0;
181 StringRef::size_type WordStartOffset = 0;
182 StringRef::size_type SplitPoint = 0;
183 for (unsigned Chars = 0;;) {
184 unsigned Advance;
185 if (Text[0] == '\\') {
187 Chars += Advance;
188 } else {
189 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
191 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192 }
193
194 if (Chars > MaxSplit || Text.size() <= Advance)
195 break;
196
197 if (IsBlank(Text[0]))
198 SpaceOffset = SplitPoint;
199 if (Text[0] == '/')
200 SlashOffset = SplitPoint;
201 if (Advance == 1 && !isAlphanumeric(Text[0]))
202 WordStartOffset = SplitPoint;
203
204 SplitPoint += Advance;
205 Text = Text.substr(Advance);
206 }
207
208 if (SpaceOffset != 0)
209 return BreakableToken::Split(SpaceOffset + 1, 0);
210 if (SlashOffset != 0)
211 return BreakableToken::Split(SlashOffset + 1, 0);
212 if (WordStartOffset != 0)
213 return BreakableToken::Split(WordStartOffset + 1, 0);
214 if (SplitPoint != 0)
215 return BreakableToken::Split(SplitPoint, 0);
216 return BreakableToken::Split(StringRef::npos, 0);
217}
218
220 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221 "formatting regions are switched by comment tokens");
222 StringRef Content = Token.TokenText.substr(2).ltrim();
223 return Content.startswith("clang-format on") ||
224 Content.startswith("clang-format off");
225}
226
227unsigned
228BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229 Split Split) const {
230 // Example: consider the content
231 // lala lala
232 // - RemainingTokenColumns is the original number of columns, 10;
233 // - Split is (4, 2), denoting the two spaces between the two words;
234 //
235 // We compute the number of columns when the split is compressed into a single
236 // space, like:
237 // lala lala
238 //
239 // FIXME: Correctly measure the length of whitespace in Split.second so it
240 // works with tabs.
241 return RemainingTokenColumns + 1 - Split.second;
242}
243
244unsigned BreakableStringLiteral::getLineCount() const { return 1; }
245
246unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247 unsigned Offset,
248 StringRef::size_type Length,
249 unsigned StartColumn) const {
250 llvm_unreachable("Getting the length of a part of the string literal "
251 "indicates that the code tries to reflow it.");
252}
253
254unsigned
256 unsigned StartColumn) const {
257 return UnbreakableTailLength + Postfix.size() +
260}
261
263 bool Break) const {
264 return StartColumn + Prefix.size();
265}
266
268 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
270 encoding::Encoding Encoding, const FormatStyle &Style)
271 : BreakableToken(Tok, InPPDirective, Encoding, Style),
272 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273 UnbreakableTailLength(UnbreakableTailLength) {
274 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
275 Line = Tok.TokenText.substr(
276 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
277}
278
280 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
283 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
284}
285
287 unsigned TailOffset, Split Split,
288 unsigned ContentIndent,
289 WhitespaceManager &Whitespaces) const {
290 Whitespaces.replaceWhitespaceInToken(
291 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
293}
294
296 unsigned StartColumn, bool InPPDirective,
297 encoding::Encoding Encoding,
298 const FormatStyle &Style)
299 : BreakableToken(Token, InPPDirective, Encoding, Style),
300 StartColumn(StartColumn) {}
301
302unsigned BreakableComment::getLineCount() const { return Lines.size(); }
303
305BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
306 unsigned ColumnLimit, unsigned ContentStartColumn,
307 const llvm::Regex &CommentPragmasRegex) const {
308 // Don't break lines matching the comment pragmas regex.
309 if (CommentPragmasRegex.match(Content[LineIndex]))
310 return Split(StringRef::npos, 0);
311 return getCommentSplit(Content[LineIndex].substr(TailOffset),
312 ContentStartColumn, ColumnLimit, Style.TabWidth,
313 Encoding, Style);
314}
315
317 unsigned LineIndex, unsigned TailOffset, Split Split,
318 WhitespaceManager &Whitespaces) const {
319 StringRef Text = Content[LineIndex].substr(TailOffset);
320 // Text is relative to the content line, but Whitespaces operates relative to
321 // the start of the corresponding token, so compute the start of the Split
322 // that needs to be compressed into a single space relative to the start of
323 // its token.
324 unsigned BreakOffsetInToken =
325 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
326 unsigned CharsToRemove = Split.second;
327 Whitespaces.replaceWhitespaceInToken(
328 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
329 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
330}
331
332const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
333 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
334}
335
336static bool mayReflowContent(StringRef Content) {
337 Content = Content.trim(Blanks);
338 // Lines starting with '@' commonly have special meaning.
339 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
340 bool hasSpecialMeaningPrefix = false;
341 for (StringRef Prefix :
342 {"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
343 if (Content.startswith(Prefix)) {
344 hasSpecialMeaningPrefix = true;
345 break;
346 }
347 }
348
349 // Numbered lists may also start with a number followed by '.'
350 // To avoid issues if a line starts with a number which is actually the end
351 // of a previous line, we only consider numbers with up to 2 digits.
352 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
353 hasSpecialMeaningPrefix =
354 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
355
356 // Simple heuristic for what to reflow: content should contain at least two
357 // characters and either the first or second character must be
358 // non-punctuation.
359 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
360 !Content.endswith("\\") &&
361 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
362 // true, then the first code point must be 1 byte long.
363 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
364}
365
367 const FormatToken &Token, unsigned StartColumn,
368 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
369 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
370 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
371 DelimitersOnNewline(false),
372 UnbreakableTailLength(Token.UnbreakableTailLength) {
373 assert(Tok.is(TT_BlockComment) &&
374 "block comment section must start with a block comment");
375
376 StringRef TokenText(Tok.TokenText);
377 assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
378 TokenText.substr(2, TokenText.size() - 4)
379 .split(Lines, UseCRLF ? "\r\n" : "\n");
380
381 int IndentDelta = StartColumn - OriginalStartColumn;
382 Content.resize(Lines.size());
383 Content[0] = Lines[0];
384 ContentColumn.resize(Lines.size());
385 // Account for the initial '/*'.
386 ContentColumn[0] = StartColumn + 2;
387 Tokens.resize(Lines.size());
388 for (size_t i = 1; i < Lines.size(); ++i)
389 adjustWhitespace(i, IndentDelta);
390
391 // Align decorations with the column of the star on the first line,
392 // that is one column after the start "/*".
393 DecorationColumn = StartColumn + 1;
394
395 // Account for comment decoration patterns like this:
396 //
397 // /*
398 // ** blah blah blah
399 // */
400 if (Lines.size() >= 2 && Content[1].startswith("**") &&
401 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
402 DecorationColumn = StartColumn;
403 }
404
405 Decoration = "* ";
406 if (Lines.size() == 1 && !FirstInLine) {
407 // Comments for which FirstInLine is false can start on arbitrary column,
408 // and available horizontal space can be too small to align consecutive
409 // lines with the first one.
410 // FIXME: We could, probably, align them to current indentation level, but
411 // now we just wrap them without stars.
412 Decoration = "";
413 }
414 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
415 const StringRef &Text = Content[i];
416 if (i + 1 == e) {
417 // If the last line is empty, the closing "*/" will have a star.
418 if (Text.empty())
419 break;
420 } else if (!Text.empty() && Decoration.startswith(Text)) {
421 continue;
422 }
423 while (!Text.startswith(Decoration))
424 Decoration = Decoration.drop_back(1);
425 }
426
427 LastLineNeedsDecoration = true;
428 IndentAtLineBreak = ContentColumn[0] + 1;
429 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
430 if (Content[i].empty()) {
431 if (i + 1 == e) {
432 // Empty last line means that we already have a star as a part of the
433 // trailing */. We also need to preserve whitespace, so that */ is
434 // correctly indented.
435 LastLineNeedsDecoration = false;
436 // Align the star in the last '*/' with the stars on the previous lines.
437 if (e >= 2 && !Decoration.empty())
438 ContentColumn[i] = DecorationColumn;
439 } else if (Decoration.empty()) {
440 // For all other lines, set the start column to 0 if they're empty, so
441 // we do not insert trailing whitespace anywhere.
442 ContentColumn[i] = 0;
443 }
444 continue;
445 }
446
447 // The first line already excludes the star.
448 // The last line excludes the star if LastLineNeedsDecoration is false.
449 // For all other lines, adjust the line to exclude the star and
450 // (optionally) the first whitespace.
451 unsigned DecorationSize = Decoration.startswith(Content[i])
452 ? Content[i].size()
453 : Decoration.size();
454 if (DecorationSize)
455 ContentColumn[i] = DecorationColumn + DecorationSize;
456 Content[i] = Content[i].substr(DecorationSize);
457 if (!Decoration.startswith(Content[i])) {
458 IndentAtLineBreak =
459 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
460 }
461 }
462 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
463
464 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
466 if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) {
467 // This is a multiline jsdoc comment.
468 DelimitersOnNewline = true;
469 } else if (Lines[0].startswith("* ") && Lines.size() == 1) {
470 // Detect a long single-line comment, like:
471 // /** long long long */
472 // Below, '2' is the width of '*/'.
473 unsigned EndColumn =
474 ContentColumn[0] +
477 2;
478 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
479 }
480 }
481
482 LLVM_DEBUG({
483 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
484 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
485 for (size_t i = 0; i < Lines.size(); ++i) {
486 llvm::dbgs() << i << " |" << Content[i] << "| "
487 << "CC=" << ContentColumn[i] << "| "
488 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
489 }
490 });
491}
492
494 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
495 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
496 // Don't break lines matching the comment pragmas regex.
497 if (CommentPragmasRegex.match(Content[LineIndex]))
498 return Split(StringRef::npos, 0);
499 return getCommentSplit(Content[LineIndex].substr(TailOffset),
500 ContentStartColumn, ColumnLimit, Style.TabWidth,
501 Encoding, Style, Decoration.endswith("*"));
502}
503
504void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
505 int IndentDelta) {
506 // When in a preprocessor directive, the trailing backslash in a block comment
507 // is not needed, but can serve a purpose of uniformity with necessary escaped
508 // newlines outside the comment. In this case we remove it here before
509 // trimming the trailing whitespace. The backslash will be re-added later when
510 // inserting a line break.
511 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
512 if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
513 --EndOfPreviousLine;
514
515 // Calculate the end of the non-whitespace text in the previous line.
516 EndOfPreviousLine =
517 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
518 if (EndOfPreviousLine == StringRef::npos)
519 EndOfPreviousLine = 0;
520 else
521 ++EndOfPreviousLine;
522 // Calculate the start of the non-whitespace text in the current line.
523 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
524 if (StartOfLine == StringRef::npos)
525 StartOfLine = Lines[LineIndex].size();
526
527 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
528 // Adjust Lines to only contain relevant text.
529 size_t PreviousContentOffset =
530 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
531 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
532 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
533 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
534
535 // Adjust the start column uniformly across all lines.
536 ContentColumn[LineIndex] =
538 IndentDelta;
539}
540
541unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
542 unsigned Offset,
543 StringRef::size_type Length,
544 unsigned StartColumn) const {
546 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
547 Encoding);
548}
549
551 unsigned Offset,
552 unsigned StartColumn) const {
553 unsigned LineLength =
554 UnbreakableTailLength +
555 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
556 if (LineIndex + 1 == Lines.size()) {
557 LineLength += 2;
558 // We never need a decoration when breaking just the trailing "*/" postfix.
559 bool HasRemainingText = Offset < Content[LineIndex].size();
560 if (!HasRemainingText) {
561 bool HasDecoration = Lines[LineIndex].ltrim().startswith(Decoration);
562 if (HasDecoration)
563 LineLength -= Decoration.size();
564 }
565 }
566 return LineLength;
567}
568
570 bool Break) const {
571 if (Break)
572 return IndentAtLineBreak;
573 return std::max(0, ContentColumn[LineIndex]);
574}
575
576const llvm::StringSet<>
578 "@param", "@return", "@returns", "@throws", "@type", "@template",
579 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
580};
581
582unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
584 return 0;
585 // The content at LineIndex 0 of a comment like:
586 // /** line 0 */
587 // is "* line 0", so we need to skip over the decoration in that case.
588 StringRef ContentWithNoDecoration = Content[LineIndex];
589 if (LineIndex == 0 && ContentWithNoDecoration.startswith("*"))
590 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
591 StringRef FirstWord = ContentWithNoDecoration.substr(
592 0, ContentWithNoDecoration.find_first_of(Blanks));
593 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
595 return 0;
596}
597
598void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
599 Split Split, unsigned ContentIndent,
600 WhitespaceManager &Whitespaces) const {
601 StringRef Text = Content[LineIndex].substr(TailOffset);
602 StringRef Prefix = Decoration;
603 // We need this to account for the case when we have a decoration "* " for all
604 // the lines except for the last one, where the star in "*/" acts as a
605 // decoration.
606 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
607 if (LineIndex + 1 == Lines.size() &&
608 Text.size() == Split.first + Split.second) {
609 // For the last line we need to break before "*/", but not to add "* ".
610 Prefix = "";
611 if (LocalIndentAtLineBreak >= 2)
612 LocalIndentAtLineBreak -= 2;
613 }
614 // The split offset is from the beginning of the line. Convert it to an offset
615 // from the beginning of the token text.
616 unsigned BreakOffsetInToken =
617 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
618 unsigned CharsToRemove = Split.second;
619 assert(LocalIndentAtLineBreak >= Prefix.size());
620 std::string PrefixWithTrailingIndent = std::string(Prefix);
621 PrefixWithTrailingIndent.append(ContentIndent, ' ');
622 Whitespaces.replaceWhitespaceInToken(
623 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
624 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
625 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
626 PrefixWithTrailingIndent.size());
627}
628
630 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
631 if (!mayReflow(LineIndex, CommentPragmasRegex))
632 return Split(StringRef::npos, 0);
633
634 // If we're reflowing into a line with content indent, only reflow the next
635 // line if its starting whitespace matches the content indent.
636 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
637 if (LineIndex) {
638 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
639 if (PreviousContentIndent && Trimmed != StringRef::npos &&
640 Trimmed != PreviousContentIndent) {
641 return Split(StringRef::npos, 0);
642 }
643 }
644
645 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
646}
647
649 // A break is introduced when we want delimiters on newline.
650 return DelimitersOnNewline &&
651 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
652}
653
654void BreakableBlockComment::reflow(unsigned LineIndex,
655 WhitespaceManager &Whitespaces) const {
656 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
657 // Here we need to reflow.
658 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
659 "Reflowing whitespace within a token");
660 // This is the offset of the end of the last line relative to the start of
661 // the token text in the token.
662 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
663 Content[LineIndex - 1].size() -
664 tokenAt(LineIndex).TokenText.data();
665 unsigned WhitespaceLength = TrimmedContent.data() -
666 tokenAt(LineIndex).TokenText.data() -
667 WhitespaceOffsetInToken;
668 Whitespaces.replaceWhitespaceInToken(
669 tokenAt(LineIndex), WhitespaceOffsetInToken,
670 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
671 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
672 /*Spaces=*/0);
673}
674
676 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
677 if (LineIndex == 0) {
678 if (DelimitersOnNewline) {
679 // Since we're breaking at index 1 below, the break position and the
680 // break length are the same.
681 // Note: this works because getCommentSplit is careful never to split at
682 // the beginning of a line.
683 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
684 if (BreakLength != StringRef::npos) {
685 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
686 Whitespaces);
687 }
688 }
689 return;
690 }
691 // Here no reflow with the previous line will happen.
692 // Fix the decoration of the line at LineIndex.
693 StringRef Prefix = Decoration;
694 if (Content[LineIndex].empty()) {
695 if (LineIndex + 1 == Lines.size()) {
696 if (!LastLineNeedsDecoration) {
697 // If the last line was empty, we don't need a prefix, as the */ will
698 // line up with the decoration (if it exists).
699 Prefix = "";
700 }
701 } else if (!Decoration.empty()) {
702 // For other empty lines, if we do have a decoration, adapt it to not
703 // contain a trailing whitespace.
704 Prefix = Prefix.substr(0, 1);
705 }
706 } else if (ContentColumn[LineIndex] == 1) {
707 // This line starts immediately after the decorating *.
708 Prefix = Prefix.substr(0, 1);
709 }
710 // This is the offset of the end of the last line relative to the start of the
711 // token text in the token.
712 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
713 Content[LineIndex - 1].size() -
714 tokenAt(LineIndex).TokenText.data();
715 unsigned WhitespaceLength = Content[LineIndex].data() -
716 tokenAt(LineIndex).TokenText.data() -
717 WhitespaceOffsetInToken;
718 Whitespaces.replaceWhitespaceInToken(
719 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
720 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
721}
722
725 if (DelimitersOnNewline) {
726 // Replace the trailing whitespace of the last line with a newline.
727 // In case the last line is empty, the ending '*/' is already on its own
728 // line.
729 StringRef Line = Content.back().substr(TailOffset);
730 StringRef TrimmedLine = Line.rtrim(Blanks);
731 if (!TrimmedLine.empty())
732 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
733 }
734 return Split(StringRef::npos, 0);
735}
736
738 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
739 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
740 // case, we compute the start of the comment pragma manually.
741 StringRef IndentContent = Content[LineIndex];
742 if (Lines[LineIndex].ltrim(Blanks).startswith("*"))
743 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
744 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
745 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
746 !switchesFormatting(tokenAt(LineIndex));
747}
748
750 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
751 encoding::Encoding Encoding, const FormatStyle &Style)
752 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
753 assert(Tok.is(TT_LineComment) &&
754 "line comment section must start with a line comment");
755 FormatToken *LineTok = nullptr;
756 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
757 // How many spaces we changed in the first line of the section, this will be
758 // applied in all following lines
759 int FirstLineSpaceChange = 0;
760 for (const FormatToken *CurrentTok = &Tok;
761 CurrentTok && CurrentTok->is(TT_LineComment);
762 CurrentTok = CurrentTok->Next) {
763 LastLineTok = LineTok;
764 StringRef TokenText(CurrentTok->TokenText);
765 assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
766 "unsupported line comment prefix, '//' and '#' are supported");
767 size_t FirstLineIndex = Lines.size();
768 TokenText.split(Lines, "\n");
769 Content.resize(Lines.size());
770 ContentColumn.resize(Lines.size());
771 PrefixSpaceChange.resize(Lines.size());
772 Tokens.resize(Lines.size());
773 Prefix.resize(Lines.size());
774 OriginalPrefix.resize(Lines.size());
775 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
776 Lines[i] = Lines[i].ltrim(Blanks);
777 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
778 OriginalPrefix[i] = IndentPrefix;
779 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
780
781 // This lambda also considers multibyte character that is not handled in
782 // functions like isPunctuation provided by CharInfo.
783 const auto NoSpaceBeforeFirstCommentChar = [&]() {
784 assert(Lines[i].size() > IndentPrefix.size());
785 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
786 const unsigned FirstCharByteSize =
787 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
789 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
790 Encoding) != 1) {
791 return false;
792 }
793 // In C-like comments, add a space before #. For example this is useful
794 // to preserve the relative indentation when commenting out code with
795 // #includes.
796 //
797 // In languages using # as the comment leader such as proto, don't
798 // add a space to support patterns like:
799 // #########
800 // # section
801 // #########
802 if (FirstCommentChar == '#' && !TokenText.startswith("#"))
803 return false;
804 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
805 isHorizontalWhitespace(FirstCommentChar);
806 };
807
808 // On the first line of the comment section we calculate how many spaces
809 // are to be added or removed, all lines after that just get only the
810 // change and we will not look at the maximum anymore. Additionally to the
811 // actual first line, we calculate that when the non space Prefix changes,
812 // e.g. from "///" to "//".
813 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
814 OriginalPrefix[i - 1].rtrim(Blanks)) {
815 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
816 !NoSpaceBeforeFirstCommentChar()) {
817 FirstLineSpaceChange = Minimum - SpacesInPrefix;
818 } else if (static_cast<unsigned>(SpacesInPrefix) >
820 FirstLineSpaceChange =
821 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
822 } else {
823 FirstLineSpaceChange = 0;
824 }
825 }
826
827 if (Lines[i].size() != IndentPrefix.size()) {
828 PrefixSpaceChange[i] = FirstLineSpaceChange;
829
830 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
831 PrefixSpaceChange[i] +=
832 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
833 }
834
835 assert(Lines[i].size() > IndentPrefix.size());
836 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
837 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
838 const bool LineRequiresLeadingSpace =
839 !NoSpaceBeforeFirstCommentChar() ||
840 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
841 const bool AllowsSpaceChange =
842 !IsFormatComment &&
843 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
844
845 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
846 Prefix[i] = IndentPrefix.str();
847 Prefix[i].append(PrefixSpaceChange[i], ' ');
848 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
849 Prefix[i] = IndentPrefix
850 .drop_back(std::min<std::size_t>(
851 -PrefixSpaceChange[i], SpacesInPrefix))
852 .str();
853 } else {
854 Prefix[i] = IndentPrefix.str();
855 }
856 } else {
857 // If the IndentPrefix is the whole line, there is no content and we
858 // drop just all space
859 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
860 }
861
862 Tokens[i] = LineTok;
863 Content[i] = Lines[i].substr(IndentPrefix.size());
864 ContentColumn[i] =
867
868 // Calculate the end of the non-whitespace text in this line.
869 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
870 if (EndOfLine == StringRef::npos)
871 EndOfLine = Content[i].size();
872 else
873 ++EndOfLine;
874 Content[i] = Content[i].substr(0, EndOfLine);
875 }
876 LineTok = CurrentTok->Next;
877 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
878 // A line comment section needs to broken by a line comment that is
879 // preceded by at least two newlines. Note that we put this break here
880 // instead of breaking at a previous stage during parsing, since that
881 // would split the contents of the enum into two unwrapped lines in this
882 // example, which is undesirable:
883 // enum A {
884 // a, // comment about a
885 //
886 // // comment about b
887 // b
888 // };
889 //
890 // FIXME: Consider putting separate line comment sections as children to
891 // the unwrapped line instead.
892 break;
893 }
894 }
895}
896
897unsigned
899 StringRef::size_type Length,
900 unsigned StartColumn) const {
902 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
903 Encoding);
904}
905
906unsigned
908 bool /*Break*/) const {
909 return ContentColumn[LineIndex];
910}
911
913 unsigned LineIndex, unsigned TailOffset, Split Split,
914 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
915 StringRef Text = Content[LineIndex].substr(TailOffset);
916 // Compute the offset of the split relative to the beginning of the token
917 // text.
918 unsigned BreakOffsetInToken =
919 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
920 unsigned CharsToRemove = Split.second;
921 Whitespaces.replaceWhitespaceInToken(
922 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
923 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
924 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
925}
926
928 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
929 if (!mayReflow(LineIndex, CommentPragmasRegex))
930 return Split(StringRef::npos, 0);
931
932 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
933
934 // In a line comment section each line is a separate token; thus, after a
935 // split we replace all whitespace before the current line comment token
936 // (which does not need to be included in the split), plus the start of the
937 // line up to where the content starts.
938 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
939}
940
942 WhitespaceManager &Whitespaces) const {
943 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
944 // Reflow happens between tokens. Replace the whitespace between the
945 // tokens by the empty string.
946 Whitespaces.replaceWhitespace(
947 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
948 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
949 /*InPPDirective=*/false);
950 } else if (LineIndex > 0) {
951 // In case we're reflowing after the '\' in:
952 //
953 // // line comment \
954 // // line 2
955 //
956 // the reflow happens inside the single comment token (it is a single line
957 // comment with an unescaped newline).
958 // Replace the whitespace between the '\' and '//' with the empty string.
959 //
960 // Offset points to after the '\' relative to start of the token.
961 unsigned Offset = Lines[LineIndex - 1].data() +
962 Lines[LineIndex - 1].size() -
963 tokenAt(LineIndex - 1).TokenText.data();
964 // WhitespaceLength is the number of chars between the '\' and the '//' on
965 // the next line.
966 unsigned WhitespaceLength =
967 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
968 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
969 /*ReplaceChars=*/WhitespaceLength,
970 /*PreviousPostfix=*/"",
971 /*CurrentPrefix=*/"",
972 /*InPPDirective=*/false,
973 /*Newlines=*/0,
974 /*Spaces=*/0);
975 }
976 // Replace the indent and prefix of the token with the reflow prefix.
977 unsigned Offset =
978 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
979 unsigned WhitespaceLength =
980 Content[LineIndex].data() - Lines[LineIndex].data();
981 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
982 /*ReplaceChars=*/WhitespaceLength,
983 /*PreviousPostfix=*/"",
984 /*CurrentPrefix=*/ReflowPrefix,
985 /*InPPDirective=*/false,
986 /*Newlines=*/0,
987 /*Spaces=*/0);
988}
989
991 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
992 // If this is the first line of a token, we need to inform Whitespace Manager
993 // about it: either adapt the whitespace range preceding it, or mark it as an
994 // untouchable token.
995 // This happens for instance here:
996 // // line 1 \
997 // // line 2
998 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
999 // This is the first line for the current token, but no reflow with the
1000 // previous token is necessary. However, we still may need to adjust the
1001 // start column. Note that ContentColumn[LineIndex] is the expected
1002 // content column after a possible update to the prefix, hence the prefix
1003 // length change is included.
1004 unsigned LineColumn =
1005 ContentColumn[LineIndex] -
1006 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1007 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1008
1009 // We always want to create a replacement instead of adding an untouchable
1010 // token, even if LineColumn is the same as the original column of the
1011 // token. This is because WhitespaceManager doesn't align trailing
1012 // comments if they are untouchable.
1013 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1014 /*Newlines=*/1,
1015 /*Spaces=*/LineColumn,
1016 /*StartOfTokenColumn=*/LineColumn,
1017 /*IsAligned=*/true,
1018 /*InPPDirective=*/false);
1019 }
1020 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1021 // Adjust the prefix if necessary.
1022 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1023 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1024 Whitespaces.replaceWhitespaceInToken(
1025 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1026 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1027 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1028 }
1029}
1030
1032 if (LastLineTok)
1033 State.NextToken = LastLineTok->Next;
1034}
1035
1037 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1038 // Line comments have the indent as part of the prefix, so we need to
1039 // recompute the start of the line.
1040 StringRef IndentContent = Content[LineIndex];
1041 if (Lines[LineIndex].startswith("//"))
1042 IndentContent = Lines[LineIndex].substr(2);
1043 // FIXME: Decide whether we want to reflow non-regular indents:
1044 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1045 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1046 // // text that protrudes
1047 // // into text with different indent
1048 // We do reflow in that case in block comments.
1049 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
1050 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1051 !switchesFormatting(tokenAt(LineIndex)) &&
1052 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1053}
1054
1055} // namespace format
1056} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
StringRef Text
Definition: Format.cpp:2775
unsigned Offset
Definition: Format.cpp:2776
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
Definition: Token.h:35
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:98
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:61
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:97
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:78
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:45
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:123
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:76
@ C
Languages that the frontend can parse and compile.
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:137
#define false
Definition: stdbool.h:22
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4013
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4011
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:1985
@ LK_Java
Should be used for Java.
Definition: Format.h:2668
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:2682
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:2697
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:4134
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4048
bool isJavaScript() const
Definition: Format.h:2691
unsigned ColumnLimit
The column limit.
Definition: Format.h:1890
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:249
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:268
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:323
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:506
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:543
The current state when indenting a unwrapped line.