19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
23#define DEBUG_TYPE "format-token-breaker"
28static constexpr StringRef
Blanks(
" \t\v\f\r");
31 const FormatStyle &Style) {
32 static constexpr StringRef KnownCStylePrefixes[] = {
"///<",
"//!<",
"///",
34 static constexpr StringRef KnownTextProtoPrefixes[] = {
"####",
"###",
"##",
37 if (Style.isTextProto())
38 KnownPrefixes = KnownTextProtoPrefixes;
41 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs)
noexcept {
42 return Lhs.size() > Rhs.size();
45 for (StringRef KnownPrefix : KnownPrefixes) {
46 if (Comment.starts_with(KnownPrefix)) {
47 const auto PrefixLength =
48 Comment.find_first_not_of(
' ', KnownPrefix.size());
49 return Comment.substr(0, PrefixLength);
57 unsigned ColumnLimit,
unsigned TabWidth,
59 bool DecorationEndsWithStar =
false) {
60 LLVM_DEBUG(llvm::dbgs() <<
"Comment split: \"" <<
Text
61 <<
"\", Column limit: " << ColumnLimit
62 <<
", Content start: " << ContentStartColumn <<
"\n");
63 if (ColumnLimit <= ContentStartColumn + 1)
66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
67 unsigned MaxSplitBytes = 0;
69 for (
unsigned NumChars = 0;
70 NumChars < MaxSplit && MaxSplitBytes <
Text.size();) {
71 unsigned BytesInChar =
74 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
76 MaxSplitBytes += BytesInChar;
82 if (Style.isJavaScript()) {
83 StringRef::size_type SpaceOffset =
85 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 <
Text.size() &&
86 Text[SpaceOffset + 1] ==
'{') {
87 MaxSplitBytes = SpaceOffset + 1;
91 StringRef::size_type SpaceOffset =
Text.find_last_of(
Blanks, MaxSplitBytes);
93 static const auto kNumberedListRegexp = llvm::Regex(
"^[1-9][0-9]?\\.");
95 while (SpaceOffset != StringRef::npos) {
102 StringRef::size_type LastNonBlank =
104 if (LastNonBlank != StringRef::npos &&
Text[LastNonBlank] ==
'\\') {
105 SpaceOffset =
Text.find_last_of(
Blanks, LastNonBlank);
112 if (kNumberedListRegexp.match(
Text.substr(SpaceOffset).ltrim(
Blanks))) {
113 SpaceOffset =
Text.find_last_of(
Blanks, SpaceOffset);
118 if (Style.isJavaScript() && SpaceOffset + 1 <
Text.size() &&
119 (
Text[SpaceOffset + 1] ==
'{' ||
Text[SpaceOffset + 1] ==
'@')) {
120 SpaceOffset =
Text.find_last_of(
Blanks, SpaceOffset);
127 if (SpaceOffset == StringRef::npos ||
129 Text.find_last_not_of(
Blanks, SpaceOffset) == StringRef::npos) {
132 StringRef::size_type FirstNonWhitespace =
Text.find_first_not_of(
Blanks);
133 if (FirstNonWhitespace == StringRef::npos) {
137 SpaceOffset =
Text.find_first_of(
138 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
140 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
145 if (SpaceOffset == 1 &&
Text[SpaceOffset - 1] ==
'*')
147 StringRef BeforeCut =
Text.substr(0, SpaceOffset).rtrim(
Blanks);
148 StringRef AfterCut =
Text.substr(SpaceOffset);
149 if (!DecorationEndsWithStar)
150 AfterCut = AfterCut.ltrim(
Blanks);
152 AfterCut.begin() - BeforeCut.end());
163 if (ColumnLimit <= UsedColumns)
165 unsigned MaxSplit = ColumnLimit - UsedColumns;
166 StringRef::size_type SpaceOffset = 0;
167 StringRef::size_type SlashOffset = 0;
168 StringRef::size_type WordStartOffset = 0;
169 StringRef::size_type SplitPoint = 0;
170 for (
unsigned Chars = 0;;) {
172 if (
Text[0] ==
'\\') {
178 Text.substr(0, Advance), UsedColumns + Chars,
TabWidth, Encoding);
181 if (Chars > MaxSplit ||
Text.size() <= Advance)
185 SpaceOffset = SplitPoint;
187 SlashOffset = SplitPoint;
189 WordStartOffset = SplitPoint;
191 SplitPoint += Advance;
195 if (SpaceOffset != 0)
197 if (SlashOffset != 0)
199 if (WordStartOffset != 0)
208 "formatting regions are switched by comment tokens");
209 StringRef Content =
Token.TokenText.substr(2).ltrim();
210 return Content.starts_with(
"clang-format on") ||
211 Content.starts_with(
"clang-format off");
228 return RemainingTokenColumns + 1 -
Split.second;
235 StringRef::size_type Length,
237 llvm_unreachable(
"Getting the length of a part of the string literal "
238 "indicates that the code tries to reflow it.");
267 unsigned LineIndex,
unsigned TailOffset,
unsigned ColumnLimit,
268 unsigned ContentStartColumn,
const llvm::Regex &CommentPragmasRegex)
const {
275 unsigned ContentIndent,
302 !
Style.isJavaScript() &&
303 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
308 assert(QuoteStyle == DoubleQuotes);
310 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ?
"{\"" :
"{ \"";
312 Style.Cpp11BracedListStyle != FormatStyle::BLS_Block ?
"\"}" :
"\" }";
318 if (QuoteStyle == SingleQuotes) {
319 LeftBraceQuote = Style.SpacesInParensOptions.Other ?
"( '" :
"('";
320 RightBraceQuote = Style.SpacesInParensOptions.Other ?
"' )" :
"')";
321 Postfix = SignOnNewLine ?
"'" :
"' +";
322 Prefix = SignOnNewLine ?
"+ '" :
"'";
324 if (QuoteStyle == AtDoubleQuotes) {
325 LeftBraceQuote = Style.SpacesInParensOptions.Other ?
"( @" :
"(@";
326 Prefix = SignOnNewLine ?
"+ @\"" :
"@\"";
328 LeftBraceQuote = Style.SpacesInParensOptions.Other ?
"( \"" :
"(\"";
329 Prefix = SignOnNewLine ?
"+ \"" :
"\"";
332 Postfix = SignOnNewLine ?
"\"" :
"\" +";
337 ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
340 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
341 Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
342 ContinuationIndent -= 2;
347 unsigned LineIndex,
unsigned Offset,
unsigned StartColumn)
const {
366 unsigned LineIndex,
unsigned TailOffset,
Split Split,
393 Tok,
Tok.TokenText.size() - 1, 1,
401 const FormatStyle &
Style)
409 unsigned ColumnLimit,
unsigned ContentStartColumn,
410 const llvm::Regex &CommentPragmasRegex)
const {
413 return Split(StringRef::npos, 0);
415 ContentStartColumn, ColumnLimit,
Style.TabWidth,
420 unsigned LineIndex,
unsigned TailOffset,
Split Split,
422 StringRef
Text =
Content[LineIndex].substr(TailOffset);
427 unsigned BreakOffsetInToken =
429 unsigned CharsToRemove =
Split.second;
431 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove,
"",
"",
440 Content = Content.trim(
Blanks);
443 bool hasSpecialMeaningPrefix =
false;
444 for (StringRef Prefix :
445 {
"@",
"\\",
"TODO",
"FIXME",
"XXX",
"-# ",
"- ",
"+ ",
"* "}) {
446 if (Content.starts_with(Prefix)) {
447 hasSpecialMeaningPrefix =
true;
455 static const auto kNumberedListRegexp = llvm::Regex(
"^[1-9][0-9]?\\. ");
456 hasSpecialMeaningPrefix =
457 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
462 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
463 !Content.ends_with(
"\\") &&
471 unsigned OriginalStartColumn,
bool FirstInLine,
bool InPPDirective,
474 DelimitersOnNewline(
false),
475 UnbreakableTailLength(
Token.UnbreakableTailLength) {
476 assert(
Tok.is(TT_BlockComment) &&
477 "block comment section must start with a block comment");
482 .split(
Lines, UseCRLF ?
"\r\n" :
"\n");
484 int IndentDelta =
StartColumn - OriginalStartColumn;
491 for (
size_t i = 1; i <
Lines.size(); ++i)
492 adjustWhitespace(i, IndentDelta);
503 if (
Lines.size() >= 2 &&
Content[1].starts_with(
"**") &&
509 if (
Lines.size() == 1 && !FirstInLine) {
517 for (
size_t i = 1, e =
Content.size(); i < e && !Decoration.empty(); ++i) {
523 }
else if (!
Text.empty() && Decoration.starts_with(
Text)) {
526 while (!
Text.starts_with(Decoration))
527 Decoration = Decoration.drop_back(1);
530 LastLineNeedsDecoration =
true;
532 for (
size_t i = 1, e =
Lines.size(); i < e; ++i) {
538 LastLineNeedsDecoration =
false;
540 if (e >= 2 && !Decoration.empty())
542 }
else if (Decoration.empty()) {
554 unsigned DecorationSize = Decoration.starts_with(
Content[i])
560 if (!Decoration.starts_with(
Content[i])) {
562 std::min<int>(IndentAtLineBreak, std::max(0,
ContentColumn[i]));
565 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
569 if ((
Lines[0] ==
"*" ||
Lines[0].starts_with(
"* ")) &&
Lines.size() > 1) {
571 DelimitersOnNewline =
true;
572 }
else if (
Lines[0].starts_with(
"* ") &&
Lines.size() == 1) {
581 DelimitersOnNewline = EndColumn >
Style.ColumnLimit;
586 llvm::dbgs() <<
"IndentAtLineBreak " << IndentAtLineBreak <<
"\n";
587 llvm::dbgs() <<
"DelimitersOnNewline " << DelimitersOnNewline <<
"\n";
588 for (
size_t i = 0; i <
Lines.size(); ++i) {
589 llvm::dbgs() << i <<
" |" <<
Content[i] <<
"| "
591 <<
"IN=" << (
Content[i].data() -
Lines[i].data()) <<
"\n";
597 unsigned LineIndex,
unsigned TailOffset,
unsigned ColumnLimit,
598 unsigned ContentStartColumn,
const llvm::Regex &CommentPragmasRegex)
const {
601 return Split(StringRef::npos, 0);
603 ContentStartColumn, ColumnLimit,
Style.TabWidth,
607void BreakableBlockComment::adjustWhitespace(
unsigned LineIndex,
614 size_t EndOfPreviousLine =
Lines[LineIndex - 1].size();
620 Lines[LineIndex - 1].find_last_not_of(
Blanks, EndOfPreviousLine);
621 if (EndOfPreviousLine == StringRef::npos)
622 EndOfPreviousLine = 0;
626 size_t StartOfLine =
Lines[LineIndex].find_first_not_of(
Blanks);
627 if (StartOfLine == StringRef::npos)
628 StartOfLine =
Lines[LineIndex].size();
630 StringRef Whitespace =
Lines[LineIndex].substr(0, StartOfLine);
632 size_t PreviousContentOffset =
633 Content[LineIndex - 1].data() -
Lines[LineIndex - 1].data();
635 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
636 Content[LineIndex] =
Lines[LineIndex].substr(StartOfLine);
646 StringRef::size_type Length,
656 unsigned LineLength =
657 UnbreakableTailLength +
659 if (LineIndex + 1 ==
Lines.size()) {
662 bool HasRemainingText = Offset <
Content[LineIndex].size();
663 if (!HasRemainingText) {
664 bool HasDecoration =
Lines[LineIndex].ltrim().starts_with(Decoration);
666 LineLength -= Decoration.size();
675 return IndentAtLineBreak;
679const llvm::StringSet<>
681 "@param",
"@return",
"@returns",
"@throws",
"@type",
"@template",
682 "@see",
"@deprecated",
"@define",
"@exports",
"@mods",
"@private",
691 StringRef ContentWithNoDecoration =
Content[LineIndex];
692 if (LineIndex == 0 && ContentWithNoDecoration.starts_with(
"*"))
693 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(
Blanks);
694 StringRef FirstWord = ContentWithNoDecoration.substr(
695 0, ContentWithNoDecoration.find_first_of(
Blanks));
697 return Style.ContinuationIndentWidth;
704 StringRef
Text =
Content[LineIndex].substr(TailOffset);
705 StringRef Prefix = Decoration;
709 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
710 if (LineIndex + 1 ==
Lines.size() &&
714 if (LocalIndentAtLineBreak >= 2)
715 LocalIndentAtLineBreak -= 2;
719 unsigned BreakOffsetInToken =
721 unsigned CharsToRemove =
Split.second;
722 assert(LocalIndentAtLineBreak >= Prefix.size());
723 std::string PrefixWithTrailingIndent = std::string(Prefix);
724 PrefixWithTrailingIndent.append(ContentIndent,
' ');
726 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove,
"",
728 LocalIndentAtLineBreak + ContentIndent -
729 PrefixWithTrailingIndent.size());
733 unsigned LineIndex,
const llvm::Regex &CommentPragmasRegex)
const {
734 if (!
mayReflow(LineIndex, CommentPragmasRegex))
735 return Split(StringRef::npos, 0);
739 size_t Trimmed =
Content[LineIndex].find_first_not_of(
Blanks);
742 if (PreviousContentIndent && Trimmed != StringRef::npos &&
743 Trimmed != PreviousContentIndent) {
744 return Split(StringRef::npos, 0);
748 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
753 return DelimitersOnNewline &&
754 Lines[0].substr(1).find_first_not_of(
Blanks) != StringRef::npos;
762 "Reflowing whitespace within a token");
765 unsigned WhitespaceOffsetInToken =
Content[LineIndex - 1].data() +
766 Content[LineIndex - 1].size() -
768 unsigned WhitespaceLength = TrimmedContent.data() -
770 WhitespaceOffsetInToken;
772 tokenAt(LineIndex), WhitespaceOffsetInToken,
773 WhitespaceLength,
"",
780 if (LineIndex == 0) {
781 if (DelimitersOnNewline) {
786 size_t BreakLength =
Lines[0].substr(1).find_first_not_of(
Blanks);
787 if (BreakLength != StringRef::npos) {
796 StringRef Prefix = Decoration;
797 if (
Content[LineIndex].empty()) {
798 if (LineIndex + 1 ==
Lines.size()) {
799 if (!LastLineNeedsDecoration) {
804 }
else if (!Decoration.empty()) {
807 Prefix = Prefix.substr(0, 1);
811 Prefix = Prefix.substr(0, 1);
815 unsigned WhitespaceOffsetInToken =
Content[LineIndex - 1].data() +
816 Content[LineIndex - 1].size() -
818 unsigned WhitespaceLength =
Content[LineIndex].data() -
820 WhitespaceOffsetInToken;
822 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength,
"", Prefix,
828 if (DelimitersOnNewline) {
832 StringRef
Line =
Content.back().substr(TailOffset);
834 if (!TrimmedLine.empty())
835 return Split(TrimmedLine.size(),
Line.size() - TrimmedLine.size());
837 return Split(StringRef::npos, 0);
841 unsigned LineIndex,
const llvm::Regex &CommentPragmasRegex)
const {
844 StringRef IndentContent =
Content[LineIndex];
845 if (
Lines[LineIndex].ltrim(
Blanks).starts_with(
"*"))
846 IndentContent =
Lines[LineIndex].ltrim(
Blanks).substr(1);
848 !CommentPragmasRegex.match(IndentContent) &&
857 assert(
Tok.is(TT_LineComment) &&
858 "line comment section must start with a line comment");
860 const int Minimum =
Style.SpacesInLineCommentPrefix.Minimum;
863 int FirstLineSpaceChange = 0;
865 CurrentTok && CurrentTok->is(TT_LineComment);
866 CurrentTok = CurrentTok->Next) {
867 LastLineTok = LineTok;
868 StringRef
TokenText(CurrentTok->TokenText);
870 "unsupported line comment prefix, '//' and '#' are supported");
871 size_t FirstLineIndex =
Lines.size();
875 PrefixSpaceChange.resize(
Lines.size());
877 Prefix.resize(
Lines.size());
878 OriginalPrefix.resize(
Lines.size());
879 for (
size_t i = FirstLineIndex, e =
Lines.size(); i < e; ++i) {
882 OriginalPrefix[i] = IndentPrefix;
883 const int SpacesInPrefix = llvm::count(IndentPrefix,
' ');
887 const auto NoSpaceBeforeFirstCommentChar = [&]() {
888 assert(
Lines[i].size() > IndentPrefix.size());
889 const char FirstCommentChar =
Lines[i][IndentPrefix.size()];
890 const unsigned FirstCharByteSize =
893 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
906 if (FirstCommentChar ==
'#' && !
TokenText.starts_with(
"#"))
908 return FirstCommentChar ==
'\\' ||
isPunctuation(FirstCommentChar) ||
917 if (i == 0 || OriginalPrefix[i].rtrim(
Blanks) !=
918 OriginalPrefix[i - 1].rtrim(
Blanks)) {
919 if (SpacesInPrefix < Minimum &&
Lines[i].size() > IndentPrefix.size() &&
920 !NoSpaceBeforeFirstCommentChar()) {
921 FirstLineSpaceChange = Minimum - SpacesInPrefix;
922 }
else if (
static_cast<unsigned>(SpacesInPrefix) >
923 Style.SpacesInLineCommentPrefix.Maximum) {
924 FirstLineSpaceChange =
925 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
927 FirstLineSpaceChange = 0;
931 if (
Lines[i].size() != IndentPrefix.size()) {
932 assert(
Lines[i].size() > IndentPrefix.size());
934 PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum
935 ? Minimum - SpacesInPrefix
936 : FirstLineSpaceChange;
938 const auto FirstNonSpace =
Lines[i][IndentPrefix.size()];
940 const bool LineRequiresLeadingSpace =
941 !NoSpaceBeforeFirstCommentChar() ||
942 (FirstNonSpace ==
'}' && FirstLineSpaceChange != 0);
943 const bool AllowsSpaceChange =
945 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
947 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
948 Prefix[i] = IndentPrefix.str();
949 Prefix[i].append(PrefixSpaceChange[i],
' ');
950 }
else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
951 Prefix[i] = IndentPrefix
952 .drop_back(std::min<std::size_t>(
953 -PrefixSpaceChange[i], SpacesInPrefix))
956 Prefix[i] = IndentPrefix.str();
961 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
972 if (EndOfLine == StringRef::npos)
978 LineTok = CurrentTok->
Next;
979 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
1001 StringRef::size_type Length,
1015 unsigned LineIndex,
unsigned TailOffset,
Split Split,
1017 StringRef
Text =
Content[LineIndex].substr(TailOffset);
1020 unsigned BreakOffsetInToken =
1022 unsigned CharsToRemove =
Split.second;
1024 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove,
"",
1030 unsigned LineIndex,
const llvm::Regex &CommentPragmasRegex)
const {
1031 if (!
mayReflow(LineIndex, CommentPragmasRegex))
1032 return Split(StringRef::npos, 0);
1034 size_t Trimmed =
Content[LineIndex].find_first_not_of(
Blanks);
1040 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1045 if (LineIndex > 0 &&
Tokens[LineIndex] !=
Tokens[LineIndex - 1]) {
1049 *
Tokens[LineIndex], 0, 0,
1052 }
else if (LineIndex > 0) {
1063 unsigned Offset =
Lines[LineIndex - 1].data() +
1064 Lines[LineIndex - 1].size() -
1068 unsigned WhitespaceLength =
1081 unsigned WhitespaceLength =
1100 if (LineIndex > 0 &&
Tokens[LineIndex] !=
Tokens[LineIndex - 1]) {
1106 unsigned LineColumn =
1108 (
Content[LineIndex].data() -
Lines[LineIndex].data()) +
1109 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1122 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1124 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1125 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1127 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1128 SpacesToRemove,
"",
"",
false,
1135 State.NextToken = LastLineTok->Next;
1139 unsigned LineIndex,
const llvm::Regex &CommentPragmasRegex)
const {
1142 StringRef IndentContent =
Content[LineIndex];
1143 if (
Lines[LineIndex].starts_with(
"//"))
1144 IndentContent =
Lines[LineIndex].substr(2);
1152 !CommentPragmasRegex.match(IndentContent) &&
1155 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
Token - This structure provides full information about a lexed token.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
The JSON file list parser is used to communicate input to InstallAPI.
unsigned TabWidth
The number of columns used for tab stops.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.