20#include "llvm/Support/Regex.h"
28 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
37 Lex.reset(
new Lexer(
ID, SourceMgr.getBufferOrFake(
ID), SourceMgr, LangOpts));
38 Lex->SetKeepWhitespaceMode(
true);
41 auto Identifier = &IdentTable.get(ForEachMacro);
44 for (
const std::string &IfMacro : Style.
IfMacros) {
49 auto Identifier = &IdentTable.get(AttributeMacro);
53 auto Identifier = &IdentTable.get(StatementMacro);
57 auto Identifier = &IdentTable.get(TypenameMacro);
61 auto Identifier = &IdentTable.get(NamespaceMacro);
64 for (
const std::string &WhitespaceSensitiveMacro :
66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
69 for (
const std::string &StatementAttributeLikeMacro :
71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
76 TemplateNames.insert(&IdentTable.get(TemplateName));
77 for (
const auto &TypeName : Style.
TypeNames)
78 TypeNames.insert(&IdentTable.get(TypeName));
82 assert(Tokens.empty());
83 assert(FirstInLineIndex == 0);
85 Tokens.push_back(getNextToken());
87 tryParseJSRegexLiteral();
88 handleTemplateStrings();
91 tryParsePythonComment();
92 tryMergePreviousTokens();
96 handleCSharpVerbatimAndInterpolatedStrings();
99 handleTableGenMultilineString();
100 handleTableGenNumericLikeIdentifier();
102 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
103 FirstInLineIndex = Tokens.size() - 1;
104 }
while (Tokens.back()->isNot(tok::eof));
106 auto &TokEOF = *Tokens.back();
107 if (TokEOF.NewlinesBefore == 0) {
108 TokEOF.NewlinesBefore = 1;
109 TokEOF.OriginalColumn = 0;
115void FormatTokenLexer::tryMergePreviousTokens() {
116 if (tryMerge_TMacro())
118 if (tryMergeConflictMarkers())
120 if (tryMergeLessLess())
122 if (tryMergeGreaterGreater())
124 if (tryMergeForEach())
126 if (Style.
isCpp() && tryTransformTryUsageForC())
130 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
132 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
134 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
136 if (tryMergeTokens(FatArrow, TT_FatArrow))
138 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
140 Tokens.back()->Tok.setKind(tok::pipepipe);
143 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
145 Tokens.back()->Tok.setKind(tok::period);
148 if (tryMergeNullishCoalescingEqual())
154 tok::question, tok::l_square};
156 if (tryMergeCSharpKeywordVariables())
158 if (tryMergeCSharpStringLiteral())
160 if (tryTransformCSharpForEach())
162 if (tryMergeTokens(CSharpNullConditionalLSquare,
163 TT_CSharpNullConditionalLSquare)) {
165 Tokens.back()->Tok.setKind(tok::l_square);
170 if (tryMergeNSStringLiteral())
174 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
177 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
179 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
182 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
183 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
186 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
188 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
190 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
192 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
194 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
195 Tokens.back()->Tok.setKind(tok::starequal);
198 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
199 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
201 Tokens.back()->Tok.setKind(tok::equal);
204 if (tryMergeJSPrivateIdentifier())
210 tok::greater, tok::greater, tok::greaterequal};
211 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
217 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
218 Tokens.end()[-2]->is(tok::numeric_constant) &&
219 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
221 tryMergeTokens(2, TT_Unknown)) {
225 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
233 if (Tokens.back()->TokenText.size() == 1 &&
234 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
235 TT_BinaryOperator)) {
236 Tokens.back()->Tok.setKind(tok::caret);
240 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
241 Tokens.back()->Tok.setKind(tok::lessless);
244 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
245 Tokens.back()->Tok.setKind(tok::greatergreater);
248 if (tryMergeTokensAny({{tok::lessless, tok::equal},
249 {tok::lessless, tok::lessequal},
250 {tok::greatergreater, tok::equal},
251 {tok::greatergreater, tok::greaterequal},
252 {tok::colon, tok::equal},
253 {tok::colon, tok::slash}},
254 TT_BinaryOperator)) {
259 if (tryMergeTokensAny({{tok::star, tok::star},
260 {tok::lessless, tok::less},
261 {tok::greatergreater, tok::greater},
262 {tok::exclaimequal, tok::equal},
263 {tok::exclaimequal, tok::question},
264 {tok::equalequal, tok::equal},
265 {tok::equalequal, tok::question}},
266 TT_BinaryOperator)) {
271 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
272 {tok::plus, tok::star, tok::greater},
273 {tok::minusequal, tok::greater},
274 {tok::minus, tok::star, tok::greater},
275 {tok::less, tok::arrow},
276 {tok::equal, tok::greater},
277 {tok::star, tok::greater},
278 {tok::pipeequal, tok::greater},
279 {tok::pipe, tok::arrow},
280 {tok::hash, tok::minus, tok::hash},
281 {tok::hash, tok::equal, tok::hash}},
282 TT_BinaryOperator) ||
283 Tokens.back()->is(tok::arrow)) {
290 if (tryMergeTokens({tok::l_square, tok::l_brace},
291 TT_TableGenMultiLineString)) {
293 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
294 Tokens.back()->Tok.setKind(tok::string_literal);
299 if (tryMergeTokens({tok::exclaim, tok::identifier},
300 TT_TableGenBangOperator)) {
301 Tokens.back()->Tok.setKind(tok::identifier);
302 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
303 if (Tokens.back()->TokenText ==
"!cond")
304 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
306 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
309 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
312 Tokens.back()->Tok.setKind(tok::identifier);
313 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
314 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
318 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
319 Tokens.back()->Tok.setKind(tok::numeric_constant);
322 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
323 Tokens.back()->Tok.setKind(tok::numeric_constant);
329bool FormatTokenLexer::tryMergeNSStringLiteral() {
330 if (Tokens.size() < 2)
332 auto &At = *(Tokens.end() - 2);
333 auto &String = *(Tokens.end() - 1);
334 if (At->isNot(tok::at) || String->isNot(tok::string_literal))
336 At->Tok.setKind(tok::string_literal);
337 At->TokenText = StringRef(At->TokenText.begin(),
338 String->TokenText.end() - At->TokenText.begin());
339 At->ColumnWidth += String->ColumnWidth;
340 At->setType(TT_ObjCStringLiteral);
341 Tokens.erase(Tokens.end() - 1);
345bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
348 if (Tokens.size() < 2)
350 auto &Hash = *(Tokens.end() - 2);
352 if (Hash->isNot(tok::hash) ||
Identifier->isNot(tok::identifier))
354 Hash->Tok.setKind(tok::identifier);
356 StringRef(Hash->TokenText.begin(),
357 Identifier->TokenText.end() - Hash->TokenText.begin());
359 Hash->setType(TT_JsPrivateIdentifier);
360 Tokens.erase(Tokens.end() - 1);
369bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
370 if (Tokens.size() < 2)
374 const auto String = *(Tokens.end() - 1);
375 if (String->isNot(tok::string_literal))
378 auto Prefix = *(Tokens.end() - 2);
379 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
382 if (Tokens.size() > 2) {
383 const auto Tok = *(Tokens.end() - 3);
384 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
385 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
387 Tok->ColumnWidth += Prefix->ColumnWidth;
388 Tokens.erase(Tokens.end() - 2);
394 Prefix->Tok.setKind(tok::string_literal);
396 StringRef(Prefix->TokenText.begin(),
397 String->TokenText.end() - Prefix->TokenText.begin());
398 Prefix->ColumnWidth += String->ColumnWidth;
399 Prefix->setType(TT_CSharpStringLiteral);
400 Tokens.erase(Tokens.end() - 1);
406const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
407 "assembly",
"module",
"field",
"event",
"method",
408 "param",
"property",
"return",
"type",
411bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
412 if (Tokens.size() < 2)
414 auto &NullishCoalescing = *(Tokens.end() - 2);
415 auto &
Equal = *(Tokens.end() - 1);
416 if (NullishCoalescing->isNot(TT_NullCoalescingOperator) ||
417 Equal->isNot(tok::equal)) {
420 NullishCoalescing->Tok.setKind(tok::equal);
421 NullishCoalescing->TokenText =
422 StringRef(NullishCoalescing->TokenText.begin(),
423 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
424 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
425 NullishCoalescing->setType(TT_NullCoalescingEqual);
426 Tokens.erase(Tokens.end() - 1);
430bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
431 if (Tokens.size() < 2)
433 const auto At = *(Tokens.end() - 2);
434 if (At->isNot(tok::at))
436 const auto Keyword = *(Tokens.end() - 1);
437 if (Keyword->TokenText ==
"$")
442 At->Tok.setKind(tok::identifier);
443 At->TokenText = StringRef(At->TokenText.begin(),
444 Keyword->TokenText.end() - At->TokenText.begin());
445 At->ColumnWidth += Keyword->ColumnWidth;
446 At->setType(Keyword->getType());
447 Tokens.erase(Tokens.end() - 1);
452bool FormatTokenLexer::tryTransformCSharpForEach() {
453 if (Tokens.size() < 1)
466bool FormatTokenLexer::tryMergeForEach() {
467 if (Tokens.size() < 2)
469 auto &For = *(Tokens.end() - 2);
470 auto &Each = *(Tokens.end() - 1);
471 if (For->isNot(tok::kw_for))
473 if (Each->isNot(tok::identifier))
475 if (Each->TokenText !=
"each")
478 For->setType(TT_ForEachMacro);
479 For->Tok.setKind(tok::kw_for);
481 For->TokenText = StringRef(For->TokenText.begin(),
482 Each->TokenText.end() - For->TokenText.begin());
483 For->ColumnWidth += Each->ColumnWidth;
484 Tokens.erase(Tokens.end() - 1);
488bool FormatTokenLexer::tryTransformTryUsageForC() {
489 if (Tokens.size() < 2)
491 auto &Try = *(Tokens.end() - 2);
492 if (Try->isNot(tok::kw_try))
494 auto &Next = *(Tokens.end() - 1);
495 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
498 if (Tokens.size() > 2) {
499 auto &At = *(Tokens.end() - 3);
504 Try->Tok.setKind(tok::identifier);
508bool FormatTokenLexer::tryMergeLessLess() {
510 if (Tokens.size() < 3)
513 auto First = Tokens.end() - 3;
514 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
518 if (
First[1]->hasWhitespaceBefore())
521 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
522 if (
X &&
X->is(tok::less))
526 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
529 First[0]->Tok.setKind(tok::lessless);
530 First[0]->TokenText =
"<<";
531 First[0]->ColumnWidth += 1;
532 Tokens.erase(Tokens.end() - 2);
536bool FormatTokenLexer::tryMergeGreaterGreater() {
538 if (Tokens.size() < 2)
541 auto First = Tokens.end() - 2;
542 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
546 if (
First[1]->hasWhitespaceBefore())
549 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
550 if (Tok && Tok->isNot(tok::kw_operator))
553 First[0]->Tok.setKind(tok::greatergreater);
554 First[0]->TokenText =
">>";
555 First[0]->ColumnWidth += 1;
556 Tokens.erase(Tokens.end() - 1);
560bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
562 if (Tokens.size() < Kinds.size())
565 SmallVectorImpl<FormatToken *>::const_iterator
First =
566 Tokens.end() - Kinds.size();
567 for (
unsigned i = 0; i < Kinds.size(); ++i)
568 if (
First[i]->isNot(Kinds[i]))
571 return tryMergeTokens(Kinds.size(), NewType);
574bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
575 if (Tokens.size() < Count)
578 SmallVectorImpl<FormatToken *>::const_iterator
First = Tokens.end() - Count;
579 unsigned AddLength = 0;
580 for (
size_t i = 1; i < Count; ++i) {
583 if (
First[i]->hasWhitespaceBefore())
585 AddLength +=
First[i]->TokenText.size();
588 Tokens.resize(Tokens.size() - Count + 1);
589 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
590 First[0]->TokenText.size() + AddLength);
591 First[0]->ColumnWidth += AddLength;
592 First[0]->setType(NewType);
596bool FormatTokenLexer::tryMergeTokensAny(
597 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
598 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
599 return tryMergeTokens(Kinds, NewType);
604bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
608 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
609 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
610 tok::colon, tok::question, tok::tilde) ||
611 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
612 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
614 Tok->isBinaryOperator();
617bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
627 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
628 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
632 if (!precedesOperand(Prev))
642void FormatTokenLexer::tryParseJSRegexLiteral() {
643 FormatToken *RegexToken = Tokens.back();
644 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
647 FormatToken *Prev =
nullptr;
648 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
651 if (FT->isNot(tok::comment)) {
657 if (!canPrecedeRegexLiteral(Prev))
661 const char *Offset = Lex->getBufferLocation();
662 const char *RegexBegin = Offset - RegexToken->TokenText.size();
663 StringRef Buffer = Lex->getBuffer();
664 bool InCharacterClass =
false;
665 bool HaveClosingSlash =
false;
666 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
676 InCharacterClass =
true;
679 InCharacterClass =
false;
682 if (!InCharacterClass)
683 HaveClosingSlash =
true;
688 RegexToken->setType(TT_RegexLiteral);
690 RegexToken->Tok.setKind(tok::string_literal);
691 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
692 RegexToken->ColumnWidth = RegexToken->TokenText.size();
694 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
699 auto Repeated = [&
Begin, End]() {
715 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
727 ++UnmatchedOpeningBraceCount;
735 else if (UnmatchedOpeningBraceCount > 0)
736 --UnmatchedOpeningBraceCount;
742 if (UnmatchedOpeningBraceCount > 0)
745 if (Verbatim && Repeated()) {
756void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
757 FormatToken *CSharpStringLiteral = Tokens.back();
759 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
762 auto &TokenText = CSharpStringLiteral->TokenText;
764 bool Verbatim =
false;
765 bool Interpolated =
false;
766 if (TokenText.starts_with(R
"($@")") || TokenText.starts_with(R"(@$")")) {
769 }
else if (TokenText.starts_with(R
"(@")")) {
771 }
else if (TokenText.starts_with(R
"($")")) {
776 if (!Verbatim && !Interpolated)
779 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
780 const char *Offset = StrBegin;
781 if (Verbatim && Interpolated)
786 const auto End = Lex->getBuffer().end();
794 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
795 TokenText = LiteralText;
798 size_t FirstBreak = LiteralText.find(
'\n');
799 StringRef FirstLineText = FirstBreak == StringRef::npos
801 : LiteralText.substr(0, FirstBreak);
803 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
805 size_t LastBreak = LiteralText.rfind(
'\n');
806 if (LastBreak != StringRef::npos) {
807 CSharpStringLiteral->IsMultiline =
true;
808 unsigned StartColumn = 0;
809 CSharpStringLiteral->LastLineColumnWidth =
811 StartColumn, Style.
TabWidth, Encoding);
814 assert(Offset < End);
815 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset + 1)));
818void FormatTokenLexer::handleTableGenMultilineString() {
819 FormatToken *MultiLineString = Tokens.back();
820 if (MultiLineString->isNot(TT_TableGenMultiLineString))
823 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;
825 auto CloseOffset = Lex->getBuffer().find(
"}]", OpenOffset);
826 if (CloseOffset == StringRef::npos)
828 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);
829 MultiLineString->TokenText =
Text;
831 Lex->getSourceLocation(Lex->getBufferLocation() - 2 +
Text.size())));
832 auto FirstLineText =
Text;
833 auto FirstBreak =
Text.find(
'\n');
835 if (FirstBreak != StringRef::npos) {
836 MultiLineString->IsMultiline =
true;
837 FirstLineText =
Text.substr(0, FirstBreak + 1);
839 auto LastBreak =
Text.rfind(
'\n');
841 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,
846 FirstLineText, MultiLineString->OriginalColumn, Style.
TabWidth, Encoding);
849void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {
850 FormatToken *Tok = Tokens.back();
853 if (Tok->isNot(tok::numeric_constant))
855 StringRef
Text = Tok->TokenText;
864 if (
Text.size() < 1 ||
Text[0] ==
'+' ||
Text[0] ==
'-')
866 const auto NonDigitPos =
Text.find_if([](
char C) {
return !isdigit(
C); });
868 if (NonDigitPos == StringRef::npos)
870 char FirstNonDigit =
Text[NonDigitPos];
871 if (NonDigitPos <
Text.size() - 1) {
872 char TheNext =
Text[NonDigitPos + 1];
874 if (FirstNonDigit ==
'b' && (TheNext ==
'0' || TheNext ==
'1'))
877 if (FirstNonDigit ==
'x' && isxdigit(TheNext))
880 if (isalpha(FirstNonDigit) || FirstNonDigit ==
'_') {
882 Tok->Tok.setKind(tok::identifier);
883 Tok->Tok.setIdentifierInfo(
nullptr);
887void FormatTokenLexer::handleTemplateStrings() {
888 FormatToken *BacktickToken = Tokens.back();
890 if (BacktickToken->is(tok::l_brace)) {
894 if (BacktickToken->is(tok::r_brace)) {
895 if (StateStack.size() == 1)
901 }
else if (BacktickToken->is(tok::unknown) &&
902 BacktickToken->TokenText ==
"`") {
909 const char *Offset = Lex->getBufferLocation();
910 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
911 for (; Offset != Lex->getBuffer().end(); ++Offset) {
912 if (Offset[0] ==
'`') {
917 if (Offset[0] ==
'\\') {
919 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
928 StringRef LiteralText(TmplBegin, Offset - TmplBegin);
929 BacktickToken->setType(TT_TemplateString);
930 BacktickToken->Tok.setKind(tok::string_literal);
931 BacktickToken->TokenText = LiteralText;
934 size_t FirstBreak = LiteralText.find(
'\n');
935 StringRef FirstLineText = FirstBreak == StringRef::npos
937 : LiteralText.substr(0, FirstBreak);
939 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
940 size_t LastBreak = LiteralText.rfind(
'\n');
941 if (LastBreak != StringRef::npos) {
942 BacktickToken->IsMultiline =
true;
943 unsigned StartColumn = 0;
944 BacktickToken->LastLineColumnWidth =
946 StartColumn, Style.
TabWidth, Encoding);
949 SourceLocation loc = Lex->getSourceLocation(Offset);
953void FormatTokenLexer::tryParsePythonComment() {
954 FormatToken *HashToken = Tokens.back();
955 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
958 const char *CommentBegin =
959 Lex->getBufferLocation() - HashToken->TokenText.size();
960 size_t From = CommentBegin - Lex->getBuffer().begin();
961 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
962 if (To == StringRef::npos)
963 To = Lex->getBuffer().size();
964 size_t Len = To - From;
965 HashToken->setType(TT_LineComment);
966 HashToken->Tok.setKind(tok::comment);
967 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
968 SourceLocation
Loc = To < Lex->getBuffer().size()
969 ? Lex->getSourceLocation(CommentBegin + Len)
974bool FormatTokenLexer::tryMerge_TMacro() {
975 if (Tokens.size() < 4)
977 FormatToken *
Last = Tokens.back();
978 if (
Last->isNot(tok::r_paren))
981 FormatToken *String = Tokens[Tokens.size() - 2];
982 if (String->isNot(tok::string_literal) || String->IsMultiline)
985 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))
988 FormatToken *
Macro = Tokens[Tokens.size() - 4];
989 if (
Macro->TokenText !=
"_T")
992 const char *Start =
Macro->TokenText.data();
993 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
994 String->TokenText = StringRef(Start, End - Start);
995 String->IsFirst =
Macro->IsFirst;
996 String->LastNewlineOffset =
Macro->LastNewlineOffset;
997 String->WhitespaceRange =
Macro->WhitespaceRange;
998 String->OriginalColumn =
Macro->OriginalColumn;
1000 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
1001 String->NewlinesBefore =
Macro->NewlinesBefore;
1002 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
1007 Tokens.back() = String;
1008 if (FirstInLineIndex >= Tokens.size())
1009 FirstInLineIndex = Tokens.size() - 1;
1013bool FormatTokenLexer::tryMergeConflictMarkers() {
1014 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1028 unsigned FirstInLineOffset;
1030 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1033 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
1034 if (LineOffset == StringRef::npos)
1039 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
1040 StringRef LineStart;
1041 if (FirstSpace == StringRef::npos)
1042 LineStart = Buffer.substr(LineOffset);
1044 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1047 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
1048 Type = TT_ConflictStart;
1049 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
1050 LineStart ==
"====") {
1051 Type = TT_ConflictAlternative;
1052 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
1053 Type = TT_ConflictEnd;
1056 if (Type != TT_Unknown) {
1057 FormatToken *Next = Tokens.back();
1059 Tokens.resize(FirstInLineIndex + 1);
1063 Tokens.back()->setType(Type);
1064 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1066 Tokens.push_back(Next);
1073FormatToken *FormatTokenLexer::getStashedToken() {
1075 Token Tok = FormatTok->
Tok;
1076 StringRef TokenText = FormatTok->
TokenText;
1079 FormatTok =
new (Allocator.Allocate()) FormatToken;
1080 FormatTok->
Tok = Tok;
1081 SourceLocation TokLocation =
1098void FormatTokenLexer::truncateToken(
size_t NewLen) {
1099 assert(NewLen <= FormatTok->TokenText.size());
1101 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
1116 const unsigned char *
const Begin =
Text.bytes_begin();
1117 const unsigned char *
const End =
Text.bytes_end();
1118 const unsigned char *Cur =
Begin;
1120 if (isspace(Cur[0])) {
1122 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1128 assert(End - Cur >= 2);
1130 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1131 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1135 assert(End - Cur >= 4);
1144FormatToken *FormatTokenLexer::getNextToken() {
1147 return getStashedToken();
1150 FormatTok =
new (Allocator.Allocate()) FormatToken;
1151 readRawToken(*FormatTok);
1152 SourceLocation WhitespaceStart =
1154 FormatTok->
IsFirst = IsFirstToken;
1155 IsFirstToken =
false;
1161 unsigned WhitespaceLength = TrailingWhitespace;
1162 while (FormatTok->
isNot(tok::eof)) {
1164 if (LeadingWhitespace == 0)
1166 if (LeadingWhitespace < FormatTok->TokenText.size())
1167 truncateToken(LeadingWhitespace);
1169 bool InEscape =
false;
1170 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1176 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1191 i > 0 &&
Text[i - 1] ==
'\n' &&
1192 ((i + 1 < e &&
Text[i + 1] ==
'\n') ||
1193 (i + 2 < e &&
Text[i + 1] ==
'\r' &&
Text[i + 2] ==
'\n'))) {
1212 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1213 Text.substr(i, 4) ==
"\?\?/\r" ||
1214 Text.substr(i, 4) ==
"\?\?/\n" ||
1215 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1216 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1217 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1218 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1227 WhitespaceLength +=
Text.size();
1228 readRawToken(*FormatTok);
1231 if (FormatTok->
is(tok::unknown))
1232 FormatTok->
setType(TT_ImplicitStringLiteral);
1242 FormatTok->
is(tok::comment) && FormatTok->
TokenText.starts_with(
"//")) {
1243 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1244 while (BackslashPos != StringRef::npos) {
1245 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1246 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1247 truncateToken(BackslashPos + 1);
1250 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1255 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1256 SmallVector<StringRef, 1> Matches;
1262 if (FormatTok->
is(tok::numeric_constant)) {
1264 auto Quote = FormatTok->
TokenText.find(
'\'');
1265 if (Quote != StringRef::npos)
1266 truncateToken(Quote);
1267 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1269 }
else if (FormatTok->
is(tok::raw_identifier)) {
1273 }
else if (FormatTok->
TokenText ==
"``") {
1276 }
else if (Tokens.size() > 0 &&
1278 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1283 truncateToken(Matches[0].size());
1290 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1294 TrailingWhitespace = 0;
1295 if (FormatTok->
is(tok::comment)) {
1297 StringRef UntrimmedText = FormatTok->
TokenText;
1299 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1300 }
else if (FormatTok->
is(tok::raw_identifier)) {
1301 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1305 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1306 tok::kw_operator)) {
1310 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1311 tok::kw_operator)) {
1318 }
else if (FormatTok->
is(tok::greatergreater)) {
1323 }
else if (FormatTok->
is(tok::lessless)) {
1330 if (Style.
isVerilog() && Tokens.size() > 0 &&
1331 Tokens.back()->is(TT_VerilogNumberBase) &&
1332 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1334 FormatTok->
Tok.
setKind(tok::numeric_constant);
1340 size_t FirstNewlinePos =
Text.find(
'\n');
1341 if (FirstNewlinePos == StringRef::npos) {
1352 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1361 if (Style.
isCpp()) {
1364 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1365 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1367 it != Macros.end()) {
1368 FormatTok->
setType(it->second);
1369 if (it->second == TT_IfMacro) {
1376 }
else if (FormatTok->
is(tok::identifier)) {
1377 if (MacroBlockBeginRegex.match(
Text))
1378 FormatTok->
setType(TT_MacroBlockBegin);
1379 else if (MacroBlockEndRegex.match(
Text))
1380 FormatTok->
setType(TT_MacroBlockEnd);
1391bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1406 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1407 "(\r?\n|\r)|[^[:space:]])*)");
1409 SmallVector<StringRef, 4> Matches;
1410 const char *Start = Lex->getBufferLocation();
1411 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1417 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1419 size_t Len = Matches[0].size();
1424 Tok.setKind(tok::raw_identifier);
1426 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1427 Tok.setRawIdentifierData(Start);
1428 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1432void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1435 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1436 Lex->LexFromRawLexer(Tok.Tok);
1437 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1438 Tok.Tok.getLength());
1441 if (Tok.is(tok::unknown)) {
1442 if (Tok.TokenText.starts_with(
"\"")) {
1443 Tok.Tok.setKind(tok::string_literal);
1444 Tok.IsUnterminatedLiteral =
true;
1445 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1446 Tok.Tok.setKind(tok::string_literal);
1451 Tok.Tok.setKind(tok::string_literal);
1454 FormattingDisabled =
false;
1456 Tok.Finalized = FormattingDisabled;
1459 FormattingDisabled =
true;
1462void FormatTokenLexer::resetLexer(
unsigned Offset) {
1465 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
1466 Lex->SetKeepWhitespaceMode(
true);
1467 TrailingWhitespace = 0;
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.