20#include "llvm/Support/Regex.h"
28 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
37 Lex.reset(
new Lexer(
ID, SourceMgr.getBufferOrFake(
ID), SourceMgr, LangOpts));
38 Lex->SetKeepWhitespaceMode(
true);
41 auto Identifier = &IdentTable.get(ForEachMacro);
44 for (
const std::string &IfMacro : Style.
IfMacros) {
49 auto Identifier = &IdentTable.get(AttributeMacro);
53 auto Identifier = &IdentTable.get(StatementMacro);
57 auto Identifier = &IdentTable.get(TypenameMacro);
61 auto Identifier = &IdentTable.get(NamespaceMacro);
64 for (
const std::string &WhitespaceSensitiveMacro :
66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
69 for (
const std::string &StatementAttributeLikeMacro :
71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
75 for (
const auto &TypeName : Style.
TypeNames)
76 TypeNames.insert(&IdentTable.get(TypeName));
80 assert(Tokens.empty());
81 assert(FirstInLineIndex == 0);
83 Tokens.push_back(getNextToken());
85 tryParseJSRegexLiteral();
86 handleTemplateStrings();
89 tryParsePythonComment();
90 tryMergePreviousTokens();
94 handleCSharpVerbatimAndInterpolatedStrings();
97 handleTableGenMultilineString();
98 handleTableGenNumericLikeIdentifier();
100 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
101 FirstInLineIndex = Tokens.size() - 1;
102 }
while (Tokens.back()->isNot(tok::eof));
106void FormatTokenLexer::tryMergePreviousTokens() {
107 if (tryMerge_TMacro())
109 if (tryMergeConflictMarkers())
111 if (tryMergeLessLess())
113 if (tryMergeGreaterGreater())
115 if (tryMergeForEach())
117 if (Style.
isCpp() && tryTransformTryUsageForC())
121 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
123 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
125 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
127 if (tryMergeTokens(FatArrow, TT_FatArrow))
129 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
131 Tokens.back()->Tok.setKind(tok::pipepipe);
134 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
136 Tokens.back()->Tok.setKind(tok::period);
139 if (tryMergeNullishCoalescingEqual())
145 tok::question, tok::l_square};
147 if (tryMergeCSharpKeywordVariables())
149 if (tryMergeCSharpStringLiteral())
151 if (tryTransformCSharpForEach())
153 if (tryMergeTokens(CSharpNullConditionalLSquare,
154 TT_CSharpNullConditionalLSquare)) {
156 Tokens.back()->Tok.setKind(tok::l_square);
161 if (tryMergeNSStringLiteral())
165 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
168 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
170 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
173 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
174 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
177 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
179 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
181 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
183 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
185 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
186 Tokens.back()->Tok.setKind(tok::starequal);
189 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
190 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
192 Tokens.back()->Tok.setKind(tok::equal);
195 if (tryMergeJSPrivateIdentifier())
201 tok::greater, tok::greater, tok::greaterequal};
202 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
208 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
209 Tokens.end()[-2]->is(tok::numeric_constant) &&
210 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
212 tryMergeTokens(2, TT_Unknown)) {
216 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
224 if (Tokens.back()->TokenText.size() == 1 &&
225 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
226 TT_BinaryOperator)) {
227 Tokens.back()->Tok.setKind(tok::caret);
231 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
232 Tokens.back()->Tok.setKind(tok::lessless);
235 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
236 Tokens.back()->Tok.setKind(tok::greatergreater);
239 if (tryMergeTokensAny({{tok::lessless, tok::equal},
240 {tok::lessless, tok::lessequal},
241 {tok::greatergreater, tok::equal},
242 {tok::greatergreater, tok::greaterequal},
243 {tok::colon, tok::equal},
244 {tok::colon, tok::slash}},
245 TT_BinaryOperator)) {
250 if (tryMergeTokensAny({{tok::star, tok::star},
251 {tok::lessless, tok::less},
252 {tok::greatergreater, tok::greater},
253 {tok::exclaimequal, tok::equal},
254 {tok::exclaimequal, tok::question},
255 {tok::equalequal, tok::equal},
256 {tok::equalequal, tok::question}},
257 TT_BinaryOperator)) {
262 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
263 {tok::plus, tok::star, tok::greater},
264 {tok::minusequal, tok::greater},
265 {tok::minus, tok::star, tok::greater},
266 {tok::less, tok::arrow},
267 {tok::equal, tok::greater},
268 {tok::star, tok::greater},
269 {tok::pipeequal, tok::greater},
270 {tok::pipe, tok::arrow},
271 {tok::hash, tok::minus, tok::hash},
272 {tok::hash, tok::equal, tok::hash}},
273 TT_BinaryOperator) ||
274 Tokens.back()->is(tok::arrow)) {
281 if (tryMergeTokens({tok::l_square, tok::l_brace},
282 TT_TableGenMultiLineString)) {
284 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
285 Tokens.back()->Tok.setKind(tok::string_literal);
290 if (tryMergeTokens({tok::exclaim, tok::identifier},
291 TT_TableGenBangOperator)) {
292 Tokens.back()->Tok.setKind(tok::identifier);
293 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
294 if (Tokens.back()->TokenText ==
"!cond")
295 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
297 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
300 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
303 Tokens.back()->Tok.setKind(tok::identifier);
304 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
305 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
309 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
310 Tokens.back()->Tok.setKind(tok::numeric_constant);
313 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
314 Tokens.back()->Tok.setKind(tok::numeric_constant);
320bool FormatTokenLexer::tryMergeNSStringLiteral() {
321 if (Tokens.size() < 2)
323 auto &At = *(Tokens.end() - 2);
324 auto &String = *(Tokens.end() - 1);
325 if (At->isNot(tok::at) || String->isNot(tok::string_literal))
327 At->Tok.setKind(tok::string_literal);
328 At->TokenText = StringRef(At->TokenText.begin(),
329 String->TokenText.end() - At->TokenText.begin());
330 At->ColumnWidth += String->ColumnWidth;
331 At->setType(TT_ObjCStringLiteral);
332 Tokens.erase(Tokens.end() - 1);
336bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
339 if (Tokens.size() < 2)
341 auto &Hash = *(Tokens.end() - 2);
343 if (Hash->isNot(tok::hash) ||
Identifier->isNot(tok::identifier))
345 Hash->Tok.setKind(tok::identifier);
347 StringRef(Hash->TokenText.begin(),
348 Identifier->TokenText.end() - Hash->TokenText.begin());
350 Hash->setType(TT_JsPrivateIdentifier);
351 Tokens.erase(Tokens.end() - 1);
360bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
361 if (Tokens.size() < 2)
365 const auto String = *(Tokens.end() - 1);
366 if (String->isNot(tok::string_literal))
369 auto Prefix = *(Tokens.end() - 2);
370 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
373 if (Tokens.size() > 2) {
374 const auto Tok = *(Tokens.end() - 3);
375 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
376 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
378 Tok->ColumnWidth += Prefix->ColumnWidth;
379 Tokens.erase(Tokens.end() - 2);
385 Prefix->Tok.setKind(tok::string_literal);
387 StringRef(Prefix->TokenText.begin(),
388 String->TokenText.end() - Prefix->TokenText.begin());
389 Prefix->ColumnWidth += String->ColumnWidth;
390 Prefix->setType(TT_CSharpStringLiteral);
391 Tokens.erase(Tokens.end() - 1);
397const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
398 "assembly",
"module",
"field",
"event",
"method",
399 "param",
"property",
"return",
"type",
402bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
403 if (Tokens.size() < 2)
405 auto &NullishCoalescing = *(Tokens.end() - 2);
406 auto &
Equal = *(Tokens.end() - 1);
407 if (NullishCoalescing->isNot(TT_NullCoalescingOperator) ||
408 Equal->isNot(tok::equal)) {
411 NullishCoalescing->Tok.setKind(tok::equal);
412 NullishCoalescing->TokenText =
413 StringRef(NullishCoalescing->TokenText.begin(),
414 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
415 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
416 NullishCoalescing->setType(TT_NullCoalescingEqual);
417 Tokens.erase(Tokens.end() - 1);
421bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
422 if (Tokens.size() < 2)
424 const auto At = *(Tokens.end() - 2);
425 if (At->isNot(tok::at))
427 const auto Keyword = *(Tokens.end() - 1);
428 if (Keyword->TokenText ==
"$")
433 At->Tok.setKind(tok::identifier);
434 At->TokenText = StringRef(At->TokenText.begin(),
435 Keyword->TokenText.end() - At->TokenText.begin());
436 At->ColumnWidth += Keyword->ColumnWidth;
437 At->setType(Keyword->getType());
438 Tokens.erase(Tokens.end() - 1);
443bool FormatTokenLexer::tryTransformCSharpForEach() {
444 if (Tokens.size() < 1)
457bool FormatTokenLexer::tryMergeForEach() {
458 if (Tokens.size() < 2)
460 auto &For = *(Tokens.end() - 2);
461 auto &Each = *(Tokens.end() - 1);
462 if (For->isNot(tok::kw_for))
464 if (Each->isNot(tok::identifier))
466 if (Each->TokenText !=
"each")
469 For->setType(TT_ForEachMacro);
470 For->Tok.setKind(tok::kw_for);
472 For->TokenText = StringRef(For->TokenText.begin(),
473 Each->TokenText.end() - For->TokenText.begin());
474 For->ColumnWidth += Each->ColumnWidth;
475 Tokens.erase(Tokens.end() - 1);
479bool FormatTokenLexer::tryTransformTryUsageForC() {
480 if (Tokens.size() < 2)
482 auto &Try = *(Tokens.end() - 2);
483 if (Try->isNot(tok::kw_try))
485 auto &Next = *(Tokens.end() - 1);
486 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
489 if (Tokens.size() > 2) {
490 auto &At = *(Tokens.end() - 3);
495 Try->Tok.setKind(tok::identifier);
499bool FormatTokenLexer::tryMergeLessLess() {
501 if (Tokens.size() < 3)
504 auto First = Tokens.end() - 3;
505 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
509 if (
First[1]->hasWhitespaceBefore())
512 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
513 if (
X &&
X->is(tok::less))
517 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
520 First[0]->Tok.setKind(tok::lessless);
521 First[0]->TokenText =
"<<";
522 First[0]->ColumnWidth += 1;
523 Tokens.erase(Tokens.end() - 2);
527bool FormatTokenLexer::tryMergeGreaterGreater() {
529 if (Tokens.size() < 2)
532 auto First = Tokens.end() - 2;
533 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
537 if (
First[1]->hasWhitespaceBefore())
540 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
541 if (Tok && Tok->isNot(tok::kw_operator))
544 First[0]->Tok.setKind(tok::greatergreater);
545 First[0]->TokenText =
">>";
546 First[0]->ColumnWidth += 1;
547 Tokens.erase(Tokens.end() - 1);
551bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
553 if (Tokens.size() < Kinds.size())
556 SmallVectorImpl<FormatToken *>::const_iterator
First =
557 Tokens.end() - Kinds.size();
558 for (
unsigned i = 0; i < Kinds.size(); ++i)
559 if (
First[i]->isNot(Kinds[i]))
562 return tryMergeTokens(Kinds.size(), NewType);
565bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
566 if (Tokens.size() < Count)
569 SmallVectorImpl<FormatToken *>::const_iterator
First = Tokens.end() - Count;
570 unsigned AddLength = 0;
571 for (
size_t i = 1; i < Count; ++i) {
574 if (
First[i]->hasWhitespaceBefore())
576 AddLength +=
First[i]->TokenText.size();
579 Tokens.resize(Tokens.size() - Count + 1);
580 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
581 First[0]->TokenText.size() + AddLength);
582 First[0]->ColumnWidth += AddLength;
583 First[0]->setType(NewType);
587bool FormatTokenLexer::tryMergeTokensAny(
588 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
589 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
590 return tryMergeTokens(Kinds, NewType);
595bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
599 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
600 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
601 tok::colon, tok::question, tok::tilde) ||
602 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
603 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
605 Tok->isBinaryOperator();
608bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
618 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
619 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
623 if (!precedesOperand(Prev))
633void FormatTokenLexer::tryParseJSRegexLiteral() {
634 FormatToken *RegexToken = Tokens.back();
635 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
638 FormatToken *Prev =
nullptr;
639 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
642 if (FT->isNot(tok::comment)) {
648 if (!canPrecedeRegexLiteral(Prev))
652 const char *Offset = Lex->getBufferLocation();
653 const char *RegexBegin = Offset - RegexToken->TokenText.size();
654 StringRef Buffer = Lex->getBuffer();
655 bool InCharacterClass =
false;
656 bool HaveClosingSlash =
false;
657 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
667 InCharacterClass =
true;
670 InCharacterClass =
false;
673 if (!InCharacterClass)
674 HaveClosingSlash =
true;
679 RegexToken->setType(TT_RegexLiteral);
681 RegexToken->Tok.setKind(tok::string_literal);
682 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
683 RegexToken->ColumnWidth = RegexToken->TokenText.size();
685 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
690 auto Repeated = [&
Begin, End]() {
706 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
718 ++UnmatchedOpeningBraceCount;
726 else if (UnmatchedOpeningBraceCount > 0)
727 --UnmatchedOpeningBraceCount;
733 if (UnmatchedOpeningBraceCount > 0)
736 if (Verbatim && Repeated()) {
747void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
748 FormatToken *CSharpStringLiteral = Tokens.back();
750 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
753 auto &TokenText = CSharpStringLiteral->TokenText;
755 bool Verbatim =
false;
756 bool Interpolated =
false;
757 if (TokenText.starts_with(R
"($@")") || TokenText.starts_with(R"(@$")")) {
760 }
else if (TokenText.starts_with(R
"(@")")) {
762 }
else if (TokenText.starts_with(R
"($")")) {
767 if (!Verbatim && !Interpolated)
770 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
771 const char *Offset = StrBegin;
772 if (Verbatim && Interpolated)
777 const auto End = Lex->getBuffer().end();
785 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
786 TokenText = LiteralText;
789 size_t FirstBreak = LiteralText.find(
'\n');
790 StringRef FirstLineText = FirstBreak == StringRef::npos
792 : LiteralText.substr(0, FirstBreak);
794 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
796 size_t LastBreak = LiteralText.rfind(
'\n');
797 if (LastBreak != StringRef::npos) {
798 CSharpStringLiteral->IsMultiline =
true;
799 unsigned StartColumn = 0;
800 CSharpStringLiteral->LastLineColumnWidth =
802 StartColumn, Style.
TabWidth, Encoding);
805 assert(Offset < End);
806 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset + 1)));
809void FormatTokenLexer::handleTableGenMultilineString() {
810 FormatToken *MultiLineString = Tokens.back();
811 if (MultiLineString->isNot(TT_TableGenMultiLineString))
814 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;
816 auto CloseOffset = Lex->getBuffer().find(
"}]", OpenOffset);
817 if (CloseOffset == StringRef::npos)
819 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);
820 MultiLineString->TokenText =
Text;
822 Lex->getSourceLocation(Lex->getBufferLocation() - 2 +
Text.size())));
823 auto FirstLineText =
Text;
824 auto FirstBreak =
Text.find(
'\n');
826 if (FirstBreak != StringRef::npos) {
827 MultiLineString->IsMultiline =
true;
828 FirstLineText =
Text.substr(0, FirstBreak + 1);
830 auto LastBreak =
Text.rfind(
'\n');
832 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,
837 FirstLineText, MultiLineString->OriginalColumn, Style.
TabWidth, Encoding);
840void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {
841 FormatToken *Tok = Tokens.back();
844 if (Tok->isNot(tok::numeric_constant))
846 StringRef
Text = Tok->TokenText;
855 if (
Text.size() < 1 ||
Text[0] ==
'+' ||
Text[0] ==
'-')
857 const auto NonDigitPos =
Text.find_if([](
char C) {
return !isdigit(
C); });
859 if (NonDigitPos == StringRef::npos)
861 char FirstNonDigit =
Text[NonDigitPos];
862 if (NonDigitPos <
Text.size() - 1) {
863 char TheNext =
Text[NonDigitPos + 1];
865 if (FirstNonDigit ==
'b' && (TheNext ==
'0' || TheNext ==
'1'))
868 if (FirstNonDigit ==
'x' && isxdigit(TheNext))
871 if (isalpha(FirstNonDigit) || FirstNonDigit ==
'_') {
873 Tok->Tok.setKind(tok::identifier);
874 Tok->Tok.setIdentifierInfo(
nullptr);
878void FormatTokenLexer::handleTemplateStrings() {
879 FormatToken *BacktickToken = Tokens.back();
881 if (BacktickToken->is(tok::l_brace)) {
885 if (BacktickToken->is(tok::r_brace)) {
886 if (StateStack.size() == 1)
892 }
else if (BacktickToken->is(tok::unknown) &&
893 BacktickToken->TokenText ==
"`") {
900 const char *Offset = Lex->getBufferLocation();
901 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
902 for (; Offset != Lex->getBuffer().end(); ++Offset) {
903 if (Offset[0] ==
'`') {
908 if (Offset[0] ==
'\\') {
910 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
919 StringRef LiteralText(TmplBegin, Offset - TmplBegin);
920 BacktickToken->setType(TT_TemplateString);
921 BacktickToken->Tok.setKind(tok::string_literal);
922 BacktickToken->TokenText = LiteralText;
925 size_t FirstBreak = LiteralText.find(
'\n');
926 StringRef FirstLineText = FirstBreak == StringRef::npos
928 : LiteralText.substr(0, FirstBreak);
930 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
931 size_t LastBreak = LiteralText.rfind(
'\n');
932 if (LastBreak != StringRef::npos) {
933 BacktickToken->IsMultiline =
true;
934 unsigned StartColumn = 0;
935 BacktickToken->LastLineColumnWidth =
937 StartColumn, Style.
TabWidth, Encoding);
940 SourceLocation loc = Lex->getSourceLocation(Offset);
944void FormatTokenLexer::tryParsePythonComment() {
945 FormatToken *HashToken = Tokens.back();
946 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
949 const char *CommentBegin =
950 Lex->getBufferLocation() - HashToken->TokenText.size();
951 size_t From = CommentBegin - Lex->getBuffer().begin();
952 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
953 if (To == StringRef::npos)
954 To = Lex->getBuffer().size();
955 size_t Len = To - From;
956 HashToken->setType(TT_LineComment);
957 HashToken->Tok.setKind(tok::comment);
958 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
959 SourceLocation
Loc = To < Lex->getBuffer().size()
960 ? Lex->getSourceLocation(CommentBegin + Len)
965bool FormatTokenLexer::tryMerge_TMacro() {
966 if (Tokens.size() < 4)
968 FormatToken *
Last = Tokens.back();
969 if (
Last->isNot(tok::r_paren))
972 FormatToken *String = Tokens[Tokens.size() - 2];
973 if (String->isNot(tok::string_literal) || String->IsMultiline)
976 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))
979 FormatToken *
Macro = Tokens[Tokens.size() - 4];
980 if (
Macro->TokenText !=
"_T")
983 const char *Start =
Macro->TokenText.data();
984 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
985 String->TokenText = StringRef(Start, End - Start);
986 String->IsFirst =
Macro->IsFirst;
987 String->LastNewlineOffset =
Macro->LastNewlineOffset;
988 String->WhitespaceRange =
Macro->WhitespaceRange;
989 String->OriginalColumn =
Macro->OriginalColumn;
991 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
992 String->NewlinesBefore =
Macro->NewlinesBefore;
993 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
998 Tokens.back() = String;
999 if (FirstInLineIndex >= Tokens.size())
1000 FirstInLineIndex = Tokens.size() - 1;
1004bool FormatTokenLexer::tryMergeConflictMarkers() {
1005 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1019 unsigned FirstInLineOffset;
1021 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1024 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
1025 if (LineOffset == StringRef::npos)
1030 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
1031 StringRef LineStart;
1032 if (FirstSpace == StringRef::npos)
1033 LineStart = Buffer.substr(LineOffset);
1035 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1038 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
1039 Type = TT_ConflictStart;
1040 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
1041 LineStart ==
"====") {
1042 Type = TT_ConflictAlternative;
1043 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
1044 Type = TT_ConflictEnd;
1047 if (Type != TT_Unknown) {
1048 FormatToken *Next = Tokens.back();
1050 Tokens.resize(FirstInLineIndex + 1);
1054 Tokens.back()->setType(Type);
1055 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1057 Tokens.push_back(Next);
1064FormatToken *FormatTokenLexer::getStashedToken() {
1066 Token Tok = FormatTok->
Tok;
1067 StringRef TokenText = FormatTok->
TokenText;
1070 FormatTok =
new (Allocator.Allocate()) FormatToken;
1071 FormatTok->
Tok = Tok;
1072 SourceLocation TokLocation =
1089void FormatTokenLexer::truncateToken(
size_t NewLen) {
1090 assert(NewLen <= FormatTok->TokenText.size());
1092 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
1107 const unsigned char *
const Begin =
Text.bytes_begin();
1108 const unsigned char *
const End =
Text.bytes_end();
1109 const unsigned char *Cur =
Begin;
1111 if (isspace(Cur[0])) {
1113 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1119 assert(End - Cur >= 2);
1121 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1122 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1126 assert(End - Cur >= 4);
1135FormatToken *FormatTokenLexer::getNextToken() {
1138 return getStashedToken();
1141 FormatTok =
new (Allocator.Allocate()) FormatToken;
1142 readRawToken(*FormatTok);
1143 SourceLocation WhitespaceStart =
1145 FormatTok->
IsFirst = IsFirstToken;
1146 IsFirstToken =
false;
1152 unsigned WhitespaceLength = TrailingWhitespace;
1153 while (FormatTok->
isNot(tok::eof)) {
1155 if (LeadingWhitespace == 0)
1157 if (LeadingWhitespace < FormatTok->TokenText.size())
1158 truncateToken(LeadingWhitespace);
1160 bool InEscape =
false;
1161 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1167 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1195 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1196 Text.substr(i, 4) ==
"\?\?/\r" ||
1197 Text.substr(i, 4) ==
"\?\?/\n" ||
1198 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1199 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1200 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1201 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1210 WhitespaceLength +=
Text.size();
1211 readRawToken(*FormatTok);
1214 if (FormatTok->
is(tok::unknown))
1215 FormatTok->
setType(TT_ImplicitStringLiteral);
1225 FormatTok->
is(tok::comment) && FormatTok->
TokenText.starts_with(
"//")) {
1226 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1227 while (BackslashPos != StringRef::npos) {
1228 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1229 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1230 truncateToken(BackslashPos + 1);
1233 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1238 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1239 SmallVector<StringRef, 1> Matches;
1245 if (FormatTok->
is(tok::numeric_constant)) {
1247 auto Quote = FormatTok->
TokenText.find(
'\'');
1248 if (Quote != StringRef::npos)
1249 truncateToken(Quote);
1250 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1252 }
else if (FormatTok->
is(tok::raw_identifier)) {
1256 }
else if (FormatTok->
TokenText ==
"``") {
1259 }
else if (Tokens.size() > 0 &&
1261 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1266 truncateToken(Matches[0].size());
1273 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1277 TrailingWhitespace = 0;
1278 if (FormatTok->
is(tok::comment)) {
1280 StringRef UntrimmedText = FormatTok->
TokenText;
1282 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1283 }
else if (FormatTok->
is(tok::raw_identifier)) {
1284 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1288 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1289 tok::kw_operator)) {
1293 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1294 tok::kw_operator)) {
1301 }
else if (FormatTok->
is(tok::greatergreater)) {
1306 }
else if (FormatTok->
is(tok::lessless)) {
1313 if (Style.
isVerilog() && Tokens.size() > 0 &&
1314 Tokens.back()->is(TT_VerilogNumberBase) &&
1315 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1317 FormatTok->
Tok.
setKind(tok::numeric_constant);
1323 size_t FirstNewlinePos =
Text.find(
'\n');
1324 if (FirstNewlinePos == StringRef::npos) {
1335 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1344 if (Style.
isCpp()) {
1347 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1348 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1350 it != Macros.end()) {
1351 FormatTok->
setType(it->second);
1352 if (it->second == TT_IfMacro) {
1359 }
else if (FormatTok->
is(tok::identifier)) {
1360 if (MacroBlockBeginRegex.match(
Text))
1361 FormatTok->
setType(TT_MacroBlockBegin);
1362 else if (MacroBlockEndRegex.match(
Text))
1363 FormatTok->
setType(TT_MacroBlockEnd);
1372bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1387 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1388 "(\r?\n|\r)|[^[:space:]])*)");
1390 SmallVector<StringRef, 4> Matches;
1391 const char *Start = Lex->getBufferLocation();
1392 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1398 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1400 size_t Len = Matches[0].size();
1405 Tok.setKind(tok::raw_identifier);
1407 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1408 Tok.setRawIdentifierData(Start);
1409 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1413void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1416 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1417 Lex->LexFromRawLexer(Tok.Tok);
1418 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1419 Tok.Tok.getLength());
1422 if (Tok.is(tok::unknown)) {
1423 if (Tok.TokenText.starts_with(
"\"")) {
1424 Tok.Tok.setKind(tok::string_literal);
1425 Tok.IsUnterminatedLiteral =
true;
1426 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1427 Tok.Tok.setKind(tok::string_literal);
1432 Tok.Tok.setKind(tok::string_literal);
1435 FormattingDisabled =
false;
1437 Tok.Finalized = FormattingDisabled;
1440 FormattingDisabled =
true;
1443void FormatTokenLexer::resetLexer(
unsigned Offset) {
1446 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
1447 Lex->SetKeepWhitespaceMode(
true);
1448 TrailingWhitespace = 0;
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.