24 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
29 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
30 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
34 Lex.reset(
new Lexer(
ID, SourceMgr.getBufferOrFake(
ID), SourceMgr, LangOpts));
35 Lex->SetKeepWhitespaceMode(
true);
38 auto Identifier = &IdentTable.get(ForEachMacro);
41 for (
const std::string &IfMacro : Style.
IfMacros) {
46 auto Identifier = &IdentTable.get(AttributeMacro);
50 auto Identifier = &IdentTable.get(StatementMacro);
54 auto Identifier = &IdentTable.get(TypenameMacro);
58 auto Identifier = &IdentTable.get(NamespaceMacro);
61 for (
const std::string &WhitespaceSensitiveMacro :
63 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
66 for (
const std::string &StatementAttributeLikeMacro :
68 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
72 for (
const auto &TypeName : Style.
TypeNames)
73 TypeNames.insert(&IdentTable.get(TypeName));
77 assert(Tokens.empty());
78 assert(FirstInLineIndex == 0);
80 Tokens.push_back(getNextToken());
82 tryParseJSRegexLiteral();
83 handleTemplateStrings();
86 tryParsePythonComment();
87 tryMergePreviousTokens();
91 handleCSharpVerbatimAndInterpolatedStrings();
94 handleTableGenMultilineString();
95 handleTableGenNumericLikeIdentifier();
97 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
98 FirstInLineIndex = Tokens.size() - 1;
99 }
while (Tokens.back()->isNot(tok::eof));
103void FormatTokenLexer::tryMergePreviousTokens() {
104 if (tryMerge_TMacro())
106 if (tryMergeConflictMarkers())
108 if (tryMergeLessLess())
110 if (tryMergeGreaterGreater())
112 if (tryMergeForEach())
114 if (
IsCpp && tryTransformTryUsageForC())
118 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
120 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
122 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
124 if (tryMergeTokens(FatArrow, TT_FatArrow))
126 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
128 Tokens.back()->Tok.setKind(tok::pipepipe);
131 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
133 Tokens.back()->Tok.setKind(tok::period);
136 if (tryMergeNullishCoalescingEqual())
142 tok::question, tok::l_square};
144 if (tryMergeCSharpKeywordVariables())
146 if (tryMergeCSharpStringLiteral())
148 if (tryTransformCSharpForEach())
150 if (tryMergeTokens(CSharpNullConditionalLSquare,
151 TT_CSharpNullConditionalLSquare)) {
153 Tokens.back()->Tok.setKind(tok::l_square);
158 if (tryMergeNSStringLiteral())
162 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
165 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
167 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
170 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
171 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
174 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
176 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
178 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
180 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
182 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
183 Tokens.back()->Tok.setKind(tok::starequal);
186 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
187 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
189 Tokens.back()->Tok.setKind(tok::equal);
192 if (tryMergeJSPrivateIdentifier())
198 tok::greater, tok::greater, tok::greaterequal};
199 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
205 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
206 Tokens.end()[-2]->is(tok::numeric_constant) &&
207 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
209 tryMergeTokens(2, TT_Unknown)) {
213 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
221 if (Tokens.back()->TokenText.size() == 1 &&
222 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
223 TT_BinaryOperator)) {
224 Tokens.back()->Tok.setKind(tok::caret);
228 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
229 Tokens.back()->Tok.setKind(tok::lessless);
232 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
233 Tokens.back()->Tok.setKind(tok::greatergreater);
236 if (tryMergeTokensAny({{tok::lessless, tok::equal},
237 {tok::lessless, tok::lessequal},
238 {tok::greatergreater, tok::equal},
239 {tok::greatergreater, tok::greaterequal},
240 {tok::colon, tok::equal},
241 {tok::colon, tok::slash}},
242 TT_BinaryOperator)) {
247 if (tryMergeTokensAny({{tok::star, tok::star},
248 {tok::lessless, tok::less},
249 {tok::greatergreater, tok::greater},
250 {tok::exclaimequal, tok::equal},
251 {tok::exclaimequal, tok::question},
252 {tok::equalequal, tok::equal},
253 {tok::equalequal, tok::question}},
254 TT_BinaryOperator)) {
259 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
260 {tok::plus, tok::star, tok::greater},
261 {tok::minusequal, tok::greater},
262 {tok::minus, tok::star, tok::greater},
263 {tok::less, tok::arrow},
264 {tok::equal, tok::greater},
265 {tok::star, tok::greater},
266 {tok::pipeequal, tok::greater},
267 {tok::pipe, tok::arrow},
268 {tok::hash, tok::minus, tok::hash},
269 {tok::hash, tok::equal, tok::hash}},
270 TT_BinaryOperator) ||
271 Tokens.back()->is(tok::arrow)) {
278 if (tryMergeTokens({tok::l_square, tok::l_brace},
279 TT_TableGenMultiLineString)) {
281 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
282 Tokens.back()->Tok.setKind(tok::string_literal);
287 if (tryMergeTokens({tok::exclaim, tok::identifier},
288 TT_TableGenBangOperator)) {
289 Tokens.back()->Tok.setKind(tok::identifier);
290 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
291 if (Tokens.back()->TokenText ==
"!cond")
292 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
294 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
297 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
300 Tokens.back()->Tok.setKind(tok::identifier);
301 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
302 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
306 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
307 Tokens.back()->Tok.setKind(tok::numeric_constant);
310 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
311 Tokens.back()->Tok.setKind(tok::numeric_constant);
317bool FormatTokenLexer::tryMergeNSStringLiteral() {
318 if (Tokens.size() < 2)
320 auto &At = *(Tokens.end() - 2);
321 auto &String = *(Tokens.end() - 1);
322 if (At->isNot(tok::at) || String->isNot(tok::string_literal))
324 At->Tok.setKind(tok::string_literal);
325 At->TokenText = StringRef(At->TokenText.begin(),
326 String->TokenText.end() - At->TokenText.begin());
327 At->ColumnWidth += String->ColumnWidth;
328 At->setType(TT_ObjCStringLiteral);
329 Tokens.erase(Tokens.end() - 1);
333bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
336 if (Tokens.size() < 2)
338 auto &Hash = *(Tokens.end() - 2);
340 if (Hash->isNot(tok::hash) ||
Identifier->isNot(tok::identifier))
342 Hash->Tok.setKind(tok::identifier);
344 StringRef(Hash->TokenText.begin(),
345 Identifier->TokenText.end() - Hash->TokenText.begin());
347 Hash->setType(TT_JsPrivateIdentifier);
348 Tokens.erase(Tokens.end() - 1);
357bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
358 if (Tokens.size() < 2)
362 const auto String = *(Tokens.end() - 1);
363 if (String->isNot(tok::string_literal))
366 auto Prefix = *(Tokens.end() - 2);
367 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
370 if (Tokens.size() > 2) {
371 const auto Tok = *(Tokens.end() - 3);
372 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
373 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
375 Tok->ColumnWidth += Prefix->ColumnWidth;
376 Tokens.erase(Tokens.end() - 2);
382 Prefix->Tok.setKind(tok::string_literal);
384 StringRef(Prefix->TokenText.begin(),
385 String->TokenText.end() - Prefix->TokenText.begin());
386 Prefix->ColumnWidth += String->ColumnWidth;
387 Prefix->setType(TT_CSharpStringLiteral);
388 Tokens.erase(Tokens.end() - 1);
394const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
395 "assembly",
"module",
"field",
"event",
"method",
396 "param",
"property",
"return",
"type",
399bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
400 if (Tokens.size() < 2)
402 auto &NullishCoalescing = *(Tokens.end() - 2);
403 auto &
Equal = *(Tokens.end() - 1);
404 if (NullishCoalescing->getType() != TT_NullCoalescingOperator ||
405 Equal->isNot(tok::equal)) {
408 NullishCoalescing->Tok.setKind(tok::equal);
409 NullishCoalescing->TokenText =
410 StringRef(NullishCoalescing->TokenText.begin(),
411 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
412 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
413 NullishCoalescing->setType(TT_NullCoalescingEqual);
414 Tokens.erase(Tokens.end() - 1);
418bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
419 if (Tokens.size() < 2)
421 const auto At = *(Tokens.end() - 2);
422 if (At->isNot(tok::at))
424 const auto Keyword = *(Tokens.end() - 1);
425 if (Keyword->TokenText ==
"$")
430 At->Tok.setKind(tok::identifier);
431 At->TokenText = StringRef(At->TokenText.begin(),
432 Keyword->TokenText.end() - At->TokenText.begin());
433 At->ColumnWidth += Keyword->ColumnWidth;
434 At->setType(Keyword->getType());
435 Tokens.erase(Tokens.end() - 1);
440bool FormatTokenLexer::tryTransformCSharpForEach() {
441 if (Tokens.size() < 1)
454bool FormatTokenLexer::tryMergeForEach() {
455 if (Tokens.size() < 2)
457 auto &For = *(Tokens.end() - 2);
458 auto &Each = *(Tokens.end() - 1);
459 if (For->isNot(tok::kw_for))
461 if (Each->isNot(tok::identifier))
463 if (Each->TokenText !=
"each")
466 For->setType(TT_ForEachMacro);
467 For->Tok.setKind(tok::kw_for);
469 For->TokenText = StringRef(For->TokenText.begin(),
470 Each->TokenText.end() - For->TokenText.begin());
471 For->ColumnWidth += Each->ColumnWidth;
472 Tokens.erase(Tokens.end() - 1);
476bool FormatTokenLexer::tryTransformTryUsageForC() {
477 if (Tokens.size() < 2)
479 auto &Try = *(Tokens.end() - 2);
480 if (Try->isNot(tok::kw_try))
482 auto &Next = *(Tokens.end() - 1);
483 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
486 if (Tokens.size() > 2) {
487 auto &At = *(Tokens.end() - 3);
492 Try->Tok.setKind(tok::identifier);
496bool FormatTokenLexer::tryMergeLessLess() {
498 if (Tokens.size() < 3)
501 auto First = Tokens.end() - 3;
502 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
506 if (
First[1]->hasWhitespaceBefore())
509 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
510 if (
X &&
X->is(tok::less))
514 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
517 First[0]->Tok.setKind(tok::lessless);
518 First[0]->TokenText =
"<<";
519 First[0]->ColumnWidth += 1;
520 Tokens.erase(Tokens.end() - 2);
524bool FormatTokenLexer::tryMergeGreaterGreater() {
526 if (Tokens.size() < 2)
529 auto First = Tokens.end() - 2;
530 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
534 if (
First[1]->hasWhitespaceBefore())
537 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
538 if (Tok && Tok->isNot(tok::kw_operator))
541 First[0]->Tok.setKind(tok::greatergreater);
542 First[0]->TokenText =
">>";
543 First[0]->ColumnWidth += 1;
544 Tokens.erase(Tokens.end() - 1);
548bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
550 if (Tokens.size() < Kinds.size())
553 SmallVectorImpl<FormatToken *>::const_iterator
First =
554 Tokens.end() - Kinds.size();
555 for (
unsigned i = 0; i < Kinds.size(); ++i)
556 if (
First[i]->isNot(Kinds[i]))
559 return tryMergeTokens(Kinds.size(), NewType);
562bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
563 if (Tokens.size() < Count)
566 SmallVectorImpl<FormatToken *>::const_iterator
First = Tokens.end() - Count;
567 unsigned AddLength = 0;
568 for (
size_t i = 1; i < Count; ++i) {
571 if (
First[i]->hasWhitespaceBefore())
573 AddLength +=
First[i]->TokenText.size();
576 Tokens.resize(Tokens.size() - Count + 1);
577 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
578 First[0]->TokenText.size() + AddLength);
579 First[0]->ColumnWidth += AddLength;
580 First[0]->setType(NewType);
584bool FormatTokenLexer::tryMergeTokensAny(
585 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
586 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
587 return tryMergeTokens(Kinds, NewType);
592bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
596 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
597 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
598 tok::colon, tok::question, tok::tilde) ||
599 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
600 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
602 Tok->isBinaryOperator();
605bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
615 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
616 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
620 if (!precedesOperand(Prev))
630void FormatTokenLexer::tryParseJSRegexLiteral() {
631 FormatToken *RegexToken = Tokens.back();
632 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
635 FormatToken *Prev =
nullptr;
636 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
639 if (FT->isNot(tok::comment)) {
645 if (!canPrecedeRegexLiteral(Prev))
649 const char *Offset = Lex->getBufferLocation();
650 const char *RegexBegin = Offset - RegexToken->TokenText.size();
651 StringRef Buffer = Lex->getBuffer();
652 bool InCharacterClass =
false;
653 bool HaveClosingSlash =
false;
654 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
664 InCharacterClass =
true;
667 InCharacterClass =
false;
670 if (!InCharacterClass)
671 HaveClosingSlash =
true;
676 RegexToken->setType(TT_RegexLiteral);
678 RegexToken->Tok.setKind(tok::string_literal);
679 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
680 RegexToken->ColumnWidth = RegexToken->TokenText.size();
682 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
687 auto Repeated = [&
Begin, End]() {
703 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
715 ++UnmatchedOpeningBraceCount;
723 else if (UnmatchedOpeningBraceCount > 0)
724 --UnmatchedOpeningBraceCount;
730 if (UnmatchedOpeningBraceCount > 0)
733 if (Verbatim && Repeated()) {
744void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
745 FormatToken *CSharpStringLiteral = Tokens.back();
747 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
750 auto &TokenText = CSharpStringLiteral->TokenText;
752 bool Verbatim =
false;
753 bool Interpolated =
false;
754 if (TokenText.starts_with(R
"($@")") || TokenText.starts_with(R"(@$")")) {
757 }
else if (TokenText.starts_with(R
"(@")")) {
759 }
else if (TokenText.starts_with(R
"($")")) {
764 if (!Verbatim && !Interpolated)
767 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
768 const char *Offset = StrBegin;
769 if (Verbatim && Interpolated)
774 const auto End = Lex->getBuffer().end();
782 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
783 TokenText = LiteralText;
786 size_t FirstBreak = LiteralText.find(
'\n');
787 StringRef FirstLineText = FirstBreak == StringRef::npos
789 : LiteralText.substr(0, FirstBreak);
791 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
793 size_t LastBreak = LiteralText.rfind(
'\n');
794 if (LastBreak != StringRef::npos) {
795 CSharpStringLiteral->IsMultiline =
true;
796 unsigned StartColumn = 0;
797 CSharpStringLiteral->LastLineColumnWidth =
799 StartColumn, Style.
TabWidth, Encoding);
802 assert(Offset < End);
803 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset + 1)));
806void FormatTokenLexer::handleTableGenMultilineString() {
807 FormatToken *MultiLineString = Tokens.back();
808 if (MultiLineString->isNot(TT_TableGenMultiLineString))
811 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;
813 auto CloseOffset = Lex->getBuffer().find(
"}]", OpenOffset);
814 if (CloseOffset == StringRef::npos)
816 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);
817 MultiLineString->TokenText =
Text;
819 Lex->getSourceLocation(Lex->getBufferLocation() - 2 +
Text.size())));
820 auto FirstLineText =
Text;
821 auto FirstBreak =
Text.find(
'\n');
823 if (FirstBreak != StringRef::npos) {
824 MultiLineString->IsMultiline =
true;
825 FirstLineText =
Text.substr(0, FirstBreak + 1);
827 auto LastBreak =
Text.rfind(
'\n');
829 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,
834 FirstLineText, MultiLineString->OriginalColumn, Style.
TabWidth, Encoding);
837void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {
838 FormatToken *Tok = Tokens.back();
841 if (Tok->isNot(tok::numeric_constant))
843 StringRef
Text = Tok->TokenText;
852 if (
Text.size() < 1 ||
Text[0] ==
'+' ||
Text[0] ==
'-')
854 const auto NonDigitPos =
Text.find_if([](
char C) {
return !isdigit(
C); });
856 if (NonDigitPos == StringRef::npos)
858 char FirstNonDigit =
Text[NonDigitPos];
859 if (NonDigitPos <
Text.size() - 1) {
860 char TheNext =
Text[NonDigitPos + 1];
862 if (FirstNonDigit ==
'b' && (TheNext ==
'0' || TheNext ==
'1'))
865 if (FirstNonDigit ==
'x' && isxdigit(TheNext))
868 if (isalpha(FirstNonDigit) || FirstNonDigit ==
'_') {
870 Tok->Tok.setKind(tok::identifier);
871 Tok->Tok.setIdentifierInfo(
nullptr);
875void FormatTokenLexer::handleTemplateStrings() {
876 FormatToken *BacktickToken = Tokens.back();
878 if (BacktickToken->is(tok::l_brace)) {
882 if (BacktickToken->is(tok::r_brace)) {
883 if (StateStack.size() == 1)
889 }
else if (BacktickToken->is(tok::unknown) &&
890 BacktickToken->TokenText ==
"`") {
897 const char *Offset = Lex->getBufferLocation();
898 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
899 for (; Offset != Lex->getBuffer().end(); ++Offset) {
900 if (Offset[0] ==
'`') {
905 if (Offset[0] ==
'\\') {
907 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
916 StringRef LiteralText(TmplBegin, Offset - TmplBegin);
917 BacktickToken->setType(TT_TemplateString);
918 BacktickToken->Tok.setKind(tok::string_literal);
919 BacktickToken->TokenText = LiteralText;
922 size_t FirstBreak = LiteralText.find(
'\n');
923 StringRef FirstLineText = FirstBreak == StringRef::npos
925 : LiteralText.substr(0, FirstBreak);
927 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
928 size_t LastBreak = LiteralText.rfind(
'\n');
929 if (LastBreak != StringRef::npos) {
930 BacktickToken->IsMultiline =
true;
931 unsigned StartColumn = 0;
932 BacktickToken->LastLineColumnWidth =
934 StartColumn, Style.
TabWidth, Encoding);
937 SourceLocation loc = Lex->getSourceLocation(Offset);
941void FormatTokenLexer::tryParsePythonComment() {
942 FormatToken *HashToken = Tokens.back();
943 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
946 const char *CommentBegin =
947 Lex->getBufferLocation() - HashToken->TokenText.size();
948 size_t From = CommentBegin - Lex->getBuffer().begin();
949 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
950 if (To == StringRef::npos)
951 To = Lex->getBuffer().size();
952 size_t Len = To - From;
953 HashToken->setType(TT_LineComment);
954 HashToken->Tok.setKind(tok::comment);
955 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
956 SourceLocation Loc = To < Lex->getBuffer().size()
957 ? Lex->getSourceLocation(CommentBegin + Len)
962bool FormatTokenLexer::tryMerge_TMacro() {
963 if (Tokens.size() < 4)
965 FormatToken *
Last = Tokens.back();
966 if (
Last->isNot(tok::r_paren))
969 FormatToken *String = Tokens[Tokens.size() - 2];
970 if (String->isNot(tok::string_literal) || String->IsMultiline)
973 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))
976 FormatToken *
Macro = Tokens[Tokens.size() - 4];
977 if (
Macro->TokenText !=
"_T")
980 const char *Start =
Macro->TokenText.data();
981 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
982 String->TokenText = StringRef(Start, End - Start);
983 String->IsFirst =
Macro->IsFirst;
984 String->LastNewlineOffset =
Macro->LastNewlineOffset;
985 String->WhitespaceRange =
Macro->WhitespaceRange;
986 String->OriginalColumn =
Macro->OriginalColumn;
988 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
989 String->NewlinesBefore =
Macro->NewlinesBefore;
990 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
995 Tokens.back() = String;
996 if (FirstInLineIndex >= Tokens.size())
997 FirstInLineIndex = Tokens.size() - 1;
1001bool FormatTokenLexer::tryMergeConflictMarkers() {
1002 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1016 unsigned FirstInLineOffset;
1018 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1021 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
1022 if (LineOffset == StringRef::npos)
1027 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
1028 StringRef LineStart;
1029 if (FirstSpace == StringRef::npos)
1030 LineStart = Buffer.substr(LineOffset);
1032 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1035 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
1036 Type = TT_ConflictStart;
1037 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
1038 LineStart ==
"====") {
1039 Type = TT_ConflictAlternative;
1040 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
1041 Type = TT_ConflictEnd;
1044 if (Type != TT_Unknown) {
1045 FormatToken *Next = Tokens.back();
1047 Tokens.resize(FirstInLineIndex + 1);
1051 Tokens.back()->setType(Type);
1052 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1054 Tokens.push_back(Next);
1061FormatToken *FormatTokenLexer::getStashedToken() {
1063 Token Tok = FormatTok->
Tok;
1064 StringRef TokenText = FormatTok->
TokenText;
1067 FormatTok =
new (Allocator.Allocate()) FormatToken;
1068 FormatTok->
Tok = Tok;
1069 SourceLocation TokLocation =
1086void FormatTokenLexer::truncateToken(
size_t NewLen) {
1087 assert(NewLen <= FormatTok->TokenText.size());
1089 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
1104 const unsigned char *
const Begin =
Text.bytes_begin();
1105 const unsigned char *
const End =
Text.bytes_end();
1106 const unsigned char *Cur =
Begin;
1108 if (isspace(Cur[0])) {
1110 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1116 assert(End - Cur >= 2);
1118 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1119 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1123 assert(End - Cur >= 4);
1132FormatToken *FormatTokenLexer::getNextToken() {
1135 return getStashedToken();
1138 FormatTok =
new (Allocator.Allocate()) FormatToken;
1139 readRawToken(*FormatTok);
1140 SourceLocation WhitespaceStart =
1142 FormatTok->
IsFirst = IsFirstToken;
1143 IsFirstToken =
false;
1149 unsigned WhitespaceLength = TrailingWhitespace;
1150 while (FormatTok->
isNot(tok::eof)) {
1152 if (LeadingWhitespace == 0)
1154 if (LeadingWhitespace < FormatTok->TokenText.size())
1155 truncateToken(LeadingWhitespace);
1157 bool InEscape =
false;
1158 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1164 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1192 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1193 Text.substr(i, 4) ==
"\?\?/\r" ||
1194 Text.substr(i, 4) ==
"\?\?/\n" ||
1195 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1196 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1197 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1198 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1207 WhitespaceLength +=
Text.size();
1208 readRawToken(*FormatTok);
1211 if (FormatTok->
is(tok::unknown))
1212 FormatTok->
setType(TT_ImplicitStringLiteral);
1222 FormatTok->
is(tok::comment) && FormatTok->
TokenText.starts_with(
"//")) {
1223 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1224 while (BackslashPos != StringRef::npos) {
1225 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1226 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1227 truncateToken(BackslashPos + 1);
1230 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1235 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1236 SmallVector<StringRef, 1> Matches;
1242 if (FormatTok->
is(tok::numeric_constant)) {
1244 auto Quote = FormatTok->
TokenText.find(
'\'');
1245 if (Quote != StringRef::npos)
1246 truncateToken(Quote);
1247 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1249 }
else if (FormatTok->
is(tok::raw_identifier)) {
1253 }
else if (FormatTok->
TokenText ==
"``") {
1256 }
else if (Tokens.size() > 0 &&
1258 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1263 truncateToken(Matches[0].size());
1270 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1274 TrailingWhitespace = 0;
1275 if (FormatTok->
is(tok::comment)) {
1277 StringRef UntrimmedText = FormatTok->
TokenText;
1279 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1280 }
else if (FormatTok->
is(tok::raw_identifier)) {
1281 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1285 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1286 tok::kw_operator)) {
1290 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1291 tok::kw_operator)) {
1298 }
else if (FormatTok->
is(tok::greatergreater)) {
1303 }
else if (FormatTok->
is(tok::lessless)) {
1310 if (Style.
isVerilog() && Tokens.size() > 0 &&
1311 Tokens.back()->is(TT_VerilogNumberBase) &&
1312 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1314 FormatTok->
Tok.
setKind(tok::numeric_constant);
1320 size_t FirstNewlinePos =
Text.find(
'\n');
1321 if (FirstNewlinePos == StringRef::npos) {
1332 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1344 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1345 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1347 it != Macros.end()) {
1348 FormatTok->
setType(it->second);
1349 if (it->second == TT_IfMacro) {
1356 }
else if (FormatTok->
is(tok::identifier)) {
1357 if (MacroBlockBeginRegex.match(
Text))
1358 FormatTok->
setType(TT_MacroBlockBegin);
1359 else if (MacroBlockEndRegex.match(
Text))
1360 FormatTok->
setType(TT_MacroBlockEnd);
1369bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1384 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1385 "(\r?\n|\r)|[^[:space:]])*)");
1387 SmallVector<StringRef, 4> Matches;
1388 const char *Start = Lex->getBufferLocation();
1389 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1395 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1397 size_t Len = Matches[0].size();
1402 Tok.setKind(tok::raw_identifier);
1404 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1405 Tok.setRawIdentifierData(Start);
1406 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1410void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1413 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1414 Lex->LexFromRawLexer(Tok.Tok);
1415 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1416 Tok.Tok.getLength());
1419 if (Tok.is(tok::unknown)) {
1420 if (Tok.TokenText.starts_with(
"\"")) {
1421 Tok.Tok.setKind(tok::string_literal);
1422 Tok.IsUnterminatedLiteral =
true;
1423 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1424 Tok.Tok.setKind(tok::string_literal);
1429 Tok.Tok.setKind(tok::string_literal);
1432 FormattingDisabled =
false;
1434 Tok.Finalized = FormattingDisabled;
1437 FormattingDisabled =
true;
1440void FormatTokenLexer::resetLexer(
unsigned Offset) {
1444 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
1445 Lex->SetKeepWhitespaceMode(
true);
1446 TrailingWhitespace = 0;
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.