20#include "llvm/Support/Regex.h"
28 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
37 Lex.reset(
new Lexer(
ID, SourceMgr.getBufferOrFake(
ID), SourceMgr, LangOpts));
38 Lex->SetKeepWhitespaceMode(
true);
41 auto Identifier = &IdentTable.get(ForEachMacro);
44 for (
const std::string &IfMacro : Style.
IfMacros) {
49 auto Identifier = &IdentTable.get(AttributeMacro);
53 auto Identifier = &IdentTable.get(StatementMacro);
57 auto Identifier = &IdentTable.get(TypenameMacro);
61 auto Identifier = &IdentTable.get(NamespaceMacro);
64 for (
const std::string &WhitespaceSensitiveMacro :
66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
69 for (
const std::string &StatementAttributeLikeMacro :
71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
76 TemplateNames.insert(&IdentTable.get(TemplateName));
77 for (
const auto &TypeName : Style.
TypeNames)
78 TypeNames.insert(&IdentTable.get(TypeName));
80 VariableTemplates.insert(&IdentTable.get(VariableTemplate));
84 assert(Tokens.empty());
85 assert(FirstInLineIndex == 0);
87 Tokens.push_back(getNextToken());
89 tryParseJSRegexLiteral();
90 handleTemplateStrings();
93 tryParsePythonComment();
94 tryMergePreviousTokens();
98 handleCSharpVerbatimAndInterpolatedStrings();
101 handleTableGenMultilineString();
102 handleTableGenNumericLikeIdentifier();
104 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
105 FirstInLineIndex = Tokens.size() - 1;
106 }
while (Tokens.back()->isNot(tok::eof));
108 auto &TokEOF = *Tokens.back();
109 if (TokEOF.NewlinesBefore == 0) {
110 TokEOF.NewlinesBefore = 1;
111 TokEOF.OriginalColumn = 0;
117void FormatTokenLexer::tryMergePreviousTokens() {
118 if (tryMerge_TMacro())
120 if (tryMergeConflictMarkers())
122 if (tryMergeLessLess())
124 if (tryMergeGreaterGreater())
126 if (tryMergeForEach())
128 if (Style.
isCpp() && tryTransformTryUsageForC())
132 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
134 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
136 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
138 if (tryMergeTokens(FatArrow, TT_FatArrow))
140 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
142 Tokens.back()->Tok.setKind(tok::pipepipe);
145 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
147 Tokens.back()->Tok.setKind(tok::period);
150 if (tryMergeNullishCoalescingEqual())
156 tok::question, tok::l_square};
158 if (tryMergeCSharpKeywordVariables())
160 if (tryMergeCSharpStringLiteral())
162 if (tryTransformCSharpForEach())
164 if (tryMergeTokens(CSharpNullConditionalLSquare,
165 TT_CSharpNullConditionalLSquare)) {
167 Tokens.back()->Tok.setKind(tok::l_square);
172 if (tryMergeNSStringLiteral())
176 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
179 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
181 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
184 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
185 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
188 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
190 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
192 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
194 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
196 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
197 Tokens.back()->Tok.setKind(tok::starequal);
200 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
201 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
203 Tokens.back()->Tok.setKind(tok::equal);
206 if (tryMergeJSPrivateIdentifier())
212 tok::greater, tok::greater, tok::greaterequal};
213 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
219 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
220 Tokens.end()[-2]->is(tok::numeric_constant) &&
221 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
223 tryMergeTokens(2, TT_Unknown)) {
227 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
235 if (Tokens.back()->TokenText.size() == 1 &&
236 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
237 TT_BinaryOperator)) {
238 Tokens.back()->Tok.setKind(tok::caret);
242 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
243 Tokens.back()->Tok.setKind(tok::lessless);
246 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
247 Tokens.back()->Tok.setKind(tok::greatergreater);
250 if (tryMergeTokensAny({{tok::lessless, tok::equal},
251 {tok::lessless, tok::lessequal},
252 {tok::greatergreater, tok::equal},
253 {tok::greatergreater, tok::greaterequal},
254 {tok::colon, tok::equal},
255 {tok::colon, tok::slash}},
256 TT_BinaryOperator)) {
261 if (tryMergeTokensAny({{tok::star, tok::star},
262 {tok::lessless, tok::less},
263 {tok::greatergreater, tok::greater},
264 {tok::exclaimequal, tok::equal},
265 {tok::exclaimequal, tok::question},
266 {tok::equalequal, tok::equal},
267 {tok::equalequal, tok::question}},
268 TT_BinaryOperator)) {
273 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
274 {tok::plus, tok::star, tok::greater},
275 {tok::minusequal, tok::greater},
276 {tok::minus, tok::star, tok::greater},
277 {tok::less, tok::arrow},
278 {tok::equal, tok::greater},
279 {tok::star, tok::greater},
280 {tok::pipeequal, tok::greater},
281 {tok::pipe, tok::arrow},
282 {tok::hash, tok::minus, tok::hash},
283 {tok::hash, tok::equal, tok::hash}},
284 TT_BinaryOperator) ||
285 Tokens.back()->is(tok::arrow)) {
292 if (tryMergeTokens({tok::l_square, tok::l_brace},
293 TT_TableGenMultiLineString)) {
295 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
296 Tokens.back()->Tok.setKind(tok::string_literal);
301 if (tryMergeTokens({tok::exclaim, tok::identifier},
302 TT_TableGenBangOperator)) {
303 Tokens.back()->Tok.setKind(tok::identifier);
304 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
305 if (Tokens.back()->TokenText ==
"!cond")
306 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
308 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
311 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
314 Tokens.back()->Tok.setKind(tok::identifier);
315 Tokens.back()->Tok.setIdentifierInfo(
nullptr);
316 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
320 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
321 Tokens.back()->Tok.setKind(tok::numeric_constant);
324 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
325 Tokens.back()->Tok.setKind(tok::numeric_constant);
331bool FormatTokenLexer::tryMergeNSStringLiteral() {
332 if (Tokens.size() < 2)
334 auto &At = *(Tokens.end() - 2);
335 auto &String = *(Tokens.end() - 1);
336 if (At->isNot(tok::at) || String->isNot(tok::string_literal))
338 At->Tok.setKind(tok::string_literal);
339 At->TokenText = StringRef(At->TokenText.begin(),
340 String->TokenText.end() - At->TokenText.begin());
341 At->ColumnWidth += String->ColumnWidth;
342 At->setType(TT_ObjCStringLiteral);
343 Tokens.erase(Tokens.end() - 1);
347bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
350 if (Tokens.size() < 2)
352 auto &Hash = *(Tokens.end() - 2);
354 if (Hash->isNot(tok::hash) ||
Identifier->isNot(tok::identifier))
356 Hash->Tok.setKind(tok::identifier);
358 StringRef(Hash->TokenText.begin(),
359 Identifier->TokenText.end() - Hash->TokenText.begin());
361 Hash->setType(TT_JsPrivateIdentifier);
362 Tokens.erase(Tokens.end() - 1);
371bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
372 if (Tokens.size() < 2)
376 const auto String = *(Tokens.end() - 1);
377 if (String->isNot(tok::string_literal))
380 auto Prefix = *(Tokens.end() - 2);
381 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
384 if (Tokens.size() > 2) {
385 const auto Tok = *(Tokens.end() - 3);
386 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
387 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
389 Tok->ColumnWidth += Prefix->ColumnWidth;
390 Tokens.erase(Tokens.end() - 2);
396 Prefix->Tok.setKind(tok::string_literal);
398 StringRef(Prefix->TokenText.begin(),
399 String->TokenText.end() - Prefix->TokenText.begin());
400 Prefix->ColumnWidth += String->ColumnWidth;
401 Prefix->setType(TT_CSharpStringLiteral);
402 Tokens.erase(Tokens.end() - 1);
408const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
409 "assembly",
"module",
"field",
"event",
"method",
410 "param",
"property",
"return",
"type",
413bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
414 if (Tokens.size() < 2)
416 auto &NullishCoalescing = *(Tokens.end() - 2);
417 auto &
Equal = *(Tokens.end() - 1);
418 if (NullishCoalescing->isNot(TT_NullCoalescingOperator) ||
419 Equal->isNot(tok::equal)) {
422 NullishCoalescing->Tok.setKind(tok::equal);
423 NullishCoalescing->TokenText =
424 StringRef(NullishCoalescing->TokenText.begin(),
425 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
426 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
427 NullishCoalescing->setType(TT_NullCoalescingEqual);
428 Tokens.erase(Tokens.end() - 1);
432bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
433 if (Tokens.size() < 2)
435 const auto At = *(Tokens.end() - 2);
436 if (At->isNot(tok::at))
438 const auto Keyword = *(Tokens.end() - 1);
439 if (Keyword->TokenText ==
"$")
444 At->Tok.setKind(tok::identifier);
445 At->TokenText = StringRef(At->TokenText.begin(),
446 Keyword->TokenText.end() - At->TokenText.begin());
447 At->ColumnWidth += Keyword->ColumnWidth;
448 At->setType(Keyword->getType());
449 Tokens.erase(Tokens.end() - 1);
454bool FormatTokenLexer::tryTransformCSharpForEach() {
455 if (Tokens.size() < 1)
468bool FormatTokenLexer::tryMergeForEach() {
469 if (Tokens.size() < 2)
471 auto &For = *(Tokens.end() - 2);
472 auto &Each = *(Tokens.end() - 1);
473 if (For->isNot(tok::kw_for))
475 if (Each->isNot(tok::identifier))
477 if (Each->TokenText !=
"each")
480 For->setType(TT_ForEachMacro);
481 For->Tok.setKind(tok::kw_for);
483 For->TokenText = StringRef(For->TokenText.begin(),
484 Each->TokenText.end() - For->TokenText.begin());
485 For->ColumnWidth += Each->ColumnWidth;
486 Tokens.erase(Tokens.end() - 1);
490bool FormatTokenLexer::tryTransformTryUsageForC() {
491 if (Tokens.size() < 2)
493 auto &Try = *(Tokens.end() - 2);
494 if (Try->isNot(tok::kw_try))
496 auto &Next = *(Tokens.end() - 1);
497 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
500 if (Tokens.size() > 2) {
501 auto &At = *(Tokens.end() - 3);
506 Try->Tok.setKind(tok::identifier);
510bool FormatTokenLexer::tryMergeLessLess() {
512 if (Tokens.size() < 3)
515 auto First = Tokens.end() - 3;
516 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
520 if (
First[1]->hasWhitespaceBefore())
523 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
524 if (
X &&
X->is(tok::less))
528 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
531 First[0]->Tok.setKind(tok::lessless);
532 First[0]->TokenText =
"<<";
533 First[0]->ColumnWidth += 1;
534 Tokens.erase(Tokens.end() - 2);
538bool FormatTokenLexer::tryMergeGreaterGreater() {
540 if (Tokens.size() < 2)
543 auto First = Tokens.end() - 2;
544 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
548 if (
First[1]->hasWhitespaceBefore())
551 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
552 if (Tok && Tok->isNot(tok::kw_operator))
555 First[0]->Tok.setKind(tok::greatergreater);
556 First[0]->TokenText =
">>";
557 First[0]->ColumnWidth += 1;
558 Tokens.erase(Tokens.end() - 1);
562bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
564 if (Tokens.size() < Kinds.size())
567 const auto *
First = Tokens.end() - Kinds.size();
568 for (
unsigned i = 0; i < Kinds.size(); ++i)
569 if (
First[i]->isNot(Kinds[i]))
572 return tryMergeTokens(Kinds.size(), NewType);
575bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
576 if (Tokens.size() < Count)
579 const auto *
First = Tokens.end() - Count;
580 unsigned AddLength = 0;
581 for (
size_t i = 1; i < Count; ++i) {
584 if (
First[i]->hasWhitespaceBefore())
586 AddLength +=
First[i]->TokenText.size();
589 Tokens.resize(Tokens.size() - Count + 1);
590 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
591 First[0]->TokenText.size() + AddLength);
592 First[0]->ColumnWidth += AddLength;
593 First[0]->setType(NewType);
597bool FormatTokenLexer::tryMergeTokensAny(
598 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
599 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
600 return tryMergeTokens(Kinds, NewType);
605bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
609 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
610 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
611 tok::colon, tok::question, tok::tilde) ||
612 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
613 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
615 Tok->isBinaryOperator();
618bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
628 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
629 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
633 if (!precedesOperand(Prev))
643void FormatTokenLexer::tryParseJSRegexLiteral() {
644 FormatToken *RegexToken = Tokens.back();
645 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
648 FormatToken *Prev =
nullptr;
649 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
652 if (FT->isNot(tok::comment)) {
658 if (!canPrecedeRegexLiteral(Prev))
662 const char *Offset = Lex->getBufferLocation();
663 const char *RegexBegin = Offset - RegexToken->TokenText.size();
664 StringRef Buffer = Lex->getBuffer();
665 bool InCharacterClass =
false;
666 bool HaveClosingSlash =
false;
667 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
677 InCharacterClass =
true;
680 InCharacterClass =
false;
683 if (!InCharacterClass)
684 HaveClosingSlash =
true;
689 RegexToken->setType(TT_RegexLiteral);
691 RegexToken->Tok.setKind(tok::string_literal);
692 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
693 RegexToken->ColumnWidth = RegexToken->TokenText.size();
695 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
700 auto Repeated = [&
Begin, End]() {
716 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
728 ++UnmatchedOpeningBraceCount;
736 else if (UnmatchedOpeningBraceCount > 0)
737 --UnmatchedOpeningBraceCount;
743 if (UnmatchedOpeningBraceCount > 0)
746 if (Verbatim && Repeated()) {
757void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
758 FormatToken *CSharpStringLiteral = Tokens.back();
760 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
763 auto &TokenText = CSharpStringLiteral->TokenText;
765 bool Verbatim =
false;
766 bool Interpolated =
false;
767 if (TokenText.starts_with(R
"($@")") || TokenText.starts_with(R"(@$")")) {
770 }
else if (TokenText.starts_with(R
"(@")")) {
772 }
else if (TokenText.starts_with(R
"($")")) {
777 if (!Verbatim && !Interpolated)
780 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
781 const char *Offset = StrBegin;
782 if (Verbatim && Interpolated)
787 const auto End = Lex->getBuffer().end();
795 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
796 TokenText = LiteralText;
799 size_t FirstBreak = LiteralText.find(
'\n');
800 StringRef FirstLineText = FirstBreak == StringRef::npos
802 : LiteralText.substr(0, FirstBreak);
804 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
806 size_t LastBreak = LiteralText.rfind(
'\n');
807 if (LastBreak != StringRef::npos) {
808 CSharpStringLiteral->IsMultiline =
true;
809 unsigned StartColumn = 0;
810 CSharpStringLiteral->LastLineColumnWidth =
812 StartColumn, Style.
TabWidth, Encoding);
815 assert(Offset < End);
816 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset + 1)));
819void FormatTokenLexer::handleTableGenMultilineString() {
820 FormatToken *MultiLineString = Tokens.back();
821 if (MultiLineString->isNot(TT_TableGenMultiLineString))
824 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;
826 auto CloseOffset = Lex->getBuffer().find(
"}]", OpenOffset);
827 if (CloseOffset == StringRef::npos)
829 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);
830 MultiLineString->TokenText =
Text;
832 Lex->getSourceLocation(Lex->getBufferLocation() - 2 +
Text.size())));
833 auto FirstLineText =
Text;
834 auto FirstBreak =
Text.find(
'\n');
836 if (FirstBreak != StringRef::npos) {
837 MultiLineString->IsMultiline =
true;
838 FirstLineText =
Text.substr(0, FirstBreak + 1);
840 auto LastBreak =
Text.rfind(
'\n');
842 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,
847 FirstLineText, MultiLineString->OriginalColumn, Style.
TabWidth, Encoding);
850void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {
851 FormatToken *Tok = Tokens.back();
854 if (Tok->isNot(tok::numeric_constant))
856 StringRef
Text = Tok->TokenText;
865 if (
Text.size() < 1 ||
Text[0] ==
'+' ||
Text[0] ==
'-')
867 const auto NonDigitPos =
Text.find_if([](
char C) {
return !isdigit(
C); });
869 if (NonDigitPos == StringRef::npos)
871 char FirstNonDigit =
Text[NonDigitPos];
872 if (NonDigitPos <
Text.size() - 1) {
873 char TheNext =
Text[NonDigitPos + 1];
875 if (FirstNonDigit ==
'b' && (TheNext ==
'0' || TheNext ==
'1'))
878 if (FirstNonDigit ==
'x' && isxdigit(TheNext))
881 if (isalpha(FirstNonDigit) || FirstNonDigit ==
'_') {
883 Tok->Tok.setKind(tok::identifier);
884 Tok->Tok.setIdentifierInfo(
nullptr);
888void FormatTokenLexer::handleTemplateStrings() {
889 FormatToken *BacktickToken = Tokens.back();
891 if (BacktickToken->is(tok::l_brace)) {
895 if (BacktickToken->is(tok::r_brace)) {
896 if (StateStack.size() == 1)
902 }
else if (BacktickToken->is(tok::unknown) &&
903 BacktickToken->TokenText ==
"`") {
910 const char *Offset = Lex->getBufferLocation();
911 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
912 for (; Offset != Lex->getBuffer().end(); ++Offset) {
913 if (Offset[0] ==
'`') {
918 if (Offset[0] ==
'\\') {
920 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
929 StringRef LiteralText(TmplBegin, Offset - TmplBegin);
930 BacktickToken->setType(TT_TemplateString);
931 BacktickToken->Tok.setKind(tok::string_literal);
932 BacktickToken->TokenText = LiteralText;
935 size_t FirstBreak = LiteralText.find(
'\n');
936 StringRef FirstLineText = FirstBreak == StringRef::npos
938 : LiteralText.substr(0, FirstBreak);
940 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
941 size_t LastBreak = LiteralText.rfind(
'\n');
942 if (LastBreak != StringRef::npos) {
943 BacktickToken->IsMultiline =
true;
944 unsigned StartColumn = 0;
945 BacktickToken->LastLineColumnWidth =
947 StartColumn, Style.
TabWidth, Encoding);
950 SourceLocation loc = Lex->getSourceLocation(Offset);
954void FormatTokenLexer::tryParsePythonComment() {
955 FormatToken *HashToken = Tokens.back();
956 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
959 const char *CommentBegin =
960 Lex->getBufferLocation() - HashToken->TokenText.size();
961 size_t From = CommentBegin - Lex->getBuffer().begin();
962 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
963 if (To == StringRef::npos)
964 To = Lex->getBuffer().size();
965 size_t Len = To - From;
966 HashToken->setType(TT_LineComment);
967 HashToken->Tok.setKind(tok::comment);
968 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
969 SourceLocation
Loc = To < Lex->getBuffer().size()
970 ? Lex->getSourceLocation(CommentBegin + Len)
975bool FormatTokenLexer::tryMerge_TMacro() {
976 if (Tokens.size() < 4)
978 FormatToken *
Last = Tokens.back();
979 if (
Last->isNot(tok::r_paren))
982 FormatToken *String = Tokens[Tokens.size() - 2];
983 if (String->isNot(tok::string_literal) || String->IsMultiline)
986 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))
989 FormatToken *
Macro = Tokens[Tokens.size() - 4];
990 if (
Macro->TokenText !=
"_T")
993 const char *Start =
Macro->TokenText.data();
994 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
995 String->TokenText = StringRef(Start, End - Start);
996 String->IsFirst =
Macro->IsFirst;
997 String->LastNewlineOffset =
Macro->LastNewlineOffset;
998 String->WhitespaceRange =
Macro->WhitespaceRange;
999 String->OriginalColumn =
Macro->OriginalColumn;
1001 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
1002 String->NewlinesBefore =
Macro->NewlinesBefore;
1003 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
1008 Tokens.back() = String;
1009 if (FirstInLineIndex >= Tokens.size())
1010 FirstInLineIndex = Tokens.size() - 1;
1014bool FormatTokenLexer::tryMergeConflictMarkers() {
1015 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
1029 unsigned FirstInLineOffset;
1031 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
1034 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
1035 if (LineOffset == StringRef::npos)
1040 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
1041 StringRef LineStart;
1042 if (FirstSpace == StringRef::npos)
1043 LineStart = Buffer.substr(LineOffset);
1045 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
1048 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
1049 Type = TT_ConflictStart;
1050 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
1051 LineStart ==
"====") {
1052 Type = TT_ConflictAlternative;
1053 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
1054 Type = TT_ConflictEnd;
1057 if (Type != TT_Unknown) {
1058 FormatToken *Next = Tokens.back();
1060 Tokens.resize(FirstInLineIndex + 1);
1064 Tokens.back()->setType(Type);
1065 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
1067 Tokens.push_back(Next);
1074FormatToken *FormatTokenLexer::getStashedToken() {
1076 Token Tok = FormatTok->
Tok;
1077 StringRef TokenText = FormatTok->
TokenText;
1080 FormatTok =
new (Allocator.Allocate()) FormatToken;
1081 FormatTok->
Tok = Tok;
1082 SourceLocation TokLocation =
1099void FormatTokenLexer::truncateToken(
size_t NewLen) {
1100 assert(NewLen <= FormatTok->TokenText.size());
1102 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
1117 const unsigned char *
const Begin =
Text.bytes_begin();
1118 const unsigned char *
const End =
Text.bytes_end();
1119 const unsigned char *Cur =
Begin;
1121 if (isspace(Cur[0])) {
1123 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1129 assert(End - Cur >= 2);
1131 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1132 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1136 assert(End - Cur >= 4);
1145FormatToken *FormatTokenLexer::getNextToken() {
1148 return getStashedToken();
1151 FormatTok =
new (Allocator.Allocate()) FormatToken;
1152 readRawToken(*FormatTok);
1153 SourceLocation WhitespaceStart =
1155 FormatTok->
IsFirst = IsFirstToken;
1156 IsFirstToken =
false;
1162 unsigned WhitespaceLength = TrailingWhitespace;
1163 while (FormatTok->
isNot(tok::eof)) {
1165 if (LeadingWhitespace == 0)
1167 if (LeadingWhitespace < FormatTok->TokenText.size())
1168 truncateToken(LeadingWhitespace);
1170 bool InEscape =
false;
1171 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1177 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1192 i > 0 &&
Text[i - 1] ==
'\n' &&
1193 ((i + 1 < e &&
Text[i + 1] ==
'\n') ||
1194 (i + 2 < e &&
Text[i + 1] ==
'\r' &&
Text[i + 2] ==
'\n'))) {
1213 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1214 Text.substr(i, 4) ==
"\?\?/\r" ||
1215 Text.substr(i, 4) ==
"\?\?/\n" ||
1216 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1217 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1218 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1219 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1228 WhitespaceLength +=
Text.size();
1229 readRawToken(*FormatTok);
1232 if (FormatTok->
is(tok::unknown))
1233 FormatTok->
setType(TT_ImplicitStringLiteral);
1243 FormatTok->
is(tok::comment) && FormatTok->
TokenText.starts_with(
"//")) {
1244 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1245 while (BackslashPos != StringRef::npos) {
1246 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1247 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1248 truncateToken(BackslashPos + 1);
1251 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1256 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1257 SmallVector<StringRef, 1> Matches;
1263 if (FormatTok->
is(tok::numeric_constant)) {
1265 auto Quote = FormatTok->
TokenText.find(
'\'');
1266 if (Quote != StringRef::npos)
1267 truncateToken(Quote);
1268 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1270 }
else if (FormatTok->
is(tok::raw_identifier)) {
1274 }
else if (FormatTok->
TokenText ==
"``") {
1277 }
else if (Tokens.size() > 0 &&
1279 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1284 truncateToken(Matches[0].size());
1291 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1295 TrailingWhitespace = 0;
1296 if (FormatTok->
is(tok::comment)) {
1298 StringRef UntrimmedText = FormatTok->
TokenText;
1300 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1301 }
else if (FormatTok->
is(tok::raw_identifier)) {
1302 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1306 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1307 tok::kw_operator)) {
1311 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1312 tok::kw_operator)) {
1319 }
else if (FormatTok->
is(tok::greatergreater)) {
1324 }
else if (FormatTok->
is(tok::lessless)) {
1331 if (Style.
isVerilog() && Tokens.size() > 0 &&
1332 Tokens.back()->is(TT_VerilogNumberBase) &&
1333 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1335 FormatTok->
Tok.
setKind(tok::numeric_constant);
1341 size_t FirstNewlinePos =
Text.find(
'\n');
1342 if (FirstNewlinePos == StringRef::npos) {
1353 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1362 if (Style.
isCpp()) {
1365 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1366 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1368 it != Macros.end()) {
1369 FormatTok->
setType(it->second);
1370 if (it->second == TT_IfMacro) {
1377 }
else if (FormatTok->
is(tok::identifier)) {
1378 if (MacroBlockBeginRegex.match(
Text))
1379 FormatTok->
setType(TT_MacroBlockBegin);
1380 else if (MacroBlockEndRegex.match(
Text))
1381 FormatTok->
setType(TT_MacroBlockEnd);
1386 else if (VariableTemplates.contains(
Identifier))
1394bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1409 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1410 "(\r?\n|\r)|[^[:space:]])*)");
1412 SmallVector<StringRef, 4> Matches;
1413 const char *Start = Lex->getBufferLocation();
1414 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1420 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1422 size_t Len = Matches[0].size();
1427 Tok.setKind(tok::raw_identifier);
1429 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1430 Tok.setRawIdentifierData(Start);
1431 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1435void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1438 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1439 Lex->LexFromRawLexer(Tok.Tok);
1440 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1441 Tok.Tok.getLength());
1444 if (Tok.is(tok::unknown)) {
1445 if (Tok.TokenText.starts_with(
"\"")) {
1446 Tok.Tok.setKind(tok::string_literal);
1447 Tok.IsUnterminatedLiteral =
true;
1448 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1449 Tok.Tok.setKind(tok::string_literal);
1454 Tok.Tok.setKind(tok::string_literal);
1457 FormattingDisabled =
false;
1459 Tok.Finalized = FormattingDisabled;
1462 FormattingDisabled =
true;
1465void FormatTokenLexer::resetLexer(
unsigned Offset) {
1468 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
1469 Lex->SetKeepWhitespaceMode(
true);
1470 TrailingWhitespace = 0;
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.