20#include "llvm/Support/Regex.h"
28 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
31 Column(Column), TrailingWhitespace(0),
33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
37 Lex.reset(
new Lexer(
ID, SourceMgr.getBufferOrFake(
ID), SourceMgr, LangOpts));
38 Lex->SetKeepWhitespaceMode(
true);
41 auto Identifier = &IdentTable.get(ForEachMacro);
44 for (
const std::string &IfMacro : Style.
IfMacros) {
49 auto Identifier = &IdentTable.get(AttributeMacro);
50 Macros.insert({
Identifier, TT_AttributeMacro});
53 auto Identifier = &IdentTable.get(StatementMacro);
54 Macros.insert({
Identifier, TT_StatementMacro});
57 auto Identifier = &IdentTable.get(TypenameMacro);
61 auto Identifier = &IdentTable.get(NamespaceMacro);
62 Macros.insert({
Identifier, TT_NamespaceMacro});
64 for (
const std::string &WhitespaceSensitiveMacro :
66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
67 Macros.insert({
Identifier, TT_UntouchableMacroFunc});
69 for (
const std::string &StatementAttributeLikeMacro :
71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
72 Macros.insert({
Identifier, TT_StatementAttributeLikeMacro});
75 for (
const auto &TypeName : Style.
TypeNames)
76 TypeNames.insert(&IdentTable.get(TypeName));
80 assert(Tokens.empty());
81 assert(FirstInLineIndex == 0);
83 Tokens.push_back(getNextToken());
85 tryParseJSRegexLiteral();
86 handleTemplateStrings();
89 tryParsePythonComment();
90 tryMergePreviousTokens();
94 handleCSharpVerbatimAndInterpolatedStrings();
96 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
97 FirstInLineIndex = Tokens.size() - 1;
98 }
while (Tokens.back()->isNot(tok::eof));
102void FormatTokenLexer::tryMergePreviousTokens() {
103 if (tryMerge_TMacro())
105 if (tryMergeConflictMarkers())
107 if (tryMergeLessLess())
109 if (tryMergeGreaterGreater())
111 if (tryMergeForEach())
113 if (Style.
isCpp() && tryTransformTryUsageForC())
117 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
119 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
121 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
123 if (tryMergeTokens(FatArrow, TT_FatArrow))
125 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
127 Tokens.back()->Tok.setKind(tok::pipepipe);
130 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
132 Tokens.back()->Tok.setKind(tok::period);
135 if (tryMergeNullishCoalescingEqual())
141 tok::question, tok::l_square};
143 if (tryMergeCSharpKeywordVariables())
145 if (tryMergeCSharpStringLiteral())
147 if (tryTransformCSharpForEach())
149 if (tryMergeTokens(CSharpNullConditionalLSquare,
150 TT_CSharpNullConditionalLSquare)) {
152 Tokens.back()->Tok.setKind(tok::l_square);
157 if (tryMergeNSStringLiteral())
161 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
164 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
166 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
169 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
170 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
173 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
175 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
177 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
179 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
181 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
182 Tokens.back()->Tok.setKind(tok::starequal);
185 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
186 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
188 Tokens.back()->Tok.setKind(tok::equal);
191 if (tryMergeJSPrivateIdentifier())
197 tok::greater, tok::greater, tok::greaterequal};
198 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
204 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
205 Tokens.end()[-2]->is(tok::numeric_constant) &&
206 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
208 tryMergeTokens(2, TT_Unknown)) {
212 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
220 if (Tokens.back()->TokenText.size() == 1 &&
221 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
222 TT_BinaryOperator)) {
223 Tokens.back()->Tok.setKind(tok::caret);
227 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
228 Tokens.back()->Tok.setKind(tok::lessless);
231 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
232 Tokens.back()->Tok.setKind(tok::greatergreater);
235 if (tryMergeTokensAny({{tok::lessless, tok::equal},
236 {tok::lessless, tok::lessequal},
237 {tok::greatergreater, tok::equal},
238 {tok::greatergreater, tok::greaterequal},
239 {tok::colon, tok::equal},
240 {tok::colon, tok::slash}},
241 TT_BinaryOperator)) {
246 if (tryMergeTokensAny({{tok::star, tok::star},
247 {tok::lessless, tok::less},
248 {tok::greatergreater, tok::greater},
249 {tok::exclaimequal, tok::equal},
250 {tok::exclaimequal, tok::question},
251 {tok::equalequal, tok::equal},
252 {tok::equalequal, tok::question}},
253 TT_BinaryOperator)) {
257 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
258 {tok::plus, tok::star, tok::greater},
259 {tok::minusequal, tok::greater},
260 {tok::minus, tok::star, tok::greater},
261 {tok::less, tok::arrow},
262 {tok::equal, tok::greater},
263 {tok::star, tok::greater},
264 {tok::pipeequal, tok::greater},
265 {tok::pipe, tok::arrow},
266 {tok::hash, tok::minus, tok::hash},
267 {tok::hash, tok::equal, tok::hash}},
268 TT_BinaryOperator)) {
275bool FormatTokenLexer::tryMergeNSStringLiteral() {
276 if (Tokens.size() < 2)
278 auto &At = *(Tokens.end() - 2);
279 auto &String = *(Tokens.end() - 1);
280 if (At->isNot(tok::at) || String->isNot(tok::string_literal))
282 At->Tok.setKind(tok::string_literal);
283 At->TokenText = StringRef(At->TokenText.begin(),
284 String->TokenText.end() - At->TokenText.begin());
285 At->ColumnWidth += String->ColumnWidth;
286 At->setType(TT_ObjCStringLiteral);
287 Tokens.erase(Tokens.end() - 1);
291bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
294 if (Tokens.size() < 2)
296 auto &Hash = *(Tokens.end() - 2);
298 if (Hash->isNot(tok::hash) ||
Identifier->isNot(tok::identifier))
300 Hash->Tok.setKind(tok::identifier);
302 StringRef(Hash->TokenText.begin(),
303 Identifier->TokenText.end() - Hash->TokenText.begin());
305 Hash->setType(TT_JsPrivateIdentifier);
306 Tokens.erase(Tokens.end() - 1);
315bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
316 if (Tokens.size() < 2)
320 const auto String = *(Tokens.end() - 1);
321 if (String->isNot(tok::string_literal))
324 auto Prefix = *(Tokens.end() - 2);
325 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
328 if (Tokens.size() > 2) {
329 const auto Tok = *(Tokens.end() - 3);
330 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
331 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
333 Tok->ColumnWidth += Prefix->ColumnWidth;
334 Tokens.erase(Tokens.end() - 2);
340 Prefix->Tok.setKind(tok::string_literal);
342 StringRef(Prefix->TokenText.begin(),
343 String->TokenText.end() - Prefix->TokenText.begin());
344 Prefix->ColumnWidth += String->ColumnWidth;
345 Prefix->setType(TT_CSharpStringLiteral);
346 Tokens.erase(Tokens.end() - 1);
352const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
353 "assembly",
"module",
"field",
"event",
"method",
354 "param",
"property",
"return",
"type",
357bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
358 if (Tokens.size() < 2)
360 auto &NullishCoalescing = *(Tokens.end() - 2);
361 auto &
Equal = *(Tokens.end() - 1);
362 if (NullishCoalescing->getType() != TT_NullCoalescingOperator ||
363 Equal->isNot(tok::equal)) {
366 NullishCoalescing->Tok.setKind(tok::equal);
367 NullishCoalescing->TokenText =
368 StringRef(NullishCoalescing->TokenText.begin(),
369 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
370 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
371 NullishCoalescing->setType(TT_NullCoalescingEqual);
372 Tokens.erase(Tokens.end() - 1);
376bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
377 if (Tokens.size() < 2)
379 const auto At = *(Tokens.end() - 2);
380 if (At->isNot(tok::at))
382 const auto Keyword = *(Tokens.end() - 1);
383 if (Keyword->TokenText ==
"$")
388 At->Tok.setKind(tok::identifier);
389 At->TokenText = StringRef(At->TokenText.begin(),
390 Keyword->TokenText.end() - At->TokenText.begin());
391 At->ColumnWidth += Keyword->ColumnWidth;
392 At->setType(Keyword->getType());
393 Tokens.erase(Tokens.end() - 1);
398bool FormatTokenLexer::tryTransformCSharpForEach() {
399 if (Tokens.size() < 1)
412bool FormatTokenLexer::tryMergeForEach() {
413 if (Tokens.size() < 2)
415 auto &For = *(Tokens.end() - 2);
416 auto &Each = *(Tokens.end() - 1);
417 if (For->isNot(tok::kw_for))
419 if (Each->isNot(tok::identifier))
421 if (Each->TokenText !=
"each")
424 For->setType(TT_ForEachMacro);
425 For->Tok.setKind(tok::kw_for);
427 For->TokenText = StringRef(For->TokenText.begin(),
428 Each->TokenText.end() - For->TokenText.begin());
429 For->ColumnWidth += Each->ColumnWidth;
430 Tokens.erase(Tokens.end() - 1);
434bool FormatTokenLexer::tryTransformTryUsageForC() {
435 if (Tokens.size() < 2)
437 auto &Try = *(Tokens.end() - 2);
438 if (Try->isNot(tok::kw_try))
440 auto &Next = *(Tokens.end() - 1);
441 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
444 if (Tokens.size() > 2) {
445 auto &At = *(Tokens.end() - 3);
450 Try->Tok.setKind(tok::identifier);
454bool FormatTokenLexer::tryMergeLessLess() {
456 if (Tokens.size() < 3)
459 auto First = Tokens.end() - 3;
460 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
464 if (
First[1]->hasWhitespaceBefore())
467 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
468 if (
X &&
X->is(tok::less))
472 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
475 First[0]->Tok.setKind(tok::lessless);
476 First[0]->TokenText =
"<<";
477 First[0]->ColumnWidth += 1;
478 Tokens.erase(Tokens.end() - 2);
482bool FormatTokenLexer::tryMergeGreaterGreater() {
484 if (Tokens.size() < 2)
487 auto First = Tokens.end() - 2;
488 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
492 if (
First[1]->hasWhitespaceBefore())
495 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
496 if (Tok && Tok->isNot(tok::kw_operator))
499 First[0]->Tok.setKind(tok::greatergreater);
500 First[0]->TokenText =
">>";
501 First[0]->ColumnWidth += 1;
502 Tokens.erase(Tokens.end() - 1);
506bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
508 if (Tokens.size() < Kinds.size())
511 SmallVectorImpl<FormatToken *>::const_iterator
First =
512 Tokens.end() - Kinds.size();
513 for (
unsigned i = 0; i < Kinds.size(); ++i)
514 if (
First[i]->isNot(Kinds[i]))
517 return tryMergeTokens(Kinds.size(), NewType);
520bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
521 if (Tokens.size() < Count)
524 SmallVectorImpl<FormatToken *>::const_iterator
First = Tokens.end() - Count;
525 unsigned AddLength = 0;
526 for (
size_t i = 1; i < Count; ++i) {
529 if (
First[i]->hasWhitespaceBefore())
531 AddLength +=
First[i]->TokenText.size();
534 Tokens.resize(Tokens.size() - Count + 1);
535 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
536 First[0]->TokenText.size() + AddLength);
537 First[0]->ColumnWidth += AddLength;
538 First[0]->setType(NewType);
542bool FormatTokenLexer::tryMergeTokensAny(
543 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
544 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
545 return tryMergeTokens(Kinds, NewType);
550bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
554 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
555 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
556 tok::colon, tok::question, tok::tilde) ||
557 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
558 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
560 Tok->isBinaryOperator();
563bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
573 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
574 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
578 if (!precedesOperand(Prev))
588void FormatTokenLexer::tryParseJSRegexLiteral() {
589 FormatToken *RegexToken = Tokens.back();
590 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
593 FormatToken *Prev =
nullptr;
594 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
597 if (FT->isNot(tok::comment)) {
603 if (!canPrecedeRegexLiteral(Prev))
607 const char *Offset = Lex->getBufferLocation();
608 const char *RegexBegin = Offset - RegexToken->TokenText.size();
609 StringRef Buffer = Lex->getBuffer();
610 bool InCharacterClass =
false;
611 bool HaveClosingSlash =
false;
612 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
622 InCharacterClass =
true;
625 InCharacterClass =
false;
628 if (!InCharacterClass)
629 HaveClosingSlash =
true;
634 RegexToken->setType(TT_RegexLiteral);
636 RegexToken->Tok.setKind(tok::string_literal);
637 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
638 RegexToken->ColumnWidth = RegexToken->TokenText.size();
640 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset)));
645 auto Repeated = [&
Begin, End]() {
661 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
673 ++UnmatchedOpeningBraceCount;
681 else if (UnmatchedOpeningBraceCount > 0)
682 --UnmatchedOpeningBraceCount;
688 if (UnmatchedOpeningBraceCount > 0)
691 if (Verbatim && Repeated()) {
702void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
703 FormatToken *CSharpStringLiteral = Tokens.back();
705 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
708 auto &TokenText = CSharpStringLiteral->TokenText;
710 bool Verbatim =
false;
711 bool Interpolated =
false;
712 if (TokenText.startswith(R
"($@")") || TokenText.startswith(R"(@$")")) {
715 }
else if (TokenText.startswith(R
"(@")")) {
717 }
else if (TokenText.startswith(R
"($")")) {
722 if (!Verbatim && !Interpolated)
725 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
726 const char *Offset = StrBegin;
727 if (Verbatim && Interpolated)
732 const auto End = Lex->getBuffer().end();
740 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
741 TokenText = LiteralText;
744 size_t FirstBreak = LiteralText.find(
'\n');
745 StringRef FirstLineText = FirstBreak == StringRef::npos
747 : LiteralText.substr(0, FirstBreak);
749 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
751 size_t LastBreak = LiteralText.rfind(
'\n');
752 if (LastBreak != StringRef::npos) {
753 CSharpStringLiteral->IsMultiline =
true;
754 unsigned StartColumn = 0;
755 CSharpStringLiteral->LastLineColumnWidth =
757 StartColumn, Style.
TabWidth, Encoding);
760 assert(Offset < End);
761 resetLexer(SourceMgr.
getFileOffset(Lex->getSourceLocation(Offset + 1)));
764void FormatTokenLexer::handleTemplateStrings() {
765 FormatToken *BacktickToken = Tokens.back();
767 if (BacktickToken->is(tok::l_brace)) {
771 if (BacktickToken->is(tok::r_brace)) {
772 if (StateStack.size() == 1)
778 }
else if (BacktickToken->is(tok::unknown) &&
779 BacktickToken->TokenText ==
"`") {
786 const char *Offset = Lex->getBufferLocation();
787 const char *TmplBegin = Offset - BacktickToken->TokenText.size();
788 for (; Offset != Lex->getBuffer().end(); ++Offset) {
789 if (Offset[0] ==
'`') {
794 if (Offset[0] ==
'\\') {
796 }
else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] ==
'$' &&
805 StringRef LiteralText(TmplBegin, Offset - TmplBegin);
806 BacktickToken->setType(TT_TemplateString);
807 BacktickToken->Tok.setKind(tok::string_literal);
808 BacktickToken->TokenText = LiteralText;
811 size_t FirstBreak = LiteralText.find(
'\n');
812 StringRef FirstLineText = FirstBreak == StringRef::npos
814 : LiteralText.substr(0, FirstBreak);
816 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
817 size_t LastBreak = LiteralText.rfind(
'\n');
818 if (LastBreak != StringRef::npos) {
819 BacktickToken->IsMultiline =
true;
820 unsigned StartColumn = 0;
821 BacktickToken->LastLineColumnWidth =
823 StartColumn, Style.
TabWidth, Encoding);
826 SourceLocation loc = Lex->getSourceLocation(Offset);
830void FormatTokenLexer::tryParsePythonComment() {
831 FormatToken *HashToken = Tokens.back();
832 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
835 const char *CommentBegin =
836 Lex->getBufferLocation() - HashToken->TokenText.size();
837 size_t From = CommentBegin - Lex->getBuffer().begin();
838 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
839 if (To == StringRef::npos)
840 To = Lex->getBuffer().size();
841 size_t Len = To - From;
842 HashToken->setType(TT_LineComment);
843 HashToken->Tok.setKind(tok::comment);
844 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
845 SourceLocation Loc = To < Lex->getBuffer().size()
846 ? Lex->getSourceLocation(CommentBegin + Len)
851bool FormatTokenLexer::tryMerge_TMacro() {
852 if (Tokens.size() < 4)
854 FormatToken *
Last = Tokens.back();
855 if (
Last->isNot(tok::r_paren))
858 FormatToken *String = Tokens[Tokens.size() - 2];
859 if (String->isNot(tok::string_literal) || String->IsMultiline)
862 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))
865 FormatToken *
Macro = Tokens[Tokens.size() - 4];
866 if (
Macro->TokenText !=
"_T")
869 const char *Start =
Macro->TokenText.data();
870 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
871 String->TokenText = StringRef(Start, End - Start);
872 String->IsFirst =
Macro->IsFirst;
873 String->LastNewlineOffset =
Macro->LastNewlineOffset;
874 String->WhitespaceRange =
Macro->WhitespaceRange;
875 String->OriginalColumn =
Macro->OriginalColumn;
877 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
878 String->NewlinesBefore =
Macro->NewlinesBefore;
879 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
884 Tokens.back() = String;
885 if (FirstInLineIndex >= Tokens.size())
886 FirstInLineIndex = Tokens.size() - 1;
890bool FormatTokenLexer::tryMergeConflictMarkers() {
891 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
905 unsigned FirstInLineOffset;
907 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
910 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
911 if (LineOffset == StringRef::npos)
916 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
918 if (FirstSpace == StringRef::npos)
919 LineStart = Buffer.substr(LineOffset);
921 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
924 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
925 Type = TT_ConflictStart;
926 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
927 LineStart ==
"====") {
928 Type = TT_ConflictAlternative;
929 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
930 Type = TT_ConflictEnd;
933 if (Type != TT_Unknown) {
934 FormatToken *Next = Tokens.back();
936 Tokens.resize(FirstInLineIndex + 1);
940 Tokens.back()->setType(Type);
941 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
943 Tokens.push_back(Next);
950FormatToken *FormatTokenLexer::getStashedToken() {
952 Token Tok = FormatTok->
Tok;
953 StringRef TokenText = FormatTok->
TokenText;
956 FormatTok =
new (Allocator.Allocate()) FormatToken;
957 FormatTok->
Tok = Tok;
958 SourceLocation TokLocation =
975void FormatTokenLexer::truncateToken(
size_t NewLen) {
976 assert(NewLen <= FormatTok->TokenText.size());
978 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
993 const unsigned char *
const Begin =
Text.bytes_begin();
994 const unsigned char *
const End =
Text.bytes_end();
995 const unsigned char *Cur =
Begin;
997 if (isspace(Cur[0])) {
999 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1005 assert(End - Cur >= 2);
1007 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1008 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1012 assert(End - Cur >= 4);
1021FormatToken *FormatTokenLexer::getNextToken() {
1024 return getStashedToken();
1027 FormatTok =
new (Allocator.Allocate()) FormatToken;
1028 readRawToken(*FormatTok);
1029 SourceLocation WhitespaceStart =
1031 FormatTok->
IsFirst = IsFirstToken;
1032 IsFirstToken =
false;
1038 unsigned WhitespaceLength = TrailingWhitespace;
1039 while (FormatTok->
isNot(tok::eof)) {
1041 if (LeadingWhitespace == 0)
1043 if (LeadingWhitespace < FormatTok->TokenText.size())
1044 truncateToken(LeadingWhitespace);
1046 bool InEscape =
false;
1047 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1053 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1081 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1082 Text.substr(i, 4) ==
"\?\?/\r" ||
1083 Text.substr(i, 4) ==
"\?\?/\n" ||
1084 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1085 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1086 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1087 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1096 WhitespaceLength +=
Text.size();
1097 readRawToken(*FormatTok);
1100 if (FormatTok->
is(tok::unknown))
1101 FormatTok->
setType(TT_ImplicitStringLiteral);
1111 FormatTok->
is(tok::comment) && FormatTok->
TokenText.startswith(
"//")) {
1112 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1113 while (BackslashPos != StringRef::npos) {
1114 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1115 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1116 truncateToken(BackslashPos + 1);
1119 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1124 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1125 SmallVector<StringRef, 1> Matches;
1131 if (FormatTok->
is(tok::numeric_constant)) {
1133 auto Quote = FormatTok->
TokenText.find(
'\'');
1134 if (Quote != StringRef::npos)
1135 truncateToken(Quote);
1136 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1138 }
else if (FormatTok->
is(tok::raw_identifier)) {
1142 }
else if (FormatTok->
TokenText ==
"``") {
1145 }
else if (Tokens.size() > 0 &&
1147 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1152 truncateToken(Matches[0].size());
1159 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1163 TrailingWhitespace = 0;
1164 if (FormatTok->
is(tok::comment)) {
1166 StringRef UntrimmedText = FormatTok->
TokenText;
1168 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1169 }
else if (FormatTok->
is(tok::raw_identifier)) {
1170 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1174 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1175 tok::kw_operator)) {
1179 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1180 tok::kw_operator)) {
1184 }
else if (FormatTok->
is(tok::greatergreater)) {
1189 }
else if (FormatTok->
is(tok::lessless)) {
1196 if (Style.
isVerilog() && Tokens.size() > 0 &&
1197 Tokens.back()->is(TT_VerilogNumberBase) &&
1198 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1200 FormatTok->
Tok.
setKind(tok::numeric_constant);
1206 size_t FirstNewlinePos =
Text.find(
'\n');
1207 if (FirstNewlinePos == StringRef::npos) {
1218 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1227 if (Style.
isCpp()) {
1230 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1231 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1233 it != Macros.end()) {
1234 FormatTok->
setType(it->second);
1235 if (it->second == TT_IfMacro) {
1242 }
else if (FormatTok->
is(tok::identifier)) {
1243 if (MacroBlockBeginRegex.match(
Text))
1244 FormatTok->
setType(TT_MacroBlockBegin);
1245 else if (MacroBlockEndRegex.match(
Text))
1246 FormatTok->
setType(TT_MacroBlockEnd);
1255bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1270 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1271 "(\r?\n|\r)|[^[:space:]])*)");
1273 SmallVector<StringRef, 4> Matches;
1274 const char *Start = Lex->getBufferLocation();
1275 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1281 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1283 size_t Len = Matches[0].size();
1288 Tok.setKind(tok::raw_identifier);
1290 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1291 Tok.setRawIdentifierData(Start);
1292 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1296void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1299 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1300 Lex->LexFromRawLexer(Tok.Tok);
1301 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1302 Tok.Tok.getLength());
1305 if (Tok.is(tok::unknown)) {
1306 if (!Tok.TokenText.empty() && Tok.TokenText[0] ==
'"') {
1307 Tok.Tok.setKind(tok::string_literal);
1308 Tok.IsUnterminatedLiteral =
true;
1309 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1310 Tok.Tok.setKind(tok::string_literal);
1316 Tok.is(tok::char_constant)) {
1317 Tok.Tok.setKind(tok::string_literal);
1321 FormattingDisabled =
false;
1323 Tok.Finalized = FormattingDisabled;
1326 FormattingDisabled =
true;
1329void FormatTokenLexer::resetLexer(
unsigned Offset) {
1333 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
1334 Lex->SetKeepWhitespaceMode(
true);
1335 TrailingWhitespace = 0;
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.