20#include "llvm/Support/Regex.h"
28 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
31 Column(Column), TrailingWhitespace(0),
33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
37 Lex.reset(
new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
38 Lex->SetKeepWhitespaceMode(
true);
41 auto Identifier = &IdentTable.get(ForEachMacro);
44 for (
const std::string &IfMacro : Style.
IfMacros) {
49 auto Identifier = &IdentTable.get(AttributeMacro);
50 Macros.insert({
Identifier, TT_AttributeMacro});
53 auto Identifier = &IdentTable.get(StatementMacro);
54 Macros.insert({
Identifier, TT_StatementMacro});
57 auto Identifier = &IdentTable.get(TypenameMacro);
61 auto Identifier = &IdentTable.get(NamespaceMacro);
62 Macros.insert({
Identifier, TT_NamespaceMacro});
64 for (
const std::string &WhitespaceSensitiveMacro :
66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
67 Macros.insert({
Identifier, TT_UntouchableMacroFunc});
69 for (
const std::string &StatementAttributeLikeMacro :
71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
72 Macros.insert({
Identifier, TT_StatementAttributeLikeMacro});
77 assert(Tokens.empty());
78 assert(FirstInLineIndex == 0);
80 Tokens.push_back(getNextToken());
82 tryParseJSRegexLiteral();
83 handleTemplateStrings();
86 tryParsePythonComment();
87 tryMergePreviousTokens();
91 handleCSharpVerbatimAndInterpolatedStrings();
93 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
94 FirstInLineIndex = Tokens.size() - 1;
95 }
while (Tokens.back()->isNot(tok::eof));
99void FormatTokenLexer::tryMergePreviousTokens() {
100 if (tryMerge_TMacro())
102 if (tryMergeConflictMarkers())
104 if (tryMergeLessLess())
106 if (tryMergeGreaterGreater())
108 if (tryMergeForEach())
110 if (Style.
isCpp() && tryTransformTryUsageForC())
114 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,
116 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,
118 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};
120 if (tryMergeTokens(FatArrow, TT_FatArrow))
122 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {
124 Tokens.back()->Tok.setKind(tok::pipepipe);
127 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {
129 Tokens.back()->Tok.setKind(tok::period);
132 if (tryMergeNullishCoalescingEqual())
138 tok::question, tok::l_square};
140 if (tryMergeCSharpKeywordVariables())
142 if (tryMergeCSharpStringLiteral())
144 if (tryTransformCSharpForEach())
146 if (tryMergeTokens(CSharpNullConditionalLSquare,
147 TT_CSharpNullConditionalLSquare)) {
149 Tokens.back()->Tok.setKind(tok::l_square);
154 if (tryMergeNSStringLiteral())
158 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
161 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
163 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
166 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};
167 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};
170 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
172 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
174 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
176 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
178 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
179 Tokens.back()->Tok.setKind(tok::starequal);
182 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||
183 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {
185 Tokens.back()->Tok.setKind(tok::equal);
188 if (tryMergeJSPrivateIdentifier())
194 tok::greater, tok::greater, tok::greaterequal};
195 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
201 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&
202 Tokens.end()[-2]->is(tok::numeric_constant) &&
203 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,
205 tryMergeTokens(2, TT_Unknown)) {
209 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},
217 if (Tokens.back()->TokenText.size() == 1 &&
218 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},
219 TT_BinaryOperator)) {
220 Tokens.back()->Tok.setKind(tok::caret);
224 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {
225 Tokens.back()->Tok.setKind(tok::lessless);
228 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {
229 Tokens.back()->Tok.setKind(tok::greatergreater);
232 if (tryMergeTokensAny({{tok::lessless, tok::equal},
233 {tok::lessless, tok::lessequal},
234 {tok::greatergreater, tok::equal},
235 {tok::greatergreater, tok::greaterequal},
236 {tok::colon, tok::equal},
237 {tok::colon, tok::slash}},
238 TT_BinaryOperator)) {
243 if (tryMergeTokensAny({{tok::star, tok::star},
244 {tok::lessless, tok::less},
245 {tok::greatergreater, tok::greater},
246 {tok::exclaimequal, tok::equal},
247 {tok::exclaimequal, tok::question},
248 {tok::equalequal, tok::equal},
249 {tok::equalequal, tok::question}},
250 TT_BinaryOperator)) {
254 if (tryMergeTokensAny({{tok::plusequal, tok::greater},
255 {tok::plus, tok::star, tok::greater},
256 {tok::minusequal, tok::greater},
257 {tok::minus, tok::star, tok::greater},
258 {tok::less, tok::arrow},
259 {tok::equal, tok::greater},
260 {tok::star, tok::greater},
261 {tok::pipeequal, tok::greater},
262 {tok::pipe, tok::arrow},
263 {tok::hash, tok::minus, tok::hash},
264 {tok::hash, tok::equal, tok::hash}},
265 TT_BinaryOperator)) {
272bool FormatTokenLexer::tryMergeNSStringLiteral() {
273 if (Tokens.size() < 2)
275 auto &At = *(Tokens.end() - 2);
276 auto &String = *(Tokens.end() - 1);
277 if (!At->is(tok::at) || !String->is(tok::string_literal))
279 At->Tok.setKind(tok::string_literal);
280 At->TokenText = StringRef(At->TokenText.begin(),
281 String->TokenText.end() - At->TokenText.begin());
282 At->ColumnWidth += String->ColumnWidth;
283 At->setType(TT_ObjCStringLiteral);
284 Tokens.erase(Tokens.end() - 1);
288bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
291 if (Tokens.size() < 2)
293 auto &Hash = *(Tokens.end() - 2);
295 if (!Hash->is(tok::hash) || !
Identifier->is(tok::identifier))
297 Hash->Tok.setKind(tok::identifier);
299 StringRef(Hash->TokenText.begin(),
300 Identifier->TokenText.end() - Hash->TokenText.begin());
302 Hash->setType(TT_JsPrivateIdentifier);
303 Tokens.erase(Tokens.end() - 1);
312bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
313 if (Tokens.size() < 2)
317 const auto String = *(Tokens.end() - 1);
318 if (String->isNot(tok::string_literal))
321 auto Prefix = *(Tokens.end() - 2);
322 if (Prefix->isNot(tok::at) && Prefix->TokenText !=
"$")
325 if (Tokens.size() > 2) {
326 const auto Tok = *(Tokens.end() - 3);
327 if ((Tok->TokenText ==
"$" && Prefix->is(tok::at)) ||
328 (Tok->is(tok::at) && Prefix->TokenText ==
"$")) {
330 Tok->ColumnWidth += Prefix->ColumnWidth;
331 Tokens.erase(Tokens.end() - 2);
337 Prefix->Tok.setKind(tok::string_literal);
339 StringRef(Prefix->TokenText.begin(),
340 String->TokenText.end() - Prefix->TokenText.begin());
341 Prefix->ColumnWidth += String->ColumnWidth;
342 Prefix->setType(TT_CSharpStringLiteral);
343 Tokens.erase(Tokens.end() - 1);
349const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {
350 "assembly",
"module",
"field",
"event",
"method",
351 "param",
"property",
"return",
"type",
354bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {
355 if (Tokens.size() < 2)
357 auto &NullishCoalescing = *(Tokens.end() - 2);
358 auto &
Equal = *(Tokens.end() - 1);
359 if (NullishCoalescing->getType() != TT_NullCoalescingOperator ||
360 !
Equal->is(tok::equal)) {
363 NullishCoalescing->Tok.setKind(tok::equal);
364 NullishCoalescing->TokenText =
365 StringRef(NullishCoalescing->TokenText.begin(),
366 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());
367 NullishCoalescing->ColumnWidth +=
Equal->ColumnWidth;
368 NullishCoalescing->setType(TT_NullCoalescingEqual);
369 Tokens.erase(Tokens.end() - 1);
373bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
374 if (Tokens.size() < 2)
376 const auto At = *(Tokens.end() - 2);
377 if (At->isNot(tok::at))
379 const auto Keyword = *(Tokens.end() - 1);
380 if (Keyword->TokenText ==
"$")
385 At->Tok.setKind(tok::identifier);
386 At->TokenText = StringRef(At->TokenText.begin(),
387 Keyword->TokenText.end() - At->TokenText.begin());
388 At->ColumnWidth += Keyword->ColumnWidth;
389 At->setType(Keyword->getType());
390 Tokens.erase(Tokens.end() - 1);
395bool FormatTokenLexer::tryTransformCSharpForEach() {
396 if (Tokens.size() < 1)
409bool FormatTokenLexer::tryMergeForEach() {
410 if (Tokens.size() < 2)
412 auto &For = *(Tokens.end() - 2);
413 auto &Each = *(Tokens.end() - 1);
414 if (!For->is(tok::kw_for))
416 if (!Each->is(tok::identifier))
418 if (Each->TokenText !=
"each")
421 For->setType(TT_ForEachMacro);
422 For->Tok.setKind(tok::kw_for);
424 For->TokenText = StringRef(For->TokenText.begin(),
425 Each->TokenText.end() - For->TokenText.begin());
426 For->ColumnWidth += Each->ColumnWidth;
427 Tokens.erase(Tokens.end() - 1);
431bool FormatTokenLexer::tryTransformTryUsageForC() {
432 if (Tokens.size() < 2)
434 auto &Try = *(Tokens.end() - 2);
435 if (!Try->is(tok::kw_try))
437 auto &Next = *(Tokens.end() - 1);
438 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))
441 if (Tokens.size() > 2) {
442 auto &At = *(Tokens.end() - 3);
447 Try->Tok.setKind(tok::identifier);
451bool FormatTokenLexer::tryMergeLessLess() {
453 if (Tokens.size() < 3)
456 auto First = Tokens.end() - 3;
457 if (
First[0]->isNot(tok::less) ||
First[1]->isNot(tok::less))
461 if (
First[1]->hasWhitespaceBefore())
464 auto X = Tokens.size() > 3 ?
First[-1] :
nullptr;
465 if (
X &&
X->is(tok::less))
469 if ((!
X ||
X->isNot(tok::kw_operator)) && Y->is(tok::less))
472 First[0]->Tok.setKind(tok::lessless);
473 First[0]->TokenText =
"<<";
474 First[0]->ColumnWidth += 1;
475 Tokens.erase(Tokens.end() - 2);
479bool FormatTokenLexer::tryMergeGreaterGreater() {
481 if (Tokens.size() < 2)
484 auto First = Tokens.end() - 2;
485 if (
First[0]->isNot(tok::greater) ||
First[1]->isNot(tok::greater))
489 if (
First[1]->hasWhitespaceBefore())
492 auto Tok = Tokens.size() > 2 ?
First[-1] :
nullptr;
493 if (Tok && Tok->isNot(tok::kw_operator))
496 First[0]->Tok.setKind(tok::greatergreater);
497 First[0]->TokenText =
">>";
498 First[0]->ColumnWidth += 1;
499 Tokens.erase(Tokens.end() - 1);
503bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
505 if (Tokens.size() < Kinds.size())
508 SmallVectorImpl<FormatToken *>::const_iterator
First =
509 Tokens.end() - Kinds.size();
510 for (
unsigned i = 0; i < Kinds.size(); ++i)
511 if (!
First[i]->is(Kinds[i]))
514 return tryMergeTokens(Kinds.size(), NewType);
517bool FormatTokenLexer::tryMergeTokens(
size_t Count,
TokenType NewType) {
518 if (Tokens.size() < Count)
521 SmallVectorImpl<FormatToken *>::const_iterator
First = Tokens.end() - Count;
522 unsigned AddLength = 0;
523 for (
size_t i = 1; i < Count; ++i) {
526 if (
First[i]->hasWhitespaceBefore())
528 AddLength +=
First[i]->TokenText.size();
531 Tokens.resize(Tokens.size() - Count + 1);
532 First[0]->TokenText = StringRef(
First[0]->TokenText.data(),
533 First[0]->TokenText.size() + AddLength);
534 First[0]->ColumnWidth += AddLength;
535 First[0]->setType(NewType);
539bool FormatTokenLexer::tryMergeTokensAny(
540 ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
TokenType NewType) {
541 return llvm::any_of(Kinds, [
this, NewType](ArrayRef<tok::TokenKind> Kinds) {
542 return tryMergeTokens(Kinds, NewType);
547bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
551 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
552 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
553 tok::colon, tok::question, tok::tilde) ||
554 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
555 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
557 Tok->isBinaryOperator();
560bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
570 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
571 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);
575 if (!precedesOperand(Prev))
585void FormatTokenLexer::tryParseJSRegexLiteral() {
586 FormatToken *RegexToken = Tokens.back();
587 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
590 FormatToken *Prev =
nullptr;
591 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
594 if (FT->isNot(tok::comment)) {
600 if (!canPrecedeRegexLiteral(Prev))
604 const char *
Offset = Lex->getBufferLocation();
605 const char *RegexBegin =
Offset - RegexToken->TokenText.size();
606 StringRef Buffer = Lex->getBuffer();
607 bool InCharacterClass =
false;
608 bool HaveClosingSlash =
false;
609 for (; !HaveClosingSlash &&
Offset != Buffer.end(); ++
Offset) {
619 InCharacterClass =
true;
622 InCharacterClass =
false;
625 if (!InCharacterClass)
626 HaveClosingSlash =
true;
631 RegexToken->setType(TT_RegexLiteral);
633 RegexToken->Tok.setKind(tok::string_literal);
634 RegexToken->TokenText = StringRef(RegexBegin,
Offset - RegexBegin);
635 RegexToken->ColumnWidth = RegexToken->TokenText.size();
642 auto Repeated = [&
Begin, End]() {
658 for (
int UnmatchedOpeningBraceCount = 0;
Begin < End; ++
Begin) {
670 ++UnmatchedOpeningBraceCount;
678 else if (UnmatchedOpeningBraceCount > 0)
679 --UnmatchedOpeningBraceCount;
685 if (UnmatchedOpeningBraceCount > 0)
688 if (Verbatim && Repeated()) {
699void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
700 FormatToken *CSharpStringLiteral = Tokens.back();
702 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
705 auto &TokenText = CSharpStringLiteral->TokenText;
707 bool Verbatim =
false;
708 bool Interpolated =
false;
709 if (TokenText.startswith(R
"($@")") || TokenText.startswith(R"(@$")")) {
712 }
else if (TokenText.startswith(R
"(@")")) {
714 }
else if (TokenText.startswith(R
"($")")) {
719 if (!Verbatim && !Interpolated)
722 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
723 const char *
Offset = StrBegin;
724 if (Verbatim && Interpolated)
729 const auto End = Lex->getBuffer().end();
737 StringRef LiteralText(StrBegin,
Offset - StrBegin + 1);
738 TokenText = LiteralText;
741 size_t FirstBreak = LiteralText.find(
'\n');
742 StringRef FirstLineText = FirstBreak == StringRef::npos
744 : LiteralText.substr(0, FirstBreak);
746 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.
TabWidth,
748 size_t LastBreak = LiteralText.rfind(
'\n');
749 if (LastBreak != StringRef::npos) {
750 CSharpStringLiteral->IsMultiline =
true;
751 unsigned StartColumn = 0;
752 CSharpStringLiteral->LastLineColumnWidth =
754 StartColumn, Style.
TabWidth, Encoding);
761void FormatTokenLexer::handleTemplateStrings() {
762 FormatToken *BacktickToken = Tokens.back();
764 if (BacktickToken->is(tok::l_brace)) {
768 if (BacktickToken->is(tok::r_brace)) {
769 if (StateStack.size() == 1)
775 }
else if (BacktickToken->is(tok::unknown) &&
776 BacktickToken->TokenText ==
"`") {
783 const char *
Offset = Lex->getBufferLocation();
784 const char *TmplBegin =
Offset - BacktickToken->TokenText.size();
793 }
else if (
Offset + 1 < Lex->getBuffer().end() &&
Offset[0] ==
'$' &&
802 StringRef LiteralText(TmplBegin,
Offset - TmplBegin);
803 BacktickToken->setType(TT_TemplateString);
804 BacktickToken->Tok.setKind(tok::string_literal);
805 BacktickToken->TokenText = LiteralText;
808 size_t FirstBreak = LiteralText.find(
'\n');
809 StringRef FirstLineText = FirstBreak == StringRef::npos
811 : LiteralText.substr(0, FirstBreak);
813 FirstLineText, BacktickToken->OriginalColumn, Style.
TabWidth, Encoding);
814 size_t LastBreak = LiteralText.rfind(
'\n');
815 if (LastBreak != StringRef::npos) {
816 BacktickToken->IsMultiline =
true;
817 unsigned StartColumn = 0;
818 BacktickToken->LastLineColumnWidth =
820 StartColumn, Style.
TabWidth, Encoding);
823 SourceLocation loc = Lex->getSourceLocation(
Offset);
827void FormatTokenLexer::tryParsePythonComment() {
828 FormatToken *HashToken = Tokens.back();
829 if (!HashToken->isOneOf(tok::hash, tok::hashhash))
832 const char *CommentBegin =
833 Lex->getBufferLocation() - HashToken->TokenText.size();
834 size_t From = CommentBegin - Lex->getBuffer().begin();
835 size_t To = Lex->getBuffer().find_first_of(
'\n', From);
836 if (To == StringRef::npos)
837 To = Lex->getBuffer().size();
838 size_t Len = To - From;
839 HashToken->setType(TT_LineComment);
840 HashToken->Tok.setKind(tok::comment);
841 HashToken->TokenText = Lex->getBuffer().substr(From, Len);
842 SourceLocation Loc = To < Lex->getBuffer().size()
843 ? Lex->getSourceLocation(CommentBegin + Len)
848bool FormatTokenLexer::tryMerge_TMacro() {
849 if (Tokens.size() < 4)
851 FormatToken *
Last = Tokens.back();
852 if (!
Last->is(tok::r_paren))
855 FormatToken *String = Tokens[Tokens.size() - 2];
856 if (!String->is(tok::string_literal) || String->IsMultiline)
859 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
862 FormatToken *
Macro = Tokens[Tokens.size() - 4];
863 if (
Macro->TokenText !=
"_T")
866 const char *Start =
Macro->TokenText.data();
867 const char *End =
Last->TokenText.data() +
Last->TokenText.size();
868 String->TokenText = StringRef(Start, End - Start);
869 String->IsFirst =
Macro->IsFirst;
870 String->LastNewlineOffset =
Macro->LastNewlineOffset;
871 String->WhitespaceRange =
Macro->WhitespaceRange;
872 String->OriginalColumn =
Macro->OriginalColumn;
874 String->TokenText, String->OriginalColumn, Style.
TabWidth, Encoding);
875 String->NewlinesBefore =
Macro->NewlinesBefore;
876 String->HasUnescapedNewline =
Macro->HasUnescapedNewline;
881 Tokens.back() = String;
882 if (FirstInLineIndex >= Tokens.size())
883 FirstInLineIndex = Tokens.size() - 1;
887bool FormatTokenLexer::tryMergeConflictMarkers() {
888 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
902 unsigned FirstInLineOffset;
904 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
907 auto LineOffset = Buffer.rfind(
'\n', FirstInLineOffset);
908 if (LineOffset == StringRef::npos)
913 auto FirstSpace = Buffer.find_first_of(
" \n", LineOffset);
915 if (FirstSpace == StringRef::npos)
916 LineStart = Buffer.substr(LineOffset);
918 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
921 if (LineStart ==
"<<<<<<<" || LineStart ==
">>>>") {
922 Type = TT_ConflictStart;
923 }
else if (LineStart ==
"|||||||" || LineStart ==
"=======" ||
924 LineStart ==
"====") {
925 Type = TT_ConflictAlternative;
926 }
else if (LineStart ==
">>>>>>>" || LineStart ==
"<<<<") {
927 Type = TT_ConflictEnd;
930 if (Type != TT_Unknown) {
931 FormatToken *Next = Tokens.back();
933 Tokens.resize(FirstInLineIndex + 1);
937 Tokens.back()->setType(Type);
938 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
940 Tokens.push_back(Next);
947FormatToken *FormatTokenLexer::getStashedToken() {
949 Token Tok = FormatTok->
Tok;
950 StringRef TokenText = FormatTok->
TokenText;
953 FormatTok =
new (Allocator.Allocate()) FormatToken;
954 FormatTok->
Tok = Tok;
955 SourceLocation TokLocation =
972void FormatTokenLexer::truncateToken(
size_t NewLen) {
973 assert(NewLen <= FormatTok->TokenText.size());
975 Lex->getBufferLocation() - FormatTok->
TokenText.size() + NewLen)));
990 const unsigned char *
const Begin =
Text.bytes_begin();
991 const unsigned char *
const End =
Text.bytes_end();
992 const unsigned char *Cur =
Begin;
994 if (isspace(Cur[0])) {
996 }
else if (Cur[0] ==
'\\' && (Cur[1] ==
'\n' || Cur[1] ==
'\r')) {
1002 assert(End - Cur >= 2);
1004 }
else if (Cur[0] ==
'?' && Cur[1] ==
'?' && Cur[2] ==
'/' &&
1005 (Cur[3] ==
'\n' || Cur[3] ==
'\r')) {
1009 assert(End - Cur >= 4);
1018FormatToken *FormatTokenLexer::getNextToken() {
1021 return getStashedToken();
1024 FormatTok =
new (Allocator.Allocate()) FormatToken;
1025 readRawToken(*FormatTok);
1026 SourceLocation WhitespaceStart =
1028 FormatTok->
IsFirst = IsFirstToken;
1029 IsFirstToken =
false;
1035 unsigned WhitespaceLength = TrailingWhitespace;
1036 while (FormatTok->
isNot(tok::eof)) {
1038 if (LeadingWhitespace == 0)
1040 if (LeadingWhitespace < FormatTok->TokenText.size())
1041 truncateToken(LeadingWhitespace);
1043 bool InEscape =
false;
1044 for (
int i = 0, e =
Text.size(); i != e; ++i) {
1050 if (i + 1 < e &&
Text[i + 1] ==
'\n')
1078 assert(
Text.substr(i, 2) ==
"\\\r" ||
Text.substr(i, 2) ==
"\\\n" ||
1079 Text.substr(i, 4) ==
"\?\?/\r" ||
1080 Text.substr(i, 4) ==
"\?\?/\n" ||
1081 (i >= 1 && (
Text.substr(i - 1, 4) ==
"\?\?/\r" ||
1082 Text.substr(i - 1, 4) ==
"\?\?/\n")) ||
1083 (i >= 2 && (
Text.substr(i - 2, 4) ==
"\?\?/\r" ||
1084 Text.substr(i - 2, 4) ==
"\?\?/\n")));
1093 WhitespaceLength +=
Text.size();
1094 readRawToken(*FormatTok);
1097 if (FormatTok->
is(tok::unknown))
1098 FormatTok->
setType(TT_ImplicitStringLiteral);
1108 FormatTok->
is(tok::comment) && FormatTok->
TokenText.startswith(
"//")) {
1109 size_t BackslashPos = FormatTok->
TokenText.find(
'\\');
1110 while (BackslashPos != StringRef::npos) {
1111 if (BackslashPos + 1 < FormatTok->
TokenText.size() &&
1112 FormatTok->
TokenText[BackslashPos + 1] ==
'\n') {
1113 truncateToken(BackslashPos + 1);
1116 BackslashPos = FormatTok->
TokenText.find(
'\\', BackslashPos + 1);
1121 static const llvm::Regex NumberBase(
"^s?[bdho]", llvm::Regex::IgnoreCase);
1122 SmallVector<StringRef, 1> Matches;
1128 if (FormatTok->
is(tok::numeric_constant)) {
1130 auto Quote = FormatTok->
TokenText.find(
'\'');
1131 if (Quote != StringRef::npos)
1132 truncateToken(Quote);
1133 }
else if (FormatTok->
isOneOf(tok::hash, tok::hashhash)) {
1135 }
else if (FormatTok->
is(tok::raw_identifier)) {
1139 }
else if (FormatTok->
TokenText ==
"``") {
1142 }
else if (Tokens.size() > 0 &&
1144 NumberBase.match(FormatTok->
TokenText, &Matches)) {
1149 truncateToken(Matches[0].size());
1156 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1160 TrailingWhitespace = 0;
1161 if (FormatTok->
is(tok::comment)) {
1163 StringRef UntrimmedText = FormatTok->
TokenText;
1165 TrailingWhitespace = UntrimmedText.size() - FormatTok->
TokenText.size();
1166 }
else if (FormatTok->
is(tok::raw_identifier)) {
1167 IdentifierInfo &Info = IdentTable.
get(FormatTok->
TokenText);
1171 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
1172 tok::kw_operator)) {
1176 FormatTok->
isOneOf(tok::kw_struct, tok::kw_union,
1177 tok::kw_operator)) {
1181 }
else if (FormatTok->
is(tok::greatergreater)) {
1186 }
else if (FormatTok->
is(tok::lessless)) {
1193 if (Style.
isVerilog() && Tokens.size() > 0 &&
1194 Tokens.back()->is(TT_VerilogNumberBase) &&
1195 FormatTok->
Tok.
isOneOf(tok::identifier, tok::question)) {
1197 FormatTok->
Tok.
setKind(tok::numeric_constant);
1203 size_t FirstNewlinePos =
Text.find(
'\n');
1204 if (FirstNewlinePos == StringRef::npos) {
1215 Text.substr(0, FirstNewlinePos), Column, Style.
TabWidth, Encoding);
1224 if (Style.
isCpp()) {
1226 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
1227 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
1229 it != Macros.end()) {
1230 FormatTok->
setType(it->second);
1231 if (it->second == TT_IfMacro) {
1238 }
else if (FormatTok->
is(tok::identifier)) {
1239 if (MacroBlockBeginRegex.match(
Text))
1240 FormatTok->
setType(TT_MacroBlockBegin);
1241 else if (MacroBlockEndRegex.match(
Text))
1242 FormatTok->
setType(TT_MacroBlockEnd);
1249bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {
1264 static const llvm::Regex VerilogToken(R
"re(^('|``?|\\(\\)re"
1265 "(\r?\n|\r)|[^[:space:]])*)");
1267 SmallVector<StringRef, 4> Matches;
1268 const char *Start = Lex->getBufferLocation();
1269 if (!VerilogToken.match(StringRef(Start, Lex->getBuffer().end() - Start),
1275 if (Start[0] ==
'\\' && (Start[1] ==
'\r' || Start[1] ==
'\n'))
1277 size_t Len = Matches[0].size();
1282 Tok.setKind(tok::raw_identifier);
1284 Tok.setLocation(Lex->getSourceLocation(Start, Len));
1285 Tok.setRawIdentifierData(Start);
1286 Lex->seek(Lex->getCurrentBufferOffset() + Len,
false);
1290void FormatTokenLexer::readRawToken(FormatToken &Tok) {
1293 if (!Style.
isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))
1294 Lex->LexFromRawLexer(Tok.Tok);
1295 Tok.TokenText = StringRef(SourceMgr.
getCharacterData(Tok.Tok.getLocation()),
1296 Tok.Tok.getLength());
1299 if (Tok.is(tok::unknown)) {
1300 if (!Tok.TokenText.empty() && Tok.TokenText[0] ==
'"') {
1301 Tok.Tok.setKind(tok::string_literal);
1302 Tok.IsUnterminatedLiteral =
true;
1303 }
else if (Style.
isJavaScript() && Tok.TokenText ==
"''") {
1304 Tok.Tok.setKind(tok::string_literal);
1310 Tok.is(tok::char_constant)) {
1311 Tok.Tok.setKind(tok::string_literal);
1315 FormattingDisabled =
false;
1317 Tok.Finalized = FormattingDisabled;
1320 FormattingDisabled =
true;
1323void FormatTokenLexer::resetLexer(
unsigned Offset) {
1327 Buffer.begin(), Buffer.begin() +
Offset, Buffer.end()));
1328 Lex->SetKeepWhitespaceMode(
true);
1329 TrailingWhitespace = 0;
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
void setLength(unsigned Len)
void setKind(tok::TokenKind K)
void setLocation(SourceLocation L)
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
void setIdentifierInfo(IdentifierInfo *II)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.