29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
63 return II->getObjCKeywordID() == objcKey;
70 return tok::objc_not_keyword;
76 if (AllowExport &&
is(tok::kw_export))
78 if (
isOneOf(tok::kw_import, tok::kw_module))
80 if (
isNot(tok::identifier))
83 return II->isImportKeyword() || II->isModuleKeyword();
89 case tok::annot_typename:
90 case tok::annot_decltype:
91 case tok::annot_pack_indexing_type:
97 case tok::kw___int128:
99 case tok::kw_unsigned:
107 case tok::kw__Float16:
108 case tok::kw___float128:
109 case tok::kw___ibm128:
110 case tok::kw_wchar_t:
116#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
117#include "clang/Basic/TransformTypeTraits.def"
118 case tok::kw___auto_type:
119 case tok::kw_char16_t:
120 case tok::kw_char32_t:
122 case tok::kw_decltype:
123 case tok::kw_char8_t:
135void Lexer::anchor() {}
137void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
138 const char *BufEnd) {
139 BufferStart = BufStart;
143 assert(BufEnd[0] == 0 &&
144 "We assume that the input buffer has a null character at the end"
145 " to simplify lexing!");
150 if (BufferStart == BufferPtr) {
152 StringRef Buf(BufferStart, BufferEnd - BufferStart);
153 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
154 .StartsWith(
"\xEF\xBB\xBF", 3)
158 BufferPtr += BOMLength;
161 Is_PragmaLexer =
false;
162 CurrentConflictMarkerState =
CMK_None;
165 IsAtStartOfLine =
true;
166 IsAtPhysicalStartOfLine =
true;
168 HasLeadingSpace =
false;
169 HasLeadingEmptyMacro =
false;
184 ExtendedTokenMode = 0;
186 NewLinePtr =
nullptr;
196 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
197 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
198 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
199 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
200 InputFile.getBufferEnd());
209 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
210 bool IsFirstIncludeOfFile)
211 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
212 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
213 InitLexer(BufStart, BufPtr, BufEnd);
224 bool IsFirstIncludeOfFile)
225 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
226 FromFile.getBufferStart(), FromFile.getBufferEnd(),
227 IsFirstIncludeOfFile) {}
230 assert(
PP &&
"Cannot reset token mode without a preprocessor");
231 if (LangOpts.TraditionalCPP)
259 FileID SpellingFID =
SM.getFileID(SpellingLoc);
260 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
266 const char *StrData =
SM.getCharacterData(SpellingLoc);
268 L->BufferPtr = StrData;
269 L->BufferEnd = StrData+TokLen;
270 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
274 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
276 ExpansionLocEnd, TokLen);
283 L->Is_PragmaLexer =
true;
288 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
289 this->IsAtStartOfLine = IsAtStartOfLine;
290 assert((BufferStart + Offset) <= BufferEnd);
291 BufferPtr = BufferStart + Offset;
295 typename T::size_type i = 0, e = Str.size();
297 if (Str[i] ==
'\\' || Str[i] == Quote) {
298 Str.insert(Str.begin() + i,
'\\');
301 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
303 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
304 Str[i] != Str[i + 1]) {
310 Str.insert(Str.begin() + i + 1,
'n');
320 std::string
Result = std::string(Str);
321 char Quote = Charify ?
'\'' :
'"';
336 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
339 const char *BufEnd = BufPtr +
Tok.getLength();
343 while (BufPtr < BufEnd) {
345 Spelling[Length++] = CharAndSize.Char;
346 BufPtr += CharAndSize.Size;
348 if (Spelling[Length - 1] ==
'"')
356 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
359 const char *RawEnd = BufEnd;
360 do --RawEnd;
while (*RawEnd !=
'"');
361 size_t RawLength = RawEnd - BufPtr + 1;
364 memcpy(Spelling + Length, BufPtr, RawLength);
372 while (BufPtr < BufEnd) {
374 Spelling[Length++] = CharAndSize.Char;
375 BufPtr += CharAndSize.Size;
378 assert(Length <
Tok.getLength() &&
379 "NeedsCleaning flag set on token that didn't need cleaning!");
397 bool invalidTemp =
false;
398 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
400 if (invalid) *invalid =
true;
404 const char *tokenBegin = file.data() + locInfo.second;
408 file.begin(), tokenBegin, file.end());
416 return StringRef(tokenBegin,
length);
421 return StringRef(buffer.data(), buffer.size());
431 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
433 bool CharDataInvalid =
false;
434 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
442 if (!
Tok.needsCleaning())
443 return std::string(TokStart, TokStart +
Tok.getLength());
464 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
466 const char *TokStart =
nullptr;
468 if (
Tok.is(tok::raw_identifier))
469 TokStart =
Tok.getRawIdentifier().data();
470 else if (!
Tok.hasUCN()) {
473 Buffer = II->getNameStart();
474 return II->getLength();
480 TokStart =
Tok.getLiteralData();
484 bool CharDataInvalid =
false;
485 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
488 if (CharDataInvalid) {
495 if (!
Tok.needsCleaning()) {
497 return Tok.getLength();
522 bool IgnoreWhiteSpace) {
531 Loc =
SM.getExpansionLoc(Loc);
534 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
538 const char *StrData = Buffer.data()+LocInfo.second;
540 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
544 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
545 Buffer.begin(), StrData, Buffer.end());
554 const char *BufStart = Buffer.data();
555 if (Offset >= Buffer.size())
558 const char *LexStart = BufStart + Offset;
559 for (; LexStart != BufStart; --LexStart) {
575 if (LocInfo.first.isInvalid())
579 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
585 const char *StrData = Buffer.data() + LocInfo.second;
587 if (!LexStart || LexStart == StrData)
592 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
612 }
while (TheTok.
getKind() != tok::eof);
624 if (!
SM.isMacroArgExpansion(Loc))
631 assert(FileLocInfo.first == BeginFileLocInfo.first &&
632 FileLocInfo.second >= BeginFileLocInfo.second);
638enum PreambleDirectiveKind {
653 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
657 bool InPreprocessorDirective =
false;
661 unsigned MaxLineOffset = 0;
663 const char *CurPtr = Buffer.begin();
664 unsigned CurLine = 0;
665 while (CurPtr != Buffer.end()) {
669 if (CurLine == MaxLines)
673 if (CurPtr != Buffer.end())
674 MaxLineOffset = CurPtr - Buffer.begin();
680 if (InPreprocessorDirective) {
682 if (TheTok.
getKind() == tok::eof) {
693 InPreprocessorDirective =
false;
702 if (MaxLineOffset && TokOffset >= MaxLineOffset)
707 if (TheTok.
getKind() == tok::comment) {
715 Token HashTok = TheTok;
716 InPreprocessorDirective =
true;
725 PreambleDirectiveKind PDK
726 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
727 .Case(
"include", PDK_Skipped)
728 .Case(
"__include_macros", PDK_Skipped)
729 .Case(
"define", PDK_Skipped)
730 .Case(
"undef", PDK_Skipped)
731 .Case(
"line", PDK_Skipped)
732 .Case(
"error", PDK_Skipped)
733 .Case(
"pragma", PDK_Skipped)
734 .Case(
"import", PDK_Skipped)
735 .Case(
"include_next", PDK_Skipped)
736 .Case(
"warning", PDK_Skipped)
737 .Case(
"ident", PDK_Skipped)
738 .Case(
"sccs", PDK_Skipped)
739 .Case(
"assert", PDK_Skipped)
740 .Case(
"unassert", PDK_Skipped)
741 .Case(
"if", PDK_Skipped)
742 .Case(
"ifdef", PDK_Skipped)
743 .Case(
"ifndef", PDK_Skipped)
744 .Case(
"elif", PDK_Skipped)
745 .Case(
"elifdef", PDK_Skipped)
746 .Case(
"elifndef", PDK_Skipped)
747 .Case(
"else", PDK_Skipped)
748 .Case(
"endif", PDK_Skipped)
749 .Default(PDK_Unknown);
766 TheTok.
getKind() == tok::raw_identifier &&
768 LangOpts.CPlusPlusModules) {
771 Token ModuleTok = TheTok;
774 }
while (TheTok.
getKind() == tok::comment);
775 if (TheTok.
getKind() != tok::semi) {
790 if (ActiveCommentLoc.
isValid())
791 End = ActiveCommentLoc;
806 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
809 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
812 unsigned PhysOffset = 0;
817 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
827 for (; CharNo; --CharNo) {
829 TokPtr += CharAndSize.Size;
830 PhysOffset += CharAndSize.Size;
837 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
838 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
887 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
893 *MacroBegin = expansionLoc;
915 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
921 *MacroEnd = expansionLoc;
934 if (Range.isTokenRange()) {
941 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
946 if (!
SM.isInFileID(End, FID, &EndOffs) ||
956 return SM.getSLocEntry(
SM.getFileID(Loc))
958 .isExpansionTokenRange();
975 Range.setBegin(Begin);
980 if (Range.isTokenRange()) {
998 Range.setBegin(MacroBegin);
999 Range.setEnd(MacroEnd);
1001 if (Range.isTokenRange())
1021 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1022 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1035 if (Range.isInvalid()) {
1042 if (beginInfo.first.isInvalid()) {
1048 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1049 beginInfo.second > EndOffs) {
1055 bool invalidTemp =
false;
1056 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1063 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1069 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1085 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1092 FileID MacroFID =
SM.getFileID(Loc);
1093 if (
SM.isInFileID(SpellLoc, MacroFID))
1103 Loc =
SM.getSpellingLoc(Loc);
1109 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1110 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1115 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1117 while (
SM.isMacroArgExpansion(Loc))
1118 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1124 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1130 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1136 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1137 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1146 if (Str - 1 < BufferStart)
1149 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1150 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1151 if (Str - 2 < BufferStart)
1161 return *Str ==
'\\';
1169 if (LocInfo.first.isInvalid())
1172 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1178 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1179 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1180 return NumWhitespaceChars == StringRef::npos
1182 : Rest.take_front(NumWhitespaceChars);
1197 unsigned CharNo,
unsigned TokLen) {
1198 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1214 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1220 unsigned TokLen)
const {
1221 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1222 "Location out of range for this buffer!");
1226 unsigned CharNo = Loc-BufferStart;
1227 if (FileLoc.isFileID())
1228 return FileLoc.getLocWithOffset(CharNo);
1232 assert(
PP &&
"This doesn't work on raw lexers");
1251 case '=':
return '#';
1252 case ')':
return ']';
1253 case '(':
return '[';
1254 case '!':
return '|';
1255 case '\'':
return '^';
1256 case '>':
return '}';
1257 case '/':
return '\\';
1258 case '<':
return '{';
1259 case '-':
return '~';
1274 L->
Diag(CP-2, diag::trigraph_ignored);
1279 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1291 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1295 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1296 Ptr[Size-1] != Ptr[Size])
1309const char *Lexer::SkipEscapedNewLines(
const char *P) {
1311 const char *AfterEscape;
1314 }
else if (*P ==
'?') {
1316 if (P[1] !=
'?' || P[2] !=
'/')
1326 if (NewLineSize == 0)
return P;
1327 P = AfterEscape+NewLineSize;
1334 bool IncludeComments) {
1337 return std::nullopt;
1345 bool InvalidTemp =
false;
1346 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1348 return std::nullopt;
1350 const char *TokenBegin =
File.data() + LocInfo.second;
1353 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1354 TokenBegin,
File.end());
1365 bool IncludeComments) {
1366 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1367 while (Loc != StartOfFile) {
1370 return std::nullopt;
1376 if (!
Tok.is(tok::comment) || IncludeComments) {
1380 return std::nullopt;
1389 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1391 if (!
Tok ||
Tok->isNot(TKind))
1396 unsigned NumWhitespaceChars = 0;
1397 if (SkipTrailingWhitespaceAndNewLine) {
1398 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1399 unsigned char C = *TokenEnd;
1402 NumWhitespaceChars++;
1406 if (
C ==
'\n' ||
C ==
'\r') {
1409 NumWhitespaceChars++;
1410 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1411 NumWhitespaceChars++;
1436 if (Ptr[0] ==
'\\') {
1442 return {
'\\', Size};
1452 Diag(Ptr, diag::backslash_newline_space);
1455 Size += EscapedNewLineSize;
1456 Ptr += EscapedNewLineSize;
1459 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1460 CharAndSize.Size += Size;
1465 return {
'\\',
Size};
1469 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1473 LangOpts.Trigraphs)) {
1479 if (
C ==
'\\')
goto Slash;
1485 return {*Ptr,
Size + 1u};
1499 if (Ptr[0] ==
'\\') {
1505 return {
'\\',
Size};
1510 Size += EscapedNewLineSize;
1511 Ptr += EscapedNewLineSize;
1514 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1515 CharAndSize.Size +=
Size;
1520 return {
'\\',
Size};
1524 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1530 if (
C ==
'\\')
goto Slash;
1536 return {*Ptr,
Size + 1u};
1544void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1545 BufferPtr = BufferStart + Offset;
1546 if (BufferPtr > BufferEnd)
1547 BufferPtr = BufferEnd;
1551 IsAtStartOfLine = StartOfLine;
1552 IsAtPhysicalStartOfLine = StartOfLine;
1556 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1558 return UnicodeWhitespaceChars.contains(Codepoint);
1563 llvm::raw_svector_ostream CharOS(CharBuf);
1564 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1575 bool IsStart,
bool &IsExtension) {
1576 static const llvm::sys::UnicodeCharSet MathStartChars(
1578 static const llvm::sys::UnicodeCharSet MathContinueChars(
1580 if (MathStartChars.contains(
C) ||
1581 (!IsStart && MathContinueChars.contains(
C))) {
1589 bool &IsExtension) {
1590 if (LangOpts.AsmPreprocessor) {
1592 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1594 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1599 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1601 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1605 }
else if (LangOpts.C11) {
1606 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1608 return C11AllowedIDChars.contains(
C);
1610 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1612 return C99AllowedIDChars.contains(
C);
1617 bool &IsExtension) {
1618 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1619 IsExtension =
false;
1620 if (LangOpts.AsmPreprocessor) {
1623 if (LangOpts.CPlusPlus || LangOpts.C23) {
1624 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1625 if (XIDStartChars.contains(
C))
1633 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1635 return !C11DisallowedInitialIDChars.contains(
C);
1637 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1639 return !C99DisallowedInitialIDChars.contains(
C);
1645 static const llvm::sys::UnicodeCharSet MathStartChars(
1647 static const llvm::sys::UnicodeCharSet MathContinueChars(
1650 (void)MathStartChars;
1651 (void)MathContinueChars;
1652 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1653 "Unexpected mathematical notation codepoint");
1654 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1667 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1669 CannotAppearInIdentifier = 0,
1670 CannotStartIdentifier
1673 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1675 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1677 if (!C99AllowedIDChars.contains(
C)) {
1678 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1680 << CannotAppearInIdentifier;
1681 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1682 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1684 << CannotStartIdentifier;
1696 struct HomoglyphPair {
1699 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1701 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1754 std::lower_bound(std::begin(SortedHomoglyphs),
1755 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1756 if (Homoglyph->Character ==
C) {
1757 if (Homoglyph->LooksLike) {
1758 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1759 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1762 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1782 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1784 if (!
IsFirst || InvalidOnlyAtStart) {
1785 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1789 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1795bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1797 const char *UCNPtr = CurPtr +
Size;
1798 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1799 if (CodePoint == 0) {
1802 bool IsExtension =
false;
1807 !
PP->isPreprocessedOutput())
1809 PP->getDiagnostics(), LangOpts, CodePoint,
1827 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1828 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1831 while (CurPtr != UCNPtr)
1832 (void)getAndAdvanceChar(CurPtr,
Result);
1836bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1837 llvm::UTF32 CodePoint;
1842 unsigned FirstCodeUnitSize;
1843 getCharAndSize(CurPtr, FirstCodeUnitSize);
1844 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1845 const char *UnicodePtr = CharStart;
1847 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1848 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1849 &CodePoint, llvm::strictConversion);
1850 if (ConvResult != llvm::conversionOK)
1853 bool IsExtension =
false;
1860 !
PP->isPreprocessedOutput())
1862 PP->getDiagnostics(), LangOpts, CodePoint,
1870 PP->getDiagnostics(), CodePoint,
1882 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1883 CurPtr = UnicodePtr;
1887bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1888 const char *CurPtr) {
1889 bool IsExtension =
false;
1892 !
PP->isPreprocessedOutput()) {
1904 return LexIdentifierContinue(
Result, CurPtr);
1908 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1920 PP->getDiagnostics(), LangOpts,
C,
1929 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1935 [[maybe_unused]]
const char *BufferEnd) {
1937 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1938 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1940 constexpr ssize_t BytesPerRegister = 16;
1942 __m128i AsciiIdentifierRangeV =
1945 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1952 if (Consumed == BytesPerRegister)
1958 unsigned char C = *CurPtr;
1964bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1973 unsigned char C = getCharAndSize(CurPtr, Size);
1975 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1980 if (!LangOpts.DollarIdents)
1984 Diag(CurPtr, diag::ext_dollar_in_identifier);
1985 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1988 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1996 const char *IdStart = BufferPtr;
1997 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1998 Result.setRawIdentifierData(IdStart);
2007 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2014 if (isCodeCompletionPoint(CurPtr)) {
2016 Result.setKind(tok::code_completion);
2022 assert(*CurPtr == 0 &&
"Completion character must be 0");
2027 if (CurPtr < BufferEnd) {
2039 return PP->HandleIdentifier(
Result);
2046bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2048 char C1 = CharAndSize1.Char;
2054 char C2 = CharAndSize2.Char;
2055 return (C2 ==
'x' || C2 ==
'X');
2061bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2063 char C = getCharAndSize(CurPtr, Size);
2066 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2068 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2072 C = getCharAndSize(CurPtr, Size);
2076 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2079 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2080 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2084 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2088 bool IsHexFloat =
true;
2089 if (!LangOpts.C99) {
2090 if (!isHexaLiteral(BufferPtr, LangOpts))
2092 else if (!LangOpts.CPlusPlus17 &&
2093 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2097 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2101 if (
C ==
'\'' && LangOpts.AllowLiteralDigitSeparator) {
2105 Diag(CurPtr, LangOpts.CPlusPlus
2106 ? diag::warn_cxx11_compat_digit_separator
2107 : diag::warn_c23_compat_digit_separator);
2108 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2109 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2110 return LexNumericConstant(
Result, CurPtr);
2115 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2116 return LexNumericConstant(
Result, CurPtr);
2118 return LexNumericConstant(
Result, CurPtr);
2121 const char *TokStart = BufferPtr;
2122 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2123 Result.setLiteralData(TokStart);
2129const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2130 bool IsStringLiteral) {
2131 assert(LangOpts.CPlusPlus);
2135 char C = getCharAndSize(CurPtr, Size);
2136 bool Consumed =
false;
2139 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2141 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2147 if (!LangOpts.CPlusPlus11) {
2150 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2151 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2162 bool IsUDSuffix =
false;
2165 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2169 const unsigned MaxStandardSuffixLength = 3;
2170 char Buffer[MaxStandardSuffixLength] = {
C };
2171 unsigned Consumed =
Size;
2174 auto [
Next, NextSize] =
2178 const StringRef CompleteSuffix(Buffer, Chars);
2184 if (Chars == MaxStandardSuffixLength)
2188 Buffer[Chars++] =
Next;
2189 Consumed += NextSize;
2195 Diag(CurPtr, LangOpts.MSVCCompat
2196 ? diag::ext_ms_reserved_user_defined_literal
2197 : diag::ext_reserved_user_defined_literal)
2202 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2207 C = getCharAndSize(CurPtr, Size);
2209 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2210 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2211 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2221bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2223 const char *AfterQuote = CurPtr;
2225 const char *NulCharacter =
nullptr;
2228 (Kind == tok::utf8_string_literal ||
2229 Kind == tok::utf16_string_literal ||
2230 Kind == tok::utf32_string_literal))
2231 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2232 : diag::warn_c99_compat_unicode_literal);
2234 char C = getAndAdvanceChar(CurPtr,
Result);
2239 C = getAndAdvanceChar(CurPtr,
Result);
2241 if (
C ==
'\n' ||
C ==
'\r' ||
2242 (
C == 0 && CurPtr-1 == BufferEnd)) {
2244 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2245 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2250 if (isCodeCompletionPoint(CurPtr-1)) {
2252 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2254 PP->CodeCompleteNaturalLanguage();
2255 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2260 NulCharacter = CurPtr-1;
2262 C = getAndAdvanceChar(CurPtr,
Result);
2266 if (LangOpts.CPlusPlus)
2267 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2271 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2274 const char *TokStart = BufferPtr;
2275 FormTokenWithChars(
Result, CurPtr, Kind);
2276 Result.setLiteralData(TokStart);
2282bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2290 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2292 unsigned PrefixLen = 0;
2296 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2297 const char *Pos = &CurPtr[PrefixLen];
2298 Diag(Pos, LangOpts.CPlusPlus26
2299 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2300 : diag::ext_cxx26_raw_string_literal_character_set)
2301 << StringRef(Pos, 1);
2307 if (CurPtr[PrefixLen] !=
'(') {
2309 const char *PrefixEnd = &CurPtr[PrefixLen];
2310 if (PrefixLen == 16) {
2311 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2312 }
else if (*PrefixEnd ==
'\n') {
2313 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2315 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2316 << StringRef(PrefixEnd, 1);
2328 if (
C == 0 && CurPtr-1 == BufferEnd) {
2334 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2339 const char *Prefix = CurPtr;
2340 CurPtr += PrefixLen + 1;
2347 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2348 CurPtr += PrefixLen + 1;
2351 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2353 Diag(BufferPtr, diag::err_unterminated_raw_string)
2354 << StringRef(Prefix, PrefixLen);
2355 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2361 if (LangOpts.CPlusPlus)
2362 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2365 const char *TokStart = BufferPtr;
2366 FormTokenWithChars(
Result, CurPtr, Kind);
2367 Result.setLiteralData(TokStart);
2373bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2375 const char *NulCharacter =
nullptr;
2376 const char *AfterLessPos = CurPtr;
2377 char C = getAndAdvanceChar(CurPtr,
Result);
2382 C = getAndAdvanceChar(CurPtr,
Result);
2385 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2388 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2393 if (isCodeCompletionPoint(CurPtr - 1)) {
2394 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2396 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2399 NulCharacter = CurPtr-1;
2401 C = getAndAdvanceChar(CurPtr,
Result);
2406 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2409 const char *TokStart = BufferPtr;
2410 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2411 Result.setLiteralData(TokStart);
2415void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2416 const char *CompletionPoint,
2419 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2420 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2421 auto Slash = PartialPath.find_last_of(SlashChars);
2423 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2424 const char *StartOfFilename =
2425 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2427 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2428 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2431 while (CompletionPoint < BufferEnd) {
2432 char Next = *(CompletionPoint + 1);
2436 if (
Next == (IsAngled ?
'>' :
'"'))
2438 if (SlashChars.contains(
Next))
2442 PP->setCodeCompletionTokenRange(
2443 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2444 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2445 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2450bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2453 const char *NulCharacter =
nullptr;
2456 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2457 Diag(BufferPtr, LangOpts.CPlusPlus
2458 ? diag::warn_cxx98_compat_unicode_literal
2459 : diag::warn_c99_compat_unicode_literal);
2460 else if (Kind == tok::utf8_char_constant)
2461 Diag(BufferPtr, LangOpts.CPlusPlus
2462 ? diag::warn_cxx14_compat_u8_character_literal
2463 : diag::warn_c17_compat_u8_character_literal);
2466 char C = getAndAdvanceChar(CurPtr,
Result);
2469 Diag(BufferPtr, diag::ext_empty_character);
2470 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2477 C = getAndAdvanceChar(CurPtr,
Result);
2479 if (
C ==
'\n' ||
C ==
'\r' ||
2480 (
C == 0 && CurPtr-1 == BufferEnd)) {
2482 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2483 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2488 if (isCodeCompletionPoint(CurPtr-1)) {
2489 PP->CodeCompleteNaturalLanguage();
2490 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2495 NulCharacter = CurPtr-1;
2497 C = getAndAdvanceChar(CurPtr,
Result);
2501 if (LangOpts.CPlusPlus)
2502 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2506 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2509 const char *TokStart = BufferPtr;
2510 FormTokenWithChars(
Result, CurPtr, Kind);
2511 Result.setLiteralData(TokStart);
2519bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr) {
2523 unsigned char Char = *CurPtr;
2525 const char *lastNewLine =
nullptr;
2526 auto setLastNewLine = [&](
const char *Ptr) {
2532 setLastNewLine(CurPtr - 1);
2551 if (*CurPtr ==
'\n')
2552 setLastNewLine(CurPtr);
2559 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2561 IsAtStartOfLine =
true;
2562 IsAtPhysicalStartOfLine =
true;
2569 char PrevChar = CurPtr[-1];
2577 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2578 if (
auto *Handler =
PP->getEmptylineHandler())
2594bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr) {
2599 Diag(BufferPtr, diag::ext_line_comment);
2617 bool UnicodeDecodingAlreadyDiagnosed =
false;
2624 C !=
'\n' &&
C !=
'\r') {
2626 UnicodeDecodingAlreadyDiagnosed =
false;
2630 unsigned Length = llvm::getUTF8SequenceSize(
2631 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2634 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2635 UnicodeDecodingAlreadyDiagnosed =
true;
2638 UnicodeDecodingAlreadyDiagnosed =
false;
2644 const char *NextLine = CurPtr;
2647 const char *EscapePtr = CurPtr-1;
2648 bool HasSpace =
false;
2654 if (*EscapePtr ==
'\\')
2657 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2658 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2660 CurPtr = EscapePtr-2;
2666 Diag(EscapePtr, diag::backslash_newline_space);
2673 const char *OldPtr = CurPtr;
2676 C = getAndAdvanceChar(CurPtr,
Result);
2681 if (
C != 0 && CurPtr == OldPtr+1) {
2689 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2690 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2691 for (; OldPtr != CurPtr; ++OldPtr)
2692 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2696 const char *ForwardPtr = CurPtr;
2699 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2704 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2709 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2714 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2715 PP->CodeCompleteNaturalLanguage();
2732 return SaveLineComment(
Result, CurPtr);
2746 NewLinePtr = CurPtr++;
2759bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2762 FormTokenWithChars(
Result, CurPtr, tok::comment);
2774 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2778 Result.setKind(tok::comment);
2789 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2792 const char *TrigraphPos =
nullptr;
2794 const char *SpacePos =
nullptr;
2801 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2803 if (CurPtr[0] == CurPtr[1])
2817 if (*CurPtr ==
'\\') {
2819 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2821 TrigraphPos = CurPtr - 2;
2832 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2841 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2845 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2850 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2854 L->
Diag(SpacePos, diag::backslash_newline_space);
2860#include <emmintrin.h>
2875bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr) {
2885 unsigned char C = getCharAndSize(CurPtr, CharSize);
2887 if (
C == 0 && CurPtr == BufferEnd+1) {
2889 Diag(BufferPtr, diag::err_unterminated_block_comment);
2895 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2912 bool UnicodeDecodingAlreadyDiagnosed =
false;
2917 if (CurPtr + 24 < BufferEnd &&
2920 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2922 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2927 if (
C ==
'/')
goto FoundSlash;
2931 while (CurPtr + 16 < BufferEnd) {
2933 if (LLVM_UNLIKELY(Mask != 0)) {
2943 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2949 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2950 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2951 0x80, 0x80, 0x80, 0x80};
2952 __vector
unsigned char Slashes = {
2953 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2954 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2956 while (CurPtr + 16 < BufferEnd) {
2958 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2960 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2967 while (CurPtr + 16 < BufferEnd) {
2968 bool HasNonASCII =
false;
2969 for (
unsigned I = 0; I < 16; ++I)
2970 HasNonASCII |= !
isASCII(CurPtr[I]);
2972 if (LLVM_UNLIKELY(HasNonASCII))
2975 bool HasSlash =
false;
2976 for (
unsigned I = 0; I < 16; ++I)
2977 HasSlash |= CurPtr[I] ==
'/';
2991 while (
C !=
'/' &&
C !=
'\0') {
2993 UnicodeDecodingAlreadyDiagnosed =
false;
3000 unsigned Length = llvm::getUTF8SequenceSize(
3001 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3004 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3005 UnicodeDecodingAlreadyDiagnosed =
true;
3007 UnicodeDecodingAlreadyDiagnosed =
false;
3008 CurPtr += Length - 1;
3015 if (CurPtr[-2] ==
'*')
3018 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3020 LangOpts.Trigraphs)) {
3026 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3031 Diag(CurPtr-1, diag::warn_nested_block_comment);
3033 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3035 Diag(BufferPtr, diag::err_unterminated_block_comment);
3044 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3050 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3051 PP->CodeCompleteNaturalLanguage();
3069 FormTokenWithChars(
Result, CurPtr, tok::comment);
3078 SkipWhitespace(
Result, CurPtr + 1);
3096 "Must be in a preprocessing directive!");
3101 const char *CurPtr = BufferPtr;
3103 char Char = getAndAdvanceChar(CurPtr, Tmp);
3111 if (CurPtr-1 != BufferEnd) {
3112 if (isCodeCompletionPoint(CurPtr-1)) {
3113 PP->CodeCompleteNaturalLanguage();
3128 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3129 BufferPtr = CurPtr-1;
3133 if (Tmp.
is(tok::code_completion)) {
3135 PP->CodeCompleteNaturalLanguage();
3138 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3150bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3158 FormTokenWithChars(
Result, CurPtr, tok::eod);
3169 Result.startToken();
3170 BufferPtr = BufferEnd;
3171 FormTokenWithChars(Result, BufferEnd, tok::eof);
3175 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3181 MIOpt.ExitTopLevelConditional();
3189 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3191 diag::err_pp_unterminated_conditional);
3198 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3199 Diag(BufferEnd, diag::warn_no_newline_eof)
3211std::optional<Token> Lexer::peekNextPPToken() {
3212 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3214 if (isDependencyDirectivesLexer()) {
3215 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3216 return std::nullopt;
3218 (void)convertDependencyDirectiveToken(
3219 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3229 const char *TmpBufferPtr = BufferPtr;
3231 bool atStartOfLine = IsAtStartOfLine;
3232 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3233 bool leadingSpace = HasLeadingSpace;
3234 MultipleIncludeOpt MIOptState =
MIOpt;
3240 BufferPtr = TmpBufferPtr;
3242 HasLeadingSpace = leadingSpace;
3243 IsAtStartOfLine = atStartOfLine;
3244 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3249 if (
Tok.
is(tok::eof))
3250 return std::nullopt;
3257 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3259 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3260 size_t Pos = RestOfBuffer.find(Terminator);
3261 while (Pos != StringRef::npos) {
3264 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3265 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3266 Pos = RestOfBuffer.find(Terminator);
3269 return RestOfBuffer.data()+Pos;
3278bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3280 if (CurPtr != BufferStart &&
3281 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3285 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3286 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3301 Diag(CurPtr, diag::err_conflict_marker);
3302 CurrentConflictMarkerState =
Kind;
3306 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3307 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3322bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3324 if (CurPtr != BufferStart &&
3325 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3334 for (
unsigned i = 1; i != 4; ++i)
3335 if (CurPtr[i] != CurPtr[0])
3342 CurrentConflictMarkerState)) {
3346 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3352 CurrentConflictMarkerState =
CMK_None;
3360 const char *BufferEnd) {
3361 if (CurPtr == BufferEnd)
3364 for (; CurPtr != BufferEnd; ++CurPtr) {
3365 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3371bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3372 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3378 const char *Start = CurPtr - 1;
3379 if (!LangOpts.AllowEditorPlaceholders)
3380 Diag(Start, diag::err_placeholder_in_source);
3382 FormTokenWithChars(
Result, End, tok::raw_identifier);
3383 Result.setRawIdentifierData(Start);
3390bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3391 if (
PP &&
PP->isCodeCompletionEnabled()) {
3392 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3393 return Loc ==
PP->getCodeCompletionLoc();
3404 if (Opts.CPlusPlus23)
3405 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3406 else if (Opts.C2y && !Named)
3407 DiagId = diag::warn_c2y_delimited_escape_sequence;
3409 DiagId = diag::ext_delimited_escape_sequence;
3415 if (!Opts.CPlusPlus)
3416 Ext = Named ? 2 : 1 ;
3420 Diags.
Report(Loc, DiagId) << Named << Ext;
3423std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3424 const char *SlashLoc,
3427 char Kind = getCharAndSize(StartPtr, CharSize);
3428 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3430 unsigned NumHexDigits;
3433 else if (Kind ==
'U')
3436 bool Delimited =
false;
3437 bool FoundEndDelimiter =
false;
3441 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3443 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3444 return std::nullopt;
3447 const char *CurPtr = StartPtr + CharSize;
3448 const char *KindLoc = &CurPtr[-1];
3450 uint32_t CodePoint = 0;
3451 while (Count != NumHexDigits || Delimited) {
3452 char C = getCharAndSize(CurPtr, CharSize);
3453 if (!Delimited && Count == 0 &&
C ==
'{') {
3459 if (Delimited &&
C ==
'}') {
3461 FoundEndDelimiter =
true;
3465 unsigned Value = llvm::hexDigitValue(
C);
3466 if (
Value == std::numeric_limits<unsigned>::max()) {
3470 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3471 << StringRef(KindLoc, 1);
3472 return std::nullopt;
3475 if (CodePoint & 0xF000'0000) {
3477 Diag(KindLoc, diag::err_escape_too_large) << 0;
3478 return std::nullopt;
3489 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3490 : diag::warn_ucn_escape_no_digits)
3491 << StringRef(KindLoc, 1);
3492 return std::nullopt;
3495 if (Delimited && Kind ==
'U') {
3497 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3498 return std::nullopt;
3501 if (!Delimited && Count != NumHexDigits) {
3503 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3505 if (Count == 4 && NumHexDigits == 8) {
3506 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3507 Diag(KindLoc, diag::note_ucn_four_not_eight)
3511 return std::nullopt;
3514 if (Delimited &&
PP)
3517 PP->getDiagnostics());
3524 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3527 while (StartPtr != CurPtr)
3528 (void)getAndAdvanceChar(StartPtr, *
Result);
3535std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3536 const char *SlashLoc,
3541 char C = getCharAndSize(StartPtr, CharSize);
3542 assert(
C ==
'N' &&
"expected \\N{...}");
3544 const char *CurPtr = StartPtr + CharSize;
3545 const char *KindLoc = &CurPtr[-1];
3547 C = getCharAndSize(CurPtr, CharSize);
3550 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3551 return std::nullopt;
3554 const char *StartName = CurPtr;
3555 bool FoundEndDelimiter =
false;
3556 llvm::SmallVector<char, 30> Buffer;
3558 C = getCharAndSize(CurPtr, CharSize);
3561 FoundEndDelimiter =
true;
3567 Buffer.push_back(
C);
3570 if (!FoundEndDelimiter || Buffer.empty()) {
3572 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3573 : diag::warn_delimited_ucn_incomplete)
3574 << StringRef(KindLoc, 1);
3575 return std::nullopt;
3578 StringRef Name(Buffer.data(), Buffer.size());
3579 std::optional<char32_t>
Match =
3580 llvm::sys::unicode::nameToCodepointStrict(Name);
3581 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3583 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3585 Diag(StartName, diag::err_invalid_ucn_name)
3586 << StringRef(Buffer.data(), Buffer.size())
3589 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3600 if (Diagnose &&
Match)
3603 PP->getDiagnostics());
3609 if (LooseMatch && Diagnose)
3610 Match = LooseMatch->CodePoint;
3617 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3620 while (StartPtr != CurPtr)
3621 (void)getAndAdvanceChar(StartPtr, *
Result);
3625 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3628uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3632 std::optional<uint32_t> CodePointOpt;
3633 char Kind = getCharAndSize(StartPtr, CharSize);
3634 if (Kind ==
'u' || Kind ==
'U')
3635 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3636 else if (Kind ==
'N')
3637 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3642 uint32_t CodePoint = *CodePointOpt;
3645 if (LangOpts.AsmPreprocessor)
3664 if (CodePoint < 0xA0) {
3668 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3669 Diag(BufferPtr, diag::err_ucn_control_character);
3671 char C =
static_cast<char>(CodePoint);
3672 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3677 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3682 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3683 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3685 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3693bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3694 const char *CurPtr) {
3697 Diag(BufferPtr, diag::ext_unicode_whitespace)
3706void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3707 IsAtStartOfLine =
Result.isAtStartOfLine();
3708 HasLeadingSpace =
Result.hasLeadingSpace();
3709 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3714 assert(!isDependencyDirectivesLexer());
3720 if (IsAtStartOfLine) {
3722 IsAtStartOfLine =
false;
3725 if (IsAtPhysicalStartOfLine) {
3727 IsAtPhysicalStartOfLine =
false;
3730 if (HasLeadingSpace) {
3732 HasLeadingSpace =
false;
3735 if (HasLeadingEmptyMacro) {
3737 HasLeadingEmptyMacro =
false;
3742 bool returnedToken = LexTokenInternal(
Result);
3744 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3745 return returnedToken;
3755 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3756 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3759 const char *CurPtr = BufferPtr;
3773 FormTokenWithChars(Result, CurPtr, tok::unknown);
3782 unsigned SizeTmp, SizeTmp2;
3785 char Char = getAndAdvanceChar(CurPtr,
Result);
3789 NewLinePtr =
nullptr;
3794 if (CurPtr-1 == BufferEnd)
3795 return LexEndOfFile(
Result, CurPtr-1);
3798 if (isCodeCompletionPoint(CurPtr-1)) {
3801 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3806 Diag(CurPtr-1, diag::null_in_file);
3808 if (SkipWhitespace(
Result, CurPtr))
3817 if (LangOpts.MicrosoftExt) {
3819 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3820 return LexEndOfFile(
Result, CurPtr-1);
3824 Kind = tok::unknown;
3828 if (CurPtr[0] ==
'\n')
3829 (void)getAndAdvanceChar(CurPtr,
Result);
3843 IsAtStartOfLine =
true;
3844 IsAtPhysicalStartOfLine =
true;
3845 NewLinePtr = CurPtr - 1;
3854 if (SkipWhitespace(
Result, CurPtr))
3864 SkipHorizontalWhitespace:
3866 if (SkipWhitespace(
Result, CurPtr))
3875 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3876 if (SkipLineComment(
Result, CurPtr + 2))
3878 goto SkipIgnoredUnits;
3880 if (SkipBlockComment(
Result, CurPtr + 2))
3882 goto SkipIgnoredUnits;
3884 goto SkipHorizontalWhitespace;
3892 case '0':
case '1':
case '2':
case '3':
case '4':
3893 case '5':
case '6':
case '7':
case '8':
case '9':
3896 return LexNumericConstant(
Result, CurPtr);
3905 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3906 Char = getCharAndSize(CurPtr, SizeTmp);
3910 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3911 tok::utf16_string_literal);
3915 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3916 tok::utf16_char_constant);
3919 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3920 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3921 return LexRawStringLiteral(
Result,
3922 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3924 tok::utf16_string_literal);
3927 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3931 return LexStringLiteral(
Result,
3932 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3934 tok::utf8_string_literal);
3935 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3936 return LexCharConstant(
3937 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3939 tok::utf8_char_constant);
3941 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3943 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3946 return LexRawStringLiteral(
Result,
3947 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3950 tok::utf8_string_literal);
3957 return LexIdentifierContinue(
Result, CurPtr);
3963 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3964 Char = getCharAndSize(CurPtr, SizeTmp);
3968 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3969 tok::utf32_string_literal);
3973 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3974 tok::utf32_char_constant);
3977 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3978 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3979 return LexRawStringLiteral(
Result,
3980 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3982 tok::utf32_string_literal);
3986 return LexIdentifierContinue(
Result, CurPtr);
3992 if (LangOpts.RawStringLiterals) {
3993 Char = getCharAndSize(CurPtr, SizeTmp);
3996 return LexRawStringLiteral(
Result,
3997 ConsumeChar(CurPtr, SizeTmp,
Result),
3998 tok::string_literal);
4002 return LexIdentifierContinue(
Result, CurPtr);
4007 Char = getCharAndSize(CurPtr, SizeTmp);
4011 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4012 tok::wide_string_literal);
4015 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4016 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4017 return LexRawStringLiteral(
Result,
4018 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4020 tok::wide_string_literal);
4024 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4025 tok::wide_char_constant);
4030 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4031 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4032 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4033 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4034 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4035 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4036 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4037 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4041 return LexIdentifierContinue(
Result, CurPtr);
4043 if (LangOpts.DollarIdents) {
4045 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4048 return LexIdentifierContinue(
Result, CurPtr);
4051 Kind = tok::unknown;
4058 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4064 return LexStringLiteral(
Result, CurPtr,
4066 : tok::string_literal);
4070 Kind = tok::question;
4073 Kind = tok::l_square;
4076 Kind = tok::r_square;
4079 Kind = tok::l_paren;
4082 Kind = tok::r_paren;
4085 Kind = tok::l_brace;
4088 Kind = tok::r_brace;
4091 Char = getCharAndSize(CurPtr, SizeTmp);
4092 if (Char >=
'0' && Char <=
'9') {
4096 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4097 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4098 Kind = tok::periodstar;
4100 }
else if (Char ==
'.' &&
4101 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4102 Kind = tok::ellipsis;
4103 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4110 Char = getCharAndSize(CurPtr, SizeTmp);
4113 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4114 }
else if (Char ==
'=') {
4115 Kind = tok::ampequal;
4116 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4122 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4123 Kind = tok::starequal;
4124 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 Char = getCharAndSize(CurPtr, SizeTmp);
4132 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4133 Kind = tok::plusplus;
4134 }
else if (Char ==
'=') {
4135 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4136 Kind = tok::plusequal;
4142 Char = getCharAndSize(CurPtr, SizeTmp);
4144 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4145 Kind = tok::minusminus;
4146 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4147 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4148 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4150 Kind = tok::arrowstar;
4151 }
else if (Char ==
'>') {
4152 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4154 }
else if (Char ==
'=') {
4155 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4156 Kind = tok::minusequal;
4165 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4166 Kind = tok::exclaimequal;
4167 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4169 Kind = tok::exclaim;
4174 Char = getCharAndSize(CurPtr, SizeTmp);
4184 bool TreatAsComment =
4185 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4186 if (!TreatAsComment)
4187 if (!(
PP &&
PP->isPreprocessedOutput()))
4188 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4190 if (TreatAsComment) {
4191 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4197 goto SkipIgnoredUnits;
4202 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4211 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4212 Kind = tok::slashequal;
4218 Char = getCharAndSize(CurPtr, SizeTmp);
4220 Kind = tok::percentequal;
4221 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4222 }
else if (LangOpts.Digraphs && Char ==
'>') {
4223 Kind = tok::r_brace;
4224 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4225 }
else if (LangOpts.Digraphs && Char ==
':') {
4226 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4227 Char = getCharAndSize(CurPtr, SizeTmp);
4228 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4229 Kind = tok::hashhash;
4230 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4232 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4233 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4235 Diag(BufferPtr, diag::ext_charize_microsoft);
4244 goto HandleDirective;
4249 Kind = tok::percent;
4253 Char = getCharAndSize(CurPtr, SizeTmp);
4255 return LexAngledStringLiteral(
Result, CurPtr);
4256 }
else if (Char ==
'<') {
4257 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4259 Kind = tok::lesslessequal;
4260 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4262 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4266 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4270 }
else if (LangOpts.CUDA && After ==
'<') {
4271 Kind = tok::lesslessless;
4272 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4275 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4276 Kind = tok::lessless;
4278 }
else if (Char ==
'=') {
4279 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4281 if (LangOpts.CPlusPlus20) {
4283 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4284 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4286 Kind = tok::spaceship;
4292 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4297 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4298 Kind = tok::lessequal;
4299 }
else if (LangOpts.Digraphs && Char ==
':') {
4300 if (LangOpts.CPlusPlus11 &&
4301 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4308 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4309 if (After !=
':' && After !=
'>') {
4312 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4317 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4318 Kind = tok::l_square;
4319 }
else if (LangOpts.Digraphs && Char ==
'%') {
4320 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4321 Kind = tok::l_brace;
4322 }
else if (Char ==
'#' && SizeTmp == 1 &&
4323 lexEditorPlaceholder(
Result, CurPtr)) {
4330 Char = getCharAndSize(CurPtr, SizeTmp);
4332 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4333 Kind = tok::greaterequal;
4334 }
else if (Char ==
'>') {
4335 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4337 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4339 Kind = tok::greatergreaterequal;
4340 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4344 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4347 }
else if (LangOpts.CUDA && After ==
'>') {
4348 Kind = tok::greatergreatergreater;
4349 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4352 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4353 Kind = tok::greatergreater;
4356 Kind = tok::greater;
4360 Char = getCharAndSize(CurPtr, SizeTmp);
4362 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4363 Kind = tok::caretequal;
4364 }
else if (LangOpts.Reflection && Char ==
'^') {
4365 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4366 Kind = tok::caretcaret;
4368 if (LangOpts.OpenCL && Char ==
'^')
4369 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4374 Char = getCharAndSize(CurPtr, SizeTmp);
4376 Kind = tok::pipeequal;
4377 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4378 }
else if (Char ==
'|') {
4380 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4382 Kind = tok::pipepipe;
4383 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4389 Char = getCharAndSize(CurPtr, SizeTmp);
4390 if (LangOpts.Digraphs && Char ==
'>') {
4391 Kind = tok::r_square;
4392 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4393 }
else if (Char ==
':') {
4394 Kind = tok::coloncolon;
4395 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4404 Char = getCharAndSize(CurPtr, SizeTmp);
4407 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4410 Kind = tok::equalequal;
4411 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4420 Char = getCharAndSize(CurPtr, SizeTmp);
4422 Kind = tok::hashhash;
4423 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4424 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4427 Diag(BufferPtr, diag::ext_charize_microsoft);
4428 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4435 goto HandleDirective;
4443 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4446 Kind = tok::unknown;
4451 if (!LangOpts.AsmPreprocessor) {
4452 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4453 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4454 if (SkipWhitespace(
Result, CurPtr))
4462 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4466 Kind = tok::unknown;
4471 Kind = tok::unknown;
4475 llvm::UTF32 CodePoint;
4480 llvm::ConversionResult Status =
4481 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4482 (
const llvm::UTF8 *)BufferEnd,
4484 llvm::strictConversion);
4485 if (Status == llvm::conversionOK) {
4486 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4487 if (SkipWhitespace(
Result, CurPtr))
4494 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4498 PP->isPreprocessedOutput()) {
4500 Kind = tok::unknown;
4507 Diag(CurPtr, diag::err_invalid_utf8);
4509 BufferPtr = CurPtr+1;
4521 FormTokenWithChars(
Result, CurPtr, Kind);
4527 FormTokenWithChars(
Result, CurPtr, tok::hash);
4530 if (
PP->hadModuleLoaderFatalFailure())
4542const char *Lexer::convertDependencyDirectiveToken(
4544 const char *TokPtr = BufferStart + DDTok.
Offset;
4550 if (
Result.is(tok::raw_identifier))
4551 Result.setRawIdentifierData(TokPtr);
4552 else if (
Result.isLiteral())
4553 Result.setLiteralData(TokPtr);
4554 BufferPtr = TokPtr + DDTok.
Length;
4558bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4559 assert(isDependencyDirectivesLexer());
4561 using namespace dependency_directives_scan;
4563 if (BufferPtr == BufferEnd)
4564 return LexEndOfFile(
Result, BufferPtr);
4566 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4567 if (DepDirectives.front().Kind == pp_eof)
4568 return LexEndOfFile(
Result, BufferEnd);
4569 if (DepDirectives.front().Kind == tokens_present_before_eof)
4571 NextDepDirectiveTokenIndex = 0;
4572 DepDirectives = DepDirectives.drop_front();
4575 const dependency_directives_scan::Token &DDTok =
4576 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4577 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4583 BufferPtr = BufferStart + DDTok.
Offset;
4584 LexAngledStringLiteral(
Result, BufferPtr + 1);
4585 if (
Result.isNot(tok::header_name))
4589 const dependency_directives_scan::Token &NextTok =
4590 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4591 if (BufferStart + NextTok.
Offset >= BufferPtr)
4593 ++NextDepDirectiveTokenIndex;
4598 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4600 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4602 if (
PP->hadModuleLoaderFatalFailure())
4607 if (
Result.is(tok::raw_identifier)) {
4608 Result.setRawIdentifierData(TokPtr);
4610 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4611 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4612 PP->HandleModuleContextualKeyword(
Result)) {
4617 return PP->HandleIdentifier(
Result);
4623 if (
Result.is(tok::colon)) {
4625 if (*BufferPtr ==
':') {
4626 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4628 ++NextDepDirectiveTokenIndex;
4629 Result.setKind(tok::coloncolon);
4639bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4640 assert(isDependencyDirectivesLexer());
4642 using namespace dependency_directives_scan;
4645 unsigned NestedIfs = 0;
4647 DepDirectives = DepDirectives.drop_front();
4648 switch (DepDirectives.front().Kind) {
4650 llvm_unreachable(
"unexpected 'pp_none'");
4691 NextDepDirectiveTokenIndex = 0;
4692 return LexEndOfFile(
Result, BufferEnd);
4696 const dependency_directives_scan::Token &DDTok =
4697 DepDirectives.front().Tokens.front();
4698 assert(DDTok.
is(tok::hash));
4699 NextDepDirectiveTokenIndex = 1;
4701 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a byte-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isModuleKeyword() const
Determine whether this is the contextual keyword module.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const