29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/SaveAndRestore.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
77 if (AllowExport &&
is(tok::kw_export))
79 if (
isOneOf(tok::kw_import, tok::kw_module))
81 if (
isNot(tok::identifier))
84 return II->isImportKeyword() || II->isModuleKeyword();
90 case tok::annot_typename:
91 case tok::annot_decltype:
92 case tok::annot_pack_indexing_type:
98 case tok::kw___int128:
100 case tok::kw_unsigned:
108 case tok::kw__Float16:
109 case tok::kw___float128:
110 case tok::kw___ibm128:
111 case tok::kw_wchar_t:
117#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
118#include "clang/Basic/TransformTypeTraits.def"
119 case tok::kw___auto_type:
120 case tok::kw_char16_t:
121 case tok::kw_char32_t:
123 case tok::kw_decltype:
124 case tok::kw_char8_t:
136void Lexer::anchor() {}
138void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
139 const char *BufEnd) {
140 BufferStart = BufStart;
144 assert(BufEnd[0] == 0 &&
145 "We assume that the input buffer has a null character at the end"
146 " to simplify lexing!");
151 if (BufferStart == BufferPtr) {
153 StringRef Buf(BufferStart, BufferEnd - BufferStart);
154 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
155 .StartsWith(
"\xEF\xBB\xBF", 3)
159 BufferPtr += BOMLength;
162 Is_PragmaLexer =
false;
163 CurrentConflictMarkerState =
CMK_None;
166 IsAtStartOfLine =
true;
167 IsAtPhysicalStartOfLine =
true;
169 HasLeadingSpace =
false;
170 HasLeadingEmptyMacro =
false;
185 ExtendedTokenMode = 0;
187 NewLinePtr =
nullptr;
197 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
198 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
199 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
200 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
201 InputFile.getBufferEnd());
210 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
211 bool IsFirstIncludeOfFile)
212 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
213 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
214 InitLexer(BufStart, BufPtr, BufEnd);
225 bool IsFirstIncludeOfFile)
226 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
227 FromFile.getBufferStart(), FromFile.getBufferEnd(),
228 IsFirstIncludeOfFile) {}
231 assert(
PP &&
"Cannot reset token mode without a preprocessor");
232 if (LangOpts.TraditionalCPP)
259 FileID SpellingFID =
SM.getFileID(SpellingLoc);
260 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
261 auto L = std::make_unique<Lexer>(SpellingFID, InputFile,
PP);
266 const char *StrData =
SM.getCharacterData(SpellingLoc);
268 L->BufferPtr = StrData;
269 L->BufferEnd = StrData+TokLen;
270 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
274 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
276 ExpansionLocEnd, TokLen);
280 L->ParsingPreprocessorDirective =
true;
283 L->Is_PragmaLexer =
true;
288 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
289 this->IsAtStartOfLine = IsAtStartOfLine;
290 assert((BufferStart + Offset) <= BufferEnd);
291 BufferPtr = BufferStart + Offset;
295 typename T::size_type i = 0, e = Str.size();
297 if (Str[i] ==
'\\' || Str[i] == Quote) {
298 Str.insert(Str.begin() + i,
'\\');
301 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
303 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
304 Str[i] != Str[i + 1]) {
310 Str.insert(Str.begin() + i + 1,
'n');
320 std::string
Result = std::string(Str);
321 char Quote = Charify ?
'\'' :
'"';
336 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
339 const char *BufEnd = BufPtr +
Tok.getLength();
343 while (BufPtr < BufEnd) {
345 Spelling[Length++] = CharAndSize.Char;
346 BufPtr += CharAndSize.Size;
348 if (Spelling[Length - 1] ==
'"')
356 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
359 const char *RawEnd = BufEnd;
360 do --RawEnd;
while (*RawEnd !=
'"');
361 size_t RawLength = RawEnd - BufPtr + 1;
364 memcpy(Spelling + Length, BufPtr, RawLength);
372 while (BufPtr < BufEnd) {
374 Spelling[Length++] = CharAndSize.Char;
375 BufPtr += CharAndSize.Size;
378 assert(Length <
Tok.getLength() &&
379 "NeedsCleaning flag set on token that didn't need cleaning!");
397 bool invalidTemp =
false;
398 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
400 if (invalid) *invalid =
true;
404 const char *tokenBegin = file.data() + locInfo.second;
408 file.begin(), tokenBegin, file.end());
416 return StringRef(tokenBegin,
length);
421 return StringRef(buffer.data(), buffer.size());
431 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
433 bool CharDataInvalid =
false;
434 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
442 if (!
Tok.needsCleaning())
443 return std::string(TokStart, TokStart +
Tok.getLength());
464 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
466 const char *TokStart =
nullptr;
468 if (
Tok.is(tok::raw_identifier))
469 TokStart =
Tok.getRawIdentifier().data();
470 else if (!
Tok.hasUCN()) {
473 Buffer = II->getNameStart();
474 return II->getLength();
480 TokStart =
Tok.getLiteralData();
484 bool CharDataInvalid =
false;
485 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
488 if (CharDataInvalid) {
495 if (!
Tok.needsCleaning()) {
497 return Tok.getLength();
519 Loc =
SM.getExpansionLoc(Loc);
522 const StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
526 const char *StrData = Buffer.data() + LocInfo.second;
527 if (StrData >= Buffer.end())
532 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
533 Buffer.begin(), StrData, Buffer.end());
536 TheLexer.LexIdentifierContinue(
Tok, StrData);
545 bool IgnoreWhiteSpace) {
554 Loc =
SM.getExpansionLoc(Loc);
557 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
561 const char *StrData = Buffer.data()+LocInfo.second;
563 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
567 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
568 Buffer.begin(), StrData, Buffer.end());
577 const char *BufStart = Buffer.data();
578 if (Offset >= Buffer.size())
581 const char *LexStart = BufStart + Offset;
582 for (; LexStart != BufStart; --LexStart) {
598 if (LocInfo.first.isInvalid())
602 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
608 const char *StrData = Buffer.data() + LocInfo.second;
610 if (!LexStart || LexStart == StrData)
615 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
635 }
while (TheTok.
getKind() != tok::eof);
647 if (!
SM.isMacroArgExpansion(Loc))
654 assert(FileLocInfo.first == BeginFileLocInfo.first &&
655 FileLocInfo.second >= BeginFileLocInfo.second);
661enum PreambleDirectiveKind {
676 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
680 bool InPreprocessorDirective =
false;
684 unsigned MaxLineOffset = 0;
686 const char *CurPtr = Buffer.begin();
687 unsigned CurLine = 0;
688 while (CurPtr != Buffer.end()) {
692 if (CurLine == MaxLines)
696 if (CurPtr != Buffer.end())
697 MaxLineOffset = CurPtr - Buffer.begin();
703 if (InPreprocessorDirective) {
705 if (TheTok.
getKind() == tok::eof) {
716 InPreprocessorDirective =
false;
725 if (MaxLineOffset && TokOffset >= MaxLineOffset)
730 if (TheTok.
getKind() == tok::comment) {
738 Token HashTok = TheTok;
739 InPreprocessorDirective =
true;
748 PreambleDirectiveKind PDK
749 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
750 .Case(
"include", PDK_Skipped)
751 .Case(
"__include_macros", PDK_Skipped)
752 .Case(
"define", PDK_Skipped)
753 .Case(
"undef", PDK_Skipped)
754 .Case(
"line", PDK_Skipped)
755 .Case(
"error", PDK_Skipped)
756 .Case(
"pragma", PDK_Skipped)
757 .Case(
"import", PDK_Skipped)
758 .Case(
"include_next", PDK_Skipped)
759 .Case(
"warning", PDK_Skipped)
760 .Case(
"ident", PDK_Skipped)
761 .Case(
"sccs", PDK_Skipped)
762 .Case(
"assert", PDK_Skipped)
763 .Case(
"unassert", PDK_Skipped)
764 .Case(
"if", PDK_Skipped)
765 .Case(
"ifdef", PDK_Skipped)
766 .Case(
"ifndef", PDK_Skipped)
767 .Case(
"elif", PDK_Skipped)
768 .Case(
"elifdef", PDK_Skipped)
769 .Case(
"elifndef", PDK_Skipped)
770 .Case(
"else", PDK_Skipped)
771 .Case(
"endif", PDK_Skipped)
772 .Default(PDK_Unknown);
789 TheTok.
getKind() == tok::raw_identifier &&
791 LangOpts.CPlusPlusModules) {
794 Token ModuleTok = TheTok;
797 }
while (TheTok.
getKind() == tok::comment);
798 if (TheTok.
getKind() != tok::semi) {
813 if (ActiveCommentLoc.
isValid())
814 End = ActiveCommentLoc;
829 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
832 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
835 unsigned PhysOffset = 0;
840 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
850 for (; CharNo; --CharNo) {
852 TokPtr += CharAndSize.Size;
853 PhysOffset += CharAndSize.Size;
860 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
861 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
893 const FileID LocFileID =
SM.getFileID(Loc);
896 if (!
SM.getSLocEntry(LocFileID).getExpansion().isExpansionTokenRange())
918 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
924 *MacroBegin = expansionLoc;
948 if (
SM.isInFileID(afterLoc,
FID)) {
949 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
958 assert(Entry.
isExpansion() &&
"Should be in an expansion");
965 *MacroEnd = expansionLoc;
978 if (Range.isTokenRange()) {
985 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
990 if (!
SM.isInFileID(End, FID, &EndOffs) ||
1000 return SM.getSLocEntry(
SM.getFileID(Loc))
1002 .isExpansionTokenRange();
1019 Range.setBegin(Begin);
1024 if (Range.isTokenRange()) {
1042 Range.setBegin(MacroBegin);
1043 Range.setEnd(MacroEnd);
1045 if (Range.isTokenRange())
1065 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1066 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1079 if (Range.isInvalid()) {
1086 if (beginInfo.first.isInvalid()) {
1092 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1093 beginInfo.second > EndOffs) {
1099 bool invalidTemp =
false;
1100 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1107 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1113 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1129 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1136 FileID MacroFID =
SM.getFileID(Loc);
1137 if (
SM.isInFileID(SpellLoc, MacroFID))
1147 Loc =
SM.getSpellingLoc(Loc);
1153 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1154 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1159 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1161 while (
SM.isMacroArgExpansion(Loc))
1162 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1168 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1174 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1180 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1181 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1190 if (Str - 1 < BufferStart)
1193 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1194 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1195 if (Str - 2 < BufferStart)
1205 return *Str ==
'\\';
1213 if (LocInfo.first.isInvalid())
1216 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1222 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1223 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1224 return NumWhitespaceChars == StringRef::npos
1226 : Rest.take_front(NumWhitespaceChars);
1241 unsigned CharNo,
unsigned TokLen) {
1242 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1258 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1264 unsigned TokLen)
const {
1265 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1266 "Location out of range for this buffer!");
1270 unsigned CharNo = Loc-BufferStart;
1271 if (FileLoc.isFileID())
1272 return FileLoc.getLocWithOffset(CharNo);
1276 assert(
PP &&
"This doesn't work on raw lexers");
1295 case '=':
return '#';
1296 case ')':
return ']';
1297 case '(':
return '[';
1298 case '!':
return '|';
1299 case '\'':
return '^';
1300 case '>':
return '}';
1301 case '/':
return '\\';
1302 case '<':
return '{';
1303 case '-':
return '~';
1318 L->
Diag(CP-2, diag::trigraph_ignored);
1323 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1335 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1339 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1340 Ptr[Size-1] != Ptr[Size])
1353const char *Lexer::SkipEscapedNewLines(
const char *P) {
1355 const char *AfterEscape;
1358 }
else if (*P ==
'?') {
1360 if (P[1] !=
'?' || P[2] !=
'/')
1370 if (NewLineSize == 0)
return P;
1371 P = AfterEscape+NewLineSize;
1378 bool IncludeComments) {
1381 return std::nullopt;
1389 bool InvalidTemp =
false;
1390 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1392 return std::nullopt;
1394 const char *TokenBegin =
File.data() + LocInfo.second;
1397 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1398 TokenBegin,
File.end());
1409 bool IncludeComments) {
1410 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1411 while (Loc != StartOfFile) {
1414 return std::nullopt;
1420 if (!
Tok.is(tok::comment) || IncludeComments) {
1424 return std::nullopt;
1433 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1435 if (!
Tok ||
Tok->isNot(TKind))
1440 unsigned NumWhitespaceChars = 0;
1441 if (SkipTrailingWhitespaceAndNewLine) {
1442 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1443 unsigned char C = *TokenEnd;
1446 NumWhitespaceChars++;
1450 if (
C ==
'\n' ||
C ==
'\r') {
1453 NumWhitespaceChars++;
1454 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1455 NumWhitespaceChars++;
1480 if (Ptr[0] ==
'\\') {
1486 return {
'\\', Size};
1496 Diag(Ptr, diag::backslash_newline_space);
1499 Size += EscapedNewLineSize;
1500 Ptr += EscapedNewLineSize;
1503 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1504 CharAndSize.Size += Size;
1509 return {
'\\',
Size};
1513 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1517 LangOpts.Trigraphs)) {
1523 if (
C ==
'\\')
goto Slash;
1529 return {*Ptr,
Size + 1u};
1543 if (Ptr[0] ==
'\\') {
1549 return {
'\\',
Size};
1554 Size += EscapedNewLineSize;
1555 Ptr += EscapedNewLineSize;
1558 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1559 CharAndSize.Size +=
Size;
1564 return {
'\\',
Size};
1568 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1574 if (
C ==
'\\')
goto Slash;
1580 return {*Ptr,
Size + 1u};
1588void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1589 BufferPtr = BufferStart + Offset;
1590 if (BufferPtr > BufferEnd)
1591 BufferPtr = BufferEnd;
1595 IsAtStartOfLine = StartOfLine;
1596 IsAtPhysicalStartOfLine = StartOfLine;
1600 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1602 return UnicodeWhitespaceChars.contains(Codepoint);
1607 llvm::raw_svector_ostream CharOS(CharBuf);
1608 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1619 bool IsStart,
bool &IsExtension) {
1620 static const llvm::sys::UnicodeCharSet MathStartChars(
1622 static const llvm::sys::UnicodeCharSet MathContinueChars(
1624 if (MathStartChars.contains(
C) ||
1625 (!IsStart && MathContinueChars.contains(
C))) {
1633 bool &IsExtension) {
1634 if (LangOpts.AsmPreprocessor) {
1636 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1638 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1643 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1645 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1649 }
else if (LangOpts.C11) {
1650 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1652 return C11AllowedIDChars.contains(
C);
1654 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1656 return C99AllowedIDChars.contains(
C);
1661 bool &IsExtension) {
1662 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1663 IsExtension =
false;
1664 if (LangOpts.AsmPreprocessor) {
1667 if (LangOpts.CPlusPlus || LangOpts.C23) {
1668 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1669 if (XIDStartChars.contains(
C))
1677 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1679 return !C11DisallowedInitialIDChars.contains(
C);
1681 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1683 return !C99DisallowedInitialIDChars.contains(
C);
1689 static const llvm::sys::UnicodeCharSet MathStartChars(
1691 static const llvm::sys::UnicodeCharSet MathContinueChars(
1694 (void)MathStartChars;
1695 (void)MathContinueChars;
1696 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1697 "Unexpected mathematical notation codepoint");
1698 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1711 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1713 CannotAppearInIdentifier = 0,
1714 CannotStartIdentifier
1717 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1719 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1721 if (!C99AllowedIDChars.contains(
C)) {
1722 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1724 << CannotAppearInIdentifier;
1725 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1726 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1728 << CannotStartIdentifier;
1740 struct HomoglyphPair {
1743 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1745 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1798 std::lower_bound(std::begin(SortedHomoglyphs),
1799 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1800 if (Homoglyph->Character ==
C) {
1801 if (Homoglyph->LooksLike) {
1802 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1803 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1806 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1826 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1828 if (!
IsFirst || InvalidOnlyAtStart) {
1829 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1833 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1839bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1841 const char *UCNPtr = CurPtr +
Size;
1842 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1843 if (CodePoint == 0) {
1846 bool IsExtension =
false;
1851 !
PP->isPreprocessedOutput())
1853 PP->getDiagnostics(), LangOpts, CodePoint,
1871 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1872 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1875 while (CurPtr != UCNPtr)
1876 (void)getAndAdvanceChar(CurPtr,
Result);
1880bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1881 llvm::UTF32 CodePoint;
1886 unsigned FirstCodeUnitSize;
1887 getCharAndSize(CurPtr, FirstCodeUnitSize);
1888 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1889 const char *UnicodePtr = CharStart;
1891 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1892 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1893 &CodePoint, llvm::strictConversion);
1894 if (ConvResult != llvm::conversionOK)
1897 bool IsExtension =
false;
1904 !
PP->isPreprocessedOutput())
1906 PP->getDiagnostics(), LangOpts, CodePoint,
1914 PP->getDiagnostics(), CodePoint,
1926 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1927 CurPtr = UnicodePtr;
1932 const char *CurPtr) {
1933 bool IsExtension =
false;
1936 !
PP->isPreprocessedOutput()) {
1948 return LexIdentifierContinue(
Result, CurPtr);
1952 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1964 PP->getDiagnostics(), LangOpts,
C,
1973 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1979 [[maybe_unused]]
const char *BufferEnd) {
1981 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1982 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1984 constexpr ssize_t BytesPerRegister = 16;
1986 __m128i AsciiIdentifierRangeV =
1989 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1996 if (Consumed == BytesPerRegister)
2002 unsigned char C = *CurPtr;
2008bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
2017 unsigned char C = getCharAndSize(CurPtr, Size);
2019 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2024 if (!LangOpts.DollarIdents)
2028 Diag(CurPtr, diag::ext_dollar_in_identifier);
2029 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2032 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2040 const char *IdStart = BufferPtr;
2041 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
2042 Result.setRawIdentifierData(IdStart);
2051 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2058 if (isCodeCompletionPoint(CurPtr)) {
2060 Result.setKind(tok::code_completion);
2066 assert(*CurPtr == 0 &&
"Completion character must be 0");
2071 if (CurPtr < BufferEnd) {
2083 return PP->HandleIdentifier(
Result);
2090bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2092 char C1 = CharAndSize1.Char;
2098 char C2 = CharAndSize2.Char;
2099 return (C2 ==
'x' || C2 ==
'X');
2105bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2107 char C = getCharAndSize(CurPtr, Size);
2110 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2112 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2116 C = getCharAndSize(CurPtr, Size);
2120 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2123 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2124 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2128 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2132 bool IsHexFloat =
true;
2133 if (!LangOpts.C99) {
2134 if (!isHexaLiteral(BufferPtr, LangOpts))
2136 else if (!LangOpts.CPlusPlus17 &&
2137 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2141 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2145 if (
C ==
'\'' && LangOpts.AllowLiteralDigitSeparator) {
2149 Diag(CurPtr, LangOpts.CPlusPlus
2150 ? diag::warn_cxx11_compat_digit_separator
2151 : diag::warn_c23_compat_digit_separator);
2152 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2153 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2154 return LexNumericConstant(
Result, CurPtr);
2159 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2160 return LexNumericConstant(
Result, CurPtr);
2162 return LexNumericConstant(
Result, CurPtr);
2165 const char *TokStart = BufferPtr;
2166 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2167 Result.setLiteralData(TokStart);
2173const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2174 bool IsStringLiteral) {
2175 assert(LangOpts.CPlusPlus);
2179 char C = getCharAndSize(CurPtr, Size);
2180 bool Consumed =
false;
2183 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2185 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2191 if (!LangOpts.CPlusPlus11) {
2194 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2195 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2206 bool IsUDSuffix =
false;
2209 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2213 const unsigned MaxStandardSuffixLength = 3;
2214 char Buffer[MaxStandardSuffixLength] = {
C };
2215 unsigned Consumed =
Size;
2218 auto [
Next, NextSize] =
2222 const StringRef CompleteSuffix(Buffer, Chars);
2228 if (Chars == MaxStandardSuffixLength)
2232 Buffer[Chars++] =
Next;
2233 Consumed += NextSize;
2239 Diag(CurPtr, LangOpts.MSVCCompat
2240 ? diag::ext_ms_reserved_user_defined_literal
2241 : diag::ext_reserved_user_defined_literal)
2246 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2251 C = getCharAndSize(CurPtr, Size);
2253 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2254 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2255 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2265bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2267 const char *AfterQuote = CurPtr;
2269 const char *NulCharacter =
nullptr;
2272 (Kind == tok::utf8_string_literal ||
2273 Kind == tok::utf16_string_literal ||
2274 Kind == tok::utf32_string_literal))
2275 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2276 : diag::warn_c99_compat_unicode_literal);
2278 char C = getAndAdvanceChar(CurPtr,
Result);
2283 const char *SavedCurPtr = CurPtr;
2284 C = getAndAdvanceChar(CurPtr,
Result);
2302 CurPtr = SavedCurPtr;
2305 if (
C ==
'\n' ||
C ==
'\r' ||
2306 (
C == 0 && CurPtr-1 == BufferEnd)) {
2308 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2309 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2314 if (isCodeCompletionPoint(CurPtr-1)) {
2316 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2318 PP->CodeCompleteNaturalLanguage();
2319 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2324 NulCharacter = CurPtr-1;
2326 C = getAndAdvanceChar(CurPtr,
Result);
2330 if (LangOpts.CPlusPlus)
2331 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2335 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2338 const char *TokStart = BufferPtr;
2339 FormTokenWithChars(
Result, CurPtr, Kind);
2340 Result.setLiteralData(TokStart);
2346bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2354 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2356 unsigned PrefixLen = 0;
2360 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2361 const char *Pos = &CurPtr[PrefixLen];
2362 Diag(Pos, LangOpts.CPlusPlus26
2363 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2364 : diag::ext_cxx26_raw_string_literal_character_set)
2365 << StringRef(Pos, 1);
2371 if (CurPtr[PrefixLen] !=
'(') {
2373 const char *PrefixEnd = &CurPtr[PrefixLen];
2374 if (PrefixLen == 16) {
2375 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2376 }
else if (*PrefixEnd ==
'\n') {
2377 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2379 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2380 << StringRef(PrefixEnd, 1);
2392 if (
C == 0 && CurPtr-1 == BufferEnd) {
2398 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2403 const char *Prefix = CurPtr;
2404 CurPtr += PrefixLen + 1;
2411 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2412 CurPtr += PrefixLen + 1;
2415 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2417 Diag(BufferPtr, diag::err_unterminated_raw_string)
2418 << StringRef(Prefix, PrefixLen);
2419 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2425 if (LangOpts.CPlusPlus)
2426 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2429 const char *TokStart = BufferPtr;
2430 FormTokenWithChars(
Result, CurPtr, Kind);
2431 Result.setLiteralData(TokStart);
2437bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2439 const char *NulCharacter =
nullptr;
2440 const char *AfterLessPos = CurPtr;
2441 char C = getAndAdvanceChar(CurPtr,
Result);
2446 C = getAndAdvanceChar(CurPtr,
Result);
2449 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2452 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2457 if (isCodeCompletionPoint(CurPtr - 1)) {
2458 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2460 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2463 NulCharacter = CurPtr-1;
2465 C = getAndAdvanceChar(CurPtr,
Result);
2470 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2473 const char *TokStart = BufferPtr;
2474 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2475 Result.setLiteralData(TokStart);
2479void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2480 const char *CompletionPoint,
2483 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2484 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2485 auto Slash = PartialPath.find_last_of(SlashChars);
2487 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2488 const char *StartOfFilename =
2489 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2491 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2492 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2495 while (CompletionPoint < BufferEnd) {
2496 char Next = *(CompletionPoint + 1);
2500 if (
Next == (IsAngled ?
'>' :
'"'))
2502 if (SlashChars.contains(
Next))
2506 PP->setCodeCompletionTokenRange(
2507 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2508 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2509 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2514bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2517 const char *NulCharacter =
nullptr;
2520 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2521 Diag(BufferPtr, LangOpts.CPlusPlus
2522 ? diag::warn_cxx98_compat_unicode_literal
2523 : diag::warn_c99_compat_unicode_literal);
2524 else if (Kind == tok::utf8_char_constant)
2525 Diag(BufferPtr, LangOpts.CPlusPlus
2526 ? diag::warn_cxx14_compat_u8_character_literal
2527 : diag::warn_c17_compat_u8_character_literal);
2530 char C = getAndAdvanceChar(CurPtr,
Result);
2533 Diag(BufferPtr, diag::ext_empty_character);
2534 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2541 C = getAndAdvanceChar(CurPtr,
Result);
2543 if (
C ==
'\n' ||
C ==
'\r' ||
2544 (
C == 0 && CurPtr-1 == BufferEnd)) {
2546 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2547 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2552 if (isCodeCompletionPoint(CurPtr-1)) {
2553 PP->CodeCompleteNaturalLanguage();
2554 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2559 NulCharacter = CurPtr-1;
2561 C = getAndAdvanceChar(CurPtr,
Result);
2565 if (LangOpts.CPlusPlus)
2566 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2570 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2573 const char *TokStart = BufferPtr;
2574 FormTokenWithChars(
Result, CurPtr, Kind);
2575 Result.setLiteralData(TokStart);
2583bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr) {
2587 unsigned char Char = *CurPtr;
2589 const char *lastNewLine =
nullptr;
2590 auto setLastNewLine = [&](
const char *Ptr) {
2596 setLastNewLine(CurPtr - 1);
2615 if (*CurPtr ==
'\n')
2616 setLastNewLine(CurPtr);
2623 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2625 IsAtStartOfLine =
true;
2626 IsAtPhysicalStartOfLine =
true;
2633 char PrevChar = CurPtr[-1];
2641 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2642 if (
auto *Handler =
PP->getEmptylineHandler())
2658bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr) {
2663 Diag(BufferPtr, diag::ext_line_comment);
2681 bool UnicodeDecodingAlreadyDiagnosed =
false;
2688 C !=
'\n' &&
C !=
'\r') {
2690 UnicodeDecodingAlreadyDiagnosed =
false;
2694 unsigned Length = llvm::getUTF8SequenceSize(
2695 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2698 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2699 UnicodeDecodingAlreadyDiagnosed =
true;
2702 UnicodeDecodingAlreadyDiagnosed =
false;
2708 const char *NextLine = CurPtr;
2711 const char *EscapePtr = CurPtr-1;
2712 bool HasSpace =
false;
2718 if (*EscapePtr ==
'\\')
2721 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2722 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2724 CurPtr = EscapePtr-2;
2730 Diag(EscapePtr, diag::backslash_newline_space);
2737 const char *OldPtr = CurPtr;
2740 C = getAndAdvanceChar(CurPtr,
Result);
2745 if (
C != 0 && CurPtr == OldPtr+1) {
2753 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2754 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2755 for (; OldPtr != CurPtr; ++OldPtr)
2756 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2760 const char *ForwardPtr = CurPtr;
2763 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2768 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2773 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2778 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2779 PP->CodeCompleteNaturalLanguage();
2796 return SaveLineComment(
Result, CurPtr);
2810 NewLinePtr = CurPtr++;
2823bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2826 FormTokenWithChars(
Result, CurPtr, tok::comment);
2838 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2842 Result.setKind(tok::comment);
2853 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2856 const char *TrigraphPos =
nullptr;
2858 const char *SpacePos =
nullptr;
2865 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2867 if (CurPtr[0] == CurPtr[1])
2881 if (*CurPtr ==
'\\') {
2883 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2885 TrigraphPos = CurPtr - 2;
2896 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2905 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2909 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2914 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2918 L->
Diag(SpacePos, diag::backslash_newline_space);
2924#include <emmintrin.h>
2939bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr) {
2949 unsigned char C = getCharAndSize(CurPtr, CharSize);
2951 if (
C == 0 && CurPtr == BufferEnd+1) {
2953 Diag(BufferPtr, diag::err_unterminated_block_comment);
2959 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2976 bool UnicodeDecodingAlreadyDiagnosed =
false;
2981 if (CurPtr + 24 < BufferEnd &&
2984 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2986 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2991 if (
C ==
'/')
goto FoundSlash;
2995 while (CurPtr + 16 < BufferEnd) {
2997 if (LLVM_UNLIKELY(Mask != 0)) {
3007 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
3013 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
3014 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
3015 0x80, 0x80, 0x80, 0x80};
3016 __vector
unsigned char Slashes = {
3017 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
3018 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
3020 while (CurPtr + 16 < BufferEnd) {
3022 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
3024 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
3031 while (CurPtr + 16 < BufferEnd) {
3032 bool HasNonASCII =
false;
3033 for (
unsigned I = 0; I < 16; ++I)
3034 HasNonASCII |= !
isASCII(CurPtr[I]);
3036 if (LLVM_UNLIKELY(HasNonASCII))
3039 bool HasSlash =
false;
3040 for (
unsigned I = 0; I < 16; ++I)
3041 HasSlash |= CurPtr[I] ==
'/';
3055 while (
C !=
'/' &&
C !=
'\0') {
3057 UnicodeDecodingAlreadyDiagnosed =
false;
3064 unsigned Length = llvm::getUTF8SequenceSize(
3065 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3068 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3069 UnicodeDecodingAlreadyDiagnosed =
true;
3071 UnicodeDecodingAlreadyDiagnosed =
false;
3072 CurPtr += Length - 1;
3079 if (CurPtr[-2] ==
'*')
3082 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3084 LangOpts.Trigraphs)) {
3090 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3095 Diag(CurPtr-1, diag::warn_nested_block_comment);
3097 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3099 Diag(BufferPtr, diag::err_unterminated_block_comment);
3108 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3114 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3115 PP->CodeCompleteNaturalLanguage();
3133 FormTokenWithChars(
Result, CurPtr, tok::comment);
3142 SkipWhitespace(
Result, CurPtr + 1);
3160 "Must be in a preprocessing directive!");
3165 const char *CurPtr = BufferPtr;
3167 char Char = getAndAdvanceChar(CurPtr, Tmp);
3175 if (CurPtr-1 != BufferEnd) {
3176 if (isCodeCompletionPoint(CurPtr-1)) {
3177 PP->CodeCompleteNaturalLanguage();
3192 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3193 BufferPtr = CurPtr-1;
3197 if (Tmp.
is(tok::code_completion)) {
3199 PP->CodeCompleteNaturalLanguage();
3202 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3214bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3222 FormTokenWithChars(
Result, CurPtr, tok::eod);
3234 BufferPtr = BufferEnd;
3235 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3239 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3245 MIOpt.ExitTopLevelConditional();
3253 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3255 diag::err_pp_unterminated_conditional);
3262 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3263 Diag(BufferEnd, diag::warn_no_newline_eof)
3275std::optional<Token> Lexer::peekNextPPToken() {
3276 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3278 if (isDependencyDirectivesLexer()) {
3279 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3280 return std::nullopt;
3282 (void)convertDependencyDirectiveToken(
3283 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3293 const char *TmpBufferPtr = BufferPtr;
3295 bool atStartOfLine = IsAtStartOfLine;
3296 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3297 bool leadingSpace = HasLeadingSpace;
3298 MultipleIncludeOpt MIOptState =
MIOpt;
3304 BufferPtr = TmpBufferPtr;
3306 HasLeadingSpace = leadingSpace;
3307 IsAtStartOfLine = atStartOfLine;
3308 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3313 if (
Tok.
is(tok::eof))
3314 return std::nullopt;
3321 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3323 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3324 size_t Pos = RestOfBuffer.find(Terminator);
3325 while (Pos != StringRef::npos) {
3328 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3329 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3330 Pos = RestOfBuffer.find(Terminator);
3333 return RestOfBuffer.data()+Pos;
3342bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3344 if (CurPtr != BufferStart &&
3345 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3349 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3350 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3365 Diag(CurPtr, diag::err_conflict_marker);
3366 CurrentConflictMarkerState =
Kind;
3370 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3371 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3386bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3388 if (CurPtr != BufferStart &&
3389 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3398 for (
unsigned i = 1; i != 4; ++i)
3399 if (CurPtr[i] != CurPtr[0])
3406 CurrentConflictMarkerState)) {
3410 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3416 CurrentConflictMarkerState =
CMK_None;
3424 const char *BufferEnd) {
3425 if (CurPtr == BufferEnd)
3428 for (; CurPtr != BufferEnd; ++CurPtr) {
3429 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3435bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3436 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3442 const char *Start = CurPtr - 1;
3443 if (!LangOpts.AllowEditorPlaceholders)
3444 Diag(Start, diag::err_placeholder_in_source);
3446 FormTokenWithChars(
Result, End, tok::raw_identifier);
3447 Result.setRawIdentifierData(Start);
3454bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3455 if (
PP &&
PP->isCodeCompletionEnabled()) {
3456 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3457 return Loc ==
PP->getCodeCompletionLoc();
3468 if (Opts.CPlusPlus23)
3469 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3470 else if (Opts.C2y && !Named)
3471 DiagId = diag::warn_c2y_delimited_escape_sequence;
3473 DiagId = diag::ext_delimited_escape_sequence;
3479 if (!Opts.CPlusPlus)
3480 Ext = Named ? 2 : 1 ;
3484 Diags.
Report(Loc, DiagId) << Named << Ext;
3487std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3488 const char *SlashLoc,
3491 char Kind = getCharAndSize(StartPtr, CharSize);
3492 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3494 unsigned NumHexDigits;
3497 else if (Kind ==
'U')
3500 bool Delimited =
false;
3501 bool FoundEndDelimiter =
false;
3505 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3507 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3508 return std::nullopt;
3511 const char *CurPtr = StartPtr + CharSize;
3512 const char *KindLoc = &CurPtr[-1];
3515 while (Count != NumHexDigits || Delimited) {
3516 char C = getCharAndSize(CurPtr, CharSize);
3517 if (!Delimited && Count == 0 &&
C ==
'{') {
3523 if (Delimited &&
C ==
'}') {
3525 FoundEndDelimiter =
true;
3529 unsigned Value = llvm::hexDigitValue(
C);
3530 if (
Value == std::numeric_limits<unsigned>::max()) {
3534 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3535 << StringRef(KindLoc, 1);
3536 return std::nullopt;
3539 if (CodePoint & 0xF000'0000) {
3541 Diag(KindLoc, diag::err_escape_too_large) << 0;
3542 return std::nullopt;
3553 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3554 : diag::warn_ucn_escape_no_digits)
3555 << StringRef(KindLoc, 1);
3556 return std::nullopt;
3559 if (Delimited && Kind ==
'U') {
3561 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3562 return std::nullopt;
3565 if (!Delimited && Count != NumHexDigits) {
3567 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3569 if (Count == 4 && NumHexDigits == 8) {
3570 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3571 Diag(KindLoc, diag::note_ucn_four_not_eight)
3575 return std::nullopt;
3578 if (Delimited &&
PP)
3581 PP->getDiagnostics());
3588 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3591 while (StartPtr != CurPtr)
3592 (void)getAndAdvanceChar(StartPtr, *
Result);
3599std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3600 const char *SlashLoc,
3605 char C = getCharAndSize(StartPtr, CharSize);
3606 assert(
C ==
'N' &&
"expected \\N{...}");
3608 const char *CurPtr = StartPtr + CharSize;
3609 const char *KindLoc = &CurPtr[-1];
3611 C = getCharAndSize(CurPtr, CharSize);
3614 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3615 return std::nullopt;
3618 const char *StartName = CurPtr;
3619 bool FoundEndDelimiter =
false;
3620 llvm::SmallVector<char, 30> Buffer;
3622 C = getCharAndSize(CurPtr, CharSize);
3625 FoundEndDelimiter =
true;
3631 Buffer.push_back(
C);
3634 if (!FoundEndDelimiter || Buffer.empty()) {
3636 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3637 : diag::warn_delimited_ucn_incomplete)
3638 << StringRef(KindLoc, 1);
3639 return std::nullopt;
3642 StringRef Name(Buffer.data(), Buffer.size());
3643 std::optional<char32_t>
Match =
3644 llvm::sys::unicode::nameToCodepointStrict(Name);
3645 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3647 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3649 Diag(StartName, diag::err_invalid_ucn_name)
3650 << StringRef(Buffer.data(), Buffer.size())
3653 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3664 if (Diagnose &&
Match)
3667 PP->getDiagnostics());
3673 if (LooseMatch && Diagnose)
3674 Match = LooseMatch->CodePoint;
3681 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3684 while (StartPtr != CurPtr)
3685 (void)getAndAdvanceChar(StartPtr, *
Result);
3689 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3692uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3696 std::optional<uint32_t> CodePointOpt;
3697 char Kind = getCharAndSize(StartPtr, CharSize);
3698 if (Kind ==
'u' || Kind ==
'U')
3699 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3700 else if (Kind ==
'N')
3701 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3706 uint32_t CodePoint = *CodePointOpt;
3709 if (LangOpts.AsmPreprocessor)
3728 if (CodePoint < 0xA0) {
3732 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3733 Diag(BufferPtr, diag::err_ucn_control_character);
3735 char C =
static_cast<char>(CodePoint);
3736 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3741 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3746 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3747 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3749 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3758 const char *CurPtr) {
3761 Diag(BufferPtr, diag::ext_unicode_whitespace)
3770void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3771 IsAtStartOfLine =
Result.isAtStartOfLine();
3772 HasLeadingSpace =
Result.hasLeadingSpace();
3773 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3778 assert(!isDependencyDirectivesLexer());
3784 if (IsAtStartOfLine) {
3786 IsAtStartOfLine =
false;
3789 if (IsAtPhysicalStartOfLine) {
3791 IsAtPhysicalStartOfLine =
false;
3794 if (HasLeadingSpace) {
3796 HasLeadingSpace =
false;
3799 if (HasLeadingEmptyMacro) {
3801 HasLeadingEmptyMacro =
false;
3806 bool returnedToken = LexTokenInternal(
Result);
3808 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3809 return returnedToken;
3819 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3820 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3823 const char *CurPtr = BufferPtr;
3837 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3846 unsigned SizeTmp, SizeTmp2;
3849 char Char = getAndAdvanceChar(CurPtr,
Result);
3853 NewLinePtr =
nullptr;
3858 if (CurPtr-1 == BufferEnd)
3859 return LexEndOfFile(
Result, CurPtr-1);
3862 if (isCodeCompletionPoint(CurPtr-1)) {
3865 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3870 Diag(CurPtr-1, diag::null_in_file);
3872 if (SkipWhitespace(
Result, CurPtr))
3881 if (LangOpts.MicrosoftExt) {
3883 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3884 return LexEndOfFile(
Result, CurPtr-1);
3888 Kind = tok::unknown;
3892 if (CurPtr[0] ==
'\n')
3893 (void)getAndAdvanceChar(CurPtr,
Result);
3907 IsAtStartOfLine =
true;
3908 IsAtPhysicalStartOfLine =
true;
3909 NewLinePtr = CurPtr - 1;
3918 if (SkipWhitespace(
Result, CurPtr))
3928 SkipHorizontalWhitespace:
3930 if (SkipWhitespace(
Result, CurPtr))
3939 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3940 if (SkipLineComment(
Result, CurPtr + 2))
3942 goto SkipIgnoredUnits;
3944 if (SkipBlockComment(
Result, CurPtr + 2))
3946 goto SkipIgnoredUnits;
3948 goto SkipHorizontalWhitespace;
3956 case '0':
case '1':
case '2':
case '3':
case '4':
3957 case '5':
case '6':
case '7':
case '8':
case '9':
3960 return LexNumericConstant(
Result, CurPtr);
3969 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3970 Char = getCharAndSize(CurPtr, SizeTmp);
3974 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3975 tok::utf16_string_literal);
3979 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3980 tok::utf16_char_constant);
3983 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3984 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3985 return LexRawStringLiteral(
Result,
3986 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3988 tok::utf16_string_literal);
3991 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3995 return LexStringLiteral(
Result,
3996 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3998 tok::utf8_string_literal);
3999 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
4000 return LexCharConstant(
4001 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4003 tok::utf8_char_constant);
4005 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
4007 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4010 return LexRawStringLiteral(
Result,
4011 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4014 tok::utf8_string_literal);
4021 return LexIdentifierContinue(
Result, CurPtr);
4027 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
4028 Char = getCharAndSize(CurPtr, SizeTmp);
4032 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4033 tok::utf32_string_literal);
4037 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4038 tok::utf32_char_constant);
4041 if (Char ==
'R' && LangOpts.RawStringLiterals &&
4042 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4043 return LexRawStringLiteral(
Result,
4044 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4046 tok::utf32_string_literal);
4050 return LexIdentifierContinue(
Result, CurPtr);
4056 if (LangOpts.RawStringLiterals) {
4057 Char = getCharAndSize(CurPtr, SizeTmp);
4060 return LexRawStringLiteral(
Result,
4061 ConsumeChar(CurPtr, SizeTmp,
Result),
4062 tok::string_literal);
4066 return LexIdentifierContinue(
Result, CurPtr);
4071 Char = getCharAndSize(CurPtr, SizeTmp);
4075 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4076 tok::wide_string_literal);
4079 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4080 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4081 return LexRawStringLiteral(
Result,
4082 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4084 tok::wide_string_literal);
4088 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4089 tok::wide_char_constant);
4094 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4095 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4096 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4097 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4098 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4099 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4100 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4101 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4105 return LexIdentifierContinue(
Result, CurPtr);
4107 if (LangOpts.DollarIdents) {
4109 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4112 return LexIdentifierContinue(
Result, CurPtr);
4115 Kind = tok::unknown;
4122 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4128 return LexStringLiteral(
Result, CurPtr,
4130 : tok::string_literal);
4134 Kind = tok::question;
4137 Kind = tok::l_square;
4140 Kind = tok::r_square;
4143 Kind = tok::l_paren;
4146 Kind = tok::r_paren;
4149 Kind = tok::l_brace;
4152 Kind = tok::r_brace;
4155 Char = getCharAndSize(CurPtr, SizeTmp);
4156 if (Char >=
'0' && Char <=
'9') {
4160 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4161 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4162 Kind = tok::periodstar;
4164 }
else if (Char ==
'.' &&
4165 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4166 Kind = tok::ellipsis;
4167 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4174 Char = getCharAndSize(CurPtr, SizeTmp);
4177 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4178 }
else if (Char ==
'=') {
4179 Kind = tok::ampequal;
4180 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4186 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4187 Kind = tok::starequal;
4188 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4194 Char = getCharAndSize(CurPtr, SizeTmp);
4196 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4197 Kind = tok::plusplus;
4198 }
else if (Char ==
'=') {
4199 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4200 Kind = tok::plusequal;
4206 Char = getCharAndSize(CurPtr, SizeTmp);
4208 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4209 Kind = tok::minusminus;
4210 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4211 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4212 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4214 Kind = tok::arrowstar;
4215 }
else if (Char ==
'>') {
4216 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4218 }
else if (Char ==
'=') {
4219 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4220 Kind = tok::minusequal;
4229 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4230 Kind = tok::exclaimequal;
4231 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4233 Kind = tok::exclaim;
4238 Char = getCharAndSize(CurPtr, SizeTmp);
4248 bool TreatAsComment =
4249 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4250 if (!TreatAsComment)
4251 if (!(
PP &&
PP->isPreprocessedOutput()))
4252 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4254 if (TreatAsComment) {
4255 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4261 goto SkipIgnoredUnits;
4266 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4275 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4276 Kind = tok::slashequal;
4282 Char = getCharAndSize(CurPtr, SizeTmp);
4284 Kind = tok::percentequal;
4285 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4286 }
else if (LangOpts.Digraphs && Char ==
'>') {
4287 Kind = tok::r_brace;
4288 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4289 }
else if (LangOpts.Digraphs && Char ==
':') {
4290 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4291 Char = getCharAndSize(CurPtr, SizeTmp);
4292 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4293 Kind = tok::hashhash;
4294 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4296 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4297 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4299 Diag(BufferPtr, diag::ext_charize_microsoft);
4308 goto HandleDirective;
4313 Kind = tok::percent;
4317 Char = getCharAndSize(CurPtr, SizeTmp);
4319 return LexAngledStringLiteral(
Result, CurPtr);
4320 }
else if (Char ==
'<') {
4321 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4323 Kind = tok::lesslessequal;
4324 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4326 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4330 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4334 }
else if (LangOpts.CUDA && After ==
'<') {
4335 Kind = tok::lesslessless;
4336 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4339 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4340 Kind = tok::lessless;
4342 }
else if (Char ==
'=') {
4343 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4345 if (LangOpts.CPlusPlus20) {
4347 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4348 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4350 Kind = tok::spaceship;
4356 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4361 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4362 Kind = tok::lessequal;
4363 }
else if (LangOpts.Digraphs && Char ==
':') {
4364 if (LangOpts.CPlusPlus11 &&
4365 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4372 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4373 if (After !=
':' && After !=
'>') {
4376 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4381 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4382 Kind = tok::l_square;
4383 }
else if (LangOpts.Digraphs && Char ==
'%') {
4384 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4385 Kind = tok::l_brace;
4386 }
else if (Char ==
'#' && SizeTmp == 1 &&
4387 lexEditorPlaceholder(
Result, CurPtr)) {
4394 Char = getCharAndSize(CurPtr, SizeTmp);
4396 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4397 Kind = tok::greaterequal;
4398 }
else if (Char ==
'>') {
4399 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4401 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4403 Kind = tok::greatergreaterequal;
4404 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4408 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4411 }
else if (LangOpts.CUDA && After ==
'>') {
4412 Kind = tok::greatergreatergreater;
4413 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4416 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4417 Kind = tok::greatergreater;
4420 Kind = tok::greater;
4424 Char = getCharAndSize(CurPtr, SizeTmp);
4426 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4427 Kind = tok::caretequal;
4428 }
else if (LangOpts.Reflection && Char ==
'^') {
4429 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4430 Kind = tok::caretcaret;
4432 if (LangOpts.OpenCL && Char ==
'^')
4433 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4438 Char = getCharAndSize(CurPtr, SizeTmp);
4440 Kind = tok::pipeequal;
4441 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4442 }
else if (Char ==
'|') {
4444 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4446 Kind = tok::pipepipe;
4447 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4453 Char = getCharAndSize(CurPtr, SizeTmp);
4454 if (LangOpts.Digraphs && Char ==
'>') {
4455 Kind = tok::r_square;
4456 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4457 }
else if (Char ==
':') {
4458 Kind = tok::coloncolon;
4459 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4468 Char = getCharAndSize(CurPtr, SizeTmp);
4471 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4474 Kind = tok::equalequal;
4475 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4484 Char = getCharAndSize(CurPtr, SizeTmp);
4486 Kind = tok::hashhash;
4487 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4488 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4491 Diag(BufferPtr, diag::ext_charize_microsoft);
4492 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4499 goto HandleDirective;
4507 if (CurPtr[-1] ==
'@' && LangOpts.ObjC) {
4508 FormTokenWithChars(
Result, CurPtr, tok::at);
4514 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
4516 auto NextTokOr = peekNextPPToken();
4517 if (NextTokOr.has_value()) {
4518 NextPPTok = *NextTokOr;
4521 if (NextPPTok.
is(tok::raw_identifier) &&
4529 Kind = tok::unknown;
4534 if (!LangOpts.AsmPreprocessor) {
4535 if (
uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4536 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4537 if (SkipWhitespace(
Result, CurPtr))
4545 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4549 Kind = tok::unknown;
4554 Kind = tok::unknown;
4558 llvm::UTF32 CodePoint;
4563 llvm::ConversionResult Status =
4564 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4565 (
const llvm::UTF8 *)BufferEnd,
4567 llvm::strictConversion);
4568 if (Status == llvm::conversionOK) {
4569 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4570 if (SkipWhitespace(
Result, CurPtr))
4577 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4581 PP->isPreprocessedOutput()) {
4583 Kind = tok::unknown;
4590 Diag(CurPtr, diag::err_invalid_utf8);
4592 BufferPtr = CurPtr+1;
4604 FormTokenWithChars(
Result, CurPtr, Kind);
4610 FormTokenWithChars(
Result, CurPtr, tok::hash);
4613 if (
PP->hadModuleLoaderFatalFailure())
4625const char *Lexer::convertDependencyDirectiveToken(
4627 const char *TokPtr = BufferStart + DDTok.
Offset;
4633 if (
Result.is(tok::raw_identifier))
4634 Result.setRawIdentifierData(TokPtr);
4635 else if (
Result.isLiteral())
4636 Result.setLiteralData(TokPtr);
4637 BufferPtr = TokPtr + DDTok.
Length;
4641bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4642 assert(isDependencyDirectivesLexer());
4644 using namespace dependency_directives_scan;
4646 if (BufferPtr == BufferEnd)
4647 return LexEndOfFile(
Result, BufferPtr);
4649 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4650 if (DepDirectives.front().Kind == pp_eof)
4651 return LexEndOfFile(
Result, BufferEnd);
4652 if (DepDirectives.front().Kind == tokens_present_before_eof)
4654 NextDepDirectiveTokenIndex = 0;
4655 DepDirectives = DepDirectives.drop_front();
4658 const dependency_directives_scan::Token &DDTok =
4659 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4660 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4666 BufferPtr = BufferStart + DDTok.
Offset;
4667 LexAngledStringLiteral(
Result, BufferPtr + 1);
4668 if (
Result.isNot(tok::header_name))
4672 const dependency_directives_scan::Token &NextTok =
4673 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4674 if (BufferStart + NextTok.
Offset >= BufferPtr)
4676 ++NextDepDirectiveTokenIndex;
4681 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4683 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4685 if (
PP->hadModuleLoaderFatalFailure())
4691 auto NextTok = peekNextPPToken();
4692 if (NextTok && NextTok->
is(tok::raw_identifier) &&
4693 NextTok->getRawIdentifier() ==
"import") {
4695 if (
PP->hadModuleLoaderFatalFailure())
4700 if (
Result.is(tok::raw_identifier)) {
4701 Result.setRawIdentifierData(TokPtr);
4703 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4704 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4705 PP->HandleModuleContextualKeyword(
Result)) {
4710 return PP->HandleIdentifier(
Result);
4716 if (
Result.is(tok::colon)) {
4718 if (*BufferPtr ==
':') {
4719 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4721 ++NextDepDirectiveTokenIndex;
4722 Result.setKind(tok::coloncolon);
4732bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4733 assert(isDependencyDirectivesLexer());
4735 using namespace dependency_directives_scan;
4738 unsigned NestedIfs = 0;
4740 DepDirectives = DepDirectives.drop_front();
4741 switch (DepDirectives.front().Kind) {
4743 llvm_unreachable(
"unexpected 'pp_none'");
4784 NextDepDirectiveTokenIndex = 0;
4785 return LexEndOfFile(
Result, BufferEnd);
4789 const dependency_directives_scan::Token &DDTok =
4790 DepDirectives.front().Tokens.front();
4791 assert(DDTok.
is(tok::hash));
4792 NextDepDirectiveTokenIndex = 1;
4794 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Result
Implement __builtin_bit_cast and related operations.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a byte-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isModuleKeyword() const
Determine whether this is the contextual keyword module.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static std::unique_ptr< Lexer > Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static SourceLocation findEndOfIdentifierContinuation(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the end of an identifier-continuation sequence starting at Loc.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
SourceLocation getExpansionLocEnd() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
__packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 __packed_splat2 __packed_splat4 __packed_splat2 __packed_splat8 __packed_splat4 uint32_t
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const