29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/SaveAndRestore.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
77 if (AllowExport &&
is(tok::kw_export))
79 if (
isOneOf(tok::kw_import, tok::kw_module))
81 if (
isNot(tok::identifier))
84 return II->isImportKeyword() || II->isModuleKeyword();
90 case tok::annot_typename:
91 case tok::annot_decltype:
92 case tok::annot_pack_indexing_type:
98 case tok::kw___int128:
100 case tok::kw_unsigned:
108 case tok::kw__Float16:
109 case tok::kw___float128:
110 case tok::kw___ibm128:
111 case tok::kw_wchar_t:
117#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
118#include "clang/Basic/TransformTypeTraits.def"
119 case tok::kw___auto_type:
120 case tok::kw_char16_t:
121 case tok::kw_char32_t:
123 case tok::kw_decltype:
124 case tok::kw_char8_t:
136void Lexer::anchor() {}
138void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
139 const char *BufEnd) {
140 BufferStart = BufStart;
144 assert(BufEnd[0] == 0 &&
145 "We assume that the input buffer has a null character at the end"
146 " to simplify lexing!");
151 if (BufferStart == BufferPtr) {
153 StringRef Buf(BufferStart, BufferEnd - BufferStart);
154 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
155 .StartsWith(
"\xEF\xBB\xBF", 3)
159 BufferPtr += BOMLength;
162 Is_PragmaLexer =
false;
163 CurrentConflictMarkerState =
CMK_None;
166 IsAtStartOfLine =
true;
167 IsAtPhysicalStartOfLine =
true;
169 HasLeadingSpace =
false;
170 HasLeadingEmptyMacro =
false;
185 ExtendedTokenMode = 0;
187 NewLinePtr =
nullptr;
197 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
198 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
199 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
200 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
201 InputFile.getBufferEnd());
210 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
211 bool IsFirstIncludeOfFile)
212 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
213 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
214 InitLexer(BufStart, BufPtr, BufEnd);
225 bool IsFirstIncludeOfFile)
226 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
227 FromFile.getBufferStart(), FromFile.getBufferEnd(),
228 IsFirstIncludeOfFile) {}
231 assert(
PP &&
"Cannot reset token mode without a preprocessor");
232 if (LangOpts.TraditionalCPP)
259 FileID SpellingFID =
SM.getFileID(SpellingLoc);
260 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
261 auto L = std::make_unique<Lexer>(SpellingFID, InputFile,
PP);
266 const char *StrData =
SM.getCharacterData(SpellingLoc);
268 L->BufferPtr = StrData;
269 L->BufferEnd = StrData+TokLen;
270 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
274 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
276 ExpansionLocEnd, TokLen);
280 L->ParsingPreprocessorDirective =
true;
283 L->Is_PragmaLexer =
true;
288 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
289 this->IsAtStartOfLine = IsAtStartOfLine;
290 assert((BufferStart + Offset) <= BufferEnd);
291 BufferPtr = BufferStart + Offset;
295 typename T::size_type i = 0, e = Str.size();
297 if (Str[i] ==
'\\' || Str[i] == Quote) {
298 Str.insert(Str.begin() + i,
'\\');
301 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
303 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
304 Str[i] != Str[i + 1]) {
310 Str.insert(Str.begin() + i + 1,
'n');
320 std::string
Result = std::string(Str);
321 char Quote = Charify ?
'\'' :
'"';
336 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
339 const char *BufEnd = BufPtr +
Tok.getLength();
343 while (BufPtr < BufEnd) {
345 Spelling[Length++] = CharAndSize.Char;
346 BufPtr += CharAndSize.Size;
348 if (Spelling[Length - 1] ==
'"')
356 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
359 const char *RawEnd = BufEnd;
360 do --RawEnd;
while (*RawEnd !=
'"');
361 size_t RawLength = RawEnd - BufPtr + 1;
364 memcpy(Spelling + Length, BufPtr, RawLength);
372 while (BufPtr < BufEnd) {
374 Spelling[Length++] = CharAndSize.Char;
375 BufPtr += CharAndSize.Size;
378 assert(Length <
Tok.getLength() &&
379 "NeedsCleaning flag set on token that didn't need cleaning!");
397 bool invalidTemp =
false;
398 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
400 if (invalid) *invalid =
true;
404 const char *tokenBegin = file.data() + locInfo.second;
408 file.begin(), tokenBegin, file.end());
416 return StringRef(tokenBegin,
length);
421 return StringRef(buffer.data(), buffer.size());
431 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
433 bool CharDataInvalid =
false;
434 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
442 if (!
Tok.needsCleaning())
443 return std::string(TokStart, TokStart +
Tok.getLength());
464 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
466 const char *TokStart =
nullptr;
468 if (
Tok.is(tok::raw_identifier))
469 TokStart =
Tok.getRawIdentifier().data();
470 else if (!
Tok.hasUCN()) {
473 Buffer = II->getNameStart();
474 return II->getLength();
480 TokStart =
Tok.getLiteralData();
484 bool CharDataInvalid =
false;
485 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
488 if (CharDataInvalid) {
495 if (!
Tok.needsCleaning()) {
497 return Tok.getLength();
519 Loc =
SM.getExpansionLoc(Loc);
522 const StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
526 const char *StrData = Buffer.data() + LocInfo.second;
527 if (StrData >= Buffer.end())
532 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
533 Buffer.begin(), StrData, Buffer.end());
536 TheLexer.LexIdentifierContinue(
Tok, StrData);
545 bool IgnoreWhiteSpace) {
554 Loc =
SM.getExpansionLoc(Loc);
557 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
561 const char *StrData = Buffer.data()+LocInfo.second;
563 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
567 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
568 Buffer.begin(), StrData, Buffer.end());
577 const char *BufStart = Buffer.data();
578 if (Offset >= Buffer.size())
581 const char *LexStart = BufStart + Offset;
582 for (; LexStart != BufStart; --LexStart) {
598 if (LocInfo.first.isInvalid())
602 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
608 const char *StrData = Buffer.data() + LocInfo.second;
610 if (!LexStart || LexStart == StrData)
615 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
635 }
while (TheTok.
getKind() != tok::eof);
647 if (!
SM.isMacroArgExpansion(Loc))
654 assert(FileLocInfo.first == BeginFileLocInfo.first &&
655 FileLocInfo.second >= BeginFileLocInfo.second);
661enum PreambleDirectiveKind {
676 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
680 bool InPreprocessorDirective =
false;
684 unsigned MaxLineOffset = 0;
686 const char *CurPtr = Buffer.begin();
687 unsigned CurLine = 0;
688 while (CurPtr != Buffer.end()) {
692 if (CurLine == MaxLines)
696 if (CurPtr != Buffer.end())
697 MaxLineOffset = CurPtr - Buffer.begin();
703 if (InPreprocessorDirective) {
705 if (TheTok.
getKind() == tok::eof) {
716 InPreprocessorDirective =
false;
725 if (MaxLineOffset && TokOffset >= MaxLineOffset)
730 if (TheTok.
getKind() == tok::comment) {
738 Token HashTok = TheTok;
739 InPreprocessorDirective =
true;
748 PreambleDirectiveKind PDK
749 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
750 .Case(
"include", PDK_Skipped)
751 .Case(
"__include_macros", PDK_Skipped)
752 .Case(
"define", PDK_Skipped)
753 .Case(
"undef", PDK_Skipped)
754 .Case(
"line", PDK_Skipped)
755 .Case(
"error", PDK_Skipped)
756 .Case(
"pragma", PDK_Skipped)
757 .Case(
"import", PDK_Skipped)
758 .Case(
"include_next", PDK_Skipped)
759 .Case(
"warning", PDK_Skipped)
760 .Case(
"ident", PDK_Skipped)
761 .Case(
"sccs", PDK_Skipped)
762 .Case(
"assert", PDK_Skipped)
763 .Case(
"unassert", PDK_Skipped)
764 .Case(
"if", PDK_Skipped)
765 .Case(
"ifdef", PDK_Skipped)
766 .Case(
"ifndef", PDK_Skipped)
767 .Case(
"elif", PDK_Skipped)
768 .Case(
"elifdef", PDK_Skipped)
769 .Case(
"elifndef", PDK_Skipped)
770 .Case(
"else", PDK_Skipped)
771 .Case(
"endif", PDK_Skipped)
772 .Default(PDK_Unknown);
789 TheTok.
getKind() == tok::raw_identifier &&
791 LangOpts.CPlusPlusModules) {
794 Token ModuleTok = TheTok;
797 }
while (TheTok.
getKind() == tok::comment);
798 if (TheTok.
getKind() != tok::semi) {
813 if (ActiveCommentLoc.
isValid())
814 End = ActiveCommentLoc;
829 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
832 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
835 unsigned PhysOffset = 0;
840 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
850 for (; CharNo; --CharNo) {
852 TokPtr += CharAndSize.Size;
853 PhysOffset += CharAndSize.Size;
860 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
861 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
893 const FileID LocFileID =
SM.getFileID(Loc);
896 if (!
SM.getSLocEntry(LocFileID).getExpansion().isExpansionTokenRange())
918 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
924 *MacroBegin = expansionLoc;
948 if (
SM.isInFileID(afterLoc,
FID)) {
949 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
958 assert(Entry.
isExpansion() &&
"Should be in an expansion");
965 *MacroEnd = expansionLoc;
978 if (Range.isTokenRange()) {
985 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
990 if (!
SM.isInFileID(End, FID, &EndOffs) ||
1000 return SM.getSLocEntry(
SM.getFileID(Loc))
1002 .isExpansionTokenRange();
1019 Range.setBegin(Begin);
1024 if (Range.isTokenRange()) {
1042 Range.setBegin(MacroBegin);
1043 Range.setEnd(MacroEnd);
1045 if (Range.isTokenRange())
1065 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1066 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1079 if (Range.isInvalid()) {
1086 if (beginInfo.first.isInvalid()) {
1092 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1093 beginInfo.second > EndOffs) {
1099 bool invalidTemp =
false;
1100 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1107 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1113 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1129 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1136 FileID MacroFID =
SM.getFileID(Loc);
1137 if (
SM.isInFileID(SpellLoc, MacroFID))
1147 Loc =
SM.getSpellingLoc(Loc);
1153 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1154 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1159 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1161 while (
SM.isMacroArgExpansion(Loc))
1162 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1168 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1174 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1180 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1181 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1190 if (Str - 1 < BufferStart)
1193 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1194 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1195 if (Str - 2 < BufferStart)
1205 return *Str ==
'\\';
1213 if (LocInfo.first.isInvalid())
1216 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1222 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1223 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1224 return NumWhitespaceChars == StringRef::npos
1226 : Rest.take_front(NumWhitespaceChars);
1241 unsigned CharNo,
unsigned TokLen) {
1242 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1258 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1264 unsigned TokLen)
const {
1265 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1266 "Location out of range for this buffer!");
1270 unsigned CharNo = Loc-BufferStart;
1271 if (FileLoc.isFileID())
1272 return FileLoc.getLocWithOffset(CharNo);
1276 assert(
PP &&
"This doesn't work on raw lexers");
1295 case '=':
return '#';
1296 case ')':
return ']';
1297 case '(':
return '[';
1298 case '!':
return '|';
1299 case '\'':
return '^';
1300 case '>':
return '}';
1301 case '/':
return '\\';
1302 case '<':
return '{';
1303 case '-':
return '~';
1318 L->
Diag(CP-2, diag::trigraph_ignored);
1323 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1335 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1339 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1340 Ptr[Size-1] != Ptr[Size])
1353const char *Lexer::SkipEscapedNewLines(
const char *P) {
1355 const char *AfterEscape;
1358 }
else if (*P ==
'?') {
1360 if (P[1] !=
'?' || P[2] !=
'/')
1370 if (NewLineSize == 0)
return P;
1371 P = AfterEscape+NewLineSize;
1378 bool IncludeComments) {
1381 return std::nullopt;
1389 bool InvalidTemp =
false;
1390 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1392 return std::nullopt;
1394 const char *TokenBegin =
File.data() + LocInfo.second;
1397 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1398 TokenBegin,
File.end());
1409 bool IncludeComments) {
1410 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1411 while (Loc != StartOfFile) {
1414 return std::nullopt;
1420 if (!
Tok.is(tok::comment) || IncludeComments) {
1424 return std::nullopt;
1433 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1435 if (!
Tok ||
Tok->isNot(TKind))
1440 unsigned NumWhitespaceChars = 0;
1441 if (SkipTrailingWhitespaceAndNewLine) {
1442 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1443 unsigned char C = *TokenEnd;
1446 NumWhitespaceChars++;
1450 if (
C ==
'\n' ||
C ==
'\r') {
1453 NumWhitespaceChars++;
1454 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1455 NumWhitespaceChars++;
1480 if (Ptr[0] ==
'\\') {
1486 return {
'\\', Size};
1496 Diag(Ptr, diag::backslash_newline_space);
1499 Size += EscapedNewLineSize;
1500 Ptr += EscapedNewLineSize;
1503 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1504 CharAndSize.Size += Size;
1509 return {
'\\',
Size};
1513 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1517 LangOpts.Trigraphs)) {
1523 if (
C ==
'\\')
goto Slash;
1529 return {*Ptr,
Size + 1u};
1543 if (Ptr[0] ==
'\\') {
1549 return {
'\\',
Size};
1554 Size += EscapedNewLineSize;
1555 Ptr += EscapedNewLineSize;
1558 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1559 CharAndSize.Size +=
Size;
1564 return {
'\\',
Size};
1568 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1574 if (
C ==
'\\')
goto Slash;
1580 return {*Ptr,
Size + 1u};
1588void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1589 BufferPtr = BufferStart + Offset;
1590 if (BufferPtr > BufferEnd)
1591 BufferPtr = BufferEnd;
1595 IsAtStartOfLine = StartOfLine;
1596 IsAtPhysicalStartOfLine = StartOfLine;
1600 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1602 return UnicodeWhitespaceChars.contains(Codepoint);
1607 llvm::raw_svector_ostream CharOS(CharBuf);
1608 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1619 bool IsStart,
bool &IsExtension) {
1620 static const llvm::sys::UnicodeCharSet MathStartChars(
1622 static const llvm::sys::UnicodeCharSet MathContinueChars(
1624 if (MathStartChars.contains(
C) ||
1625 (!IsStart && MathContinueChars.contains(
C))) {
1633 bool &IsExtension) {
1634 if (LangOpts.AsmPreprocessor) {
1636 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1638 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1643 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1645 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1649 }
else if (LangOpts.C11) {
1650 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1652 return C11AllowedIDChars.contains(
C);
1654 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1656 return C99AllowedIDChars.contains(
C);
1661 bool &IsExtension) {
1662 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1663 IsExtension =
false;
1664 if (LangOpts.AsmPreprocessor) {
1667 if (LangOpts.CPlusPlus || LangOpts.C23) {
1668 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1669 if (XIDStartChars.contains(
C))
1677 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1679 return !C11DisallowedInitialIDChars.contains(
C);
1681 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1683 return !C99DisallowedInitialIDChars.contains(
C);
1689 static const llvm::sys::UnicodeCharSet MathStartChars(
1691 static const llvm::sys::UnicodeCharSet MathContinueChars(
1694 (void)MathStartChars;
1695 (void)MathContinueChars;
1696 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1697 "Unexpected mathematical notation codepoint");
1698 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1711 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1713 CannotAppearInIdentifier = 0,
1714 CannotStartIdentifier
1717 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1719 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1721 if (!C99AllowedIDChars.contains(
C)) {
1722 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1724 << CannotAppearInIdentifier;
1725 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1726 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1728 << CannotStartIdentifier;
1740 struct HomoglyphPair {
1743 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1745 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1798 std::lower_bound(std::begin(SortedHomoglyphs),
1799 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1800 if (Homoglyph->Character ==
C) {
1801 if (Homoglyph->LooksLike) {
1802 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1803 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1806 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1826 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1828 if (!
IsFirst || InvalidOnlyAtStart) {
1829 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1833 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1839bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1841 const char *UCNPtr = CurPtr +
Size;
1842 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1843 if (CodePoint == 0) {
1846 bool IsExtension =
false;
1851 !
PP->isPreprocessedOutput())
1853 PP->getDiagnostics(), LangOpts, CodePoint,
1871 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1872 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1875 while (CurPtr != UCNPtr)
1876 (void)getAndAdvanceChar(CurPtr,
Result);
1880bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1881 llvm::UTF32 CodePoint;
1886 unsigned FirstCodeUnitSize;
1887 getCharAndSize(CurPtr, FirstCodeUnitSize);
1888 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1889 const char *UnicodePtr = CharStart;
1891 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1892 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1893 &CodePoint, llvm::strictConversion);
1894 if (ConvResult != llvm::conversionOK)
1897 bool IsExtension =
false;
1904 !
PP->isPreprocessedOutput())
1906 PP->getDiagnostics(), LangOpts, CodePoint,
1914 PP->getDiagnostics(), CodePoint,
1926 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1927 CurPtr = UnicodePtr;
1931bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1932 const char *CurPtr) {
1933 bool IsExtension =
false;
1936 !
PP->isPreprocessedOutput()) {
1948 return LexIdentifierContinue(
Result, CurPtr);
1952 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1964 PP->getDiagnostics(), LangOpts,
C,
1973 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1979 [[maybe_unused]]
const char *BufferEnd) {
1981 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1982 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1984 constexpr ssize_t BytesPerRegister = 16;
1986 __m128i AsciiIdentifierRangeV =
1989 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1996 if (Consumed == BytesPerRegister)
2002 unsigned char C = *CurPtr;
2008bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
2017 unsigned char C = getCharAndSize(CurPtr, Size);
2019 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2024 if (!LangOpts.DollarIdents)
2028 Diag(CurPtr, diag::ext_dollar_in_identifier);
2029 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2032 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2040 const char *IdStart = BufferPtr;
2041 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
2042 Result.setRawIdentifierData(IdStart);
2051 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2058 if (isCodeCompletionPoint(CurPtr)) {
2060 Result.setKind(tok::code_completion);
2066 assert(*CurPtr == 0 &&
"Completion character must be 0");
2071 if (CurPtr < BufferEnd) {
2083 return PP->HandleIdentifier(
Result);
2090bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2092 char C1 = CharAndSize1.Char;
2098 char C2 = CharAndSize2.Char;
2099 return (C2 ==
'x' || C2 ==
'X');
2105bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2107 char C = getCharAndSize(CurPtr, Size);
2110 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2112 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2116 C = getCharAndSize(CurPtr, Size);
2120 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2123 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2124 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2128 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2132 bool IsHexFloat =
true;
2133 if (!LangOpts.C99) {
2134 if (!isHexaLiteral(BufferPtr, LangOpts))
2136 else if (!LangOpts.CPlusPlus17 &&
2137 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2141 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2145 if (
C ==
'\'' && LangOpts.AllowLiteralDigitSeparator) {
2149 Diag(CurPtr, LangOpts.CPlusPlus
2150 ? diag::warn_cxx11_compat_digit_separator
2151 : diag::warn_c23_compat_digit_separator);
2152 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2153 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2154 return LexNumericConstant(
Result, CurPtr);
2159 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2160 return LexNumericConstant(
Result, CurPtr);
2162 return LexNumericConstant(
Result, CurPtr);
2165 const char *TokStart = BufferPtr;
2166 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2167 Result.setLiteralData(TokStart);
2173const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2174 bool IsStringLiteral) {
2175 assert(LangOpts.CPlusPlus);
2179 char C = getCharAndSize(CurPtr, Size);
2180 bool Consumed =
false;
2183 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2185 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2191 if (!LangOpts.CPlusPlus11) {
2194 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2195 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2206 bool IsUDSuffix =
false;
2209 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2213 const unsigned MaxStandardSuffixLength = 3;
2214 char Buffer[MaxStandardSuffixLength] = {
C };
2215 unsigned Consumed =
Size;
2218 auto [
Next, NextSize] =
2222 const StringRef CompleteSuffix(Buffer, Chars);
2228 if (Chars == MaxStandardSuffixLength)
2232 Buffer[Chars++] =
Next;
2233 Consumed += NextSize;
2239 Diag(CurPtr, LangOpts.MSVCCompat
2240 ? diag::ext_ms_reserved_user_defined_literal
2241 : diag::ext_reserved_user_defined_literal)
2246 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2251 C = getCharAndSize(CurPtr, Size);
2253 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2254 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2255 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2265bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2267 const char *AfterQuote = CurPtr;
2269 const char *NulCharacter =
nullptr;
2272 (Kind == tok::utf8_string_literal ||
2273 Kind == tok::utf16_string_literal ||
2274 Kind == tok::utf32_string_literal))
2275 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2276 : diag::warn_c99_compat_unicode_literal);
2278 char C = getAndAdvanceChar(CurPtr,
Result);
2283 C = getAndAdvanceChar(CurPtr,
Result);
2285 if (
C ==
'\n' ||
C ==
'\r' ||
2286 (
C == 0 && CurPtr-1 == BufferEnd)) {
2288 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2289 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2294 if (isCodeCompletionPoint(CurPtr-1)) {
2296 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2298 PP->CodeCompleteNaturalLanguage();
2299 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2304 NulCharacter = CurPtr-1;
2306 C = getAndAdvanceChar(CurPtr,
Result);
2310 if (LangOpts.CPlusPlus)
2311 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2315 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2318 const char *TokStart = BufferPtr;
2319 FormTokenWithChars(
Result, CurPtr, Kind);
2320 Result.setLiteralData(TokStart);
2326bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2334 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2336 unsigned PrefixLen = 0;
2340 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2341 const char *Pos = &CurPtr[PrefixLen];
2342 Diag(Pos, LangOpts.CPlusPlus26
2343 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2344 : diag::ext_cxx26_raw_string_literal_character_set)
2345 << StringRef(Pos, 1);
2351 if (CurPtr[PrefixLen] !=
'(') {
2353 const char *PrefixEnd = &CurPtr[PrefixLen];
2354 if (PrefixLen == 16) {
2355 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2356 }
else if (*PrefixEnd ==
'\n') {
2357 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2359 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2360 << StringRef(PrefixEnd, 1);
2372 if (
C == 0 && CurPtr-1 == BufferEnd) {
2378 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2383 const char *Prefix = CurPtr;
2384 CurPtr += PrefixLen + 1;
2391 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2392 CurPtr += PrefixLen + 1;
2395 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2397 Diag(BufferPtr, diag::err_unterminated_raw_string)
2398 << StringRef(Prefix, PrefixLen);
2399 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2405 if (LangOpts.CPlusPlus)
2406 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2409 const char *TokStart = BufferPtr;
2410 FormTokenWithChars(
Result, CurPtr, Kind);
2411 Result.setLiteralData(TokStart);
2417bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2419 const char *NulCharacter =
nullptr;
2420 const char *AfterLessPos = CurPtr;
2421 char C = getAndAdvanceChar(CurPtr,
Result);
2426 C = getAndAdvanceChar(CurPtr,
Result);
2429 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2432 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2437 if (isCodeCompletionPoint(CurPtr - 1)) {
2438 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2440 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2443 NulCharacter = CurPtr-1;
2445 C = getAndAdvanceChar(CurPtr,
Result);
2450 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2453 const char *TokStart = BufferPtr;
2454 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2455 Result.setLiteralData(TokStart);
2459void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2460 const char *CompletionPoint,
2463 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2464 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2465 auto Slash = PartialPath.find_last_of(SlashChars);
2467 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2468 const char *StartOfFilename =
2469 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2471 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2472 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2475 while (CompletionPoint < BufferEnd) {
2476 char Next = *(CompletionPoint + 1);
2480 if (
Next == (IsAngled ?
'>' :
'"'))
2482 if (SlashChars.contains(
Next))
2486 PP->setCodeCompletionTokenRange(
2487 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2488 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2489 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2494bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2497 const char *NulCharacter =
nullptr;
2500 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2501 Diag(BufferPtr, LangOpts.CPlusPlus
2502 ? diag::warn_cxx98_compat_unicode_literal
2503 : diag::warn_c99_compat_unicode_literal);
2504 else if (Kind == tok::utf8_char_constant)
2505 Diag(BufferPtr, LangOpts.CPlusPlus
2506 ? diag::warn_cxx14_compat_u8_character_literal
2507 : diag::warn_c17_compat_u8_character_literal);
2510 char C = getAndAdvanceChar(CurPtr,
Result);
2513 Diag(BufferPtr, diag::ext_empty_character);
2514 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2521 C = getAndAdvanceChar(CurPtr,
Result);
2523 if (
C ==
'\n' ||
C ==
'\r' ||
2524 (
C == 0 && CurPtr-1 == BufferEnd)) {
2526 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2527 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2532 if (isCodeCompletionPoint(CurPtr-1)) {
2533 PP->CodeCompleteNaturalLanguage();
2534 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2539 NulCharacter = CurPtr-1;
2541 C = getAndAdvanceChar(CurPtr,
Result);
2545 if (LangOpts.CPlusPlus)
2546 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2550 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2553 const char *TokStart = BufferPtr;
2554 FormTokenWithChars(
Result, CurPtr, Kind);
2555 Result.setLiteralData(TokStart);
2563bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr) {
2567 unsigned char Char = *CurPtr;
2569 const char *lastNewLine =
nullptr;
2570 auto setLastNewLine = [&](
const char *Ptr) {
2576 setLastNewLine(CurPtr - 1);
2595 if (*CurPtr ==
'\n')
2596 setLastNewLine(CurPtr);
2603 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2605 IsAtStartOfLine =
true;
2606 IsAtPhysicalStartOfLine =
true;
2613 char PrevChar = CurPtr[-1];
2621 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2622 if (
auto *Handler =
PP->getEmptylineHandler())
2638bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr) {
2643 Diag(BufferPtr, diag::ext_line_comment);
2661 bool UnicodeDecodingAlreadyDiagnosed =
false;
2668 C !=
'\n' &&
C !=
'\r') {
2670 UnicodeDecodingAlreadyDiagnosed =
false;
2674 unsigned Length = llvm::getUTF8SequenceSize(
2675 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2678 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2679 UnicodeDecodingAlreadyDiagnosed =
true;
2682 UnicodeDecodingAlreadyDiagnosed =
false;
2688 const char *NextLine = CurPtr;
2691 const char *EscapePtr = CurPtr-1;
2692 bool HasSpace =
false;
2698 if (*EscapePtr ==
'\\')
2701 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2702 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2704 CurPtr = EscapePtr-2;
2710 Diag(EscapePtr, diag::backslash_newline_space);
2717 const char *OldPtr = CurPtr;
2720 C = getAndAdvanceChar(CurPtr,
Result);
2725 if (
C != 0 && CurPtr == OldPtr+1) {
2733 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2734 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2735 for (; OldPtr != CurPtr; ++OldPtr)
2736 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2740 const char *ForwardPtr = CurPtr;
2743 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2748 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2753 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2758 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2759 PP->CodeCompleteNaturalLanguage();
2776 return SaveLineComment(
Result, CurPtr);
2790 NewLinePtr = CurPtr++;
2803bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2806 FormTokenWithChars(
Result, CurPtr, tok::comment);
2818 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2822 Result.setKind(tok::comment);
2833 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2836 const char *TrigraphPos =
nullptr;
2838 const char *SpacePos =
nullptr;
2845 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2847 if (CurPtr[0] == CurPtr[1])
2861 if (*CurPtr ==
'\\') {
2863 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2865 TrigraphPos = CurPtr - 2;
2876 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2885 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2889 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2894 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2898 L->
Diag(SpacePos, diag::backslash_newline_space);
2904#include <emmintrin.h>
2919bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr) {
2929 unsigned char C = getCharAndSize(CurPtr, CharSize);
2931 if (
C == 0 && CurPtr == BufferEnd+1) {
2933 Diag(BufferPtr, diag::err_unterminated_block_comment);
2939 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2956 bool UnicodeDecodingAlreadyDiagnosed =
false;
2961 if (CurPtr + 24 < BufferEnd &&
2964 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2966 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2971 if (
C ==
'/')
goto FoundSlash;
2975 while (CurPtr + 16 < BufferEnd) {
2977 if (LLVM_UNLIKELY(Mask != 0)) {
2987 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2993 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2994 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2995 0x80, 0x80, 0x80, 0x80};
2996 __vector
unsigned char Slashes = {
2997 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2998 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
3000 while (CurPtr + 16 < BufferEnd) {
3002 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
3004 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
3011 while (CurPtr + 16 < BufferEnd) {
3012 bool HasNonASCII =
false;
3013 for (
unsigned I = 0; I < 16; ++I)
3014 HasNonASCII |= !
isASCII(CurPtr[I]);
3016 if (LLVM_UNLIKELY(HasNonASCII))
3019 bool HasSlash =
false;
3020 for (
unsigned I = 0; I < 16; ++I)
3021 HasSlash |= CurPtr[I] ==
'/';
3035 while (
C !=
'/' &&
C !=
'\0') {
3037 UnicodeDecodingAlreadyDiagnosed =
false;
3044 unsigned Length = llvm::getUTF8SequenceSize(
3045 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3048 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3049 UnicodeDecodingAlreadyDiagnosed =
true;
3051 UnicodeDecodingAlreadyDiagnosed =
false;
3052 CurPtr += Length - 1;
3059 if (CurPtr[-2] ==
'*')
3062 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3064 LangOpts.Trigraphs)) {
3070 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3075 Diag(CurPtr-1, diag::warn_nested_block_comment);
3077 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3079 Diag(BufferPtr, diag::err_unterminated_block_comment);
3088 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3094 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3095 PP->CodeCompleteNaturalLanguage();
3113 FormTokenWithChars(
Result, CurPtr, tok::comment);
3122 SkipWhitespace(
Result, CurPtr + 1);
3140 "Must be in a preprocessing directive!");
3145 const char *CurPtr = BufferPtr;
3147 char Char = getAndAdvanceChar(CurPtr, Tmp);
3155 if (CurPtr-1 != BufferEnd) {
3156 if (isCodeCompletionPoint(CurPtr-1)) {
3157 PP->CodeCompleteNaturalLanguage();
3172 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3173 BufferPtr = CurPtr-1;
3177 if (Tmp.
is(tok::code_completion)) {
3179 PP->CodeCompleteNaturalLanguage();
3182 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3194bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3202 FormTokenWithChars(
Result, CurPtr, tok::eod);
3214 BufferPtr = BufferEnd;
3215 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3219 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3225 MIOpt.ExitTopLevelConditional();
3233 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3235 diag::err_pp_unterminated_conditional);
3242 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3243 Diag(BufferEnd, diag::warn_no_newline_eof)
3255std::optional<Token> Lexer::peekNextPPToken() {
3256 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3258 if (isDependencyDirectivesLexer()) {
3259 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3260 return std::nullopt;
3262 (void)convertDependencyDirectiveToken(
3263 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3273 const char *TmpBufferPtr = BufferPtr;
3275 bool atStartOfLine = IsAtStartOfLine;
3276 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3277 bool leadingSpace = HasLeadingSpace;
3278 MultipleIncludeOpt MIOptState =
MIOpt;
3284 BufferPtr = TmpBufferPtr;
3286 HasLeadingSpace = leadingSpace;
3287 IsAtStartOfLine = atStartOfLine;
3288 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3293 if (
Tok.
is(tok::eof))
3294 return std::nullopt;
3301 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3303 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3304 size_t Pos = RestOfBuffer.find(Terminator);
3305 while (Pos != StringRef::npos) {
3308 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3309 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3310 Pos = RestOfBuffer.find(Terminator);
3313 return RestOfBuffer.data()+Pos;
3322bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3324 if (CurPtr != BufferStart &&
3325 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3329 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3330 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3345 Diag(CurPtr, diag::err_conflict_marker);
3346 CurrentConflictMarkerState =
Kind;
3350 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3351 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3366bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3368 if (CurPtr != BufferStart &&
3369 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3378 for (
unsigned i = 1; i != 4; ++i)
3379 if (CurPtr[i] != CurPtr[0])
3386 CurrentConflictMarkerState)) {
3390 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3396 CurrentConflictMarkerState =
CMK_None;
3404 const char *BufferEnd) {
3405 if (CurPtr == BufferEnd)
3408 for (; CurPtr != BufferEnd; ++CurPtr) {
3409 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3415bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3416 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3422 const char *Start = CurPtr - 1;
3423 if (!LangOpts.AllowEditorPlaceholders)
3424 Diag(Start, diag::err_placeholder_in_source);
3426 FormTokenWithChars(
Result, End, tok::raw_identifier);
3427 Result.setRawIdentifierData(Start);
3434bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3435 if (
PP &&
PP->isCodeCompletionEnabled()) {
3436 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3437 return Loc ==
PP->getCodeCompletionLoc();
3448 if (Opts.CPlusPlus23)
3449 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3450 else if (Opts.C2y && !Named)
3451 DiagId = diag::warn_c2y_delimited_escape_sequence;
3453 DiagId = diag::ext_delimited_escape_sequence;
3459 if (!Opts.CPlusPlus)
3460 Ext = Named ? 2 : 1 ;
3464 Diags.
Report(Loc, DiagId) << Named << Ext;
3467std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3468 const char *SlashLoc,
3471 char Kind = getCharAndSize(StartPtr, CharSize);
3472 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3474 unsigned NumHexDigits;
3477 else if (Kind ==
'U')
3480 bool Delimited =
false;
3481 bool FoundEndDelimiter =
false;
3485 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3487 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3488 return std::nullopt;
3491 const char *CurPtr = StartPtr + CharSize;
3492 const char *KindLoc = &CurPtr[-1];
3494 uint32_t CodePoint = 0;
3495 while (Count != NumHexDigits || Delimited) {
3496 char C = getCharAndSize(CurPtr, CharSize);
3497 if (!Delimited && Count == 0 &&
C ==
'{') {
3503 if (Delimited &&
C ==
'}') {
3505 FoundEndDelimiter =
true;
3509 unsigned Value = llvm::hexDigitValue(
C);
3510 if (
Value == std::numeric_limits<unsigned>::max()) {
3514 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3515 << StringRef(KindLoc, 1);
3516 return std::nullopt;
3519 if (CodePoint & 0xF000'0000) {
3521 Diag(KindLoc, diag::err_escape_too_large) << 0;
3522 return std::nullopt;
3533 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3534 : diag::warn_ucn_escape_no_digits)
3535 << StringRef(KindLoc, 1);
3536 return std::nullopt;
3539 if (Delimited && Kind ==
'U') {
3541 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3542 return std::nullopt;
3545 if (!Delimited && Count != NumHexDigits) {
3547 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3549 if (Count == 4 && NumHexDigits == 8) {
3550 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3551 Diag(KindLoc, diag::note_ucn_four_not_eight)
3555 return std::nullopt;
3558 if (Delimited &&
PP)
3561 PP->getDiagnostics());
3568 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3571 while (StartPtr != CurPtr)
3572 (void)getAndAdvanceChar(StartPtr, *
Result);
3579std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3580 const char *SlashLoc,
3585 char C = getCharAndSize(StartPtr, CharSize);
3586 assert(
C ==
'N' &&
"expected \\N{...}");
3588 const char *CurPtr = StartPtr + CharSize;
3589 const char *KindLoc = &CurPtr[-1];
3591 C = getCharAndSize(CurPtr, CharSize);
3594 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3595 return std::nullopt;
3598 const char *StartName = CurPtr;
3599 bool FoundEndDelimiter =
false;
3600 llvm::SmallVector<char, 30> Buffer;
3602 C = getCharAndSize(CurPtr, CharSize);
3605 FoundEndDelimiter =
true;
3611 Buffer.push_back(
C);
3614 if (!FoundEndDelimiter || Buffer.empty()) {
3616 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3617 : diag::warn_delimited_ucn_incomplete)
3618 << StringRef(KindLoc, 1);
3619 return std::nullopt;
3622 StringRef Name(Buffer.data(), Buffer.size());
3623 std::optional<char32_t>
Match =
3624 llvm::sys::unicode::nameToCodepointStrict(Name);
3625 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3627 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3629 Diag(StartName, diag::err_invalid_ucn_name)
3630 << StringRef(Buffer.data(), Buffer.size())
3633 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3644 if (Diagnose &&
Match)
3647 PP->getDiagnostics());
3653 if (LooseMatch && Diagnose)
3654 Match = LooseMatch->CodePoint;
3661 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3664 while (StartPtr != CurPtr)
3665 (void)getAndAdvanceChar(StartPtr, *
Result);
3669 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3672uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3676 std::optional<uint32_t> CodePointOpt;
3677 char Kind = getCharAndSize(StartPtr, CharSize);
3678 if (Kind ==
'u' || Kind ==
'U')
3679 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3680 else if (Kind ==
'N')
3681 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3686 uint32_t CodePoint = *CodePointOpt;
3689 if (LangOpts.AsmPreprocessor)
3708 if (CodePoint < 0xA0) {
3712 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3713 Diag(BufferPtr, diag::err_ucn_control_character);
3715 char C =
static_cast<char>(CodePoint);
3716 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3721 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3726 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3727 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3729 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3737bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3738 const char *CurPtr) {
3741 Diag(BufferPtr, diag::ext_unicode_whitespace)
3750void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3751 IsAtStartOfLine =
Result.isAtStartOfLine();
3752 HasLeadingSpace =
Result.hasLeadingSpace();
3753 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3758 assert(!isDependencyDirectivesLexer());
3764 if (IsAtStartOfLine) {
3766 IsAtStartOfLine =
false;
3769 if (IsAtPhysicalStartOfLine) {
3771 IsAtPhysicalStartOfLine =
false;
3774 if (HasLeadingSpace) {
3776 HasLeadingSpace =
false;
3779 if (HasLeadingEmptyMacro) {
3781 HasLeadingEmptyMacro =
false;
3786 bool returnedToken = LexTokenInternal(
Result);
3788 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3789 return returnedToken;
3799 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3800 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3803 const char *CurPtr = BufferPtr;
3817 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3826 unsigned SizeTmp, SizeTmp2;
3829 char Char = getAndAdvanceChar(CurPtr,
Result);
3833 NewLinePtr =
nullptr;
3838 if (CurPtr-1 == BufferEnd)
3839 return LexEndOfFile(
Result, CurPtr-1);
3842 if (isCodeCompletionPoint(CurPtr-1)) {
3845 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3850 Diag(CurPtr-1, diag::null_in_file);
3852 if (SkipWhitespace(
Result, CurPtr))
3861 if (LangOpts.MicrosoftExt) {
3863 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3864 return LexEndOfFile(
Result, CurPtr-1);
3868 Kind = tok::unknown;
3872 if (CurPtr[0] ==
'\n')
3873 (void)getAndAdvanceChar(CurPtr,
Result);
3887 IsAtStartOfLine =
true;
3888 IsAtPhysicalStartOfLine =
true;
3889 NewLinePtr = CurPtr - 1;
3898 if (SkipWhitespace(
Result, CurPtr))
3908 SkipHorizontalWhitespace:
3910 if (SkipWhitespace(
Result, CurPtr))
3919 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3920 if (SkipLineComment(
Result, CurPtr + 2))
3922 goto SkipIgnoredUnits;
3924 if (SkipBlockComment(
Result, CurPtr + 2))
3926 goto SkipIgnoredUnits;
3928 goto SkipHorizontalWhitespace;
3936 case '0':
case '1':
case '2':
case '3':
case '4':
3937 case '5':
case '6':
case '7':
case '8':
case '9':
3940 return LexNumericConstant(
Result, CurPtr);
3949 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3950 Char = getCharAndSize(CurPtr, SizeTmp);
3954 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3955 tok::utf16_string_literal);
3959 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3960 tok::utf16_char_constant);
3963 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3964 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3965 return LexRawStringLiteral(
Result,
3966 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3968 tok::utf16_string_literal);
3971 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3975 return LexStringLiteral(
Result,
3976 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3978 tok::utf8_string_literal);
3979 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3980 return LexCharConstant(
3981 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3983 tok::utf8_char_constant);
3985 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3987 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3990 return LexRawStringLiteral(
Result,
3991 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3994 tok::utf8_string_literal);
4001 return LexIdentifierContinue(
Result, CurPtr);
4007 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
4008 Char = getCharAndSize(CurPtr, SizeTmp);
4012 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4013 tok::utf32_string_literal);
4017 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4018 tok::utf32_char_constant);
4021 if (Char ==
'R' && LangOpts.RawStringLiterals &&
4022 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4023 return LexRawStringLiteral(
Result,
4024 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4026 tok::utf32_string_literal);
4030 return LexIdentifierContinue(
Result, CurPtr);
4036 if (LangOpts.RawStringLiterals) {
4037 Char = getCharAndSize(CurPtr, SizeTmp);
4040 return LexRawStringLiteral(
Result,
4041 ConsumeChar(CurPtr, SizeTmp,
Result),
4042 tok::string_literal);
4046 return LexIdentifierContinue(
Result, CurPtr);
4051 Char = getCharAndSize(CurPtr, SizeTmp);
4055 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4056 tok::wide_string_literal);
4059 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4060 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4061 return LexRawStringLiteral(
Result,
4062 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4064 tok::wide_string_literal);
4068 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4069 tok::wide_char_constant);
4074 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4075 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4076 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4077 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4078 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4079 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4080 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4081 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4085 return LexIdentifierContinue(
Result, CurPtr);
4087 if (LangOpts.DollarIdents) {
4089 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4092 return LexIdentifierContinue(
Result, CurPtr);
4095 Kind = tok::unknown;
4102 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4108 return LexStringLiteral(
Result, CurPtr,
4110 : tok::string_literal);
4114 Kind = tok::question;
4117 Kind = tok::l_square;
4120 Kind = tok::r_square;
4123 Kind = tok::l_paren;
4126 Kind = tok::r_paren;
4129 Kind = tok::l_brace;
4132 Kind = tok::r_brace;
4135 Char = getCharAndSize(CurPtr, SizeTmp);
4136 if (Char >=
'0' && Char <=
'9') {
4140 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4141 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4142 Kind = tok::periodstar;
4144 }
else if (Char ==
'.' &&
4145 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4146 Kind = tok::ellipsis;
4147 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4154 Char = getCharAndSize(CurPtr, SizeTmp);
4157 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4158 }
else if (Char ==
'=') {
4159 Kind = tok::ampequal;
4160 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4166 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4167 Kind = tok::starequal;
4168 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4174 Char = getCharAndSize(CurPtr, SizeTmp);
4176 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4177 Kind = tok::plusplus;
4178 }
else if (Char ==
'=') {
4179 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4180 Kind = tok::plusequal;
4186 Char = getCharAndSize(CurPtr, SizeTmp);
4188 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4189 Kind = tok::minusminus;
4190 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4191 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4192 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4194 Kind = tok::arrowstar;
4195 }
else if (Char ==
'>') {
4196 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4198 }
else if (Char ==
'=') {
4199 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4200 Kind = tok::minusequal;
4209 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4210 Kind = tok::exclaimequal;
4211 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4213 Kind = tok::exclaim;
4218 Char = getCharAndSize(CurPtr, SizeTmp);
4228 bool TreatAsComment =
4229 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4230 if (!TreatAsComment)
4231 if (!(
PP &&
PP->isPreprocessedOutput()))
4232 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4234 if (TreatAsComment) {
4235 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4241 goto SkipIgnoredUnits;
4246 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4255 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4256 Kind = tok::slashequal;
4262 Char = getCharAndSize(CurPtr, SizeTmp);
4264 Kind = tok::percentequal;
4265 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4266 }
else if (LangOpts.Digraphs && Char ==
'>') {
4267 Kind = tok::r_brace;
4268 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4269 }
else if (LangOpts.Digraphs && Char ==
':') {
4270 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4271 Char = getCharAndSize(CurPtr, SizeTmp);
4272 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4273 Kind = tok::hashhash;
4274 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4276 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4277 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4279 Diag(BufferPtr, diag::ext_charize_microsoft);
4288 goto HandleDirective;
4293 Kind = tok::percent;
4297 Char = getCharAndSize(CurPtr, SizeTmp);
4299 return LexAngledStringLiteral(
Result, CurPtr);
4300 }
else if (Char ==
'<') {
4301 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4303 Kind = tok::lesslessequal;
4304 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4306 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4310 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4314 }
else if (LangOpts.CUDA && After ==
'<') {
4315 Kind = tok::lesslessless;
4316 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4319 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4320 Kind = tok::lessless;
4322 }
else if (Char ==
'=') {
4323 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4325 if (LangOpts.CPlusPlus20) {
4327 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4328 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4330 Kind = tok::spaceship;
4336 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4341 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4342 Kind = tok::lessequal;
4343 }
else if (LangOpts.Digraphs && Char ==
':') {
4344 if (LangOpts.CPlusPlus11 &&
4345 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4352 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4353 if (After !=
':' && After !=
'>') {
4356 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4361 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4362 Kind = tok::l_square;
4363 }
else if (LangOpts.Digraphs && Char ==
'%') {
4364 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4365 Kind = tok::l_brace;
4366 }
else if (Char ==
'#' && SizeTmp == 1 &&
4367 lexEditorPlaceholder(
Result, CurPtr)) {
4374 Char = getCharAndSize(CurPtr, SizeTmp);
4376 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4377 Kind = tok::greaterequal;
4378 }
else if (Char ==
'>') {
4379 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4381 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4383 Kind = tok::greatergreaterequal;
4384 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4388 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4391 }
else if (LangOpts.CUDA && After ==
'>') {
4392 Kind = tok::greatergreatergreater;
4393 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4396 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4397 Kind = tok::greatergreater;
4400 Kind = tok::greater;
4404 Char = getCharAndSize(CurPtr, SizeTmp);
4406 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4407 Kind = tok::caretequal;
4408 }
else if (LangOpts.Reflection && Char ==
'^') {
4409 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4410 Kind = tok::caretcaret;
4412 if (LangOpts.OpenCL && Char ==
'^')
4413 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4418 Char = getCharAndSize(CurPtr, SizeTmp);
4420 Kind = tok::pipeequal;
4421 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4422 }
else if (Char ==
'|') {
4424 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4426 Kind = tok::pipepipe;
4427 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4433 Char = getCharAndSize(CurPtr, SizeTmp);
4434 if (LangOpts.Digraphs && Char ==
'>') {
4435 Kind = tok::r_square;
4436 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4437 }
else if (Char ==
':') {
4438 Kind = tok::coloncolon;
4439 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4448 Char = getCharAndSize(CurPtr, SizeTmp);
4451 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4454 Kind = tok::equalequal;
4455 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4464 Char = getCharAndSize(CurPtr, SizeTmp);
4466 Kind = tok::hashhash;
4467 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4468 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4471 Diag(BufferPtr, diag::ext_charize_microsoft);
4472 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4479 goto HandleDirective;
4487 if (CurPtr[-1] ==
'@' && LangOpts.ObjC) {
4488 FormTokenWithChars(
Result, CurPtr, tok::at);
4494 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
4496 auto NextTokOr = peekNextPPToken();
4497 if (NextTokOr.has_value()) {
4498 NextPPTok = *NextTokOr;
4501 if (NextPPTok.
is(tok::raw_identifier) &&
4509 Kind = tok::unknown;
4514 if (!LangOpts.AsmPreprocessor) {
4515 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4516 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4517 if (SkipWhitespace(
Result, CurPtr))
4525 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4529 Kind = tok::unknown;
4534 Kind = tok::unknown;
4538 llvm::UTF32 CodePoint;
4543 llvm::ConversionResult Status =
4544 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4545 (
const llvm::UTF8 *)BufferEnd,
4547 llvm::strictConversion);
4548 if (Status == llvm::conversionOK) {
4549 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4550 if (SkipWhitespace(
Result, CurPtr))
4557 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4561 PP->isPreprocessedOutput()) {
4563 Kind = tok::unknown;
4570 Diag(CurPtr, diag::err_invalid_utf8);
4572 BufferPtr = CurPtr+1;
4584 FormTokenWithChars(
Result, CurPtr, Kind);
4590 FormTokenWithChars(
Result, CurPtr, tok::hash);
4593 if (
PP->hadModuleLoaderFatalFailure())
4605const char *Lexer::convertDependencyDirectiveToken(
4607 const char *TokPtr = BufferStart + DDTok.
Offset;
4613 if (
Result.is(tok::raw_identifier))
4614 Result.setRawIdentifierData(TokPtr);
4615 else if (
Result.isLiteral())
4616 Result.setLiteralData(TokPtr);
4617 BufferPtr = TokPtr + DDTok.
Length;
4621bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4622 assert(isDependencyDirectivesLexer());
4624 using namespace dependency_directives_scan;
4626 if (BufferPtr == BufferEnd)
4627 return LexEndOfFile(
Result, BufferPtr);
4629 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4630 if (DepDirectives.front().Kind == pp_eof)
4631 return LexEndOfFile(
Result, BufferEnd);
4632 if (DepDirectives.front().Kind == tokens_present_before_eof)
4634 NextDepDirectiveTokenIndex = 0;
4635 DepDirectives = DepDirectives.drop_front();
4638 const dependency_directives_scan::Token &DDTok =
4639 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4640 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4646 BufferPtr = BufferStart + DDTok.
Offset;
4647 LexAngledStringLiteral(
Result, BufferPtr + 1);
4648 if (
Result.isNot(tok::header_name))
4652 const dependency_directives_scan::Token &NextTok =
4653 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4654 if (BufferStart + NextTok.
Offset >= BufferPtr)
4656 ++NextDepDirectiveTokenIndex;
4661 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4663 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4665 if (
PP->hadModuleLoaderFatalFailure())
4671 auto NextTok = peekNextPPToken();
4672 if (NextTok && NextTok->
is(tok::raw_identifier) &&
4673 NextTok->getRawIdentifier() ==
"import") {
4675 if (
PP->hadModuleLoaderFatalFailure())
4680 if (
Result.is(tok::raw_identifier)) {
4681 Result.setRawIdentifierData(TokPtr);
4683 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4684 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4685 PP->HandleModuleContextualKeyword(
Result)) {
4690 return PP->HandleIdentifier(
Result);
4696 if (
Result.is(tok::colon)) {
4698 if (*BufferPtr ==
':') {
4699 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4701 ++NextDepDirectiveTokenIndex;
4702 Result.setKind(tok::coloncolon);
4712bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4713 assert(isDependencyDirectivesLexer());
4715 using namespace dependency_directives_scan;
4718 unsigned NestedIfs = 0;
4720 DepDirectives = DepDirectives.drop_front();
4721 switch (DepDirectives.front().Kind) {
4723 llvm_unreachable(
"unexpected 'pp_none'");
4764 NextDepDirectiveTokenIndex = 0;
4765 return LexEndOfFile(
Result, BufferEnd);
4769 const dependency_directives_scan::Token &DDTok =
4770 DepDirectives.front().Tokens.front();
4771 assert(DDTok.
is(tok::hash));
4772 NextDepDirectiveTokenIndex = 1;
4774 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Result
Implement __builtin_bit_cast and related operations.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a byte-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isModuleKeyword() const
Determine whether this is the contextual keyword module.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static std::unique_ptr< Lexer > Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static SourceLocation findEndOfIdentifierContinuation(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the end of an identifier-continuation sequence starting at Loc.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
SourceLocation getExpansionLocEnd() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const