29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
63 return II->getObjCKeywordID() == objcKey;
70 return tok::objc_not_keyword;
76 if (AllowExport &&
is(tok::kw_export))
78 if (
isOneOf(tok::kw_import, tok::kw_module))
80 if (
isNot(tok::identifier))
83 return II->isImportKeyword() || II->isModuleKeyword();
89 case tok::annot_typename:
90 case tok::annot_decltype:
91 case tok::annot_pack_indexing_type:
97 case tok::kw___int128:
99 case tok::kw_unsigned:
107 case tok::kw__Float16:
108 case tok::kw___float128:
109 case tok::kw___ibm128:
110 case tok::kw_wchar_t:
116#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
117#include "clang/Basic/TransformTypeTraits.def"
118 case tok::kw___auto_type:
119 case tok::kw_char16_t:
120 case tok::kw_char32_t:
122 case tok::kw_decltype:
123 case tok::kw_char8_t:
135void Lexer::anchor() {}
137void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
138 const char *BufEnd) {
139 BufferStart = BufStart;
143 assert(BufEnd[0] == 0 &&
144 "We assume that the input buffer has a null character at the end"
145 " to simplify lexing!");
150 if (BufferStart == BufferPtr) {
152 StringRef Buf(BufferStart, BufferEnd - BufferStart);
153 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
154 .StartsWith(
"\xEF\xBB\xBF", 3)
158 BufferPtr += BOMLength;
161 Is_PragmaLexer =
false;
162 CurrentConflictMarkerState =
CMK_None;
165 IsAtStartOfLine =
true;
166 IsAtPhysicalStartOfLine =
true;
168 HasLeadingSpace =
false;
169 HasLeadingEmptyMacro =
false;
184 ExtendedTokenMode = 0;
186 NewLinePtr =
nullptr;
196 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
197 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
198 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
199 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
200 InputFile.getBufferEnd());
209 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
210 bool IsFirstIncludeOfFile)
211 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
212 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
213 InitLexer(BufStart, BufPtr, BufEnd);
224 bool IsFirstIncludeOfFile)
225 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
226 FromFile.getBufferStart(), FromFile.getBufferEnd(),
227 IsFirstIncludeOfFile) {}
230 assert(
PP &&
"Cannot reset token mode without a preprocessor");
231 if (LangOpts.TraditionalCPP)
259 FileID SpellingFID =
SM.getFileID(SpellingLoc);
260 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
266 const char *StrData =
SM.getCharacterData(SpellingLoc);
268 L->BufferPtr = StrData;
269 L->BufferEnd = StrData+TokLen;
270 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
274 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
276 ExpansionLocEnd, TokLen);
283 L->Is_PragmaLexer =
true;
288 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
289 this->IsAtStartOfLine = IsAtStartOfLine;
290 assert((BufferStart + Offset) <= BufferEnd);
291 BufferPtr = BufferStart + Offset;
295 typename T::size_type i = 0, e = Str.size();
297 if (Str[i] ==
'\\' || Str[i] == Quote) {
298 Str.insert(Str.begin() + i,
'\\');
301 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
303 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
304 Str[i] != Str[i + 1]) {
310 Str.insert(Str.begin() + i + 1,
'n');
320 std::string
Result = std::string(Str);
321 char Quote = Charify ?
'\'' :
'"';
336 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
339 const char *BufEnd = BufPtr +
Tok.getLength();
343 while (BufPtr < BufEnd) {
345 Spelling[Length++] = CharAndSize.Char;
346 BufPtr += CharAndSize.Size;
348 if (Spelling[Length - 1] ==
'"')
356 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
359 const char *RawEnd = BufEnd;
360 do --RawEnd;
while (*RawEnd !=
'"');
361 size_t RawLength = RawEnd - BufPtr + 1;
364 memcpy(Spelling + Length, BufPtr, RawLength);
372 while (BufPtr < BufEnd) {
374 Spelling[Length++] = CharAndSize.Char;
375 BufPtr += CharAndSize.Size;
378 assert(Length <
Tok.getLength() &&
379 "NeedsCleaning flag set on token that didn't need cleaning!");
397 bool invalidTemp =
false;
398 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
400 if (invalid) *invalid =
true;
404 const char *tokenBegin = file.data() + locInfo.second;
408 file.begin(), tokenBegin, file.end());
416 return StringRef(tokenBegin,
length);
421 return StringRef(buffer.data(), buffer.size());
431 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
433 bool CharDataInvalid =
false;
434 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
442 if (!
Tok.needsCleaning())
443 return std::string(TokStart, TokStart +
Tok.getLength());
464 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
466 const char *TokStart =
nullptr;
468 if (
Tok.is(tok::raw_identifier))
469 TokStart =
Tok.getRawIdentifier().data();
470 else if (!
Tok.hasUCN()) {
473 Buffer = II->getNameStart();
474 return II->getLength();
480 TokStart =
Tok.getLiteralData();
484 bool CharDataInvalid =
false;
485 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
488 if (CharDataInvalid) {
495 if (!
Tok.needsCleaning()) {
497 return Tok.getLength();
522 bool IgnoreWhiteSpace) {
531 Loc =
SM.getExpansionLoc(Loc);
534 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
538 const char *StrData = Buffer.data()+LocInfo.second;
540 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
544 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
545 Buffer.begin(), StrData, Buffer.end());
554 const char *BufStart = Buffer.data();
555 if (Offset >= Buffer.size())
558 const char *LexStart = BufStart + Offset;
559 for (; LexStart != BufStart; --LexStart) {
575 if (LocInfo.first.isInvalid())
579 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
585 const char *StrData = Buffer.data() + LocInfo.second;
587 if (!LexStart || LexStart == StrData)
592 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
612 }
while (TheTok.
getKind() != tok::eof);
624 if (!
SM.isMacroArgExpansion(Loc))
631 assert(FileLocInfo.first == BeginFileLocInfo.first &&
632 FileLocInfo.second >= BeginFileLocInfo.second);
638enum PreambleDirectiveKind {
653 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
657 bool InPreprocessorDirective =
false;
661 unsigned MaxLineOffset = 0;
663 const char *CurPtr = Buffer.begin();
664 unsigned CurLine = 0;
665 while (CurPtr != Buffer.end()) {
669 if (CurLine == MaxLines)
673 if (CurPtr != Buffer.end())
674 MaxLineOffset = CurPtr - Buffer.begin();
680 if (InPreprocessorDirective) {
682 if (TheTok.
getKind() == tok::eof) {
693 InPreprocessorDirective =
false;
702 if (MaxLineOffset && TokOffset >= MaxLineOffset)
707 if (TheTok.
getKind() == tok::comment) {
715 Token HashTok = TheTok;
716 InPreprocessorDirective =
true;
725 PreambleDirectiveKind PDK
726 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
727 .Case(
"include", PDK_Skipped)
728 .Case(
"__include_macros", PDK_Skipped)
729 .Case(
"define", PDK_Skipped)
730 .Case(
"undef", PDK_Skipped)
731 .Case(
"line", PDK_Skipped)
732 .Case(
"error", PDK_Skipped)
733 .Case(
"pragma", PDK_Skipped)
734 .Case(
"import", PDK_Skipped)
735 .Case(
"include_next", PDK_Skipped)
736 .Case(
"warning", PDK_Skipped)
737 .Case(
"ident", PDK_Skipped)
738 .Case(
"sccs", PDK_Skipped)
739 .Case(
"assert", PDK_Skipped)
740 .Case(
"unassert", PDK_Skipped)
741 .Case(
"if", PDK_Skipped)
742 .Case(
"ifdef", PDK_Skipped)
743 .Case(
"ifndef", PDK_Skipped)
744 .Case(
"elif", PDK_Skipped)
745 .Case(
"elifdef", PDK_Skipped)
746 .Case(
"elifndef", PDK_Skipped)
747 .Case(
"else", PDK_Skipped)
748 .Case(
"endif", PDK_Skipped)
749 .Default(PDK_Unknown);
766 TheTok.
getKind() == tok::raw_identifier &&
768 LangOpts.CPlusPlusModules) {
771 Token ModuleTok = TheTok;
774 }
while (TheTok.
getKind() == tok::comment);
775 if (TheTok.
getKind() != tok::semi) {
790 if (ActiveCommentLoc.
isValid())
791 End = ActiveCommentLoc;
806 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
809 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
812 unsigned PhysOffset = 0;
817 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
827 for (; CharNo; --CharNo) {
829 TokPtr += CharAndSize.Size;
830 PhysOffset += CharAndSize.Size;
837 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
838 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
887 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
893 *MacroBegin = expansionLoc;
915 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
921 *MacroEnd = expansionLoc;
934 if (Range.isTokenRange()) {
941 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
946 if (!
SM.isInFileID(End, FID, &EndOffs) ||
956 return SM.getSLocEntry(
SM.getFileID(Loc))
958 .isExpansionTokenRange();
975 Range.setBegin(Begin);
980 if (Range.isTokenRange()) {
998 Range.setBegin(MacroBegin);
999 Range.setEnd(MacroEnd);
1001 if (Range.isTokenRange())
1021 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1022 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1035 if (Range.isInvalid()) {
1042 if (beginInfo.first.isInvalid()) {
1048 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1049 beginInfo.second > EndOffs) {
1055 bool invalidTemp =
false;
1056 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1063 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1069 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1085 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1092 FileID MacroFID =
SM.getFileID(Loc);
1093 if (
SM.isInFileID(SpellLoc, MacroFID))
1103 Loc =
SM.getSpellingLoc(Loc);
1109 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1110 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1115 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1117 while (
SM.isMacroArgExpansion(Loc))
1118 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1124 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1130 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1136 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1137 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1146 if (Str - 1 < BufferStart)
1149 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1150 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1151 if (Str - 2 < BufferStart)
1161 return *Str ==
'\\';
1169 if (LocInfo.first.isInvalid())
1172 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1178 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1179 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1180 return NumWhitespaceChars == StringRef::npos
1182 : Rest.take_front(NumWhitespaceChars);
1197 unsigned CharNo,
unsigned TokLen) {
1198 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1214 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1220 unsigned TokLen)
const {
1221 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1222 "Location out of range for this buffer!");
1226 unsigned CharNo = Loc-BufferStart;
1227 if (FileLoc.isFileID())
1228 return FileLoc.getLocWithOffset(CharNo);
1232 assert(
PP &&
"This doesn't work on raw lexers");
1251 case '=':
return '#';
1252 case ')':
return ']';
1253 case '(':
return '[';
1254 case '!':
return '|';
1255 case '\'':
return '^';
1256 case '>':
return '}';
1257 case '/':
return '\\';
1258 case '<':
return '{';
1259 case '-':
return '~';
1274 L->
Diag(CP-2, diag::trigraph_ignored);
1279 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1291 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1295 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1296 Ptr[Size-1] != Ptr[Size])
1309const char *Lexer::SkipEscapedNewLines(
const char *P) {
1311 const char *AfterEscape;
1314 }
else if (*P ==
'?') {
1316 if (P[1] !=
'?' || P[2] !=
'/')
1326 if (NewLineSize == 0)
return P;
1327 P = AfterEscape+NewLineSize;
1334 bool IncludeComments) {
1337 return std::nullopt;
1345 bool InvalidTemp =
false;
1346 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1348 return std::nullopt;
1350 const char *TokenBegin =
File.data() + LocInfo.second;
1353 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1354 TokenBegin,
File.end());
1365 bool IncludeComments) {
1366 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1367 while (Loc != StartOfFile) {
1370 return std::nullopt;
1376 if (!
Tok.is(tok::comment) || IncludeComments) {
1380 return std::nullopt;
1389 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1391 if (!
Tok ||
Tok->isNot(TKind))
1396 unsigned NumWhitespaceChars = 0;
1397 if (SkipTrailingWhitespaceAndNewLine) {
1398 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1399 unsigned char C = *TokenEnd;
1402 NumWhitespaceChars++;
1406 if (
C ==
'\n' ||
C ==
'\r') {
1409 NumWhitespaceChars++;
1410 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1411 NumWhitespaceChars++;
1436 if (Ptr[0] ==
'\\') {
1442 return {
'\\', Size};
1452 Diag(Ptr, diag::backslash_newline_space);
1455 Size += EscapedNewLineSize;
1456 Ptr += EscapedNewLineSize;
1459 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1460 CharAndSize.Size += Size;
1465 return {
'\\',
Size};
1469 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1473 LangOpts.Trigraphs)) {
1479 if (
C ==
'\\')
goto Slash;
1485 return {*Ptr,
Size + 1u};
1499 if (Ptr[0] ==
'\\') {
1505 return {
'\\',
Size};
1510 Size += EscapedNewLineSize;
1511 Ptr += EscapedNewLineSize;
1514 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1515 CharAndSize.Size +=
Size;
1520 return {
'\\',
Size};
1524 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1530 if (
C ==
'\\')
goto Slash;
1536 return {*Ptr,
Size + 1u};
1544void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1545 BufferPtr = BufferStart + Offset;
1546 if (BufferPtr > BufferEnd)
1547 BufferPtr = BufferEnd;
1551 IsAtStartOfLine = StartOfLine;
1552 IsAtPhysicalStartOfLine = StartOfLine;
1556 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1558 return UnicodeWhitespaceChars.contains(Codepoint);
1563 llvm::raw_svector_ostream CharOS(CharBuf);
1564 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1575 bool IsStart,
bool &IsExtension) {
1576 static const llvm::sys::UnicodeCharSet MathStartChars(
1578 static const llvm::sys::UnicodeCharSet MathContinueChars(
1580 if (MathStartChars.contains(
C) ||
1581 (!IsStart && MathContinueChars.contains(
C))) {
1589 bool &IsExtension) {
1590 if (LangOpts.AsmPreprocessor) {
1592 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1594 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1599 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1601 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1605 }
else if (LangOpts.C11) {
1606 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1608 return C11AllowedIDChars.contains(
C);
1610 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1612 return C99AllowedIDChars.contains(
C);
1617 bool &IsExtension) {
1618 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1619 IsExtension =
false;
1620 if (LangOpts.AsmPreprocessor) {
1623 if (LangOpts.CPlusPlus || LangOpts.C23) {
1624 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1625 if (XIDStartChars.contains(
C))
1633 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1635 return !C11DisallowedInitialIDChars.contains(
C);
1637 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1639 return !C99DisallowedInitialIDChars.contains(
C);
1645 static const llvm::sys::UnicodeCharSet MathStartChars(
1647 static const llvm::sys::UnicodeCharSet MathContinueChars(
1650 (void)MathStartChars;
1651 (void)MathContinueChars;
1652 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1653 "Unexpected mathematical notation codepoint");
1654 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1667 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1669 CannotAppearInIdentifier = 0,
1670 CannotStartIdentifier
1673 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1675 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1677 if (!C99AllowedIDChars.contains(
C)) {
1678 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1680 << CannotAppearInIdentifier;
1681 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1682 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1684 << CannotStartIdentifier;
1696 struct HomoglyphPair {
1699 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1701 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1754 std::lower_bound(std::begin(SortedHomoglyphs),
1755 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1756 if (Homoglyph->Character ==
C) {
1757 if (Homoglyph->LooksLike) {
1758 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1759 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1762 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1782 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1784 if (!
IsFirst || InvalidOnlyAtStart) {
1785 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1789 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1795bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1797 const char *UCNPtr = CurPtr +
Size;
1798 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1799 if (CodePoint == 0) {
1802 bool IsExtension =
false;
1807 !
PP->isPreprocessedOutput())
1809 PP->getDiagnostics(), LangOpts, CodePoint,
1827 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1828 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1831 while (CurPtr != UCNPtr)
1832 (void)getAndAdvanceChar(CurPtr,
Result);
1836bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1837 llvm::UTF32 CodePoint;
1842 unsigned FirstCodeUnitSize;
1843 getCharAndSize(CurPtr, FirstCodeUnitSize);
1844 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1845 const char *UnicodePtr = CharStart;
1847 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1848 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1849 &CodePoint, llvm::strictConversion);
1850 if (ConvResult != llvm::conversionOK)
1853 bool IsExtension =
false;
1860 !
PP->isPreprocessedOutput())
1862 PP->getDiagnostics(), LangOpts, CodePoint,
1870 PP->getDiagnostics(), CodePoint,
1882 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1883 CurPtr = UnicodePtr;
1887bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1888 const char *CurPtr) {
1889 bool IsExtension =
false;
1892 !
PP->isPreprocessedOutput()) {
1904 return LexIdentifierContinue(
Result, CurPtr);
1908 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1920 PP->getDiagnostics(), LangOpts,
C,
1929 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1935 [[maybe_unused]]
const char *BufferEnd) {
1937 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1938 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1940 constexpr ssize_t BytesPerRegister = 16;
1942 __m128i AsciiIdentifierRangeV =
1945 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1952 if (Consumed == BytesPerRegister)
1958 unsigned char C = *CurPtr;
1964bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1973 unsigned char C = getCharAndSize(CurPtr, Size);
1975 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1980 if (!LangOpts.DollarIdents)
1984 Diag(CurPtr, diag::ext_dollar_in_identifier);
1985 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1988 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1996 const char *IdStart = BufferPtr;
1997 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1998 Result.setRawIdentifierData(IdStart);
2007 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2014 if (isCodeCompletionPoint(CurPtr)) {
2016 Result.setKind(tok::code_completion);
2022 assert(*CurPtr == 0 &&
"Completion character must be 0");
2027 if (CurPtr < BufferEnd) {
2038 return PP->HandleIdentifier(
Result);
2045bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2047 char C1 = CharAndSize1.Char;
2053 char C2 = CharAndSize2.Char;
2054 return (C2 ==
'x' || C2 ==
'X');
2060bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2062 char C = getCharAndSize(CurPtr, Size);
2065 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2067 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2071 C = getCharAndSize(CurPtr, Size);
2075 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2078 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2079 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2083 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2087 bool IsHexFloat =
true;
2088 if (!LangOpts.C99) {
2089 if (!isHexaLiteral(BufferPtr, LangOpts))
2091 else if (!LangOpts.CPlusPlus17 &&
2092 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2096 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2100 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2104 Diag(CurPtr, LangOpts.CPlusPlus
2105 ? diag::warn_cxx11_compat_digit_separator
2106 : diag::warn_c23_compat_digit_separator);
2107 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2108 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2109 return LexNumericConstant(
Result, CurPtr);
2114 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2115 return LexNumericConstant(
Result, CurPtr);
2117 return LexNumericConstant(
Result, CurPtr);
2120 const char *TokStart = BufferPtr;
2121 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2122 Result.setLiteralData(TokStart);
2128const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2129 bool IsStringLiteral) {
2130 assert(LangOpts.CPlusPlus);
2134 char C = getCharAndSize(CurPtr, Size);
2135 bool Consumed =
false;
2138 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2140 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2146 if (!LangOpts.CPlusPlus11) {
2149 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2150 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2161 bool IsUDSuffix =
false;
2164 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2168 const unsigned MaxStandardSuffixLength = 3;
2169 char Buffer[MaxStandardSuffixLength] = {
C };
2170 unsigned Consumed =
Size;
2173 auto [
Next, NextSize] =
2177 const StringRef CompleteSuffix(Buffer, Chars);
2183 if (Chars == MaxStandardSuffixLength)
2187 Buffer[Chars++] =
Next;
2188 Consumed += NextSize;
2194 Diag(CurPtr, LangOpts.MSVCCompat
2195 ? diag::ext_ms_reserved_user_defined_literal
2196 : diag::ext_reserved_user_defined_literal)
2201 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2206 C = getCharAndSize(CurPtr, Size);
2208 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2209 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2210 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2220bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2222 const char *AfterQuote = CurPtr;
2224 const char *NulCharacter =
nullptr;
2227 (Kind == tok::utf8_string_literal ||
2228 Kind == tok::utf16_string_literal ||
2229 Kind == tok::utf32_string_literal))
2230 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2231 : diag::warn_c99_compat_unicode_literal);
2233 char C = getAndAdvanceChar(CurPtr,
Result);
2238 C = getAndAdvanceChar(CurPtr,
Result);
2240 if (
C ==
'\n' ||
C ==
'\r' ||
2241 (
C == 0 && CurPtr-1 == BufferEnd)) {
2243 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2244 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2249 if (isCodeCompletionPoint(CurPtr-1)) {
2251 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2253 PP->CodeCompleteNaturalLanguage();
2254 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2259 NulCharacter = CurPtr-1;
2261 C = getAndAdvanceChar(CurPtr,
Result);
2265 if (LangOpts.CPlusPlus)
2266 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2270 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2273 const char *TokStart = BufferPtr;
2274 FormTokenWithChars(
Result, CurPtr, Kind);
2275 Result.setLiteralData(TokStart);
2281bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2289 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2291 unsigned PrefixLen = 0;
2295 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2296 const char *Pos = &CurPtr[PrefixLen];
2297 Diag(Pos, LangOpts.CPlusPlus26
2298 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2299 : diag::ext_cxx26_raw_string_literal_character_set)
2300 << StringRef(Pos, 1);
2306 if (CurPtr[PrefixLen] !=
'(') {
2308 const char *PrefixEnd = &CurPtr[PrefixLen];
2309 if (PrefixLen == 16) {
2310 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2311 }
else if (*PrefixEnd ==
'\n') {
2312 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2314 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2315 << StringRef(PrefixEnd, 1);
2327 if (
C == 0 && CurPtr-1 == BufferEnd) {
2333 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2338 const char *Prefix = CurPtr;
2339 CurPtr += PrefixLen + 1;
2346 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2347 CurPtr += PrefixLen + 1;
2350 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2352 Diag(BufferPtr, diag::err_unterminated_raw_string)
2353 << StringRef(Prefix, PrefixLen);
2354 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2360 if (LangOpts.CPlusPlus)
2361 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2364 const char *TokStart = BufferPtr;
2365 FormTokenWithChars(
Result, CurPtr, Kind);
2366 Result.setLiteralData(TokStart);
2372bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2374 const char *NulCharacter =
nullptr;
2375 const char *AfterLessPos = CurPtr;
2376 char C = getAndAdvanceChar(CurPtr,
Result);
2381 C = getAndAdvanceChar(CurPtr,
Result);
2384 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2387 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2392 if (isCodeCompletionPoint(CurPtr - 1)) {
2393 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2395 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2398 NulCharacter = CurPtr-1;
2400 C = getAndAdvanceChar(CurPtr,
Result);
2405 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2408 const char *TokStart = BufferPtr;
2409 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2410 Result.setLiteralData(TokStart);
2414void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2415 const char *CompletionPoint,
2418 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2419 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2420 auto Slash = PartialPath.find_last_of(SlashChars);
2422 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2423 const char *StartOfFilename =
2424 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2426 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2427 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2430 while (CompletionPoint < BufferEnd) {
2431 char Next = *(CompletionPoint + 1);
2435 if (
Next == (IsAngled ?
'>' :
'"'))
2437 if (SlashChars.contains(
Next))
2441 PP->setCodeCompletionTokenRange(
2442 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2443 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2444 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2449bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2452 const char *NulCharacter =
nullptr;
2455 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2456 Diag(BufferPtr, LangOpts.CPlusPlus
2457 ? diag::warn_cxx98_compat_unicode_literal
2458 : diag::warn_c99_compat_unicode_literal);
2459 else if (Kind == tok::utf8_char_constant)
2460 Diag(BufferPtr, LangOpts.CPlusPlus
2461 ? diag::warn_cxx14_compat_u8_character_literal
2462 : diag::warn_c17_compat_u8_character_literal);
2465 char C = getAndAdvanceChar(CurPtr,
Result);
2468 Diag(BufferPtr, diag::ext_empty_character);
2469 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2476 C = getAndAdvanceChar(CurPtr,
Result);
2478 if (
C ==
'\n' ||
C ==
'\r' ||
2479 (
C == 0 && CurPtr-1 == BufferEnd)) {
2481 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2482 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2487 if (isCodeCompletionPoint(CurPtr-1)) {
2488 PP->CodeCompleteNaturalLanguage();
2489 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2494 NulCharacter = CurPtr-1;
2496 C = getAndAdvanceChar(CurPtr,
Result);
2500 if (LangOpts.CPlusPlus)
2501 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2505 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2508 const char *TokStart = BufferPtr;
2509 FormTokenWithChars(
Result, CurPtr, Kind);
2510 Result.setLiteralData(TokStart);
2518bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2519 bool &TokAtPhysicalStartOfLine) {
2523 unsigned char Char = *CurPtr;
2525 const char *lastNewLine =
nullptr;
2526 auto setLastNewLine = [&](
const char *Ptr) {
2532 setLastNewLine(CurPtr - 1);
2551 if (*CurPtr ==
'\n')
2552 setLastNewLine(CurPtr);
2559 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2561 IsAtStartOfLine =
true;
2562 IsAtPhysicalStartOfLine =
true;
2569 char PrevChar = CurPtr[-1];
2575 TokAtPhysicalStartOfLine =
true;
2577 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2578 if (
auto *Handler =
PP->getEmptylineHandler())
2594bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2595 bool &TokAtPhysicalStartOfLine) {
2600 Diag(BufferPtr, diag::ext_line_comment);
2618 bool UnicodeDecodingAlreadyDiagnosed =
false;
2625 C !=
'\n' &&
C !=
'\r') {
2627 UnicodeDecodingAlreadyDiagnosed =
false;
2631 unsigned Length = llvm::getUTF8SequenceSize(
2632 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2635 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2636 UnicodeDecodingAlreadyDiagnosed =
true;
2639 UnicodeDecodingAlreadyDiagnosed =
false;
2645 const char *NextLine = CurPtr;
2648 const char *EscapePtr = CurPtr-1;
2649 bool HasSpace =
false;
2655 if (*EscapePtr ==
'\\')
2658 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2659 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2661 CurPtr = EscapePtr-2;
2667 Diag(EscapePtr, diag::backslash_newline_space);
2674 const char *OldPtr = CurPtr;
2677 C = getAndAdvanceChar(CurPtr,
Result);
2682 if (
C != 0 && CurPtr == OldPtr+1) {
2690 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2691 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2692 for (; OldPtr != CurPtr; ++OldPtr)
2693 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2697 const char *ForwardPtr = CurPtr;
2700 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2705 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2710 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2715 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2716 PP->CodeCompleteNaturalLanguage();
2733 return SaveLineComment(
Result, CurPtr);
2747 NewLinePtr = CurPtr++;
2751 TokAtPhysicalStartOfLine =
true;
2760bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2763 FormTokenWithChars(
Result, CurPtr, tok::comment);
2775 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2779 Result.setKind(tok::comment);
2790 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2793 const char *TrigraphPos =
nullptr;
2795 const char *SpacePos =
nullptr;
2802 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2804 if (CurPtr[0] == CurPtr[1])
2818 if (*CurPtr ==
'\\') {
2820 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2822 TrigraphPos = CurPtr - 2;
2833 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2842 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2846 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2851 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2855 L->
Diag(SpacePos, diag::backslash_newline_space);
2861#include <emmintrin.h>
2876bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2877 bool &TokAtPhysicalStartOfLine) {
2887 unsigned char C = getCharAndSize(CurPtr, CharSize);
2889 if (
C == 0 && CurPtr == BufferEnd+1) {
2891 Diag(BufferPtr, diag::err_unterminated_block_comment);
2897 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2914 bool UnicodeDecodingAlreadyDiagnosed =
false;
2919 if (CurPtr + 24 < BufferEnd &&
2922 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2924 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2929 if (
C ==
'/')
goto FoundSlash;
2933 while (CurPtr + 16 < BufferEnd) {
2935 if (LLVM_UNLIKELY(Mask != 0)) {
2945 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2951 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2952 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2953 0x80, 0x80, 0x80, 0x80};
2954 __vector
unsigned char Slashes = {
2955 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2956 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2958 while (CurPtr + 16 < BufferEnd) {
2960 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2962 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2969 while (CurPtr + 16 < BufferEnd) {
2970 bool HasNonASCII =
false;
2971 for (
unsigned I = 0; I < 16; ++I)
2972 HasNonASCII |= !
isASCII(CurPtr[I]);
2974 if (LLVM_UNLIKELY(HasNonASCII))
2977 bool HasSlash =
false;
2978 for (
unsigned I = 0; I < 16; ++I)
2979 HasSlash |= CurPtr[I] ==
'/';
2993 while (
C !=
'/' &&
C !=
'\0') {
2995 UnicodeDecodingAlreadyDiagnosed =
false;
3002 unsigned Length = llvm::getUTF8SequenceSize(
3003 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3006 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3007 UnicodeDecodingAlreadyDiagnosed =
true;
3009 UnicodeDecodingAlreadyDiagnosed =
false;
3010 CurPtr += Length - 1;
3017 if (CurPtr[-2] ==
'*')
3020 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3022 LangOpts.Trigraphs)) {
3028 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3033 Diag(CurPtr-1, diag::warn_nested_block_comment);
3035 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3037 Diag(BufferPtr, diag::err_unterminated_block_comment);
3046 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3052 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3053 PP->CodeCompleteNaturalLanguage();
3071 FormTokenWithChars(
Result, CurPtr, tok::comment);
3080 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3098 "Must be in a preprocessing directive!");
3103 const char *CurPtr = BufferPtr;
3105 char Char = getAndAdvanceChar(CurPtr, Tmp);
3113 if (CurPtr-1 != BufferEnd) {
3114 if (isCodeCompletionPoint(CurPtr-1)) {
3115 PP->CodeCompleteNaturalLanguage();
3130 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3131 BufferPtr = CurPtr-1;
3135 if (Tmp.
is(tok::code_completion)) {
3137 PP->CodeCompleteNaturalLanguage();
3140 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3152bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3160 FormTokenWithChars(
Result, CurPtr, tok::eod);
3171 Result.startToken();
3172 BufferPtr = BufferEnd;
3173 FormTokenWithChars(Result, BufferEnd, tok::eof);
3177 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3183 MIOpt.ExitTopLevelConditional();
3191 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3193 diag::err_pp_unterminated_conditional);
3200 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3201 Diag(BufferEnd, diag::warn_no_newline_eof)
3213std::optional<Token> Lexer::peekNextPPToken() {
3214 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3216 if (isDependencyDirectivesLexer()) {
3217 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3218 return std::nullopt;
3220 (void)convertDependencyDirectiveToken(
3221 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3231 const char *TmpBufferPtr = BufferPtr;
3233 bool atStartOfLine = IsAtStartOfLine;
3234 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3235 bool leadingSpace = HasLeadingSpace;
3241 BufferPtr = TmpBufferPtr;
3243 HasLeadingSpace = leadingSpace;
3244 IsAtStartOfLine = atStartOfLine;
3245 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3249 if (
Tok.
is(tok::eof))
3250 return std::nullopt;
3257 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3259 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3260 size_t Pos = RestOfBuffer.find(Terminator);
3261 while (Pos != StringRef::npos) {
3264 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3265 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3266 Pos = RestOfBuffer.find(Terminator);
3269 return RestOfBuffer.data()+Pos;
3278bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3280 if (CurPtr != BufferStart &&
3281 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3285 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3286 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3301 Diag(CurPtr, diag::err_conflict_marker);
3302 CurrentConflictMarkerState =
Kind;
3306 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3307 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3322bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3324 if (CurPtr != BufferStart &&
3325 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3334 for (
unsigned i = 1; i != 4; ++i)
3335 if (CurPtr[i] != CurPtr[0])
3342 CurrentConflictMarkerState)) {
3346 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3352 CurrentConflictMarkerState =
CMK_None;
3360 const char *BufferEnd) {
3361 if (CurPtr == BufferEnd)
3364 for (; CurPtr != BufferEnd; ++CurPtr) {
3365 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3371bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3372 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3378 const char *Start = CurPtr - 1;
3379 if (!LangOpts.AllowEditorPlaceholders)
3380 Diag(Start, diag::err_placeholder_in_source);
3382 FormTokenWithChars(
Result, End, tok::raw_identifier);
3383 Result.setRawIdentifierData(Start);
3390bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3391 if (
PP &&
PP->isCodeCompletionEnabled()) {
3392 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3393 return Loc ==
PP->getCodeCompletionLoc();
3404 if (Opts.CPlusPlus23)
3405 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3406 else if (Opts.C2y && !Named)
3407 DiagId = diag::warn_c2y_delimited_escape_sequence;
3409 DiagId = diag::ext_delimited_escape_sequence;
3415 if (!Opts.CPlusPlus)
3416 Ext = Named ? 2 : 1 ;
3420 Diags.
Report(Loc, DiagId) << Named << Ext;
3423std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3424 const char *SlashLoc,
3427 char Kind = getCharAndSize(StartPtr, CharSize);
3428 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3430 unsigned NumHexDigits;
3433 else if (Kind ==
'U')
3436 bool Delimited =
false;
3437 bool FoundEndDelimiter =
false;
3441 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3443 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3444 return std::nullopt;
3447 const char *CurPtr = StartPtr + CharSize;
3448 const char *KindLoc = &CurPtr[-1];
3450 uint32_t CodePoint = 0;
3451 while (Count != NumHexDigits || Delimited) {
3452 char C = getCharAndSize(CurPtr, CharSize);
3453 if (!Delimited && Count == 0 &&
C ==
'{') {
3459 if (Delimited &&
C ==
'}') {
3461 FoundEndDelimiter =
true;
3465 unsigned Value = llvm::hexDigitValue(
C);
3466 if (
Value == std::numeric_limits<unsigned>::max()) {
3470 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3471 << StringRef(KindLoc, 1);
3472 return std::nullopt;
3475 if (CodePoint & 0xF000'0000) {
3477 Diag(KindLoc, diag::err_escape_too_large) << 0;
3478 return std::nullopt;
3489 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3490 : diag::warn_ucn_escape_no_digits)
3491 << StringRef(KindLoc, 1);
3492 return std::nullopt;
3495 if (Delimited && Kind ==
'U') {
3497 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3498 return std::nullopt;
3501 if (!Delimited && Count != NumHexDigits) {
3503 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3505 if (Count == 4 && NumHexDigits == 8) {
3506 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3507 Diag(KindLoc, diag::note_ucn_four_not_eight)
3511 return std::nullopt;
3514 if (Delimited &&
PP)
3517 PP->getDiagnostics());
3524 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3527 while (StartPtr != CurPtr)
3528 (void)getAndAdvanceChar(StartPtr, *
Result);
3535std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3536 const char *SlashLoc,
3541 char C = getCharAndSize(StartPtr, CharSize);
3542 assert(
C ==
'N' &&
"expected \\N{...}");
3544 const char *CurPtr = StartPtr + CharSize;
3545 const char *KindLoc = &CurPtr[-1];
3547 C = getCharAndSize(CurPtr, CharSize);
3550 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3551 return std::nullopt;
3554 const char *StartName = CurPtr;
3555 bool FoundEndDelimiter =
false;
3556 llvm::SmallVector<char, 30> Buffer;
3558 C = getCharAndSize(CurPtr, CharSize);
3561 FoundEndDelimiter =
true;
3567 Buffer.push_back(
C);
3570 if (!FoundEndDelimiter || Buffer.empty()) {
3572 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3573 : diag::warn_delimited_ucn_incomplete)
3574 << StringRef(KindLoc, 1);
3575 return std::nullopt;
3578 StringRef Name(Buffer.data(), Buffer.size());
3579 std::optional<char32_t>
Match =
3580 llvm::sys::unicode::nameToCodepointStrict(Name);
3581 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3583 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3585 Diag(StartName, diag::err_invalid_ucn_name)
3586 << StringRef(Buffer.data(), Buffer.size())
3589 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3600 if (Diagnose &&
Match)
3603 PP->getDiagnostics());
3609 if (LooseMatch && Diagnose)
3610 Match = LooseMatch->CodePoint;
3617 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3620 while (StartPtr != CurPtr)
3621 (void)getAndAdvanceChar(StartPtr, *
Result);
3625 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3628uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3632 std::optional<uint32_t> CodePointOpt;
3633 char Kind = getCharAndSize(StartPtr, CharSize);
3634 if (Kind ==
'u' || Kind ==
'U')
3635 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3636 else if (Kind ==
'N')
3637 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3642 uint32_t CodePoint = *CodePointOpt;
3645 if (LangOpts.AsmPreprocessor)
3664 if (CodePoint < 0xA0) {
3668 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3669 Diag(BufferPtr, diag::err_ucn_control_character);
3671 char C =
static_cast<char>(CodePoint);
3672 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3677 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3682 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3683 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3685 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3693bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3694 const char *CurPtr) {
3697 Diag(BufferPtr, diag::ext_unicode_whitespace)
3706void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3707 IsAtStartOfLine =
Result.isAtStartOfLine();
3708 HasLeadingSpace =
Result.hasLeadingSpace();
3709 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3714 assert(!isDependencyDirectivesLexer());
3720 if (IsAtStartOfLine) {
3722 IsAtStartOfLine =
false;
3725 if (HasLeadingSpace) {
3727 HasLeadingSpace =
false;
3730 if (HasLeadingEmptyMacro) {
3732 HasLeadingEmptyMacro =
false;
3735 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3736 IsAtPhysicalStartOfLine =
false;
3739 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3741 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3742 return returnedToken;
3750bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3752 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3753 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3756 const char *CurPtr = BufferPtr;
3768 FormTokenWithChars(Result, CurPtr, tok::unknown);
3777 unsigned SizeTmp, SizeTmp2;
3780 char Char = getAndAdvanceChar(CurPtr,
Result);
3784 NewLinePtr =
nullptr;
3789 if (CurPtr-1 == BufferEnd)
3790 return LexEndOfFile(
Result, CurPtr-1);
3793 if (isCodeCompletionPoint(CurPtr-1)) {
3796 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3801 Diag(CurPtr-1, diag::null_in_file);
3803 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3812 if (LangOpts.MicrosoftExt) {
3814 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3815 return LexEndOfFile(
Result, CurPtr-1);
3819 Kind = tok::unknown;
3823 if (CurPtr[0] ==
'\n')
3824 (void)getAndAdvanceChar(CurPtr,
Result);
3838 IsAtStartOfLine =
true;
3839 IsAtPhysicalStartOfLine =
true;
3840 NewLinePtr = CurPtr - 1;
3849 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3859 SkipHorizontalWhitespace:
3861 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3870 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3871 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3873 goto SkipIgnoredUnits;
3875 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3877 goto SkipIgnoredUnits;
3879 goto SkipHorizontalWhitespace;
3887 case '0':
case '1':
case '2':
case '3':
case '4':
3888 case '5':
case '6':
case '7':
case '8':
case '9':
3891 return LexNumericConstant(
Result, CurPtr);
3900 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3901 Char = getCharAndSize(CurPtr, SizeTmp);
3905 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3906 tok::utf16_string_literal);
3910 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3911 tok::utf16_char_constant);
3914 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3915 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3916 return LexRawStringLiteral(
Result,
3917 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3919 tok::utf16_string_literal);
3922 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3926 return LexStringLiteral(
Result,
3927 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3929 tok::utf8_string_literal);
3930 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3931 return LexCharConstant(
3932 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3934 tok::utf8_char_constant);
3936 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3938 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3941 return LexRawStringLiteral(
Result,
3942 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3945 tok::utf8_string_literal);
3952 return LexIdentifierContinue(
Result, CurPtr);
3958 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3959 Char = getCharAndSize(CurPtr, SizeTmp);
3963 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3964 tok::utf32_string_literal);
3968 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3969 tok::utf32_char_constant);
3972 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3973 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3974 return LexRawStringLiteral(
Result,
3975 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3977 tok::utf32_string_literal);
3981 return LexIdentifierContinue(
Result, CurPtr);
3987 if (LangOpts.RawStringLiterals) {
3988 Char = getCharAndSize(CurPtr, SizeTmp);
3991 return LexRawStringLiteral(
Result,
3992 ConsumeChar(CurPtr, SizeTmp,
Result),
3993 tok::string_literal);
3997 return LexIdentifierContinue(
Result, CurPtr);
4002 Char = getCharAndSize(CurPtr, SizeTmp);
4006 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4007 tok::wide_string_literal);
4010 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4011 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4012 return LexRawStringLiteral(
Result,
4013 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4015 tok::wide_string_literal);
4019 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4020 tok::wide_char_constant);
4025 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4026 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4027 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4028 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4029 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4030 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4031 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4032 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4041 bool returnedToken = LexIdentifierContinue(
Result, CurPtr);
4045 Result.isModuleContextualKeyword() &&
4046 PP->HandleModuleContextualKeyword(
Result, TokAtPhysicalStartOfLine))
4047 goto HandleDirective;
4048 return returnedToken;
4051 if (LangOpts.DollarIdents) {
4053 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4056 return LexIdentifierContinue(
Result, CurPtr);
4059 Kind = tok::unknown;
4066 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4072 return LexStringLiteral(
Result, CurPtr,
4074 : tok::string_literal);
4078 Kind = tok::question;
4081 Kind = tok::l_square;
4084 Kind = tok::r_square;
4087 Kind = tok::l_paren;
4090 Kind = tok::r_paren;
4093 Kind = tok::l_brace;
4096 Kind = tok::r_brace;
4099 Char = getCharAndSize(CurPtr, SizeTmp);
4100 if (Char >=
'0' && Char <=
'9') {
4104 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4105 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4106 Kind = tok::periodstar;
4108 }
else if (Char ==
'.' &&
4109 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4110 Kind = tok::ellipsis;
4111 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4118 Char = getCharAndSize(CurPtr, SizeTmp);
4121 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4122 }
else if (Char ==
'=') {
4123 Kind = tok::ampequal;
4124 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4131 Kind = tok::starequal;
4132 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4138 Char = getCharAndSize(CurPtr, SizeTmp);
4140 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4141 Kind = tok::plusplus;
4142 }
else if (Char ==
'=') {
4143 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4144 Kind = tok::plusequal;
4150 Char = getCharAndSize(CurPtr, SizeTmp);
4152 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4153 Kind = tok::minusminus;
4154 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4155 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4156 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4158 Kind = tok::arrowstar;
4159 }
else if (Char ==
'>') {
4160 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4162 }
else if (Char ==
'=') {
4163 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4164 Kind = tok::minusequal;
4173 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4174 Kind = tok::exclaimequal;
4175 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4177 Kind = tok::exclaim;
4182 Char = getCharAndSize(CurPtr, SizeTmp);
4192 bool TreatAsComment =
4193 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4194 if (!TreatAsComment)
4195 if (!(
PP &&
PP->isPreprocessedOutput()))
4196 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4198 if (TreatAsComment) {
4199 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4200 TokAtPhysicalStartOfLine))
4206 goto SkipIgnoredUnits;
4211 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4212 TokAtPhysicalStartOfLine))
4221 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4222 Kind = tok::slashequal;
4228 Char = getCharAndSize(CurPtr, SizeTmp);
4230 Kind = tok::percentequal;
4231 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4232 }
else if (LangOpts.Digraphs && Char ==
'>') {
4233 Kind = tok::r_brace;
4234 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4235 }
else if (LangOpts.Digraphs && Char ==
':') {
4236 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4237 Char = getCharAndSize(CurPtr, SizeTmp);
4238 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4239 Kind = tok::hashhash;
4240 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4242 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4243 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4245 Diag(BufferPtr, diag::ext_charize_microsoft);
4252 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer) {
4255 FormTokenWithChars(
Result, CurPtr, tok::hash);
4256 goto HandleDirective;
4262 Kind = tok::percent;
4266 Char = getCharAndSize(CurPtr, SizeTmp);
4268 return LexAngledStringLiteral(
Result, CurPtr);
4269 }
else if (Char ==
'<') {
4270 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4272 Kind = tok::lesslessequal;
4273 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4275 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4279 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4283 }
else if (LangOpts.CUDA && After ==
'<') {
4284 Kind = tok::lesslessless;
4285 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4288 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4289 Kind = tok::lessless;
4291 }
else if (Char ==
'=') {
4292 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4294 if (LangOpts.CPlusPlus20) {
4296 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4297 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4299 Kind = tok::spaceship;
4305 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4310 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4311 Kind = tok::lessequal;
4312 }
else if (LangOpts.Digraphs && Char ==
':') {
4313 if (LangOpts.CPlusPlus11 &&
4314 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4321 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4322 if (After !=
':' && After !=
'>') {
4325 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4330 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4331 Kind = tok::l_square;
4332 }
else if (LangOpts.Digraphs && Char ==
'%') {
4333 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4334 Kind = tok::l_brace;
4335 }
else if (Char ==
'#' && SizeTmp == 1 &&
4336 lexEditorPlaceholder(
Result, CurPtr)) {
4343 Char = getCharAndSize(CurPtr, SizeTmp);
4345 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4346 Kind = tok::greaterequal;
4347 }
else if (Char ==
'>') {
4348 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4350 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4352 Kind = tok::greatergreaterequal;
4353 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4357 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4360 }
else if (LangOpts.CUDA && After ==
'>') {
4361 Kind = tok::greatergreatergreater;
4362 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4365 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4366 Kind = tok::greatergreater;
4369 Kind = tok::greater;
4373 Char = getCharAndSize(CurPtr, SizeTmp);
4375 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4376 Kind = tok::caretequal;
4378 if (LangOpts.OpenCL && Char ==
'^')
4379 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4384 Char = getCharAndSize(CurPtr, SizeTmp);
4386 Kind = tok::pipeequal;
4387 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4388 }
else if (Char ==
'|') {
4390 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4392 Kind = tok::pipepipe;
4393 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4399 Char = getCharAndSize(CurPtr, SizeTmp);
4400 if (LangOpts.Digraphs && Char ==
'>') {
4401 Kind = tok::r_square;
4402 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4403 }
else if (Char ==
':') {
4404 Kind = tok::coloncolon;
4405 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4414 Char = getCharAndSize(CurPtr, SizeTmp);
4417 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4420 Kind = tok::equalequal;
4421 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4430 Char = getCharAndSize(CurPtr, SizeTmp);
4432 Kind = tok::hashhash;
4433 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4434 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4437 Diag(BufferPtr, diag::ext_charize_microsoft);
4438 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4444 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer) {
4447 FormTokenWithChars(
Result, CurPtr, tok::hash);
4448 goto HandleDirective;
4457 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4460 Kind = tok::unknown;
4465 if (!LangOpts.AsmPreprocessor) {
4466 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4467 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4468 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4476 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4480 Kind = tok::unknown;
4485 Kind = tok::unknown;
4489 llvm::UTF32 CodePoint;
4494 llvm::ConversionResult Status =
4495 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4496 (
const llvm::UTF8 *)BufferEnd,
4498 llvm::strictConversion);
4499 if (Status == llvm::conversionOK) {
4500 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4501 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4508 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4512 PP->isPreprocessedOutput()) {
4514 Kind = tok::unknown;
4521 Diag(CurPtr, diag::err_invalid_utf8);
4523 BufferPtr = CurPtr+1;
4535 FormTokenWithChars(
Result, CurPtr, Kind);
4541 if (
PP->hadModuleLoaderFatalFailure())
4553const char *Lexer::convertDependencyDirectiveToken(
4555 const char *TokPtr = BufferStart + DDTok.
Offset;
4561 if (
Result.is(tok::raw_identifier))
4562 Result.setRawIdentifierData(TokPtr);
4563 else if (
Result.isLiteral())
4564 Result.setLiteralData(TokPtr);
4565 BufferPtr = TokPtr + DDTok.
Length;
4569bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4570 assert(isDependencyDirectivesLexer());
4572 using namespace dependency_directives_scan;
4574 if (BufferPtr == BufferEnd)
4575 return LexEndOfFile(
Result, BufferPtr);
4577 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4578 if (DepDirectives.front().Kind == pp_eof)
4579 return LexEndOfFile(
Result, BufferEnd);
4580 if (DepDirectives.front().Kind == tokens_present_before_eof)
4582 NextDepDirectiveTokenIndex = 0;
4583 DepDirectives = DepDirectives.drop_front();
4586 const dependency_directives_scan::Token &DDTok =
4587 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4588 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4594 BufferPtr = BufferStart + DDTok.
Offset;
4595 LexAngledStringLiteral(
Result, BufferPtr + 1);
4596 if (
Result.isNot(tok::header_name))
4600 const dependency_directives_scan::Token &NextTok =
4601 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4602 if (BufferStart + NextTok.
Offset >= BufferPtr)
4604 ++NextDepDirectiveTokenIndex;
4609 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4611 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4613 if (
PP->hadModuleLoaderFatalFailure())
4618 if (
Result.is(tok::raw_identifier)) {
4619 Result.setRawIdentifierData(TokPtr);
4621 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4622 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4623 PP->HandleModuleContextualKeyword(
Result,
Result.isAtStartOfLine())) {
4628 return PP->HandleIdentifier(
Result);
4634 if (
Result.is(tok::colon)) {
4636 if (*BufferPtr ==
':') {
4637 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4639 ++NextDepDirectiveTokenIndex;
4640 Result.setKind(tok::coloncolon);
4650bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4651 assert(isDependencyDirectivesLexer());
4653 using namespace dependency_directives_scan;
4656 unsigned NestedIfs = 0;
4658 DepDirectives = DepDirectives.drop_front();
4659 switch (DepDirectives.front().Kind) {
4661 llvm_unreachable(
"unexpected 'pp_none'");
4702 NextDepDirectiveTokenIndex = 0;
4703 return LexEndOfFile(
Result, BufferEnd);
4707 const dependency_directives_scan::Token &DDTok =
4708 DepDirectives.front().Tokens.front();
4709 assert(DDTok.
is(tok::hash));
4710 NextDepDirectiveTokenIndex = 1;
4712 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
const FunctionProtoType * T
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const