29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
49#if defined(__i386__) || defined(__x86_64__)
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
79 case tok::annot_typename:
80 case tok::annot_decltype:
81 case tok::annot_pack_indexing_type:
87 case tok::kw___int128:
89 case tok::kw_unsigned:
97 case tok::kw__Float16:
98 case tok::kw___float128:
99 case tok::kw___ibm128:
100 case tok::kw_wchar_t:
106#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
107#include "clang/Basic/TransformTypeTraits.def"
108 case tok::kw___auto_type:
109 case tok::kw_char16_t:
110 case tok::kw_char32_t:
112 case tok::kw_decltype:
113 case tok::kw_char8_t:
125void Lexer::anchor() {}
127void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
128 const char *BufEnd) {
129 BufferStart = BufStart;
133 assert(BufEnd[0] == 0 &&
134 "We assume that the input buffer has a null character at the end"
135 " to simplify lexing!");
140 if (BufferStart == BufferPtr) {
142 StringRef Buf(BufferStart, BufferEnd - BufferStart);
143 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
144 .StartsWith(
"\xEF\xBB\xBF", 3)
148 BufferPtr += BOMLength;
151 Is_PragmaLexer =
false;
152 CurrentConflictMarkerState =
CMK_None;
155 IsAtStartOfLine =
true;
156 IsAtPhysicalStartOfLine =
true;
158 HasLeadingSpace =
false;
159 HasLeadingEmptyMacro =
false;
174 ExtendedTokenMode = 0;
176 NewLinePtr =
nullptr;
186 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
187 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
188 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
189 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
190 InputFile.getBufferEnd());
199 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
200 bool IsFirstIncludeOfFile)
201 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
202 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
203 InitLexer(BufStart, BufPtr, BufEnd);
214 bool IsFirstIncludeOfFile)
215 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
216 FromFile.getBufferStart(), FromFile.getBufferEnd(),
217 IsFirstIncludeOfFile) {}
220 assert(
PP &&
"Cannot reset token mode without a preprocessor");
221 if (LangOpts.TraditionalCPP)
249 FileID SpellingFID =
SM.getFileID(SpellingLoc);
250 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
256 const char *StrData =
SM.getCharacterData(SpellingLoc);
258 L->BufferPtr = StrData;
259 L->BufferEnd = StrData+TokLen;
260 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
264 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
266 ExpansionLocEnd, TokLen);
273 L->Is_PragmaLexer =
true;
278 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
279 this->IsAtStartOfLine = IsAtStartOfLine;
280 assert((BufferStart + Offset) <= BufferEnd);
281 BufferPtr = BufferStart + Offset;
285 typename T::size_type i = 0, e = Str.size();
287 if (Str[i] ==
'\\' || Str[i] == Quote) {
288 Str.insert(Str.begin() + i,
'\\');
291 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
293 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
294 Str[i] != Str[i + 1]) {
300 Str.insert(Str.begin() + i + 1,
'n');
310 std::string
Result = std::string(Str);
311 char Quote = Charify ?
'\'' :
'"';
326 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
329 const char *BufEnd = BufPtr +
Tok.getLength();
333 while (BufPtr < BufEnd) {
335 Spelling[Length++] = CharAndSize.Char;
336 BufPtr += CharAndSize.Size;
338 if (Spelling[Length - 1] ==
'"')
346 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
349 const char *RawEnd = BufEnd;
350 do --RawEnd;
while (*RawEnd !=
'"');
351 size_t RawLength = RawEnd - BufPtr + 1;
354 memcpy(Spelling + Length, BufPtr, RawLength);
362 while (BufPtr < BufEnd) {
364 Spelling[Length++] = CharAndSize.Char;
365 BufPtr += CharAndSize.Size;
368 assert(Length <
Tok.getLength() &&
369 "NeedsCleaning flag set on token that didn't need cleaning!");
387 bool invalidTemp =
false;
388 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
390 if (invalid) *invalid =
true;
394 const char *tokenBegin = file.data() + locInfo.second;
398 file.begin(), tokenBegin, file.end());
406 return StringRef(tokenBegin,
length);
411 return StringRef(buffer.data(), buffer.size());
421 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
423 bool CharDataInvalid =
false;
424 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
432 if (!
Tok.needsCleaning())
433 return std::string(TokStart, TokStart +
Tok.getLength());
454 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
456 const char *TokStart =
nullptr;
458 if (
Tok.is(tok::raw_identifier))
459 TokStart =
Tok.getRawIdentifier().data();
460 else if (!
Tok.hasUCN()) {
463 Buffer = II->getNameStart();
464 return II->getLength();
470 TokStart =
Tok.getLiteralData();
474 bool CharDataInvalid =
false;
475 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
478 if (CharDataInvalid) {
485 if (!
Tok.needsCleaning()) {
487 return Tok.getLength();
512 bool IgnoreWhiteSpace) {
521 Loc =
SM.getExpansionLoc(Loc);
524 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
528 const char *StrData = Buffer.data()+LocInfo.second;
530 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
534 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
535 Buffer.begin(), StrData, Buffer.end());
544 const char *BufStart = Buffer.data();
545 if (Offset >= Buffer.size())
548 const char *LexStart = BufStart + Offset;
549 for (; LexStart != BufStart; --LexStart) {
565 if (LocInfo.first.isInvalid())
569 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
575 const char *StrData = Buffer.data() + LocInfo.second;
577 if (!LexStart || LexStart == StrData)
582 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
602 }
while (TheTok.
getKind() != tok::eof);
614 if (!
SM.isMacroArgExpansion(Loc))
621 assert(FileLocInfo.first == BeginFileLocInfo.first &&
622 FileLocInfo.second >= BeginFileLocInfo.second);
628enum PreambleDirectiveKind {
643 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
647 bool InPreprocessorDirective =
false;
651 unsigned MaxLineOffset = 0;
653 const char *CurPtr = Buffer.begin();
654 unsigned CurLine = 0;
655 while (CurPtr != Buffer.end()) {
659 if (CurLine == MaxLines)
663 if (CurPtr != Buffer.end())
664 MaxLineOffset = CurPtr - Buffer.begin();
670 if (InPreprocessorDirective) {
672 if (TheTok.
getKind() == tok::eof) {
683 InPreprocessorDirective =
false;
692 if (MaxLineOffset && TokOffset >= MaxLineOffset)
697 if (TheTok.
getKind() == tok::comment) {
705 Token HashTok = TheTok;
706 InPreprocessorDirective =
true;
715 PreambleDirectiveKind PDK
716 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
717 .Case(
"include", PDK_Skipped)
718 .Case(
"__include_macros", PDK_Skipped)
719 .Case(
"define", PDK_Skipped)
720 .Case(
"undef", PDK_Skipped)
721 .Case(
"line", PDK_Skipped)
722 .Case(
"error", PDK_Skipped)
723 .Case(
"pragma", PDK_Skipped)
724 .Case(
"import", PDK_Skipped)
725 .Case(
"include_next", PDK_Skipped)
726 .Case(
"warning", PDK_Skipped)
727 .Case(
"ident", PDK_Skipped)
728 .Case(
"sccs", PDK_Skipped)
729 .Case(
"assert", PDK_Skipped)
730 .Case(
"unassert", PDK_Skipped)
731 .Case(
"if", PDK_Skipped)
732 .Case(
"ifdef", PDK_Skipped)
733 .Case(
"ifndef", PDK_Skipped)
734 .Case(
"elif", PDK_Skipped)
735 .Case(
"elifdef", PDK_Skipped)
736 .Case(
"elifndef", PDK_Skipped)
737 .Case(
"else", PDK_Skipped)
738 .Case(
"endif", PDK_Skipped)
739 .Default(PDK_Unknown);
756 TheTok.
getKind() == tok::raw_identifier &&
758 LangOpts.CPlusPlusModules) {
761 Token ModuleTok = TheTok;
764 }
while (TheTok.
getKind() == tok::comment);
765 if (TheTok.
getKind() != tok::semi) {
780 if (ActiveCommentLoc.
isValid())
781 End = ActiveCommentLoc;
796 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
799 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
802 unsigned PhysOffset = 0;
807 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
817 for (; CharNo; --CharNo) {
819 TokPtr += CharAndSize.Size;
820 PhysOffset += CharAndSize.Size;
827 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
828 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
877 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
883 *MacroBegin = expansionLoc;
905 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
911 *MacroEnd = expansionLoc;
924 if (Range.isTokenRange()) {
931 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
936 if (!
SM.isInFileID(End, FID, &EndOffs) ||
946 return SM.getSLocEntry(
SM.getFileID(Loc))
948 .isExpansionTokenRange();
965 Range.setBegin(Begin);
970 if (Range.isTokenRange()) {
988 Range.setBegin(MacroBegin);
989 Range.setEnd(MacroEnd);
991 if (Range.isTokenRange())
1011 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1012 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1025 if (Range.isInvalid()) {
1032 if (beginInfo.first.isInvalid()) {
1038 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1039 beginInfo.second > EndOffs) {
1045 bool invalidTemp =
false;
1046 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1053 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1059 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1075 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1082 FileID MacroFID =
SM.getFileID(Loc);
1083 if (
SM.isInFileID(SpellLoc, MacroFID))
1093 Loc =
SM.getSpellingLoc(Loc);
1099 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1100 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1105 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1107 while (
SM.isMacroArgExpansion(Loc))
1108 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1114 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1120 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1126 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1127 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1136 if (Str - 1 < BufferStart)
1139 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1140 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1141 if (Str - 2 < BufferStart)
1151 return *Str ==
'\\';
1159 if (LocInfo.first.isInvalid())
1162 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1168 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1169 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1170 return NumWhitespaceChars == StringRef::npos
1172 : Rest.take_front(NumWhitespaceChars);
1187 unsigned CharNo,
unsigned TokLen) {
1188 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1204 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1210 unsigned TokLen)
const {
1211 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1212 "Location out of range for this buffer!");
1216 unsigned CharNo = Loc-BufferStart;
1217 if (FileLoc.isFileID())
1218 return FileLoc.getLocWithOffset(CharNo);
1222 assert(
PP &&
"This doesn't work on raw lexers");
1241 case '=':
return '#';
1242 case ')':
return ']';
1243 case '(':
return '[';
1244 case '!':
return '|';
1245 case '\'':
return '^';
1246 case '>':
return '}';
1247 case '/':
return '\\';
1248 case '<':
return '{';
1249 case '-':
return '~';
1264 L->
Diag(CP-2, diag::trigraph_ignored);
1269 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1281 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1285 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1286 Ptr[Size-1] != Ptr[Size])
1299const char *Lexer::SkipEscapedNewLines(
const char *P) {
1301 const char *AfterEscape;
1304 }
else if (*P ==
'?') {
1306 if (P[1] !=
'?' || P[2] !=
'/')
1316 if (NewLineSize == 0)
return P;
1317 P = AfterEscape+NewLineSize;
1324 bool IncludeComments) {
1327 return std::nullopt;
1335 bool InvalidTemp =
false;
1336 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1338 return std::nullopt;
1340 const char *TokenBegin =
File.data() + LocInfo.second;
1343 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1344 TokenBegin,
File.end());
1355 bool IncludeComments) {
1356 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1357 while (Loc != StartOfFile) {
1360 return std::nullopt;
1366 if (!
Tok.is(tok::comment) || IncludeComments) {
1370 return std::nullopt;
1379 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1381 if (!
Tok ||
Tok->isNot(TKind))
1386 unsigned NumWhitespaceChars = 0;
1387 if (SkipTrailingWhitespaceAndNewLine) {
1388 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1389 unsigned char C = *TokenEnd;
1392 NumWhitespaceChars++;
1396 if (
C ==
'\n' ||
C ==
'\r') {
1399 NumWhitespaceChars++;
1400 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1401 NumWhitespaceChars++;
1426 if (Ptr[0] ==
'\\') {
1432 return {
'\\', Size};
1442 Diag(Ptr, diag::backslash_newline_space);
1445 Size += EscapedNewLineSize;
1446 Ptr += EscapedNewLineSize;
1449 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1450 CharAndSize.Size += Size;
1455 return {
'\\',
Size};
1459 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1463 LangOpts.Trigraphs)) {
1469 if (
C ==
'\\')
goto Slash;
1475 return {*Ptr,
Size + 1u};
1489 if (Ptr[0] ==
'\\') {
1495 return {
'\\',
Size};
1500 Size += EscapedNewLineSize;
1501 Ptr += EscapedNewLineSize;
1504 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1505 CharAndSize.Size +=
Size;
1510 return {
'\\',
Size};
1514 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1520 if (
C ==
'\\')
goto Slash;
1526 return {*Ptr,
Size + 1u};
1534void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1535 BufferPtr = BufferStart + Offset;
1536 if (BufferPtr > BufferEnd)
1537 BufferPtr = BufferEnd;
1541 IsAtStartOfLine = StartOfLine;
1542 IsAtPhysicalStartOfLine = StartOfLine;
1546 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1548 return UnicodeWhitespaceChars.contains(Codepoint);
1553 llvm::raw_svector_ostream CharOS(CharBuf);
1554 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1565 bool IsStart,
bool &IsExtension) {
1566 static const llvm::sys::UnicodeCharSet MathStartChars(
1568 static const llvm::sys::UnicodeCharSet MathContinueChars(
1570 if (MathStartChars.contains(
C) ||
1571 (!IsStart && MathContinueChars.contains(
C))) {
1579 bool &IsExtension) {
1580 if (LangOpts.AsmPreprocessor) {
1582 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1584 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1589 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1591 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1595 }
else if (LangOpts.C11) {
1596 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1598 return C11AllowedIDChars.contains(
C);
1600 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1602 return C99AllowedIDChars.contains(
C);
1607 bool &IsExtension) {
1608 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1609 IsExtension =
false;
1610 if (LangOpts.AsmPreprocessor) {
1613 if (LangOpts.CPlusPlus || LangOpts.C23) {
1614 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1615 if (XIDStartChars.contains(
C))
1623 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1625 return !C11DisallowedInitialIDChars.contains(
C);
1627 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1629 return !C99DisallowedInitialIDChars.contains(
C);
1635 static const llvm::sys::UnicodeCharSet MathStartChars(
1637 static const llvm::sys::UnicodeCharSet MathContinueChars(
1640 (void)MathStartChars;
1641 (void)MathContinueChars;
1642 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1643 "Unexpected mathematical notation codepoint");
1644 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1657 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1659 CannotAppearInIdentifier = 0,
1660 CannotStartIdentifier
1663 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1665 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1667 if (!C99AllowedIDChars.contains(
C)) {
1668 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1670 << CannotAppearInIdentifier;
1671 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1672 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1674 << CannotStartIdentifier;
1686 struct HomoglyphPair {
1689 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1691 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1744 std::lower_bound(std::begin(SortedHomoglyphs),
1745 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1746 if (Homoglyph->Character ==
C) {
1747 if (Homoglyph->LooksLike) {
1748 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1749 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1752 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1772 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1774 if (!
IsFirst || InvalidOnlyAtStart) {
1775 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1779 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1785bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1787 const char *UCNPtr = CurPtr +
Size;
1788 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1789 if (CodePoint == 0) {
1792 bool IsExtension =
false;
1797 !
PP->isPreprocessedOutput())
1799 PP->getDiagnostics(), LangOpts, CodePoint,
1817 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1818 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1821 while (CurPtr != UCNPtr)
1822 (void)getAndAdvanceChar(CurPtr,
Result);
1826bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1827 llvm::UTF32 CodePoint;
1832 unsigned FirstCodeUnitSize;
1833 getCharAndSize(CurPtr, FirstCodeUnitSize);
1834 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1835 const char *UnicodePtr = CharStart;
1837 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1838 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1839 &CodePoint, llvm::strictConversion);
1840 if (ConvResult != llvm::conversionOK)
1843 bool IsExtension =
false;
1850 !
PP->isPreprocessedOutput())
1852 PP->getDiagnostics(), LangOpts, CodePoint,
1860 PP->getDiagnostics(), CodePoint,
1872 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1873 CurPtr = UnicodePtr;
1877bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1878 const char *CurPtr) {
1879 bool IsExtension =
false;
1882 !
PP->isPreprocessedOutput()) {
1894 return LexIdentifierContinue(
Result, CurPtr);
1898 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1910 PP->getDiagnostics(), LangOpts,
C,
1919 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1924 unsigned char C = *CurPtr;
1934#if (defined(__i386__) || defined(__x86_64__)) && defined(__has_attribute) && \
1935 __has_attribute(target) && !defined(_MSC_VER)
1937fastParseASCIIIdentifierSSE42(const
char *CurPtr, const
char *BufferEnd) {
1938 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1939 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1941 constexpr ssize_t BytesPerRegister = 16;
1943 __m128i AsciiIdentifierRangeV =
1946 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1953 if (Consumed == BytesPerRegister)
1963 return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
1969 const char *BufferEnd) {
1973bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1982 unsigned char C = getCharAndSize(CurPtr, Size);
1984 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1989 if (!LangOpts.DollarIdents)
1993 Diag(CurPtr, diag::ext_dollar_in_identifier);
1994 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1997 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2005 const char *IdStart = BufferPtr;
2006 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
2007 Result.setRawIdentifierData(IdStart);
2016 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2023 if (isCodeCompletionPoint(CurPtr)) {
2025 Result.setKind(tok::code_completion);
2031 assert(*CurPtr == 0 &&
"Completion character must be 0");
2036 if (CurPtr < BufferEnd) {
2047 return PP->HandleIdentifier(
Result);
2054bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2056 char C1 = CharAndSize1.Char;
2062 char C2 = CharAndSize2.Char;
2063 return (C2 ==
'x' || C2 ==
'X');
2069bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2071 char C = getCharAndSize(CurPtr, Size);
2074 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2076 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2080 C = getCharAndSize(CurPtr, Size);
2084 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2087 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2088 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2092 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2096 bool IsHexFloat =
true;
2097 if (!LangOpts.C99) {
2098 if (!isHexaLiteral(BufferPtr, LangOpts))
2100 else if (!LangOpts.CPlusPlus17 &&
2101 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2105 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2109 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2113 Diag(CurPtr, LangOpts.CPlusPlus
2114 ? diag::warn_cxx11_compat_digit_separator
2115 : diag::warn_c23_compat_digit_separator);
2116 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2117 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2118 return LexNumericConstant(
Result, CurPtr);
2123 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2124 return LexNumericConstant(
Result, CurPtr);
2126 return LexNumericConstant(
Result, CurPtr);
2129 const char *TokStart = BufferPtr;
2130 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2131 Result.setLiteralData(TokStart);
2137const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2138 bool IsStringLiteral) {
2139 assert(LangOpts.CPlusPlus);
2143 char C = getCharAndSize(CurPtr, Size);
2144 bool Consumed =
false;
2147 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2149 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2155 if (!LangOpts.CPlusPlus11) {
2158 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2159 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2170 bool IsUDSuffix =
false;
2173 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2177 const unsigned MaxStandardSuffixLength = 3;
2178 char Buffer[MaxStandardSuffixLength] = {
C };
2179 unsigned Consumed =
Size;
2182 auto [
Next, NextSize] =
2186 const StringRef CompleteSuffix(Buffer, Chars);
2192 if (Chars == MaxStandardSuffixLength)
2196 Buffer[Chars++] =
Next;
2197 Consumed += NextSize;
2203 Diag(CurPtr, LangOpts.MSVCCompat
2204 ? diag::ext_ms_reserved_user_defined_literal
2205 : diag::ext_reserved_user_defined_literal)
2210 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2215 C = getCharAndSize(CurPtr, Size);
2217 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2218 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2219 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2229bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2231 const char *AfterQuote = CurPtr;
2233 const char *NulCharacter =
nullptr;
2236 (Kind == tok::utf8_string_literal ||
2237 Kind == tok::utf16_string_literal ||
2238 Kind == tok::utf32_string_literal))
2239 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2240 : diag::warn_c99_compat_unicode_literal);
2242 char C = getAndAdvanceChar(CurPtr,
Result);
2247 C = getAndAdvanceChar(CurPtr,
Result);
2249 if (
C ==
'\n' ||
C ==
'\r' ||
2250 (
C == 0 && CurPtr-1 == BufferEnd)) {
2252 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2253 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2258 if (isCodeCompletionPoint(CurPtr-1)) {
2260 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2262 PP->CodeCompleteNaturalLanguage();
2263 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2268 NulCharacter = CurPtr-1;
2270 C = getAndAdvanceChar(CurPtr,
Result);
2274 if (LangOpts.CPlusPlus)
2275 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2279 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2282 const char *TokStart = BufferPtr;
2283 FormTokenWithChars(
Result, CurPtr, Kind);
2284 Result.setLiteralData(TokStart);
2290bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2298 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2300 unsigned PrefixLen = 0;
2304 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2305 const char *Pos = &CurPtr[PrefixLen];
2306 Diag(Pos, LangOpts.CPlusPlus26
2307 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2308 : diag::ext_cxx26_raw_string_literal_character_set)
2309 << StringRef(Pos, 1);
2315 if (CurPtr[PrefixLen] !=
'(') {
2317 const char *PrefixEnd = &CurPtr[PrefixLen];
2318 if (PrefixLen == 16) {
2319 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2320 }
else if (*PrefixEnd ==
'\n') {
2321 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2323 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2324 << StringRef(PrefixEnd, 1);
2336 if (
C == 0 && CurPtr-1 == BufferEnd) {
2342 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2347 const char *Prefix = CurPtr;
2348 CurPtr += PrefixLen + 1;
2355 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2356 CurPtr += PrefixLen + 1;
2359 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2361 Diag(BufferPtr, diag::err_unterminated_raw_string)
2362 << StringRef(Prefix, PrefixLen);
2363 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2369 if (LangOpts.CPlusPlus)
2370 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2373 const char *TokStart = BufferPtr;
2374 FormTokenWithChars(
Result, CurPtr, Kind);
2375 Result.setLiteralData(TokStart);
2381bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2383 const char *NulCharacter =
nullptr;
2384 const char *AfterLessPos = CurPtr;
2385 char C = getAndAdvanceChar(CurPtr,
Result);
2390 C = getAndAdvanceChar(CurPtr,
Result);
2393 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2396 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2401 if (isCodeCompletionPoint(CurPtr - 1)) {
2402 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2404 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2407 NulCharacter = CurPtr-1;
2409 C = getAndAdvanceChar(CurPtr,
Result);
2414 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2417 const char *TokStart = BufferPtr;
2418 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2419 Result.setLiteralData(TokStart);
2423void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2424 const char *CompletionPoint,
2427 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2428 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2429 auto Slash = PartialPath.find_last_of(SlashChars);
2431 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2432 const char *StartOfFilename =
2433 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2435 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2436 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2439 while (CompletionPoint < BufferEnd) {
2440 char Next = *(CompletionPoint + 1);
2444 if (
Next == (IsAngled ?
'>' :
'"'))
2446 if (SlashChars.contains(
Next))
2450 PP->setCodeCompletionTokenRange(
2451 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2452 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2453 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2458bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2461 const char *NulCharacter =
nullptr;
2464 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2465 Diag(BufferPtr, LangOpts.CPlusPlus
2466 ? diag::warn_cxx98_compat_unicode_literal
2467 : diag::warn_c99_compat_unicode_literal);
2468 else if (Kind == tok::utf8_char_constant)
2469 Diag(BufferPtr, LangOpts.CPlusPlus
2470 ? diag::warn_cxx14_compat_u8_character_literal
2471 : diag::warn_c17_compat_u8_character_literal);
2474 char C = getAndAdvanceChar(CurPtr,
Result);
2477 Diag(BufferPtr, diag::ext_empty_character);
2478 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2485 C = getAndAdvanceChar(CurPtr,
Result);
2487 if (
C ==
'\n' ||
C ==
'\r' ||
2488 (
C == 0 && CurPtr-1 == BufferEnd)) {
2490 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2491 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2496 if (isCodeCompletionPoint(CurPtr-1)) {
2497 PP->CodeCompleteNaturalLanguage();
2498 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2503 NulCharacter = CurPtr-1;
2505 C = getAndAdvanceChar(CurPtr,
Result);
2509 if (LangOpts.CPlusPlus)
2510 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2514 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2517 const char *TokStart = BufferPtr;
2518 FormTokenWithChars(
Result, CurPtr, Kind);
2519 Result.setLiteralData(TokStart);
2527bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2528 bool &TokAtPhysicalStartOfLine) {
2532 unsigned char Char = *CurPtr;
2534 const char *lastNewLine =
nullptr;
2535 auto setLastNewLine = [&](
const char *Ptr) {
2541 setLastNewLine(CurPtr - 1);
2560 if (*CurPtr ==
'\n')
2561 setLastNewLine(CurPtr);
2568 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2570 IsAtStartOfLine =
true;
2571 IsAtPhysicalStartOfLine =
true;
2578 char PrevChar = CurPtr[-1];
2584 TokAtPhysicalStartOfLine =
true;
2586 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2587 if (
auto *Handler =
PP->getEmptylineHandler())
2603bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2604 bool &TokAtPhysicalStartOfLine) {
2609 Diag(BufferPtr, diag::ext_line_comment);
2627 bool UnicodeDecodingAlreadyDiagnosed =
false;
2634 C !=
'\n' &&
C !=
'\r') {
2636 UnicodeDecodingAlreadyDiagnosed =
false;
2640 unsigned Length = llvm::getUTF8SequenceSize(
2641 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2644 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2645 UnicodeDecodingAlreadyDiagnosed =
true;
2648 UnicodeDecodingAlreadyDiagnosed =
false;
2654 const char *NextLine = CurPtr;
2657 const char *EscapePtr = CurPtr-1;
2658 bool HasSpace =
false;
2664 if (*EscapePtr ==
'\\')
2667 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2668 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2670 CurPtr = EscapePtr-2;
2676 Diag(EscapePtr, diag::backslash_newline_space);
2683 const char *OldPtr = CurPtr;
2686 C = getAndAdvanceChar(CurPtr,
Result);
2691 if (
C != 0 && CurPtr == OldPtr+1) {
2699 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2700 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2701 for (; OldPtr != CurPtr; ++OldPtr)
2702 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2706 const char *ForwardPtr = CurPtr;
2709 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2714 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2719 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2724 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2725 PP->CodeCompleteNaturalLanguage();
2742 return SaveLineComment(
Result, CurPtr);
2756 NewLinePtr = CurPtr++;
2760 TokAtPhysicalStartOfLine =
true;
2769bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2772 FormTokenWithChars(
Result, CurPtr, tok::comment);
2784 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2788 Result.setKind(tok::comment);
2799 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2802 const char *TrigraphPos =
nullptr;
2804 const char *SpacePos =
nullptr;
2811 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2813 if (CurPtr[0] == CurPtr[1])
2827 if (*CurPtr ==
'\\') {
2829 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2831 TrigraphPos = CurPtr - 2;
2842 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2851 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2855 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2860 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2864 L->
Diag(SpacePos, diag::backslash_newline_space);
2870#include <emmintrin.h>
2885bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2886 bool &TokAtPhysicalStartOfLine) {
2896 unsigned char C = getCharAndSize(CurPtr, CharSize);
2898 if (
C == 0 && CurPtr == BufferEnd+1) {
2900 Diag(BufferPtr, diag::err_unterminated_block_comment);
2906 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2923 bool UnicodeDecodingAlreadyDiagnosed =
false;
2928 if (CurPtr + 24 < BufferEnd &&
2931 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2933 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2938 if (
C ==
'/')
goto FoundSlash;
2942 while (CurPtr + 16 < BufferEnd) {
2944 if (LLVM_UNLIKELY(Mask != 0)) {
2954 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2960 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2961 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2962 0x80, 0x80, 0x80, 0x80};
2963 __vector
unsigned char Slashes = {
2964 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2965 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2967 while (CurPtr + 16 < BufferEnd) {
2969 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2971 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2978 while (CurPtr + 16 < BufferEnd) {
2979 bool HasNonASCII =
false;
2980 for (
unsigned I = 0; I < 16; ++I)
2981 HasNonASCII |= !
isASCII(CurPtr[I]);
2983 if (LLVM_UNLIKELY(HasNonASCII))
2986 bool HasSlash =
false;
2987 for (
unsigned I = 0; I < 16; ++I)
2988 HasSlash |= CurPtr[I] ==
'/';
3002 while (
C !=
'/' &&
C !=
'\0') {
3004 UnicodeDecodingAlreadyDiagnosed =
false;
3011 unsigned Length = llvm::getUTF8SequenceSize(
3012 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3015 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3016 UnicodeDecodingAlreadyDiagnosed =
true;
3018 UnicodeDecodingAlreadyDiagnosed =
false;
3019 CurPtr += Length - 1;
3026 if (CurPtr[-2] ==
'*')
3029 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3031 LangOpts.Trigraphs)) {
3037 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3042 Diag(CurPtr-1, diag::warn_nested_block_comment);
3044 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3046 Diag(BufferPtr, diag::err_unterminated_block_comment);
3055 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3061 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3062 PP->CodeCompleteNaturalLanguage();
3080 FormTokenWithChars(
Result, CurPtr, tok::comment);
3089 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3107 "Must be in a preprocessing directive!");
3112 const char *CurPtr = BufferPtr;
3114 char Char = getAndAdvanceChar(CurPtr, Tmp);
3122 if (CurPtr-1 != BufferEnd) {
3123 if (isCodeCompletionPoint(CurPtr-1)) {
3124 PP->CodeCompleteNaturalLanguage();
3139 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3140 BufferPtr = CurPtr-1;
3144 if (Tmp.
is(tok::code_completion)) {
3146 PP->CodeCompleteNaturalLanguage();
3149 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3161bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3169 FormTokenWithChars(
Result, CurPtr, tok::eod);
3180 Result.startToken();
3181 BufferPtr = BufferEnd;
3182 FormTokenWithChars(Result, BufferEnd, tok::eof);
3186 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3192 MIOpt.ExitTopLevelConditional();
3200 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3202 diag::err_pp_unterminated_conditional);
3209 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3210 Diag(BufferEnd, diag::warn_no_newline_eof)
3222std::optional<Token> Lexer::peekNextPPToken() {
3223 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3225 if (isDependencyDirectivesLexer()) {
3226 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3227 return std::nullopt;
3229 (void)convertDependencyDirectiveToken(
3230 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3240 const char *TmpBufferPtr = BufferPtr;
3242 bool atStartOfLine = IsAtStartOfLine;
3243 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3244 bool leadingSpace = HasLeadingSpace;
3250 BufferPtr = TmpBufferPtr;
3252 HasLeadingSpace = leadingSpace;
3253 IsAtStartOfLine = atStartOfLine;
3254 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3258 if (
Tok.
is(tok::eof))
3259 return std::nullopt;
3266 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3268 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3269 size_t Pos = RestOfBuffer.find(Terminator);
3270 while (Pos != StringRef::npos) {
3273 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3274 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3275 Pos = RestOfBuffer.find(Terminator);
3278 return RestOfBuffer.data()+Pos;
3287bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3289 if (CurPtr != BufferStart &&
3290 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3294 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3295 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3310 Diag(CurPtr, diag::err_conflict_marker);
3311 CurrentConflictMarkerState =
Kind;
3315 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3316 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3331bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3333 if (CurPtr != BufferStart &&
3334 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3343 for (
unsigned i = 1; i != 4; ++i)
3344 if (CurPtr[i] != CurPtr[0])
3351 CurrentConflictMarkerState)) {
3355 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3361 CurrentConflictMarkerState =
CMK_None;
3369 const char *BufferEnd) {
3370 if (CurPtr == BufferEnd)
3373 for (; CurPtr != BufferEnd; ++CurPtr) {
3374 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3380bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3381 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3387 const char *Start = CurPtr - 1;
3388 if (!LangOpts.AllowEditorPlaceholders)
3389 Diag(Start, diag::err_placeholder_in_source);
3391 FormTokenWithChars(
Result, End, tok::raw_identifier);
3392 Result.setRawIdentifierData(Start);
3399bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3400 if (
PP &&
PP->isCodeCompletionEnabled()) {
3401 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3402 return Loc ==
PP->getCodeCompletionLoc();
3413 if (Opts.CPlusPlus23)
3414 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3415 else if (Opts.C2y && !Named)
3416 DiagId = diag::warn_c2y_delimited_escape_sequence;
3418 DiagId = diag::ext_delimited_escape_sequence;
3424 if (!Opts.CPlusPlus)
3425 Ext = Named ? 2 : 1 ;
3429 Diags.
Report(Loc, DiagId) << Named << Ext;
3432std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3433 const char *SlashLoc,
3436 char Kind = getCharAndSize(StartPtr, CharSize);
3437 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3439 unsigned NumHexDigits;
3442 else if (Kind ==
'U')
3445 bool Delimited =
false;
3446 bool FoundEndDelimiter =
false;
3450 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3452 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3453 return std::nullopt;
3456 const char *CurPtr = StartPtr + CharSize;
3457 const char *KindLoc = &CurPtr[-1];
3459 uint32_t CodePoint = 0;
3460 while (Count != NumHexDigits || Delimited) {
3461 char C = getCharAndSize(CurPtr, CharSize);
3462 if (!Delimited && Count == 0 &&
C ==
'{') {
3468 if (Delimited &&
C ==
'}') {
3470 FoundEndDelimiter =
true;
3474 unsigned Value = llvm::hexDigitValue(
C);
3475 if (
Value == std::numeric_limits<unsigned>::max()) {
3479 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3480 << StringRef(KindLoc, 1);
3481 return std::nullopt;
3484 if (CodePoint & 0xF000'0000) {
3486 Diag(KindLoc, diag::err_escape_too_large) << 0;
3487 return std::nullopt;
3498 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3499 : diag::warn_ucn_escape_no_digits)
3500 << StringRef(KindLoc, 1);
3501 return std::nullopt;
3504 if (Delimited && Kind ==
'U') {
3506 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3507 return std::nullopt;
3510 if (!Delimited && Count != NumHexDigits) {
3512 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3514 if (Count == 4 && NumHexDigits == 8) {
3515 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3516 Diag(KindLoc, diag::note_ucn_four_not_eight)
3520 return std::nullopt;
3523 if (Delimited &&
PP)
3526 PP->getDiagnostics());
3533 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3536 while (StartPtr != CurPtr)
3537 (void)getAndAdvanceChar(StartPtr, *
Result);
3544std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3545 const char *SlashLoc,
3550 char C = getCharAndSize(StartPtr, CharSize);
3551 assert(
C ==
'N' &&
"expected \\N{...}");
3553 const char *CurPtr = StartPtr + CharSize;
3554 const char *KindLoc = &CurPtr[-1];
3556 C = getCharAndSize(CurPtr, CharSize);
3559 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3560 return std::nullopt;
3563 const char *StartName = CurPtr;
3564 bool FoundEndDelimiter =
false;
3565 llvm::SmallVector<char, 30> Buffer;
3567 C = getCharAndSize(CurPtr, CharSize);
3570 FoundEndDelimiter =
true;
3576 Buffer.push_back(
C);
3579 if (!FoundEndDelimiter || Buffer.empty()) {
3581 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3582 : diag::warn_delimited_ucn_incomplete)
3583 << StringRef(KindLoc, 1);
3584 return std::nullopt;
3587 StringRef Name(Buffer.data(), Buffer.size());
3588 std::optional<char32_t>
Match =
3589 llvm::sys::unicode::nameToCodepointStrict(Name);
3590 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3592 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3594 Diag(StartName, diag::err_invalid_ucn_name)
3595 << StringRef(Buffer.data(), Buffer.size())
3598 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3609 if (Diagnose &&
Match)
3612 PP->getDiagnostics());
3618 if (LooseMatch && Diagnose)
3619 Match = LooseMatch->CodePoint;
3626 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3629 while (StartPtr != CurPtr)
3630 (void)getAndAdvanceChar(StartPtr, *
Result);
3634 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3637uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3641 std::optional<uint32_t> CodePointOpt;
3642 char Kind = getCharAndSize(StartPtr, CharSize);
3643 if (Kind ==
'u' || Kind ==
'U')
3644 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3645 else if (Kind ==
'N')
3646 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3651 uint32_t CodePoint = *CodePointOpt;
3654 if (LangOpts.AsmPreprocessor)
3673 if (CodePoint < 0xA0) {
3677 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3678 Diag(BufferPtr, diag::err_ucn_control_character);
3680 char C =
static_cast<char>(CodePoint);
3681 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3686 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3691 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3692 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3694 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3702bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3703 const char *CurPtr) {
3706 Diag(BufferPtr, diag::ext_unicode_whitespace)
3715void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3716 IsAtStartOfLine =
Result.isAtStartOfLine();
3717 HasLeadingSpace =
Result.hasLeadingSpace();
3718 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3723 assert(!isDependencyDirectivesLexer());
3729 if (IsAtStartOfLine) {
3731 IsAtStartOfLine =
false;
3734 if (HasLeadingSpace) {
3736 HasLeadingSpace =
false;
3739 if (HasLeadingEmptyMacro) {
3741 HasLeadingEmptyMacro =
false;
3744 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3745 IsAtPhysicalStartOfLine =
false;
3748 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3750 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3751 return returnedToken;
3759bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3761 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3762 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3765 const char *CurPtr = BufferPtr;
3777 FormTokenWithChars(Result, CurPtr, tok::unknown);
3786 unsigned SizeTmp, SizeTmp2;
3789 char Char = getAndAdvanceChar(CurPtr,
Result);
3793 NewLinePtr =
nullptr;
3798 if (CurPtr-1 == BufferEnd)
3799 return LexEndOfFile(
Result, CurPtr-1);
3802 if (isCodeCompletionPoint(CurPtr-1)) {
3805 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3810 Diag(CurPtr-1, diag::null_in_file);
3812 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3821 if (LangOpts.MicrosoftExt) {
3823 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3824 return LexEndOfFile(
Result, CurPtr-1);
3828 Kind = tok::unknown;
3832 if (CurPtr[0] ==
'\n')
3833 (void)getAndAdvanceChar(CurPtr,
Result);
3847 IsAtStartOfLine =
true;
3848 IsAtPhysicalStartOfLine =
true;
3849 NewLinePtr = CurPtr - 1;
3858 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3868 SkipHorizontalWhitespace:
3870 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3879 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3880 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3882 goto SkipIgnoredUnits;
3884 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3886 goto SkipIgnoredUnits;
3888 goto SkipHorizontalWhitespace;
3896 case '0':
case '1':
case '2':
case '3':
case '4':
3897 case '5':
case '6':
case '7':
case '8':
case '9':
3900 return LexNumericConstant(
Result, CurPtr);
3909 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3910 Char = getCharAndSize(CurPtr, SizeTmp);
3914 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3915 tok::utf16_string_literal);
3919 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3920 tok::utf16_char_constant);
3923 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3924 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3925 return LexRawStringLiteral(
Result,
3926 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3928 tok::utf16_string_literal);
3931 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3935 return LexStringLiteral(
Result,
3936 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3938 tok::utf8_string_literal);
3939 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3940 return LexCharConstant(
3941 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3943 tok::utf8_char_constant);
3945 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3947 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3950 return LexRawStringLiteral(
Result,
3951 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3954 tok::utf8_string_literal);
3961 return LexIdentifierContinue(
Result, CurPtr);
3967 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3968 Char = getCharAndSize(CurPtr, SizeTmp);
3972 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3973 tok::utf32_string_literal);
3977 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3978 tok::utf32_char_constant);
3981 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3982 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3983 return LexRawStringLiteral(
Result,
3984 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3986 tok::utf32_string_literal);
3990 return LexIdentifierContinue(
Result, CurPtr);
3996 if (LangOpts.RawStringLiterals) {
3997 Char = getCharAndSize(CurPtr, SizeTmp);
4000 return LexRawStringLiteral(
Result,
4001 ConsumeChar(CurPtr, SizeTmp,
Result),
4002 tok::string_literal);
4006 return LexIdentifierContinue(
Result, CurPtr);
4011 Char = getCharAndSize(CurPtr, SizeTmp);
4015 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4016 tok::wide_string_literal);
4019 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4020 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4021 return LexRawStringLiteral(
Result,
4022 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4024 tok::wide_string_literal);
4028 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4029 tok::wide_char_constant);
4034 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4035 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4036 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4037 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4038 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4039 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4040 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4041 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4045 return LexIdentifierContinue(
Result, CurPtr);
4048 if (LangOpts.DollarIdents) {
4050 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4053 return LexIdentifierContinue(
Result, CurPtr);
4056 Kind = tok::unknown;
4063 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4069 return LexStringLiteral(
Result, CurPtr,
4071 : tok::string_literal);
4075 Kind = tok::question;
4078 Kind = tok::l_square;
4081 Kind = tok::r_square;
4084 Kind = tok::l_paren;
4087 Kind = tok::r_paren;
4090 Kind = tok::l_brace;
4093 Kind = tok::r_brace;
4096 Char = getCharAndSize(CurPtr, SizeTmp);
4097 if (Char >=
'0' && Char <=
'9') {
4101 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4102 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4103 Kind = tok::periodstar;
4105 }
else if (Char ==
'.' &&
4106 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4107 Kind = tok::ellipsis;
4108 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4115 Char = getCharAndSize(CurPtr, SizeTmp);
4118 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4119 }
else if (Char ==
'=') {
4120 Kind = tok::ampequal;
4121 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4127 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4128 Kind = tok::starequal;
4129 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4135 Char = getCharAndSize(CurPtr, SizeTmp);
4137 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4138 Kind = tok::plusplus;
4139 }
else if (Char ==
'=') {
4140 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4141 Kind = tok::plusequal;
4147 Char = getCharAndSize(CurPtr, SizeTmp);
4149 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4150 Kind = tok::minusminus;
4151 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4152 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4153 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4155 Kind = tok::arrowstar;
4156 }
else if (Char ==
'>') {
4157 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4159 }
else if (Char ==
'=') {
4160 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4161 Kind = tok::minusequal;
4170 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4171 Kind = tok::exclaimequal;
4172 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4174 Kind = tok::exclaim;
4179 Char = getCharAndSize(CurPtr, SizeTmp);
4189 bool TreatAsComment =
4190 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4191 if (!TreatAsComment)
4192 if (!(
PP &&
PP->isPreprocessedOutput()))
4193 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4195 if (TreatAsComment) {
4196 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4197 TokAtPhysicalStartOfLine))
4203 goto SkipIgnoredUnits;
4208 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4209 TokAtPhysicalStartOfLine))
4218 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4219 Kind = tok::slashequal;
4225 Char = getCharAndSize(CurPtr, SizeTmp);
4227 Kind = tok::percentequal;
4228 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4229 }
else if (LangOpts.Digraphs && Char ==
'>') {
4230 Kind = tok::r_brace;
4231 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4232 }
else if (LangOpts.Digraphs && Char ==
':') {
4233 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4234 Char = getCharAndSize(CurPtr, SizeTmp);
4235 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4236 Kind = tok::hashhash;
4237 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4239 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4240 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4242 Diag(BufferPtr, diag::ext_charize_microsoft);
4249 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4250 goto HandleDirective;
4255 Kind = tok::percent;
4259 Char = getCharAndSize(CurPtr, SizeTmp);
4261 return LexAngledStringLiteral(
Result, CurPtr);
4262 }
else if (Char ==
'<') {
4263 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4265 Kind = tok::lesslessequal;
4266 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4268 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4272 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4276 }
else if (LangOpts.CUDA && After ==
'<') {
4277 Kind = tok::lesslessless;
4278 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4281 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4282 Kind = tok::lessless;
4284 }
else if (Char ==
'=') {
4285 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4287 if (LangOpts.CPlusPlus20) {
4289 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4290 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4292 Kind = tok::spaceship;
4298 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4303 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4304 Kind = tok::lessequal;
4305 }
else if (LangOpts.Digraphs && Char ==
':') {
4306 if (LangOpts.CPlusPlus11 &&
4307 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4314 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4315 if (After !=
':' && After !=
'>') {
4318 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4323 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4324 Kind = tok::l_square;
4325 }
else if (LangOpts.Digraphs && Char ==
'%') {
4326 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4327 Kind = tok::l_brace;
4328 }
else if (Char ==
'#' && SizeTmp == 1 &&
4329 lexEditorPlaceholder(
Result, CurPtr)) {
4336 Char = getCharAndSize(CurPtr, SizeTmp);
4338 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4339 Kind = tok::greaterequal;
4340 }
else if (Char ==
'>') {
4341 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4343 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4345 Kind = tok::greatergreaterequal;
4346 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4350 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4353 }
else if (LangOpts.CUDA && After ==
'>') {
4354 Kind = tok::greatergreatergreater;
4355 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4358 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4359 Kind = tok::greatergreater;
4362 Kind = tok::greater;
4366 Char = getCharAndSize(CurPtr, SizeTmp);
4368 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4369 Kind = tok::caretequal;
4371 if (LangOpts.OpenCL && Char ==
'^')
4372 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4377 Char = getCharAndSize(CurPtr, SizeTmp);
4379 Kind = tok::pipeequal;
4380 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4381 }
else if (Char ==
'|') {
4383 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4385 Kind = tok::pipepipe;
4386 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4392 Char = getCharAndSize(CurPtr, SizeTmp);
4393 if (LangOpts.Digraphs && Char ==
'>') {
4394 Kind = tok::r_square;
4395 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4396 }
else if (Char ==
':') {
4397 Kind = tok::coloncolon;
4398 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4407 Char = getCharAndSize(CurPtr, SizeTmp);
4410 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4413 Kind = tok::equalequal;
4414 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4423 Char = getCharAndSize(CurPtr, SizeTmp);
4425 Kind = tok::hashhash;
4426 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4427 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4430 Diag(BufferPtr, diag::ext_charize_microsoft);
4431 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4437 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4438 goto HandleDirective;
4446 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4449 Kind = tok::unknown;
4454 if (!LangOpts.AsmPreprocessor) {
4455 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4456 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4457 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4465 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4469 Kind = tok::unknown;
4474 Kind = tok::unknown;
4478 llvm::UTF32 CodePoint;
4483 llvm::ConversionResult Status =
4484 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4485 (
const llvm::UTF8 *)BufferEnd,
4487 llvm::strictConversion);
4488 if (Status == llvm::conversionOK) {
4489 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4490 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4497 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4501 PP->isPreprocessedOutput()) {
4503 Kind = tok::unknown;
4510 Diag(CurPtr, diag::err_invalid_utf8);
4512 BufferPtr = CurPtr+1;
4524 FormTokenWithChars(
Result, CurPtr, Kind);
4530 FormTokenWithChars(
Result, CurPtr, tok::hash);
4533 if (
PP->hadModuleLoaderFatalFailure())
4545const char *Lexer::convertDependencyDirectiveToken(
4547 const char *TokPtr = BufferStart + DDTok.
Offset;
4553 BufferPtr = TokPtr + DDTok.
Length;
4557bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4558 assert(isDependencyDirectivesLexer());
4560 using namespace dependency_directives_scan;
4562 if (BufferPtr == BufferEnd)
4563 return LexEndOfFile(
Result, BufferPtr);
4565 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4566 if (DepDirectives.front().Kind == pp_eof)
4567 return LexEndOfFile(
Result, BufferEnd);
4568 if (DepDirectives.front().Kind == tokens_present_before_eof)
4570 NextDepDirectiveTokenIndex = 0;
4571 DepDirectives = DepDirectives.drop_front();
4574 const dependency_directives_scan::Token &DDTok =
4575 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4576 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4582 BufferPtr = BufferStart + DDTok.
Offset;
4583 LexAngledStringLiteral(
Result, BufferPtr + 1);
4584 if (
Result.isNot(tok::header_name))
4588 const dependency_directives_scan::Token &NextTok =
4589 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4590 if (BufferStart + NextTok.
Offset >= BufferPtr)
4592 ++NextDepDirectiveTokenIndex;
4597 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4599 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4601 if (
PP->hadModuleLoaderFatalFailure())
4606 if (
Result.is(tok::raw_identifier)) {
4607 Result.setRawIdentifierData(TokPtr);
4609 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4611 return PP->HandleIdentifier(
Result);
4615 if (
Result.isLiteral()) {
4616 Result.setLiteralData(TokPtr);
4619 if (
Result.is(tok::colon)) {
4621 if (*BufferPtr ==
':') {
4622 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4624 ++NextDepDirectiveTokenIndex;
4625 Result.setKind(tok::coloncolon);
4635bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4636 assert(isDependencyDirectivesLexer());
4638 using namespace dependency_directives_scan;
4641 unsigned NestedIfs = 0;
4643 DepDirectives = DepDirectives.drop_front();
4644 switch (DepDirectives.front().Kind) {
4646 llvm_unreachable(
"unexpected 'pp_none'");
4687 NextDepDirectiveTokenIndex = 0;
4688 return LexEndOfFile(
Result, BufferEnd);
4692 const dependency_directives_scan::Token &DDTok =
4693 DepDirectives.front().Tokens.front();
4694 assert(DDTok.
is(tok::hash));
4695 NextDepDirectiveTokenIndex = 1;
4697 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static const char * fastParseASCIIIdentifierScalar(const char *CurPtr)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
const FunctionProtoType * T
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const