29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
63 return II->getObjCKeywordID() == objcKey;
70 return tok::objc_not_keyword;
78 case tok::annot_typename:
79 case tok::annot_decltype:
80 case tok::annot_pack_indexing_type:
86 case tok::kw___int128:
88 case tok::kw_unsigned:
96 case tok::kw__Float16:
97 case tok::kw___float128:
98 case tok::kw___ibm128:
105#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
106#include "clang/Basic/TransformTypeTraits.def"
107 case tok::kw___auto_type:
108 case tok::kw_char16_t:
109 case tok::kw_char32_t:
111 case tok::kw_decltype:
112 case tok::kw_char8_t:
124void Lexer::anchor() {}
126void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
127 const char *BufEnd) {
128 BufferStart = BufStart;
132 assert(BufEnd[0] == 0 &&
133 "We assume that the input buffer has a null character at the end"
134 " to simplify lexing!");
139 if (BufferStart == BufferPtr) {
141 StringRef Buf(BufferStart, BufferEnd - BufferStart);
142 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
143 .StartsWith(
"\xEF\xBB\xBF", 3)
147 BufferPtr += BOMLength;
150 Is_PragmaLexer =
false;
151 CurrentConflictMarkerState =
CMK_None;
154 IsAtStartOfLine =
true;
155 IsAtPhysicalStartOfLine =
true;
157 HasLeadingSpace =
false;
158 HasLeadingEmptyMacro =
false;
173 ExtendedTokenMode = 0;
175 NewLinePtr =
nullptr;
185 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
186 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
187 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
188 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
189 InputFile.getBufferEnd());
198 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
199 bool IsFirstIncludeOfFile)
200 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
201 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
202 InitLexer(BufStart, BufPtr, BufEnd);
213 bool IsFirstIncludeOfFile)
214 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
215 FromFile.getBufferStart(), FromFile.getBufferEnd(),
216 IsFirstIncludeOfFile) {}
219 assert(
PP &&
"Cannot reset token mode without a preprocessor");
220 if (LangOpts.TraditionalCPP)
248 FileID SpellingFID =
SM.getFileID(SpellingLoc);
249 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
255 const char *StrData =
SM.getCharacterData(SpellingLoc);
257 L->BufferPtr = StrData;
258 L->BufferEnd = StrData+TokLen;
259 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
263 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
265 ExpansionLocEnd, TokLen);
272 L->Is_PragmaLexer =
true;
277 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
278 this->IsAtStartOfLine = IsAtStartOfLine;
279 assert((BufferStart + Offset) <= BufferEnd);
280 BufferPtr = BufferStart + Offset;
284 typename T::size_type i = 0, e = Str.size();
286 if (Str[i] ==
'\\' || Str[i] == Quote) {
287 Str.insert(Str.begin() + i,
'\\');
290 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
292 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
293 Str[i] != Str[i + 1]) {
299 Str.insert(Str.begin() + i + 1,
'n');
309 std::string
Result = std::string(Str);
310 char Quote = Charify ?
'\'' :
'"';
325 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
328 const char *BufEnd = BufPtr +
Tok.getLength();
332 while (BufPtr < BufEnd) {
334 Spelling[Length++] = CharAndSize.Char;
335 BufPtr += CharAndSize.Size;
337 if (Spelling[Length - 1] ==
'"')
345 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
348 const char *RawEnd = BufEnd;
349 do --RawEnd;
while (*RawEnd !=
'"');
350 size_t RawLength = RawEnd - BufPtr + 1;
353 memcpy(Spelling + Length, BufPtr, RawLength);
361 while (BufPtr < BufEnd) {
363 Spelling[Length++] = CharAndSize.Char;
364 BufPtr += CharAndSize.Size;
367 assert(Length <
Tok.getLength() &&
368 "NeedsCleaning flag set on token that didn't need cleaning!");
386 bool invalidTemp =
false;
387 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
389 if (invalid) *invalid =
true;
393 const char *tokenBegin = file.data() + locInfo.second;
397 file.begin(), tokenBegin, file.end());
405 return StringRef(tokenBegin,
length);
410 return StringRef(buffer.data(), buffer.size());
420 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
422 bool CharDataInvalid =
false;
423 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
431 if (!
Tok.needsCleaning())
432 return std::string(TokStart, TokStart +
Tok.getLength());
453 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
455 const char *TokStart =
nullptr;
457 if (
Tok.is(tok::raw_identifier))
458 TokStart =
Tok.getRawIdentifier().data();
459 else if (!
Tok.hasUCN()) {
462 Buffer = II->getNameStart();
463 return II->getLength();
469 TokStart =
Tok.getLiteralData();
473 bool CharDataInvalid =
false;
474 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
477 if (CharDataInvalid) {
484 if (!
Tok.needsCleaning()) {
486 return Tok.getLength();
511 bool IgnoreWhiteSpace) {
520 Loc =
SM.getExpansionLoc(Loc);
523 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
527 const char *StrData = Buffer.data()+LocInfo.second;
529 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
533 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
534 Buffer.begin(), StrData, Buffer.end());
543 const char *BufStart = Buffer.data();
544 if (Offset >= Buffer.size())
547 const char *LexStart = BufStart + Offset;
548 for (; LexStart != BufStart; --LexStart) {
564 if (LocInfo.first.isInvalid())
568 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
574 const char *StrData = Buffer.data() + LocInfo.second;
576 if (!LexStart || LexStart == StrData)
581 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
601 }
while (TheTok.
getKind() != tok::eof);
613 if (!
SM.isMacroArgExpansion(Loc))
620 assert(FileLocInfo.first == BeginFileLocInfo.first &&
621 FileLocInfo.second >= BeginFileLocInfo.second);
627enum PreambleDirectiveKind {
642 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
646 bool InPreprocessorDirective =
false;
650 unsigned MaxLineOffset = 0;
652 const char *CurPtr = Buffer.begin();
653 unsigned CurLine = 0;
654 while (CurPtr != Buffer.end()) {
658 if (CurLine == MaxLines)
662 if (CurPtr != Buffer.end())
663 MaxLineOffset = CurPtr - Buffer.begin();
669 if (InPreprocessorDirective) {
671 if (TheTok.
getKind() == tok::eof) {
682 InPreprocessorDirective =
false;
691 if (MaxLineOffset && TokOffset >= MaxLineOffset)
696 if (TheTok.
getKind() == tok::comment) {
704 Token HashTok = TheTok;
705 InPreprocessorDirective =
true;
714 PreambleDirectiveKind PDK
715 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
716 .Case(
"include", PDK_Skipped)
717 .Case(
"__include_macros", PDK_Skipped)
718 .Case(
"define", PDK_Skipped)
719 .Case(
"undef", PDK_Skipped)
720 .Case(
"line", PDK_Skipped)
721 .Case(
"error", PDK_Skipped)
722 .Case(
"pragma", PDK_Skipped)
723 .Case(
"import", PDK_Skipped)
724 .Case(
"include_next", PDK_Skipped)
725 .Case(
"warning", PDK_Skipped)
726 .Case(
"ident", PDK_Skipped)
727 .Case(
"sccs", PDK_Skipped)
728 .Case(
"assert", PDK_Skipped)
729 .Case(
"unassert", PDK_Skipped)
730 .Case(
"if", PDK_Skipped)
731 .Case(
"ifdef", PDK_Skipped)
732 .Case(
"ifndef", PDK_Skipped)
733 .Case(
"elif", PDK_Skipped)
734 .Case(
"elifdef", PDK_Skipped)
735 .Case(
"elifndef", PDK_Skipped)
736 .Case(
"else", PDK_Skipped)
737 .Case(
"endif", PDK_Skipped)
738 .Default(PDK_Unknown);
755 TheTok.
getKind() == tok::raw_identifier &&
757 LangOpts.CPlusPlusModules) {
760 Token ModuleTok = TheTok;
763 }
while (TheTok.
getKind() == tok::comment);
764 if (TheTok.
getKind() != tok::semi) {
779 if (ActiveCommentLoc.
isValid())
780 End = ActiveCommentLoc;
795 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
798 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
801 unsigned PhysOffset = 0;
806 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
816 for (; CharNo; --CharNo) {
818 TokPtr += CharAndSize.Size;
819 PhysOffset += CharAndSize.Size;
826 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
827 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
876 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
882 *MacroBegin = expansionLoc;
904 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
910 *MacroEnd = expansionLoc;
923 if (Range.isTokenRange()) {
930 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
935 if (!
SM.isInFileID(End, FID, &EndOffs) ||
945 return SM.getSLocEntry(
SM.getFileID(Loc))
947 .isExpansionTokenRange();
964 Range.setBegin(Begin);
969 if (Range.isTokenRange()) {
987 Range.setBegin(MacroBegin);
988 Range.setEnd(MacroEnd);
990 if (Range.isTokenRange())
1010 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1011 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1024 if (Range.isInvalid()) {
1031 if (beginInfo.first.isInvalid()) {
1037 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1038 beginInfo.second > EndOffs) {
1044 bool invalidTemp =
false;
1045 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1052 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1058 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1074 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1081 FileID MacroFID =
SM.getFileID(Loc);
1082 if (
SM.isInFileID(SpellLoc, MacroFID))
1092 Loc =
SM.getSpellingLoc(Loc);
1098 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1099 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1104 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1106 while (
SM.isMacroArgExpansion(Loc))
1107 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1113 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1119 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1125 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1126 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1135 if (Str - 1 < BufferStart)
1138 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1139 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1140 if (Str - 2 < BufferStart)
1150 return *Str ==
'\\';
1158 if (LocInfo.first.isInvalid())
1161 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1167 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1168 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1169 return NumWhitespaceChars == StringRef::npos
1171 : Rest.take_front(NumWhitespaceChars);
1186 unsigned CharNo,
unsigned TokLen) {
1187 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1203 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1209 unsigned TokLen)
const {
1210 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1211 "Location out of range for this buffer!");
1215 unsigned CharNo = Loc-BufferStart;
1216 if (FileLoc.isFileID())
1217 return FileLoc.getLocWithOffset(CharNo);
1221 assert(
PP &&
"This doesn't work on raw lexers");
1240 case '=':
return '#';
1241 case ')':
return ']';
1242 case '(':
return '[';
1243 case '!':
return '|';
1244 case '\'':
return '^';
1245 case '>':
return '}';
1246 case '/':
return '\\';
1247 case '<':
return '{';
1248 case '-':
return '~';
1263 L->
Diag(CP-2, diag::trigraph_ignored);
1268 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1280 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1284 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1285 Ptr[Size-1] != Ptr[Size])
1298const char *Lexer::SkipEscapedNewLines(
const char *P) {
1300 const char *AfterEscape;
1303 }
else if (*P ==
'?') {
1305 if (P[1] !=
'?' || P[2] !=
'/')
1315 if (NewLineSize == 0)
return P;
1316 P = AfterEscape+NewLineSize;
1323 bool IncludeComments) {
1326 return std::nullopt;
1334 bool InvalidTemp =
false;
1335 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1337 return std::nullopt;
1339 const char *TokenBegin =
File.data() + LocInfo.second;
1342 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1343 TokenBegin,
File.end());
1354 bool IncludeComments) {
1355 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1356 while (Loc != StartOfFile) {
1359 return std::nullopt;
1365 if (!
Tok.is(tok::comment) || IncludeComments) {
1369 return std::nullopt;
1378 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1380 if (!
Tok ||
Tok->isNot(TKind))
1385 unsigned NumWhitespaceChars = 0;
1386 if (SkipTrailingWhitespaceAndNewLine) {
1387 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1388 unsigned char C = *TokenEnd;
1391 NumWhitespaceChars++;
1395 if (
C ==
'\n' ||
C ==
'\r') {
1398 NumWhitespaceChars++;
1399 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1400 NumWhitespaceChars++;
1425 if (Ptr[0] ==
'\\') {
1431 return {
'\\', Size};
1441 Diag(Ptr, diag::backslash_newline_space);
1444 Size += EscapedNewLineSize;
1445 Ptr += EscapedNewLineSize;
1448 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1449 CharAndSize.Size += Size;
1454 return {
'\\',
Size};
1458 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1462 LangOpts.Trigraphs)) {
1468 if (
C ==
'\\')
goto Slash;
1474 return {*Ptr,
Size + 1u};
1488 if (Ptr[0] ==
'\\') {
1494 return {
'\\',
Size};
1499 Size += EscapedNewLineSize;
1500 Ptr += EscapedNewLineSize;
1503 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1504 CharAndSize.Size +=
Size;
1509 return {
'\\',
Size};
1513 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1519 if (
C ==
'\\')
goto Slash;
1525 return {*Ptr,
Size + 1u};
1533void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1534 BufferPtr = BufferStart + Offset;
1535 if (BufferPtr > BufferEnd)
1536 BufferPtr = BufferEnd;
1540 IsAtStartOfLine = StartOfLine;
1541 IsAtPhysicalStartOfLine = StartOfLine;
1545 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1547 return UnicodeWhitespaceChars.contains(Codepoint);
1552 llvm::raw_svector_ostream CharOS(CharBuf);
1553 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1564 bool IsStart,
bool &IsExtension) {
1565 static const llvm::sys::UnicodeCharSet MathStartChars(
1567 static const llvm::sys::UnicodeCharSet MathContinueChars(
1569 if (MathStartChars.contains(
C) ||
1570 (!IsStart && MathContinueChars.contains(
C))) {
1578 bool &IsExtension) {
1579 if (LangOpts.AsmPreprocessor) {
1581 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1583 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1588 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1590 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1594 }
else if (LangOpts.C11) {
1595 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1597 return C11AllowedIDChars.contains(
C);
1599 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1601 return C99AllowedIDChars.contains(
C);
1606 bool &IsExtension) {
1607 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1608 IsExtension =
false;
1609 if (LangOpts.AsmPreprocessor) {
1612 if (LangOpts.CPlusPlus || LangOpts.C23) {
1613 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1614 if (XIDStartChars.contains(
C))
1622 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1624 return !C11DisallowedInitialIDChars.contains(
C);
1626 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1628 return !C99DisallowedInitialIDChars.contains(
C);
1634 static const llvm::sys::UnicodeCharSet MathStartChars(
1636 static const llvm::sys::UnicodeCharSet MathContinueChars(
1639 (void)MathStartChars;
1640 (void)MathContinueChars;
1641 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1642 "Unexpected mathematical notation codepoint");
1643 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1656 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1658 CannotAppearInIdentifier = 0,
1659 CannotStartIdentifier
1662 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1664 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1666 if (!C99AllowedIDChars.contains(
C)) {
1667 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1669 << CannotAppearInIdentifier;
1670 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1671 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1673 << CannotStartIdentifier;
1685 struct HomoglyphPair {
1688 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1690 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1743 std::lower_bound(std::begin(SortedHomoglyphs),
1744 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1745 if (Homoglyph->Character ==
C) {
1746 if (Homoglyph->LooksLike) {
1747 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1748 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1751 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1771 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1773 if (!
IsFirst || InvalidOnlyAtStart) {
1774 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1778 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1784bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1786 const char *UCNPtr = CurPtr +
Size;
1787 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1788 if (CodePoint == 0) {
1791 bool IsExtension =
false;
1796 !
PP->isPreprocessedOutput())
1798 PP->getDiagnostics(), LangOpts, CodePoint,
1816 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1817 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1820 while (CurPtr != UCNPtr)
1821 (void)getAndAdvanceChar(CurPtr,
Result);
1825bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1826 llvm::UTF32 CodePoint;
1831 unsigned FirstCodeUnitSize;
1832 getCharAndSize(CurPtr, FirstCodeUnitSize);
1833 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1834 const char *UnicodePtr = CharStart;
1836 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1837 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1838 &CodePoint, llvm::strictConversion);
1839 if (ConvResult != llvm::conversionOK)
1842 bool IsExtension =
false;
1849 !
PP->isPreprocessedOutput())
1851 PP->getDiagnostics(), LangOpts, CodePoint,
1859 PP->getDiagnostics(), CodePoint,
1871 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1872 CurPtr = UnicodePtr;
1876bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1877 const char *CurPtr) {
1878 bool IsExtension =
false;
1881 !
PP->isPreprocessedOutput()) {
1893 return LexIdentifierContinue(
Result, CurPtr);
1897 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1909 PP->getDiagnostics(), LangOpts,
C,
1918 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1924 [[maybe_unused]]
const char *BufferEnd) {
1926 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1927 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1929 constexpr ssize_t BytesPerRegister = 16;
1931 __m128i AsciiIdentifierRangeV =
1934 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1941 if (Consumed == BytesPerRegister)
1947 unsigned char C = *CurPtr;
1953bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1962 unsigned char C = getCharAndSize(CurPtr, Size);
1964 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1969 if (!LangOpts.DollarIdents)
1973 Diag(CurPtr, diag::ext_dollar_in_identifier);
1974 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1977 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1985 const char *IdStart = BufferPtr;
1986 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1987 Result.setRawIdentifierData(IdStart);
1996 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2003 if (isCodeCompletionPoint(CurPtr)) {
2005 Result.setKind(tok::code_completion);
2011 assert(*CurPtr == 0 &&
"Completion character must be 0");
2016 if (CurPtr < BufferEnd) {
2027 return PP->HandleIdentifier(
Result);
2034bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2036 char C1 = CharAndSize1.Char;
2042 char C2 = CharAndSize2.Char;
2043 return (C2 ==
'x' || C2 ==
'X');
2049bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2051 char C = getCharAndSize(CurPtr, Size);
2054 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2056 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2060 C = getCharAndSize(CurPtr, Size);
2064 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2067 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2068 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2072 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2076 bool IsHexFloat =
true;
2077 if (!LangOpts.C99) {
2078 if (!isHexaLiteral(BufferPtr, LangOpts))
2080 else if (!LangOpts.CPlusPlus17 &&
2081 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2085 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2089 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2093 Diag(CurPtr, LangOpts.CPlusPlus
2094 ? diag::warn_cxx11_compat_digit_separator
2095 : diag::warn_c23_compat_digit_separator);
2096 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2097 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2098 return LexNumericConstant(
Result, CurPtr);
2103 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2104 return LexNumericConstant(
Result, CurPtr);
2106 return LexNumericConstant(
Result, CurPtr);
2109 const char *TokStart = BufferPtr;
2110 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2111 Result.setLiteralData(TokStart);
2117const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2118 bool IsStringLiteral) {
2119 assert(LangOpts.CPlusPlus);
2123 char C = getCharAndSize(CurPtr, Size);
2124 bool Consumed =
false;
2127 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2129 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2135 if (!LangOpts.CPlusPlus11) {
2138 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2139 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2150 bool IsUDSuffix =
false;
2153 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2157 const unsigned MaxStandardSuffixLength = 3;
2158 char Buffer[MaxStandardSuffixLength] = {
C };
2159 unsigned Consumed =
Size;
2162 auto [
Next, NextSize] =
2166 const StringRef CompleteSuffix(Buffer, Chars);
2172 if (Chars == MaxStandardSuffixLength)
2176 Buffer[Chars++] =
Next;
2177 Consumed += NextSize;
2183 Diag(CurPtr, LangOpts.MSVCCompat
2184 ? diag::ext_ms_reserved_user_defined_literal
2185 : diag::ext_reserved_user_defined_literal)
2190 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2195 C = getCharAndSize(CurPtr, Size);
2197 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2198 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2199 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2209bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2211 const char *AfterQuote = CurPtr;
2213 const char *NulCharacter =
nullptr;
2216 (Kind == tok::utf8_string_literal ||
2217 Kind == tok::utf16_string_literal ||
2218 Kind == tok::utf32_string_literal))
2219 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2220 : diag::warn_c99_compat_unicode_literal);
2222 char C = getAndAdvanceChar(CurPtr,
Result);
2227 C = getAndAdvanceChar(CurPtr,
Result);
2229 if (
C ==
'\n' ||
C ==
'\r' ||
2230 (
C == 0 && CurPtr-1 == BufferEnd)) {
2232 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2233 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2238 if (isCodeCompletionPoint(CurPtr-1)) {
2240 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2242 PP->CodeCompleteNaturalLanguage();
2243 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2248 NulCharacter = CurPtr-1;
2250 C = getAndAdvanceChar(CurPtr,
Result);
2254 if (LangOpts.CPlusPlus)
2255 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2259 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2262 const char *TokStart = BufferPtr;
2263 FormTokenWithChars(
Result, CurPtr, Kind);
2264 Result.setLiteralData(TokStart);
2270bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2278 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2280 unsigned PrefixLen = 0;
2284 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2285 const char *Pos = &CurPtr[PrefixLen];
2286 Diag(Pos, LangOpts.CPlusPlus26
2287 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2288 : diag::ext_cxx26_raw_string_literal_character_set)
2289 << StringRef(Pos, 1);
2295 if (CurPtr[PrefixLen] !=
'(') {
2297 const char *PrefixEnd = &CurPtr[PrefixLen];
2298 if (PrefixLen == 16) {
2299 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2300 }
else if (*PrefixEnd ==
'\n') {
2301 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2303 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2304 << StringRef(PrefixEnd, 1);
2316 if (
C == 0 && CurPtr-1 == BufferEnd) {
2322 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2327 const char *Prefix = CurPtr;
2328 CurPtr += PrefixLen + 1;
2335 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2336 CurPtr += PrefixLen + 1;
2339 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2341 Diag(BufferPtr, diag::err_unterminated_raw_string)
2342 << StringRef(Prefix, PrefixLen);
2343 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2349 if (LangOpts.CPlusPlus)
2350 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2353 const char *TokStart = BufferPtr;
2354 FormTokenWithChars(
Result, CurPtr, Kind);
2355 Result.setLiteralData(TokStart);
2361bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2363 const char *NulCharacter =
nullptr;
2364 const char *AfterLessPos = CurPtr;
2365 char C = getAndAdvanceChar(CurPtr,
Result);
2370 C = getAndAdvanceChar(CurPtr,
Result);
2373 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2376 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2381 if (isCodeCompletionPoint(CurPtr - 1)) {
2382 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2384 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2387 NulCharacter = CurPtr-1;
2389 C = getAndAdvanceChar(CurPtr,
Result);
2394 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2397 const char *TokStart = BufferPtr;
2398 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2399 Result.setLiteralData(TokStart);
2403void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2404 const char *CompletionPoint,
2407 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2408 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2409 auto Slash = PartialPath.find_last_of(SlashChars);
2411 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2412 const char *StartOfFilename =
2413 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2415 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2416 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2419 while (CompletionPoint < BufferEnd) {
2420 char Next = *(CompletionPoint + 1);
2424 if (
Next == (IsAngled ?
'>' :
'"'))
2426 if (SlashChars.contains(
Next))
2430 PP->setCodeCompletionTokenRange(
2431 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2432 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2433 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2438bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2441 const char *NulCharacter =
nullptr;
2444 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2445 Diag(BufferPtr, LangOpts.CPlusPlus
2446 ? diag::warn_cxx98_compat_unicode_literal
2447 : diag::warn_c99_compat_unicode_literal);
2448 else if (Kind == tok::utf8_char_constant)
2449 Diag(BufferPtr, LangOpts.CPlusPlus
2450 ? diag::warn_cxx14_compat_u8_character_literal
2451 : diag::warn_c17_compat_u8_character_literal);
2454 char C = getAndAdvanceChar(CurPtr,
Result);
2457 Diag(BufferPtr, diag::ext_empty_character);
2458 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2465 C = getAndAdvanceChar(CurPtr,
Result);
2467 if (
C ==
'\n' ||
C ==
'\r' ||
2468 (
C == 0 && CurPtr-1 == BufferEnd)) {
2470 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2471 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2476 if (isCodeCompletionPoint(CurPtr-1)) {
2477 PP->CodeCompleteNaturalLanguage();
2478 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2483 NulCharacter = CurPtr-1;
2485 C = getAndAdvanceChar(CurPtr,
Result);
2489 if (LangOpts.CPlusPlus)
2490 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2494 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2497 const char *TokStart = BufferPtr;
2498 FormTokenWithChars(
Result, CurPtr, Kind);
2499 Result.setLiteralData(TokStart);
2507bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2508 bool &TokAtPhysicalStartOfLine) {
2512 unsigned char Char = *CurPtr;
2514 const char *lastNewLine =
nullptr;
2515 auto setLastNewLine = [&](
const char *Ptr) {
2521 setLastNewLine(CurPtr - 1);
2540 if (*CurPtr ==
'\n')
2541 setLastNewLine(CurPtr);
2548 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2550 IsAtStartOfLine =
true;
2551 IsAtPhysicalStartOfLine =
true;
2558 char PrevChar = CurPtr[-1];
2564 TokAtPhysicalStartOfLine =
true;
2566 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2567 if (
auto *Handler =
PP->getEmptylineHandler())
2583bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2584 bool &TokAtPhysicalStartOfLine) {
2589 Diag(BufferPtr, diag::ext_line_comment);
2607 bool UnicodeDecodingAlreadyDiagnosed =
false;
2614 C !=
'\n' &&
C !=
'\r') {
2616 UnicodeDecodingAlreadyDiagnosed =
false;
2620 unsigned Length = llvm::getUTF8SequenceSize(
2621 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2624 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2625 UnicodeDecodingAlreadyDiagnosed =
true;
2628 UnicodeDecodingAlreadyDiagnosed =
false;
2634 const char *NextLine = CurPtr;
2637 const char *EscapePtr = CurPtr-1;
2638 bool HasSpace =
false;
2644 if (*EscapePtr ==
'\\')
2647 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2648 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2650 CurPtr = EscapePtr-2;
2656 Diag(EscapePtr, diag::backslash_newline_space);
2663 const char *OldPtr = CurPtr;
2666 C = getAndAdvanceChar(CurPtr,
Result);
2671 if (
C != 0 && CurPtr == OldPtr+1) {
2679 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2680 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2681 for (; OldPtr != CurPtr; ++OldPtr)
2682 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2686 const char *ForwardPtr = CurPtr;
2689 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2694 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2699 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2704 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2705 PP->CodeCompleteNaturalLanguage();
2722 return SaveLineComment(
Result, CurPtr);
2736 NewLinePtr = CurPtr++;
2740 TokAtPhysicalStartOfLine =
true;
2749bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2752 FormTokenWithChars(
Result, CurPtr, tok::comment);
2764 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2768 Result.setKind(tok::comment);
2779 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2782 const char *TrigraphPos =
nullptr;
2784 const char *SpacePos =
nullptr;
2791 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2793 if (CurPtr[0] == CurPtr[1])
2807 if (*CurPtr ==
'\\') {
2809 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2811 TrigraphPos = CurPtr - 2;
2822 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2831 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2835 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2840 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2844 L->
Diag(SpacePos, diag::backslash_newline_space);
2850#include <emmintrin.h>
2865bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2866 bool &TokAtPhysicalStartOfLine) {
2876 unsigned char C = getCharAndSize(CurPtr, CharSize);
2878 if (
C == 0 && CurPtr == BufferEnd+1) {
2880 Diag(BufferPtr, diag::err_unterminated_block_comment);
2886 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2903 bool UnicodeDecodingAlreadyDiagnosed =
false;
2908 if (CurPtr + 24 < BufferEnd &&
2911 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2913 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2918 if (
C ==
'/')
goto FoundSlash;
2922 while (CurPtr + 16 < BufferEnd) {
2924 if (LLVM_UNLIKELY(Mask != 0)) {
2934 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2940 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2941 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2942 0x80, 0x80, 0x80, 0x80};
2943 __vector
unsigned char Slashes = {
2944 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2945 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2947 while (CurPtr + 16 < BufferEnd) {
2949 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2951 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2958 while (CurPtr + 16 < BufferEnd) {
2959 bool HasNonASCII =
false;
2960 for (
unsigned I = 0; I < 16; ++I)
2961 HasNonASCII |= !
isASCII(CurPtr[I]);
2963 if (LLVM_UNLIKELY(HasNonASCII))
2966 bool HasSlash =
false;
2967 for (
unsigned I = 0; I < 16; ++I)
2968 HasSlash |= CurPtr[I] ==
'/';
2982 while (
C !=
'/' &&
C !=
'\0') {
2984 UnicodeDecodingAlreadyDiagnosed =
false;
2991 unsigned Length = llvm::getUTF8SequenceSize(
2992 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2995 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2996 UnicodeDecodingAlreadyDiagnosed =
true;
2998 UnicodeDecodingAlreadyDiagnosed =
false;
2999 CurPtr += Length - 1;
3006 if (CurPtr[-2] ==
'*')
3009 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3011 LangOpts.Trigraphs)) {
3017 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3022 Diag(CurPtr-1, diag::warn_nested_block_comment);
3024 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3026 Diag(BufferPtr, diag::err_unterminated_block_comment);
3035 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3041 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3042 PP->CodeCompleteNaturalLanguage();
3060 FormTokenWithChars(
Result, CurPtr, tok::comment);
3069 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3087 "Must be in a preprocessing directive!");
3092 const char *CurPtr = BufferPtr;
3094 char Char = getAndAdvanceChar(CurPtr, Tmp);
3102 if (CurPtr-1 != BufferEnd) {
3103 if (isCodeCompletionPoint(CurPtr-1)) {
3104 PP->CodeCompleteNaturalLanguage();
3119 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3120 BufferPtr = CurPtr-1;
3124 if (Tmp.
is(tok::code_completion)) {
3126 PP->CodeCompleteNaturalLanguage();
3129 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3141bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3149 FormTokenWithChars(
Result, CurPtr, tok::eod);
3160 Result.startToken();
3161 BufferPtr = BufferEnd;
3162 FormTokenWithChars(Result, BufferEnd, tok::eof);
3166 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3172 MIOpt.ExitTopLevelConditional();
3180 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3182 diag::err_pp_unterminated_conditional);
3189 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3190 Diag(BufferEnd, diag::warn_no_newline_eof)
3202std::optional<Token> Lexer::peekNextPPToken() {
3203 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3205 if (isDependencyDirectivesLexer()) {
3206 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3207 return std::nullopt;
3209 (void)convertDependencyDirectiveToken(
3210 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3220 const char *TmpBufferPtr = BufferPtr;
3222 bool atStartOfLine = IsAtStartOfLine;
3223 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3224 bool leadingSpace = HasLeadingSpace;
3230 BufferPtr = TmpBufferPtr;
3232 HasLeadingSpace = leadingSpace;
3233 IsAtStartOfLine = atStartOfLine;
3234 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3238 if (
Tok.
is(tok::eof))
3239 return std::nullopt;
3246 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3248 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3249 size_t Pos = RestOfBuffer.find(Terminator);
3250 while (Pos != StringRef::npos) {
3253 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3254 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3255 Pos = RestOfBuffer.find(Terminator);
3258 return RestOfBuffer.data()+Pos;
3267bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3269 if (CurPtr != BufferStart &&
3270 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3274 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3275 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3290 Diag(CurPtr, diag::err_conflict_marker);
3291 CurrentConflictMarkerState =
Kind;
3295 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3296 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3311bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3313 if (CurPtr != BufferStart &&
3314 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3323 for (
unsigned i = 1; i != 4; ++i)
3324 if (CurPtr[i] != CurPtr[0])
3331 CurrentConflictMarkerState)) {
3335 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3341 CurrentConflictMarkerState =
CMK_None;
3349 const char *BufferEnd) {
3350 if (CurPtr == BufferEnd)
3353 for (; CurPtr != BufferEnd; ++CurPtr) {
3354 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3360bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3361 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3367 const char *Start = CurPtr - 1;
3368 if (!LangOpts.AllowEditorPlaceholders)
3369 Diag(Start, diag::err_placeholder_in_source);
3371 FormTokenWithChars(
Result, End, tok::raw_identifier);
3372 Result.setRawIdentifierData(Start);
3379bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3380 if (
PP &&
PP->isCodeCompletionEnabled()) {
3381 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3382 return Loc ==
PP->getCodeCompletionLoc();
3393 if (Opts.CPlusPlus23)
3394 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3395 else if (Opts.C2y && !Named)
3396 DiagId = diag::warn_c2y_delimited_escape_sequence;
3398 DiagId = diag::ext_delimited_escape_sequence;
3404 if (!Opts.CPlusPlus)
3405 Ext = Named ? 2 : 1 ;
3409 Diags.
Report(Loc, DiagId) << Named << Ext;
3412std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3413 const char *SlashLoc,
3416 char Kind = getCharAndSize(StartPtr, CharSize);
3417 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3419 unsigned NumHexDigits;
3422 else if (Kind ==
'U')
3425 bool Delimited =
false;
3426 bool FoundEndDelimiter =
false;
3430 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3432 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3433 return std::nullopt;
3436 const char *CurPtr = StartPtr + CharSize;
3437 const char *KindLoc = &CurPtr[-1];
3439 uint32_t CodePoint = 0;
3440 while (Count != NumHexDigits || Delimited) {
3441 char C = getCharAndSize(CurPtr, CharSize);
3442 if (!Delimited && Count == 0 &&
C ==
'{') {
3448 if (Delimited &&
C ==
'}') {
3450 FoundEndDelimiter =
true;
3454 unsigned Value = llvm::hexDigitValue(
C);
3455 if (
Value == std::numeric_limits<unsigned>::max()) {
3459 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3460 << StringRef(KindLoc, 1);
3461 return std::nullopt;
3464 if (CodePoint & 0xF000'0000) {
3466 Diag(KindLoc, diag::err_escape_too_large) << 0;
3467 return std::nullopt;
3478 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3479 : diag::warn_ucn_escape_no_digits)
3480 << StringRef(KindLoc, 1);
3481 return std::nullopt;
3484 if (Delimited && Kind ==
'U') {
3486 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3487 return std::nullopt;
3490 if (!Delimited && Count != NumHexDigits) {
3492 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3494 if (Count == 4 && NumHexDigits == 8) {
3495 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3496 Diag(KindLoc, diag::note_ucn_four_not_eight)
3500 return std::nullopt;
3503 if (Delimited &&
PP)
3506 PP->getDiagnostics());
3513 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3516 while (StartPtr != CurPtr)
3517 (void)getAndAdvanceChar(StartPtr, *
Result);
3524std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3525 const char *SlashLoc,
3530 char C = getCharAndSize(StartPtr, CharSize);
3531 assert(
C ==
'N' &&
"expected \\N{...}");
3533 const char *CurPtr = StartPtr + CharSize;
3534 const char *KindLoc = &CurPtr[-1];
3536 C = getCharAndSize(CurPtr, CharSize);
3539 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3540 return std::nullopt;
3543 const char *StartName = CurPtr;
3544 bool FoundEndDelimiter =
false;
3545 llvm::SmallVector<char, 30> Buffer;
3547 C = getCharAndSize(CurPtr, CharSize);
3550 FoundEndDelimiter =
true;
3556 Buffer.push_back(
C);
3559 if (!FoundEndDelimiter || Buffer.empty()) {
3561 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3562 : diag::warn_delimited_ucn_incomplete)
3563 << StringRef(KindLoc, 1);
3564 return std::nullopt;
3567 StringRef Name(Buffer.data(), Buffer.size());
3568 std::optional<char32_t>
Match =
3569 llvm::sys::unicode::nameToCodepointStrict(Name);
3570 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3572 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3574 Diag(StartName, diag::err_invalid_ucn_name)
3575 << StringRef(Buffer.data(), Buffer.size())
3578 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3589 if (Diagnose &&
Match)
3592 PP->getDiagnostics());
3598 if (LooseMatch && Diagnose)
3599 Match = LooseMatch->CodePoint;
3606 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3609 while (StartPtr != CurPtr)
3610 (void)getAndAdvanceChar(StartPtr, *
Result);
3614 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3617uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3621 std::optional<uint32_t> CodePointOpt;
3622 char Kind = getCharAndSize(StartPtr, CharSize);
3623 if (Kind ==
'u' || Kind ==
'U')
3624 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3625 else if (Kind ==
'N')
3626 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3631 uint32_t CodePoint = *CodePointOpt;
3634 if (LangOpts.AsmPreprocessor)
3653 if (CodePoint < 0xA0) {
3657 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3658 Diag(BufferPtr, diag::err_ucn_control_character);
3660 char C =
static_cast<char>(CodePoint);
3661 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3666 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3671 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3672 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3674 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3682bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3683 const char *CurPtr) {
3686 Diag(BufferPtr, diag::ext_unicode_whitespace)
3695void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3696 IsAtStartOfLine =
Result.isAtStartOfLine();
3697 HasLeadingSpace =
Result.hasLeadingSpace();
3698 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3703 assert(!isDependencyDirectivesLexer());
3709 if (IsAtStartOfLine) {
3711 IsAtStartOfLine =
false;
3714 if (HasLeadingSpace) {
3716 HasLeadingSpace =
false;
3719 if (HasLeadingEmptyMacro) {
3721 HasLeadingEmptyMacro =
false;
3724 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3725 IsAtPhysicalStartOfLine =
false;
3728 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3730 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3731 return returnedToken;
3739bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3741 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3742 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3745 const char *CurPtr = BufferPtr;
3757 FormTokenWithChars(Result, CurPtr, tok::unknown);
3766 unsigned SizeTmp, SizeTmp2;
3769 char Char = getAndAdvanceChar(CurPtr,
Result);
3773 NewLinePtr =
nullptr;
3778 if (CurPtr-1 == BufferEnd)
3779 return LexEndOfFile(
Result, CurPtr-1);
3782 if (isCodeCompletionPoint(CurPtr-1)) {
3785 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3790 Diag(CurPtr-1, diag::null_in_file);
3792 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3801 if (LangOpts.MicrosoftExt) {
3803 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3804 return LexEndOfFile(
Result, CurPtr-1);
3808 Kind = tok::unknown;
3812 if (CurPtr[0] ==
'\n')
3813 (void)getAndAdvanceChar(CurPtr,
Result);
3827 IsAtStartOfLine =
true;
3828 IsAtPhysicalStartOfLine =
true;
3829 NewLinePtr = CurPtr - 1;
3838 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3848 SkipHorizontalWhitespace:
3850 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3859 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3860 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3862 goto SkipIgnoredUnits;
3864 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3866 goto SkipIgnoredUnits;
3868 goto SkipHorizontalWhitespace;
3876 case '0':
case '1':
case '2':
case '3':
case '4':
3877 case '5':
case '6':
case '7':
case '8':
case '9':
3880 return LexNumericConstant(
Result, CurPtr);
3889 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3890 Char = getCharAndSize(CurPtr, SizeTmp);
3894 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3895 tok::utf16_string_literal);
3899 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3900 tok::utf16_char_constant);
3903 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3904 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3905 return LexRawStringLiteral(
Result,
3906 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3908 tok::utf16_string_literal);
3911 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3915 return LexStringLiteral(
Result,
3916 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3918 tok::utf8_string_literal);
3919 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3920 return LexCharConstant(
3921 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3923 tok::utf8_char_constant);
3925 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3927 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3930 return LexRawStringLiteral(
Result,
3931 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3934 tok::utf8_string_literal);
3941 return LexIdentifierContinue(
Result, CurPtr);
3947 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3948 Char = getCharAndSize(CurPtr, SizeTmp);
3952 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3953 tok::utf32_string_literal);
3957 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3958 tok::utf32_char_constant);
3961 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3962 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3963 return LexRawStringLiteral(
Result,
3964 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3966 tok::utf32_string_literal);
3970 return LexIdentifierContinue(
Result, CurPtr);
3976 if (LangOpts.RawStringLiterals) {
3977 Char = getCharAndSize(CurPtr, SizeTmp);
3980 return LexRawStringLiteral(
Result,
3981 ConsumeChar(CurPtr, SizeTmp,
Result),
3982 tok::string_literal);
3986 return LexIdentifierContinue(
Result, CurPtr);
3991 Char = getCharAndSize(CurPtr, SizeTmp);
3995 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3996 tok::wide_string_literal);
3999 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4000 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4001 return LexRawStringLiteral(
Result,
4002 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4004 tok::wide_string_literal);
4008 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4009 tok::wide_char_constant);
4014 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4015 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4016 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4017 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4018 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4019 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4020 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4021 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4025 return LexIdentifierContinue(
Result, CurPtr);
4028 if (LangOpts.DollarIdents) {
4030 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4033 return LexIdentifierContinue(
Result, CurPtr);
4036 Kind = tok::unknown;
4043 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4049 return LexStringLiteral(
Result, CurPtr,
4051 : tok::string_literal);
4055 Kind = tok::question;
4058 Kind = tok::l_square;
4061 Kind = tok::r_square;
4064 Kind = tok::l_paren;
4067 Kind = tok::r_paren;
4070 Kind = tok::l_brace;
4073 Kind = tok::r_brace;
4076 Char = getCharAndSize(CurPtr, SizeTmp);
4077 if (Char >=
'0' && Char <=
'9') {
4081 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4082 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4083 Kind = tok::periodstar;
4085 }
else if (Char ==
'.' &&
4086 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4087 Kind = tok::ellipsis;
4088 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4095 Char = getCharAndSize(CurPtr, SizeTmp);
4098 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4099 }
else if (Char ==
'=') {
4100 Kind = tok::ampequal;
4101 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4107 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4108 Kind = tok::starequal;
4109 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4115 Char = getCharAndSize(CurPtr, SizeTmp);
4117 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4118 Kind = tok::plusplus;
4119 }
else if (Char ==
'=') {
4120 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4121 Kind = tok::plusequal;
4127 Char = getCharAndSize(CurPtr, SizeTmp);
4129 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 Kind = tok::minusminus;
4131 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4132 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4133 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4135 Kind = tok::arrowstar;
4136 }
else if (Char ==
'>') {
4137 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4139 }
else if (Char ==
'=') {
4140 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4141 Kind = tok::minusequal;
4150 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4151 Kind = tok::exclaimequal;
4152 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4154 Kind = tok::exclaim;
4159 Char = getCharAndSize(CurPtr, SizeTmp);
4169 bool TreatAsComment =
4170 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4171 if (!TreatAsComment)
4172 if (!(
PP &&
PP->isPreprocessedOutput()))
4173 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4175 if (TreatAsComment) {
4176 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4177 TokAtPhysicalStartOfLine))
4183 goto SkipIgnoredUnits;
4188 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4189 TokAtPhysicalStartOfLine))
4198 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4199 Kind = tok::slashequal;
4205 Char = getCharAndSize(CurPtr, SizeTmp);
4207 Kind = tok::percentequal;
4208 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4209 }
else if (LangOpts.Digraphs && Char ==
'>') {
4210 Kind = tok::r_brace;
4211 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4212 }
else if (LangOpts.Digraphs && Char ==
':') {
4213 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4214 Char = getCharAndSize(CurPtr, SizeTmp);
4215 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4216 Kind = tok::hashhash;
4217 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4219 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4220 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4222 Diag(BufferPtr, diag::ext_charize_microsoft);
4229 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4230 goto HandleDirective;
4235 Kind = tok::percent;
4239 Char = getCharAndSize(CurPtr, SizeTmp);
4241 return LexAngledStringLiteral(
Result, CurPtr);
4242 }
else if (Char ==
'<') {
4243 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4245 Kind = tok::lesslessequal;
4246 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4248 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4252 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4256 }
else if (LangOpts.CUDA && After ==
'<') {
4257 Kind = tok::lesslessless;
4258 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4261 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4262 Kind = tok::lessless;
4264 }
else if (Char ==
'=') {
4265 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4267 if (LangOpts.CPlusPlus20) {
4269 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4270 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4272 Kind = tok::spaceship;
4278 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4283 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4284 Kind = tok::lessequal;
4285 }
else if (LangOpts.Digraphs && Char ==
':') {
4286 if (LangOpts.CPlusPlus11 &&
4287 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4294 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4295 if (After !=
':' && After !=
'>') {
4298 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4303 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4304 Kind = tok::l_square;
4305 }
else if (LangOpts.Digraphs && Char ==
'%') {
4306 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4307 Kind = tok::l_brace;
4308 }
else if (Char ==
'#' && SizeTmp == 1 &&
4309 lexEditorPlaceholder(
Result, CurPtr)) {
4316 Char = getCharAndSize(CurPtr, SizeTmp);
4318 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4319 Kind = tok::greaterequal;
4320 }
else if (Char ==
'>') {
4321 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4323 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4325 Kind = tok::greatergreaterequal;
4326 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4330 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4333 }
else if (LangOpts.CUDA && After ==
'>') {
4334 Kind = tok::greatergreatergreater;
4335 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4338 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4339 Kind = tok::greatergreater;
4342 Kind = tok::greater;
4346 Char = getCharAndSize(CurPtr, SizeTmp);
4348 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4349 Kind = tok::caretequal;
4351 if (LangOpts.OpenCL && Char ==
'^')
4352 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4357 Char = getCharAndSize(CurPtr, SizeTmp);
4359 Kind = tok::pipeequal;
4360 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4361 }
else if (Char ==
'|') {
4363 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4365 Kind = tok::pipepipe;
4366 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4372 Char = getCharAndSize(CurPtr, SizeTmp);
4373 if (LangOpts.Digraphs && Char ==
'>') {
4374 Kind = tok::r_square;
4375 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4376 }
else if (Char ==
':') {
4377 Kind = tok::coloncolon;
4378 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4387 Char = getCharAndSize(CurPtr, SizeTmp);
4390 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4393 Kind = tok::equalequal;
4394 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4403 Char = getCharAndSize(CurPtr, SizeTmp);
4405 Kind = tok::hashhash;
4406 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4407 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4410 Diag(BufferPtr, diag::ext_charize_microsoft);
4411 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4417 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4418 goto HandleDirective;
4426 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4429 Kind = tok::unknown;
4434 if (!LangOpts.AsmPreprocessor) {
4435 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4436 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4437 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4445 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4449 Kind = tok::unknown;
4454 Kind = tok::unknown;
4458 llvm::UTF32 CodePoint;
4463 llvm::ConversionResult Status =
4464 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4465 (
const llvm::UTF8 *)BufferEnd,
4467 llvm::strictConversion);
4468 if (Status == llvm::conversionOK) {
4469 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4470 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4477 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4481 PP->isPreprocessedOutput()) {
4483 Kind = tok::unknown;
4490 Diag(CurPtr, diag::err_invalid_utf8);
4492 BufferPtr = CurPtr+1;
4504 FormTokenWithChars(
Result, CurPtr, Kind);
4510 FormTokenWithChars(
Result, CurPtr, tok::hash);
4513 if (
PP->hadModuleLoaderFatalFailure())
4525const char *Lexer::convertDependencyDirectiveToken(
4527 const char *TokPtr = BufferStart + DDTok.
Offset;
4533 BufferPtr = TokPtr + DDTok.
Length;
4537bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4538 assert(isDependencyDirectivesLexer());
4540 using namespace dependency_directives_scan;
4542 if (BufferPtr == BufferEnd)
4543 return LexEndOfFile(
Result, BufferPtr);
4545 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4546 if (DepDirectives.front().Kind == pp_eof)
4547 return LexEndOfFile(
Result, BufferEnd);
4548 if (DepDirectives.front().Kind == tokens_present_before_eof)
4550 NextDepDirectiveTokenIndex = 0;
4551 DepDirectives = DepDirectives.drop_front();
4554 const dependency_directives_scan::Token &DDTok =
4555 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4556 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4562 BufferPtr = BufferStart + DDTok.
Offset;
4563 LexAngledStringLiteral(
Result, BufferPtr + 1);
4564 if (
Result.isNot(tok::header_name))
4568 const dependency_directives_scan::Token &NextTok =
4569 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4570 if (BufferStart + NextTok.
Offset >= BufferPtr)
4572 ++NextDepDirectiveTokenIndex;
4577 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4579 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4581 if (
PP->hadModuleLoaderFatalFailure())
4586 if (
Result.is(tok::raw_identifier)) {
4587 Result.setRawIdentifierData(TokPtr);
4589 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4591 return PP->HandleIdentifier(
Result);
4595 if (
Result.isLiteral()) {
4596 Result.setLiteralData(TokPtr);
4599 if (
Result.is(tok::colon)) {
4601 if (*BufferPtr ==
':') {
4602 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4604 ++NextDepDirectiveTokenIndex;
4605 Result.setKind(tok::coloncolon);
4615bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4616 assert(isDependencyDirectivesLexer());
4618 using namespace dependency_directives_scan;
4621 unsigned NestedIfs = 0;
4623 DepDirectives = DepDirectives.drop_front();
4624 switch (DepDirectives.front().Kind) {
4626 llvm_unreachable(
"unexpected 'pp_none'");
4667 NextDepDirectiveTokenIndex = 0;
4668 return LexEndOfFile(
Result, BufferEnd);
4672 const dependency_directives_scan::Token &DDTok =
4673 DepDirectives.front().Tokens.front();
4674 assert(DDTok.
is(tok::hash));
4675 NextDepDirectiveTokenIndex = 1;
4677 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
const FunctionProtoType * T
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const