29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
79 case tok::annot_typename:
80 case tok::annot_decltype:
81 case tok::annot_pack_indexing_type:
87 case tok::kw___int128:
89 case tok::kw_unsigned:
97 case tok::kw__Float16:
98 case tok::kw___float128:
99 case tok::kw___ibm128:
100 case tok::kw_wchar_t:
106#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
107#include "clang/Basic/TransformTypeTraits.def"
108 case tok::kw___auto_type:
109 case tok::kw_char16_t:
110 case tok::kw_char32_t:
112 case tok::kw_decltype:
113 case tok::kw_char8_t:
125void Lexer::anchor() {}
127void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
128 const char *BufEnd) {
129 BufferStart = BufStart;
133 assert(BufEnd[0] == 0 &&
134 "We assume that the input buffer has a null character at the end"
135 " to simplify lexing!");
140 if (BufferStart == BufferPtr) {
142 StringRef Buf(BufferStart, BufferEnd - BufferStart);
143 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
144 .StartsWith(
"\xEF\xBB\xBF", 3)
148 BufferPtr += BOMLength;
151 Is_PragmaLexer =
false;
152 CurrentConflictMarkerState =
CMK_None;
155 IsAtStartOfLine =
true;
156 IsAtPhysicalStartOfLine =
true;
158 HasLeadingSpace =
false;
159 HasLeadingEmptyMacro =
false;
174 ExtendedTokenMode = 0;
176 NewLinePtr =
nullptr;
186 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
188 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
189 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
190 InputFile.getBufferEnd());
199 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
200 bool IsFirstIncludeOfFile)
202 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
203 InitLexer(BufStart, BufPtr, BufEnd);
214 bool IsFirstIncludeOfFile)
215 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
216 FromFile.getBufferStart(), FromFile.getBufferEnd(),
217 IsFirstIncludeOfFile) {}
220 assert(
PP &&
"Cannot reset token mode without a preprocessor");
221 if (LangOpts.TraditionalCPP)
249 FileID SpellingFID =
SM.getFileID(SpellingLoc);
250 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
256 const char *StrData =
SM.getCharacterData(SpellingLoc);
258 L->BufferPtr = StrData;
259 L->BufferEnd = StrData+TokLen;
260 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
264 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
266 ExpansionLocEnd, TokLen);
273 L->Is_PragmaLexer =
true;
278 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
279 this->IsAtStartOfLine = IsAtStartOfLine;
280 assert((BufferStart + Offset) <= BufferEnd);
281 BufferPtr = BufferStart + Offset;
285 typename T::size_type i = 0, e = Str.size();
287 if (Str[i] ==
'\\' || Str[i] == Quote) {
288 Str.insert(Str.begin() + i,
'\\');
291 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
293 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
294 Str[i] != Str[i + 1]) {
300 Str.insert(Str.begin() + i + 1,
'n');
310 std::string
Result = std::string(Str);
311 char Quote = Charify ?
'\'' :
'"';
326 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
329 const char *BufEnd = BufPtr + Tok.
getLength();
333 while (BufPtr < BufEnd) {
335 Spelling[Length++] = CharAndSize.Char;
336 BufPtr += CharAndSize.Size;
338 if (Spelling[Length - 1] ==
'"')
346 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
349 const char *RawEnd = BufEnd;
350 do --RawEnd;
while (*RawEnd !=
'"');
351 size_t RawLength = RawEnd - BufPtr + 1;
354 memcpy(Spelling + Length, BufPtr, RawLength);
362 while (BufPtr < BufEnd) {
364 Spelling[Length++] = CharAndSize.Char;
365 BufPtr += CharAndSize.Size;
369 "NeedsCleaning flag set on token that didn't need cleaning!");
384 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
387 bool invalidTemp =
false;
388 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
390 if (invalid) *invalid =
true;
394 const char *tokenBegin = file.data() + locInfo.second;
397 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
398 file.begin(), tokenBegin, file.end());
406 return StringRef(tokenBegin,
length);
410 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
411 return StringRef(buffer.data(), buffer.size());
421 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
423 bool CharDataInvalid =
false;
433 return std::string(TokStart, TokStart + Tok.
getLength());
454 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
456 const char *TokStart =
nullptr;
458 if (Tok.
is(tok::raw_identifier))
463 Buffer = II->getNameStart();
464 return II->getLength();
474 bool CharDataInvalid =
false;
478 if (CharDataInvalid) {
491 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
512 bool IgnoreWhiteSpace) {
522 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
524 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
528 const char *StrData = Buffer.data()+LocInfo.second;
530 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
534 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
535 Buffer.begin(), StrData, Buffer.end());
544 const char *BufStart = Buffer.data();
545 if (Offset >= Buffer.size())
548 const char *LexStart = BufStart + Offset;
549 for (; LexStart != BufStart; --LexStart) {
564 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
565 if (LocInfo.first.isInvalid())
569 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
575 const char *StrData = Buffer.data() + LocInfo.second;
577 if (!LexStart || LexStart == StrData)
582 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
602 }
while (TheTok.
getKind() != tok::eof);
614 if (!
SM.isMacroArgExpansion(
Loc))
619 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
620 std::pair<FileID, unsigned> BeginFileLocInfo =
621 SM.getDecomposedLoc(BeginFileLoc);
622 assert(FileLocInfo.first == BeginFileLocInfo.first &&
623 FileLocInfo.second >= BeginFileLocInfo.second);
629enum PreambleDirectiveKind {
644 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
648 bool InPreprocessorDirective =
false;
652 unsigned MaxLineOffset = 0;
654 const char *CurPtr = Buffer.begin();
655 unsigned CurLine = 0;
656 while (CurPtr != Buffer.end()) {
660 if (CurLine == MaxLines)
664 if (CurPtr != Buffer.end())
665 MaxLineOffset = CurPtr - Buffer.begin();
671 if (InPreprocessorDirective) {
673 if (TheTok.
getKind() == tok::eof) {
684 InPreprocessorDirective =
false;
693 if (MaxLineOffset && TokOffset >= MaxLineOffset)
698 if (TheTok.
getKind() == tok::comment) {
706 Token HashTok = TheTok;
707 InPreprocessorDirective =
true;
716 PreambleDirectiveKind PDK
717 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
718 .Case(
"include", PDK_Skipped)
719 .Case(
"__include_macros", PDK_Skipped)
720 .Case(
"define", PDK_Skipped)
721 .Case(
"undef", PDK_Skipped)
722 .Case(
"line", PDK_Skipped)
723 .Case(
"error", PDK_Skipped)
724 .Case(
"pragma", PDK_Skipped)
725 .Case(
"import", PDK_Skipped)
726 .Case(
"include_next", PDK_Skipped)
727 .Case(
"warning", PDK_Skipped)
728 .Case(
"ident", PDK_Skipped)
729 .Case(
"sccs", PDK_Skipped)
730 .Case(
"assert", PDK_Skipped)
731 .Case(
"unassert", PDK_Skipped)
732 .Case(
"if", PDK_Skipped)
733 .Case(
"ifdef", PDK_Skipped)
734 .Case(
"ifndef", PDK_Skipped)
735 .Case(
"elif", PDK_Skipped)
736 .Case(
"elifdef", PDK_Skipped)
737 .Case(
"elifndef", PDK_Skipped)
738 .Case(
"else", PDK_Skipped)
739 .Case(
"endif", PDK_Skipped)
740 .Default(PDK_Unknown);
757 TheTok.
getKind() == tok::raw_identifier &&
759 LangOpts.CPlusPlusModules) {
762 Token ModuleTok = TheTok;
765 }
while (TheTok.
getKind() == tok::comment);
766 if (TheTok.
getKind() != tok::semi) {
781 if (ActiveCommentLoc.
isValid())
782 End = ActiveCommentLoc;
797 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
800 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
803 unsigned PhysOffset = 0;
808 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
818 for (; CharNo; --CharNo) {
820 TokPtr += CharAndSize.Size;
821 PhysOffset += CharAndSize.Size;
828 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
829 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
878 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
884 *MacroBegin = expansionLoc;
906 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
912 *MacroEnd = expansionLoc;
925 if (
Range.isTokenRange()) {
934 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
939 if (!
SM.isInFileID(End, FID, &EndOffs) ||
949 return SM.getSLocEntry(
SM.getFileID(
Loc))
951 .isExpansionTokenRange();
973 if (
Range.isTokenRange()) {
994 if (
Range.isTokenRange())
1034 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(
Range.
getBegin());
1035 if (beginInfo.first.isInvalid()) {
1041 if (!
SM.isInFileID(
Range.
getEnd(), beginInfo.first, &EndOffs) ||
1042 beginInfo.second > EndOffs) {
1048 bool invalidTemp =
false;
1049 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1056 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1062 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1078 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1086 if (
SM.isInFileID(SpellLoc, MacroFID))
1100 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1102 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1103 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1108 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1110 while (
SM.isMacroArgExpansion(
Loc))
1111 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1117 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1123 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(
Loc).getBegin());
1127 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1129 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1130 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1139 if (Str - 1 < BufferStart)
1142 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1143 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1144 if (Str - 2 < BufferStart)
1154 return *Str ==
'\\';
1161 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1162 if (LocInfo.first.isInvalid())
1165 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1171 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1172 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1173 return NumWhitespaceChars == StringRef::npos
1175 : Rest.take_front(NumWhitespaceChars);
1190 unsigned CharNo,
unsigned TokLen) {
1191 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1207 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1213 unsigned TokLen)
const {
1214 assert(
Loc >= BufferStart &&
Loc <= BufferEnd &&
1215 "Location out of range for this buffer!");
1219 unsigned CharNo =
Loc-BufferStart;
1225 assert(
PP &&
"This doesn't work on raw lexers");
1244 case '=':
return '#';
1245 case ')':
return ']';
1246 case '(':
return '[';
1247 case '!':
return '|';
1248 case '\'':
return '^';
1249 case '>':
return '}';
1250 case '/':
return '\\';
1251 case '<':
return '{';
1252 case '-':
return '~';
1267 L->
Diag(CP-2, diag::trigraph_ignored);
1272 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1279unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1284 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1288 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1289 Ptr[Size-1] != Ptr[Size])
1302const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1304 const char *AfterEscape;
1307 }
else if (*
P ==
'?') {
1309 if (
P[1] !=
'?' ||
P[2] !=
'/')
1318 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1319 if (NewLineSize == 0)
return P;
1320 P = AfterEscape+NewLineSize;
1329 return std::nullopt;
1334 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1337 bool InvalidTemp =
false;
1338 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1340 return std::nullopt;
1342 const char *TokenBegin =
File.data() + LocInfo.second;
1345 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1346 TokenBegin,
File.end());
1359 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1361 if (!Tok || Tok->isNot(TKind))
1366 unsigned NumWhitespaceChars = 0;
1367 if (SkipTrailingWhitespaceAndNewLine) {
1368 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1369 unsigned char C = *TokenEnd;
1372 NumWhitespaceChars++;
1376 if (
C ==
'\n' ||
C ==
'\r') {
1379 NumWhitespaceChars++;
1380 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1381 NumWhitespaceChars++;
1406 if (Ptr[0] ==
'\\') {
1412 return {
'\\', Size};
1416 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1422 Diag(Ptr, diag::backslash_newline_space);
1425 Size += EscapedNewLineSize;
1426 Ptr += EscapedNewLineSize;
1429 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1430 CharAndSize.Size += Size;
1435 return {
'\\',
Size};
1439 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1443 LangOpts.Trigraphs)) {
1449 if (
C ==
'\\')
goto Slash;
1455 return {*Ptr,
Size + 1u};
1469 if (Ptr[0] ==
'\\') {
1475 return {
'\\',
Size};
1478 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1480 Size += EscapedNewLineSize;
1481 Ptr += EscapedNewLineSize;
1484 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1485 CharAndSize.Size +=
Size;
1490 return {
'\\',
Size};
1494 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1500 if (
C ==
'\\')
goto Slash;
1506 return {*Ptr,
Size + 1u};
1514void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1515 BufferPtr = BufferStart + Offset;
1516 if (BufferPtr > BufferEnd)
1517 BufferPtr = BufferEnd;
1521 IsAtStartOfLine = StartOfLine;
1522 IsAtPhysicalStartOfLine = StartOfLine;
1526 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1528 return UnicodeWhitespaceChars.contains(Codepoint);
1533 llvm::raw_svector_ostream CharOS(CharBuf);
1534 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1545 bool IsStart,
bool &IsExtension) {
1546 static const llvm::sys::UnicodeCharSet MathStartChars(
1548 static const llvm::sys::UnicodeCharSet MathContinueChars(
1550 if (MathStartChars.contains(
C) ||
1551 (!IsStart && MathContinueChars.contains(
C))) {
1559 bool &IsExtension) {
1560 if (LangOpts.AsmPreprocessor) {
1562 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1564 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1569 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1571 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1575 }
else if (LangOpts.C11) {
1576 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1578 return C11AllowedIDChars.contains(
C);
1580 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1582 return C99AllowedIDChars.contains(
C);
1587 bool &IsExtension) {
1588 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1589 IsExtension =
false;
1590 if (LangOpts.AsmPreprocessor) {
1593 if (LangOpts.CPlusPlus || LangOpts.C23) {
1594 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1595 if (XIDStartChars.contains(
C))
1603 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1605 return !C11DisallowedInitialIDChars.contains(
C);
1607 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1609 return !C99DisallowedInitialIDChars.contains(
C);
1615 static const llvm::sys::UnicodeCharSet MathStartChars(
1617 static const llvm::sys::UnicodeCharSet MathContinueChars(
1620 (void)MathStartChars;
1621 (void)MathContinueChars;
1622 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1623 "Unexpected mathematical notation codepoint");
1639 CannotAppearInIdentifier = 0,
1640 CannotStartIdentifier
1643 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1645 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1647 if (!C99AllowedIDChars.contains(
C)) {
1650 << CannotAppearInIdentifier;
1651 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1654 << CannotStartIdentifier;
1666 struct HomoglyphPair {
1669 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1671 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1724 std::lower_bound(std::begin(SortedHomoglyphs),
1725 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1726 if (Homoglyph->Character ==
C) {
1727 if (Homoglyph->LooksLike) {
1728 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1749 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1752 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1754 if (!IsFirst || InvalidOnlyAtStart) {
1765bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1767 const char *UCNPtr = CurPtr +
Size;
1768 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1769 if (CodePoint == 0) {
1772 bool IsExtension =
false;
1797 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1798 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1801 while (CurPtr != UCNPtr)
1802 (void)getAndAdvanceChar(CurPtr,
Result);
1806bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1807 llvm::UTF32 CodePoint;
1812 unsigned FirstCodeUnitSize;
1813 getCharAndSize(CurPtr, FirstCodeUnitSize);
1814 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1815 const char *UnicodePtr = CharStart;
1817 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1818 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1819 &CodePoint, llvm::strictConversion);
1820 if (ConvResult != llvm::conversionOK)
1823 bool IsExtension =
false;
1852 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1853 CurPtr = UnicodePtr;
1857bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1858 const char *CurPtr) {
1859 bool IsExtension =
false;
1874 return LexIdentifierContinue(
Result, CurPtr);
1899 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1905 [[maybe_unused]]
const char *BufferEnd) {
1907 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1908 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1910 constexpr ssize_t BytesPerRegister = 16;
1912 __m128i AsciiIdentifierRangeV =
1915 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1922 if (Consumed == BytesPerRegister)
1928 unsigned char C = *CurPtr;
1934bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1943 unsigned char C = getCharAndSize(CurPtr, Size);
1945 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1950 if (!LangOpts.DollarIdents)
1954 Diag(CurPtr, diag::ext_dollar_in_identifier);
1955 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1958 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1966 const char *IdStart = BufferPtr;
1967 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1968 Result.setRawIdentifierData(IdStart);
1984 if (isCodeCompletionPoint(CurPtr)) {
1986 Result.setKind(tok::code_completion);
1992 assert(*CurPtr == 0 &&
"Completion character must be 0");
1997 if (CurPtr < BufferEnd) {
2015bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2017 char C1 = CharAndSize1.Char;
2023 char C2 = CharAndSize2.Char;
2024 return (C2 ==
'x' || C2 ==
'X');
2030bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2032 char C = getCharAndSize(CurPtr, Size);
2035 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2037 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2041 C = getCharAndSize(CurPtr, Size);
2045 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2048 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2049 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2053 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2057 bool IsHexFloat =
true;
2058 if (!LangOpts.C99) {
2059 if (!isHexaLiteral(BufferPtr, LangOpts))
2061 else if (!LangOpts.CPlusPlus17 &&
2062 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2066 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2070 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2074 Diag(CurPtr, LangOpts.CPlusPlus
2075 ? diag::warn_cxx11_compat_digit_separator
2076 : diag::warn_c23_compat_digit_separator);
2077 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2078 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2079 return LexNumericConstant(
Result, CurPtr);
2084 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2085 return LexNumericConstant(
Result, CurPtr);
2087 return LexNumericConstant(
Result, CurPtr);
2090 const char *TokStart = BufferPtr;
2091 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2092 Result.setLiteralData(TokStart);
2098const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2099 bool IsStringLiteral) {
2100 assert(LangOpts.CPlusPlus);
2104 char C = getCharAndSize(CurPtr, Size);
2105 bool Consumed =
false;
2108 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2110 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2116 if (!LangOpts.CPlusPlus11) {
2119 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2120 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2131 bool IsUDSuffix =
false;
2134 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2138 const unsigned MaxStandardSuffixLength = 3;
2139 char Buffer[MaxStandardSuffixLength] = {
C };
2140 unsigned Consumed =
Size;
2143 auto [Next, NextSize] =
2147 const StringRef CompleteSuffix(Buffer, Chars);
2153 if (Chars == MaxStandardSuffixLength)
2157 Buffer[Chars++] = Next;
2158 Consumed += NextSize;
2164 Diag(CurPtr, LangOpts.MSVCCompat
2165 ? diag::ext_ms_reserved_user_defined_literal
2166 : diag::ext_reserved_user_defined_literal)
2171 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2176 C = getCharAndSize(CurPtr, Size);
2178 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2179 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2180 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2190bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2192 const char *AfterQuote = CurPtr;
2194 const char *NulCharacter =
nullptr;
2197 (Kind == tok::utf8_string_literal ||
2198 Kind == tok::utf16_string_literal ||
2199 Kind == tok::utf32_string_literal))
2200 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2201 : diag::warn_c99_compat_unicode_literal);
2203 char C = getAndAdvanceChar(CurPtr,
Result);
2208 C = getAndAdvanceChar(CurPtr,
Result);
2210 if (
C ==
'\n' ||
C ==
'\r' ||
2211 (
C == 0 && CurPtr-1 == BufferEnd)) {
2213 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2214 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2219 if (isCodeCompletionPoint(CurPtr-1)) {
2221 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2224 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2229 NulCharacter = CurPtr-1;
2231 C = getAndAdvanceChar(CurPtr,
Result);
2235 if (LangOpts.CPlusPlus)
2236 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2240 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2243 const char *TokStart = BufferPtr;
2244 FormTokenWithChars(
Result, CurPtr, Kind);
2245 Result.setLiteralData(TokStart);
2251bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2259 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2261 unsigned PrefixLen = 0;
2265 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2266 const char *Pos = &CurPtr[PrefixLen];
2267 Diag(Pos, LangOpts.CPlusPlus26
2268 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2269 : diag::ext_cxx26_raw_string_literal_character_set)
2270 << StringRef(Pos, 1);
2276 if (CurPtr[PrefixLen] !=
'(') {
2278 const char *PrefixEnd = &CurPtr[PrefixLen];
2279 if (PrefixLen == 16) {
2280 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2281 }
else if (*PrefixEnd ==
'\n') {
2282 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2284 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2285 << StringRef(PrefixEnd, 1);
2297 if (
C == 0 && CurPtr-1 == BufferEnd) {
2303 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2308 const char *Prefix = CurPtr;
2309 CurPtr += PrefixLen + 1;
2316 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2317 CurPtr += PrefixLen + 1;
2320 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2322 Diag(BufferPtr, diag::err_unterminated_raw_string)
2323 << StringRef(Prefix, PrefixLen);
2324 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2330 if (LangOpts.CPlusPlus)
2331 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2334 const char *TokStart = BufferPtr;
2335 FormTokenWithChars(
Result, CurPtr, Kind);
2336 Result.setLiteralData(TokStart);
2342bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2344 const char *NulCharacter =
nullptr;
2345 const char *AfterLessPos = CurPtr;
2346 char C = getAndAdvanceChar(CurPtr,
Result);
2351 C = getAndAdvanceChar(CurPtr,
Result);
2354 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2357 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2362 if (isCodeCompletionPoint(CurPtr - 1)) {
2363 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2365 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2368 NulCharacter = CurPtr-1;
2370 C = getAndAdvanceChar(CurPtr,
Result);
2375 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2378 const char *TokStart = BufferPtr;
2379 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2380 Result.setLiteralData(TokStart);
2384void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2385 const char *CompletionPoint,
2388 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2389 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2390 auto Slash = PartialPath.find_last_of(SlashChars);
2392 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2393 const char *StartOfFilename =
2394 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2397 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2400 while (CompletionPoint < BufferEnd) {
2401 char Next = *(CompletionPoint + 1);
2402 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2405 if (Next == (IsAngled ?
'>' :
'"'))
2407 if (SlashChars.contains(Next))
2419bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2422 const char *NulCharacter =
nullptr;
2425 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2426 Diag(BufferPtr, LangOpts.CPlusPlus
2427 ? diag::warn_cxx98_compat_unicode_literal
2428 : diag::warn_c99_compat_unicode_literal);
2429 else if (Kind == tok::utf8_char_constant)
2430 Diag(BufferPtr, LangOpts.CPlusPlus
2431 ? diag::warn_cxx14_compat_u8_character_literal
2432 : diag::warn_c17_compat_u8_character_literal);
2435 char C = getAndAdvanceChar(CurPtr,
Result);
2438 Diag(BufferPtr, diag::ext_empty_character);
2439 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2446 C = getAndAdvanceChar(CurPtr,
Result);
2448 if (
C ==
'\n' ||
C ==
'\r' ||
2449 (
C == 0 && CurPtr-1 == BufferEnd)) {
2451 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2452 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2457 if (isCodeCompletionPoint(CurPtr-1)) {
2459 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2464 NulCharacter = CurPtr-1;
2466 C = getAndAdvanceChar(CurPtr,
Result);
2470 if (LangOpts.CPlusPlus)
2471 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2475 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2478 const char *TokStart = BufferPtr;
2479 FormTokenWithChars(
Result, CurPtr, Kind);
2480 Result.setLiteralData(TokStart);
2488bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2489 bool &TokAtPhysicalStartOfLine) {
2493 unsigned char Char = *CurPtr;
2495 const char *lastNewLine =
nullptr;
2496 auto setLastNewLine = [&](
const char *Ptr) {
2502 setLastNewLine(CurPtr - 1);
2521 if (*CurPtr ==
'\n')
2522 setLastNewLine(CurPtr);
2529 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2531 IsAtStartOfLine =
true;
2532 IsAtPhysicalStartOfLine =
true;
2539 char PrevChar = CurPtr[-1];
2545 TokAtPhysicalStartOfLine =
true;
2547 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2564bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2565 bool &TokAtPhysicalStartOfLine) {
2570 Diag(BufferPtr, diag::ext_line_comment);
2588 bool UnicodeDecodingAlreadyDiagnosed =
false;
2595 C !=
'\n' &&
C !=
'\r') {
2597 UnicodeDecodingAlreadyDiagnosed =
false;
2601 unsigned Length = llvm::getUTF8SequenceSize(
2602 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2605 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2606 UnicodeDecodingAlreadyDiagnosed =
true;
2609 UnicodeDecodingAlreadyDiagnosed =
false;
2615 const char *NextLine = CurPtr;
2618 const char *EscapePtr = CurPtr-1;
2619 bool HasSpace =
false;
2625 if (*EscapePtr ==
'\\')
2628 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2629 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2631 CurPtr = EscapePtr-2;
2637 Diag(EscapePtr, diag::backslash_newline_space);
2644 const char *OldPtr = CurPtr;
2647 C = getAndAdvanceChar(CurPtr,
Result);
2652 if (
C != 0 && CurPtr == OldPtr+1) {
2660 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2661 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2662 for (; OldPtr != CurPtr; ++OldPtr)
2663 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2667 const char *ForwardPtr = CurPtr;
2670 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2675 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2680 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2685 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2703 return SaveLineComment(
Result, CurPtr);
2717 NewLinePtr = CurPtr++;
2721 TokAtPhysicalStartOfLine =
true;
2730bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2733 FormTokenWithChars(
Result, CurPtr, tok::comment);
2745 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2749 Result.setKind(tok::comment);
2760 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2763 const char *TrigraphPos =
nullptr;
2765 const char *SpacePos =
nullptr;
2772 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2774 if (CurPtr[0] == CurPtr[1])
2788 if (*CurPtr ==
'\\') {
2790 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2792 TrigraphPos = CurPtr - 2;
2803 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2812 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2816 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2821 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2825 L->
Diag(SpacePos, diag::backslash_newline_space);
2831#include <emmintrin.h>
2846bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2847 bool &TokAtPhysicalStartOfLine) {
2857 unsigned char C = getCharAndSize(CurPtr, CharSize);
2859 if (
C == 0 && CurPtr == BufferEnd+1) {
2861 Diag(BufferPtr, diag::err_unterminated_block_comment);
2867 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2884 bool UnicodeDecodingAlreadyDiagnosed =
false;
2889 if (CurPtr + 24 < BufferEnd &&
2894 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2899 if (
C ==
'/')
goto FoundSlash;
2903 while (CurPtr + 16 < BufferEnd) {
2905 if (LLVM_UNLIKELY(Mask != 0)) {
2915 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2921 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2922 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2923 0x80, 0x80, 0x80, 0x80};
2924 __vector
unsigned char Slashes = {
2925 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2926 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2928 while (CurPtr + 16 < BufferEnd) {
2930 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2932 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2939 while (CurPtr + 16 < BufferEnd) {
2940 bool HasNonASCII =
false;
2941 for (
unsigned I = 0; I < 16; ++I)
2942 HasNonASCII |= !
isASCII(CurPtr[I]);
2944 if (LLVM_UNLIKELY(HasNonASCII))
2947 bool HasSlash =
false;
2948 for (
unsigned I = 0; I < 16; ++I)
2949 HasSlash |= CurPtr[I] ==
'/';
2963 while (
C !=
'/' &&
C !=
'\0') {
2965 UnicodeDecodingAlreadyDiagnosed =
false;
2972 unsigned Length = llvm::getUTF8SequenceSize(
2973 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2976 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2977 UnicodeDecodingAlreadyDiagnosed =
true;
2979 UnicodeDecodingAlreadyDiagnosed =
false;
2980 CurPtr += Length - 1;
2987 if (CurPtr[-2] ==
'*')
2990 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2992 LangOpts.Trigraphs)) {
2998 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3003 Diag(CurPtr-1, diag::warn_nested_block_comment);
3005 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3007 Diag(BufferPtr, diag::err_unterminated_block_comment);
3016 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3022 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3041 FormTokenWithChars(
Result, CurPtr, tok::comment);
3050 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3068 "Must be in a preprocessing directive!");
3073 const char *CurPtr = BufferPtr;
3075 char Char = getAndAdvanceChar(CurPtr, Tmp);
3083 if (CurPtr-1 != BufferEnd) {
3084 if (isCodeCompletionPoint(CurPtr-1)) {
3100 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3101 BufferPtr = CurPtr-1;
3105 if (Tmp.
is(tok::code_completion)) {
3110 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3122bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3130 FormTokenWithChars(
Result, CurPtr, tok::eod);
3142 BufferPtr = BufferEnd;
3143 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3163 diag::err_pp_unterminated_conditional);
3169 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3174 if (LangOpts.CPlusPlus11) {
3178 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3179 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3181 DiagID = diag::warn_no_newline_eof;
3184 DiagID = diag::ext_no_newline_eof;
3187 Diag(BufferEnd, DiagID)
3201unsigned Lexer::isNextPPTokenLParen() {
3202 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3204 if (isDependencyDirectivesLexer()) {
3205 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3207 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3217 const char *TmpBufferPtr = BufferPtr;
3219 bool atStartOfLine = IsAtStartOfLine;
3220 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3221 bool leadingSpace = HasLeadingSpace;
3227 BufferPtr = TmpBufferPtr;
3229 HasLeadingSpace = leadingSpace;
3230 IsAtStartOfLine = atStartOfLine;
3231 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3236 if (Tok.
is(tok::eof))
3238 return Tok.
is(tok::l_paren);
3244 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3246 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3247 size_t Pos = RestOfBuffer.find(Terminator);
3248 while (Pos != StringRef::npos) {
3251 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3252 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3253 Pos = RestOfBuffer.find(Terminator);
3256 return RestOfBuffer.data()+Pos;
3265bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3267 if (CurPtr != BufferStart &&
3268 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3272 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3273 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3288 Diag(CurPtr, diag::err_conflict_marker);
3289 CurrentConflictMarkerState =
Kind;
3293 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3294 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3309bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3311 if (CurPtr != BufferStart &&
3312 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3321 for (
unsigned i = 1; i != 4; ++i)
3322 if (CurPtr[i] != CurPtr[0])
3329 CurrentConflictMarkerState)) {
3333 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3339 CurrentConflictMarkerState =
CMK_None;
3347 const char *BufferEnd) {
3348 if (CurPtr == BufferEnd)
3351 for (; CurPtr != BufferEnd; ++CurPtr) {
3352 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3358bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3359 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3365 const char *Start = CurPtr - 1;
3366 if (!LangOpts.AllowEditorPlaceholders)
3367 Diag(Start, diag::err_placeholder_in_source);
3369 FormTokenWithChars(
Result, End, tok::raw_identifier);
3370 Result.setRawIdentifierData(Start);
3377bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3386std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3387 const char *SlashLoc,
3390 char Kind = getCharAndSize(StartPtr, CharSize);
3391 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3393 unsigned NumHexDigits;
3396 else if (Kind ==
'U')
3399 bool Delimited =
false;
3400 bool FoundEndDelimiter =
false;
3404 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3406 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3407 return std::nullopt;
3410 const char *CurPtr = StartPtr + CharSize;
3411 const char *KindLoc = &CurPtr[-1];
3414 while (Count != NumHexDigits || Delimited) {
3415 char C = getCharAndSize(CurPtr, CharSize);
3416 if (!Delimited && Count == 0 &&
C ==
'{') {
3422 if (Delimited &&
C ==
'}') {
3424 FoundEndDelimiter =
true;
3428 unsigned Value = llvm::hexDigitValue(
C);
3433 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3434 << StringRef(KindLoc, 1);
3435 return std::nullopt;
3438 if (CodePoint & 0xF000'0000) {
3440 Diag(KindLoc, diag::err_escape_too_large) << 0;
3441 return std::nullopt;
3452 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3453 : diag::warn_ucn_escape_no_digits)
3454 << StringRef(KindLoc, 1);
3455 return std::nullopt;
3458 if (Delimited && Kind ==
'U') {
3460 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3461 return std::nullopt;
3464 if (!Delimited && Count != NumHexDigits) {
3466 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3468 if (Count == 4 && NumHexDigits == 8) {
3470 Diag(KindLoc, diag::note_ucn_four_not_eight)
3474 return std::nullopt;
3477 if (Delimited &&
PP) {
3479 ? diag::warn_cxx23_delimited_escape_sequence
3480 : diag::ext_delimited_escape_sequence)
3489 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3492 while (StartPtr != CurPtr)
3493 (void)getAndAdvanceChar(StartPtr, *
Result);
3500std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3501 const char *SlashLoc,
3506 char C = getCharAndSize(StartPtr, CharSize);
3507 assert(
C ==
'N' &&
"expected \\N{...}");
3509 const char *CurPtr = StartPtr + CharSize;
3510 const char *KindLoc = &CurPtr[-1];
3512 C = getCharAndSize(CurPtr, CharSize);
3515 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3516 return std::nullopt;
3519 const char *StartName = CurPtr;
3520 bool FoundEndDelimiter =
false;
3523 C = getCharAndSize(CurPtr, CharSize);
3526 FoundEndDelimiter =
true;
3532 Buffer.push_back(
C);
3535 if (!FoundEndDelimiter || Buffer.empty()) {
3537 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3538 : diag::warn_delimited_ucn_incomplete)
3539 << StringRef(KindLoc, 1);
3540 return std::nullopt;
3543 StringRef Name(Buffer.data(), Buffer.size());
3544 std::optional<char32_t> Match =
3545 llvm::sys::unicode::nameToCodepointStrict(Name);
3546 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3548 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3550 Diag(StartName, diag::err_invalid_ucn_name)
3551 << StringRef(Buffer.data(), Buffer.size())
3554 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3565 if (Diagnose && Match)
3567 ? diag::warn_cxx23_delimited_escape_sequence
3568 : diag::ext_delimited_escape_sequence)
3575 if (LooseMatch && Diagnose)
3576 Match = LooseMatch->CodePoint;
3583 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3586 while (StartPtr != CurPtr)
3587 (void)getAndAdvanceChar(StartPtr, *
Result);
3591 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3594uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3598 std::optional<uint32_t> CodePointOpt;
3599 char Kind = getCharAndSize(StartPtr, CharSize);
3600 if (Kind ==
'u' || Kind ==
'U')
3601 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3602 else if (Kind ==
'N')
3603 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3608 uint32_t CodePoint = *CodePointOpt;
3611 if (LangOpts.AsmPreprocessor)
3630 if (CodePoint < 0xA0) {
3634 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3635 Diag(BufferPtr, diag::err_ucn_control_character);
3637 char C =
static_cast<char>(CodePoint);
3638 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3643 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3648 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3649 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3651 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3659bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3660 const char *CurPtr) {
3663 Diag(BufferPtr, diag::ext_unicode_whitespace)
3672void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3673 IsAtStartOfLine =
Result.isAtStartOfLine();
3674 HasLeadingSpace =
Result.hasLeadingSpace();
3675 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3680 assert(!isDependencyDirectivesLexer());
3686 if (IsAtStartOfLine) {
3688 IsAtStartOfLine =
false;
3691 if (HasLeadingSpace) {
3693 HasLeadingSpace =
false;
3696 if (HasLeadingEmptyMacro) {
3698 HasLeadingEmptyMacro =
false;
3701 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3702 IsAtPhysicalStartOfLine =
false;
3705 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3707 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3708 return returnedToken;
3716bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3718 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3719 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3722 const char *CurPtr = BufferPtr;
3734 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3743 unsigned SizeTmp, SizeTmp2;
3746 char Char = getAndAdvanceChar(CurPtr,
Result);
3750 NewLinePtr =
nullptr;
3755 if (CurPtr-1 == BufferEnd)
3756 return LexEndOfFile(
Result, CurPtr-1);
3759 if (isCodeCompletionPoint(CurPtr-1)) {
3762 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3767 Diag(CurPtr-1, diag::null_in_file);
3769 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3778 if (LangOpts.MicrosoftExt) {
3780 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3781 return LexEndOfFile(
Result, CurPtr-1);
3785 Kind = tok::unknown;
3789 if (CurPtr[0] ==
'\n')
3790 (void)getAndAdvanceChar(CurPtr,
Result);
3804 IsAtStartOfLine =
true;
3805 IsAtPhysicalStartOfLine =
true;
3806 NewLinePtr = CurPtr - 1;
3815 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3825 SkipHorizontalWhitespace:
3827 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3836 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3837 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3839 goto SkipIgnoredUnits;
3841 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3843 goto SkipIgnoredUnits;
3845 goto SkipHorizontalWhitespace;
3853 case '0':
case '1':
case '2':
case '3':
case '4':
3854 case '5':
case '6':
case '7':
case '8':
case '9':
3857 return LexNumericConstant(
Result, CurPtr);
3866 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3867 Char = getCharAndSize(CurPtr, SizeTmp);
3871 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3872 tok::utf16_string_literal);
3876 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3877 tok::utf16_char_constant);
3880 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3881 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3882 return LexRawStringLiteral(
Result,
3883 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3885 tok::utf16_string_literal);
3888 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3892 return LexStringLiteral(
Result,
3893 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3895 tok::utf8_string_literal);
3896 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3897 return LexCharConstant(
3898 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3900 tok::utf8_char_constant);
3902 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3904 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3907 return LexRawStringLiteral(
Result,
3908 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3911 tok::utf8_string_literal);
3918 return LexIdentifierContinue(
Result, CurPtr);
3924 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3925 Char = getCharAndSize(CurPtr, SizeTmp);
3929 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3930 tok::utf32_string_literal);
3934 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3935 tok::utf32_char_constant);
3938 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3939 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3940 return LexRawStringLiteral(
Result,
3941 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3943 tok::utf32_string_literal);
3947 return LexIdentifierContinue(
Result, CurPtr);
3953 if (LangOpts.RawStringLiterals) {
3954 Char = getCharAndSize(CurPtr, SizeTmp);
3957 return LexRawStringLiteral(
Result,
3958 ConsumeChar(CurPtr, SizeTmp,
Result),
3959 tok::string_literal);
3963 return LexIdentifierContinue(
Result, CurPtr);
3968 Char = getCharAndSize(CurPtr, SizeTmp);
3972 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3973 tok::wide_string_literal);
3976 if (LangOpts.RawStringLiterals && Char ==
'R' &&
3977 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3978 return LexRawStringLiteral(
Result,
3979 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3981 tok::wide_string_literal);
3985 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3986 tok::wide_char_constant);
3991 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3992 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3993 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3994 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3995 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3996 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3997 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3998 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4002 return LexIdentifierContinue(
Result, CurPtr);
4005 if (LangOpts.DollarIdents) {
4007 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4010 return LexIdentifierContinue(
Result, CurPtr);
4013 Kind = tok::unknown;
4020 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4026 return LexStringLiteral(
Result, CurPtr,
4028 : tok::string_literal);
4032 Kind = tok::question;
4035 Kind = tok::l_square;
4038 Kind = tok::r_square;
4041 Kind = tok::l_paren;
4044 Kind = tok::r_paren;
4047 Kind = tok::l_brace;
4050 Kind = tok::r_brace;
4053 Char = getCharAndSize(CurPtr, SizeTmp);
4054 if (Char >=
'0' && Char <=
'9') {
4058 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4059 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4060 Kind = tok::periodstar;
4062 }
else if (Char ==
'.' &&
4063 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4064 Kind = tok::ellipsis;
4065 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4072 Char = getCharAndSize(CurPtr, SizeTmp);
4075 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4076 }
else if (Char ==
'=') {
4077 Kind = tok::ampequal;
4078 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4084 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4085 Kind = tok::starequal;
4086 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4092 Char = getCharAndSize(CurPtr, SizeTmp);
4094 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4095 Kind = tok::plusplus;
4096 }
else if (Char ==
'=') {
4097 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4098 Kind = tok::plusequal;
4104 Char = getCharAndSize(CurPtr, SizeTmp);
4106 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4107 Kind = tok::minusminus;
4108 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4109 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4110 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4112 Kind = tok::arrowstar;
4113 }
else if (Char ==
'>') {
4114 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4116 }
else if (Char ==
'=') {
4117 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4118 Kind = tok::minusequal;
4127 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4128 Kind = tok::exclaimequal;
4129 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4131 Kind = tok::exclaim;
4136 Char = getCharAndSize(CurPtr, SizeTmp);
4146 bool TreatAsComment =
4147 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4148 if (!TreatAsComment)
4150 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4152 if (TreatAsComment) {
4153 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4154 TokAtPhysicalStartOfLine))
4160 goto SkipIgnoredUnits;
4165 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4166 TokAtPhysicalStartOfLine))
4175 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4176 Kind = tok::slashequal;
4182 Char = getCharAndSize(CurPtr, SizeTmp);
4184 Kind = tok::percentequal;
4185 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4186 }
else if (LangOpts.Digraphs && Char ==
'>') {
4187 Kind = tok::r_brace;
4188 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4189 }
else if (LangOpts.Digraphs && Char ==
':') {
4190 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4191 Char = getCharAndSize(CurPtr, SizeTmp);
4192 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4193 Kind = tok::hashhash;
4194 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4196 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4197 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4199 Diag(BufferPtr, diag::ext_charize_microsoft);
4206 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4207 goto HandleDirective;
4212 Kind = tok::percent;
4216 Char = getCharAndSize(CurPtr, SizeTmp);
4218 return LexAngledStringLiteral(
Result, CurPtr);
4219 }
else if (Char ==
'<') {
4220 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4222 Kind = tok::lesslessequal;
4223 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4225 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4229 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4233 }
else if (LangOpts.CUDA && After ==
'<') {
4234 Kind = tok::lesslessless;
4235 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4238 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4239 Kind = tok::lessless;
4241 }
else if (Char ==
'=') {
4242 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4244 if (LangOpts.CPlusPlus20) {
4246 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4247 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4249 Kind = tok::spaceship;
4255 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4260 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4261 Kind = tok::lessequal;
4262 }
else if (LangOpts.Digraphs && Char ==
':') {
4263 if (LangOpts.CPlusPlus11 &&
4264 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4271 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4272 if (After !=
':' && After !=
'>') {
4275 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4280 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4281 Kind = tok::l_square;
4282 }
else if (LangOpts.Digraphs && Char ==
'%') {
4283 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4284 Kind = tok::l_brace;
4285 }
else if (Char ==
'#' && SizeTmp == 1 &&
4286 lexEditorPlaceholder(
Result, CurPtr)) {
4293 Char = getCharAndSize(CurPtr, SizeTmp);
4295 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4296 Kind = tok::greaterequal;
4297 }
else if (Char ==
'>') {
4298 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4300 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4302 Kind = tok::greatergreaterequal;
4303 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4307 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4310 }
else if (LangOpts.CUDA && After ==
'>') {
4311 Kind = tok::greatergreatergreater;
4312 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4315 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4316 Kind = tok::greatergreater;
4319 Kind = tok::greater;
4323 Char = getCharAndSize(CurPtr, SizeTmp);
4325 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4326 Kind = tok::caretequal;
4328 if (LangOpts.OpenCL && Char ==
'^')
4329 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4334 Char = getCharAndSize(CurPtr, SizeTmp);
4336 Kind = tok::pipeequal;
4337 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4338 }
else if (Char ==
'|') {
4340 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4342 Kind = tok::pipepipe;
4343 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4349 Char = getCharAndSize(CurPtr, SizeTmp);
4350 if (LangOpts.Digraphs && Char ==
'>') {
4351 Kind = tok::r_square;
4352 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4353 }
else if (Char ==
':') {
4354 Kind = tok::coloncolon;
4355 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4364 Char = getCharAndSize(CurPtr, SizeTmp);
4367 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4370 Kind = tok::equalequal;
4371 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4380 Char = getCharAndSize(CurPtr, SizeTmp);
4382 Kind = tok::hashhash;
4383 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4384 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4387 Diag(BufferPtr, diag::ext_charize_microsoft);
4388 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4394 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4395 goto HandleDirective;
4403 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4406 Kind = tok::unknown;
4411 if (!LangOpts.AsmPreprocessor) {
4412 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4413 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4414 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4422 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4426 Kind = tok::unknown;
4431 Kind = tok::unknown;
4435 llvm::UTF32 CodePoint;
4440 llvm::ConversionResult Status =
4441 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4442 (
const llvm::UTF8 *)BufferEnd,
4444 llvm::strictConversion);
4445 if (Status == llvm::conversionOK) {
4446 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4447 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4454 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4460 Kind = tok::unknown;
4467 Diag(CurPtr, diag::err_invalid_utf8);
4469 BufferPtr = CurPtr+1;
4481 FormTokenWithChars(
Result, CurPtr, Kind);
4487 FormTokenWithChars(
Result, CurPtr, tok::hash);
4502const char *Lexer::convertDependencyDirectiveToken(
4504 const char *TokPtr = BufferStart + DDTok.
Offset;
4510 BufferPtr = TokPtr + DDTok.
Length;
4514bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4515 assert(isDependencyDirectivesLexer());
4517 using namespace dependency_directives_scan;
4519 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4520 if (DepDirectives.front().Kind == pp_eof)
4521 return LexEndOfFile(
Result, BufferEnd);
4522 if (DepDirectives.front().Kind == tokens_present_before_eof)
4524 NextDepDirectiveTokenIndex = 0;
4525 DepDirectives = DepDirectives.drop_front();
4529 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4530 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4536 BufferPtr = BufferStart + DDTok.
Offset;
4537 LexAngledStringLiteral(
Result, BufferPtr + 1);
4538 if (
Result.isNot(tok::header_name))
4543 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4544 if (BufferStart + NextTok.
Offset >= BufferPtr)
4546 ++NextDepDirectiveTokenIndex;
4551 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4553 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4557 if (
Result.is(tok::raw_identifier)) {
4558 Result.setRawIdentifierData(TokPtr);
4566 if (
Result.isLiteral()) {
4567 Result.setLiteralData(TokPtr);
4570 if (
Result.is(tok::colon)) {
4572 if (*BufferPtr ==
':') {
4573 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4575 ++NextDepDirectiveTokenIndex;
4576 Result.setKind(tok::coloncolon);
4586bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4587 assert(isDependencyDirectivesLexer());
4589 using namespace dependency_directives_scan;
4592 unsigned NestedIfs = 0;
4594 DepDirectives = DepDirectives.drop_front();
4595 switch (DepDirectives.front().Kind) {
4597 llvm_unreachable(
"unexpected 'pp_none'");
4638 NextDepDirectiveTokenIndex = 0;
4639 return LexEndOfFile(
Result, BufferEnd);
4644 DepDirectives.front().Tokens.front();
4645 assert(DDTok.
is(tok::hash));
4646 NextDepDirectiveTokenIndex = 1;
4648 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setEnd(SourceLocation e)
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
const FunctionProtoType * T
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const