29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/Support/MemoryBufferRef.h"
37#include "llvm/Support/NativeFormatting.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
65 return II->getObjCKeywordID() == objcKey;
72 return tok::objc_not_keyword;
80 case tok::annot_typename:
81 case tok::annot_decltype:
82 case tok::annot_pack_indexing_type:
88 case tok::kw___int128:
90 case tok::kw_unsigned:
98 case tok::kw__Float16:
99 case tok::kw___float128:
100 case tok::kw___ibm128:
101 case tok::kw_wchar_t:
107#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
108#include "clang/Basic/TransformTypeTraits.def"
109 case tok::kw___auto_type:
110 case tok::kw_char16_t:
111 case tok::kw_char32_t:
113 case tok::kw_decltype:
114 case tok::kw_char8_t:
126void Lexer::anchor() {}
128void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
129 const char *BufEnd) {
130 BufferStart = BufStart;
134 assert(BufEnd[0] == 0 &&
135 "We assume that the input buffer has a null character at the end"
136 " to simplify lexing!");
141 if (BufferStart == BufferPtr) {
143 StringRef Buf(BufferStart, BufferEnd - BufferStart);
144 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
145 .StartsWith(
"\xEF\xBB\xBF", 3)
149 BufferPtr += BOMLength;
152 Is_PragmaLexer =
false;
153 CurrentConflictMarkerState =
CMK_None;
156 IsAtStartOfLine =
true;
157 IsAtPhysicalStartOfLine =
true;
159 HasLeadingSpace =
false;
160 HasLeadingEmptyMacro =
false;
175 ExtendedTokenMode = 0;
177 NewLinePtr =
nullptr;
187 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
189 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
190 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
191 InputFile.getBufferEnd());
200 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
201 bool IsFirstIncludeOfFile)
203 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
204 InitLexer(BufStart, BufPtr, BufEnd);
215 bool IsFirstIncludeOfFile)
216 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
217 FromFile.getBufferStart(), FromFile.getBufferEnd(),
218 IsFirstIncludeOfFile) {}
221 assert(
PP &&
"Cannot reset token mode without a preprocessor");
222 if (LangOpts.TraditionalCPP)
250 FileID SpellingFID =
SM.getFileID(SpellingLoc);
251 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
257 const char *StrData =
SM.getCharacterData(SpellingLoc);
259 L->BufferPtr = StrData;
260 L->BufferEnd = StrData+TokLen;
261 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
265 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
267 ExpansionLocEnd, TokLen);
274 L->Is_PragmaLexer =
true;
279 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
280 this->IsAtStartOfLine = IsAtStartOfLine;
281 assert((BufferStart + Offset) <= BufferEnd);
282 BufferPtr = BufferStart + Offset;
286 typename T::size_type i = 0, e = Str.size();
288 if (Str[i] ==
'\\' || Str[i] == Quote) {
289 Str.insert(Str.begin() + i,
'\\');
292 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
294 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
295 Str[i] != Str[i + 1]) {
301 Str.insert(Str.begin() + i + 1,
'n');
311 std::string
Result = std::string(Str);
312 char Quote = Charify ?
'\'' :
'"';
327 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
330 const char *BufEnd = BufPtr + Tok.
getLength();
334 while (BufPtr < BufEnd) {
336 Spelling[Length++] = CharAndSize.Char;
337 BufPtr += CharAndSize.Size;
339 if (Spelling[Length - 1] ==
'"')
347 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
350 const char *RawEnd = BufEnd;
351 do --RawEnd;
while (*RawEnd !=
'"');
352 size_t RawLength = RawEnd - BufPtr + 1;
355 memcpy(Spelling + Length, BufPtr, RawLength);
363 while (BufPtr < BufEnd) {
365 Spelling[Length++] = CharAndSize.Char;
366 BufPtr += CharAndSize.Size;
370 "NeedsCleaning flag set on token that didn't need cleaning!");
385 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
388 bool invalidTemp =
false;
389 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
391 if (invalid) *invalid =
true;
395 const char *tokenBegin = file.data() + locInfo.second;
398 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
399 file.begin(), tokenBegin, file.end());
407 return StringRef(tokenBegin,
length);
411 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
412 return StringRef(buffer.data(), buffer.size());
422 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
424 bool CharDataInvalid =
false;
434 return std::string(TokStart, TokStart + Tok.
getLength());
455 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
457 const char *TokStart =
nullptr;
459 if (Tok.
is(tok::raw_identifier))
464 Buffer = II->getNameStart();
465 return II->getLength();
475 bool CharDataInvalid =
false;
479 if (CharDataInvalid) {
492 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
513 bool IgnoreWhiteSpace) {
523 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
525 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
529 const char *StrData = Buffer.data()+LocInfo.second;
535 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
536 Buffer.begin(), StrData, Buffer.end());
545 const char *BufStart = Buffer.data();
546 if (Offset >= Buffer.size())
549 const char *LexStart = BufStart + Offset;
550 for (; LexStart != BufStart; --LexStart) {
565 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
566 if (LocInfo.first.isInvalid())
570 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
576 const char *StrData = Buffer.data() + LocInfo.second;
578 if (!LexStart || LexStart == StrData)
583 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
603 }
while (TheTok.
getKind() != tok::eof);
615 if (!
SM.isMacroArgExpansion(
Loc))
620 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
621 std::pair<FileID, unsigned> BeginFileLocInfo =
622 SM.getDecomposedLoc(BeginFileLoc);
623 assert(FileLocInfo.first == BeginFileLocInfo.first &&
624 FileLocInfo.second >= BeginFileLocInfo.second);
630enum PreambleDirectiveKind {
645 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
649 bool InPreprocessorDirective =
false;
653 unsigned MaxLineOffset = 0;
655 const char *CurPtr = Buffer.begin();
656 unsigned CurLine = 0;
657 while (CurPtr != Buffer.end()) {
661 if (CurLine == MaxLines)
665 if (CurPtr != Buffer.end())
666 MaxLineOffset = CurPtr - Buffer.begin();
672 if (InPreprocessorDirective) {
674 if (TheTok.
getKind() == tok::eof) {
685 InPreprocessorDirective =
false;
694 if (MaxLineOffset && TokOffset >= MaxLineOffset)
699 if (TheTok.
getKind() == tok::comment) {
707 Token HashTok = TheTok;
708 InPreprocessorDirective =
true;
717 PreambleDirectiveKind PDK
718 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
719 .Case(
"include", PDK_Skipped)
720 .Case(
"__include_macros", PDK_Skipped)
721 .Case(
"define", PDK_Skipped)
722 .Case(
"undef", PDK_Skipped)
723 .Case(
"line", PDK_Skipped)
724 .Case(
"error", PDK_Skipped)
725 .Case(
"pragma", PDK_Skipped)
726 .Case(
"import", PDK_Skipped)
727 .Case(
"include_next", PDK_Skipped)
728 .Case(
"warning", PDK_Skipped)
729 .Case(
"ident", PDK_Skipped)
730 .Case(
"sccs", PDK_Skipped)
731 .Case(
"assert", PDK_Skipped)
732 .Case(
"unassert", PDK_Skipped)
733 .Case(
"if", PDK_Skipped)
734 .Case(
"ifdef", PDK_Skipped)
735 .Case(
"ifndef", PDK_Skipped)
736 .Case(
"elif", PDK_Skipped)
737 .Case(
"elifdef", PDK_Skipped)
738 .Case(
"elifndef", PDK_Skipped)
739 .Case(
"else", PDK_Skipped)
740 .Case(
"endif", PDK_Skipped)
741 .Default(PDK_Unknown);
758 TheTok.
getKind() == tok::raw_identifier &&
760 LangOpts.CPlusPlusModules) {
763 Token ModuleTok = TheTok;
766 }
while (TheTok.
getKind() == tok::comment);
767 if (TheTok.
getKind() != tok::semi) {
782 if (ActiveCommentLoc.
isValid())
783 End = ActiveCommentLoc;
798 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
801 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
804 unsigned PhysOffset = 0;
809 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
819 for (; CharNo; --CharNo) {
821 TokPtr += CharAndSize.Size;
822 PhysOffset += CharAndSize.Size;
829 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
830 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
879 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
885 *MacroBegin = expansionLoc;
907 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
913 *MacroEnd = expansionLoc;
926 if (
Range.isTokenRange()) {
935 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
940 if (!
SM.isInFileID(End, FID, &EndOffs) ||
950 return SM.getSLocEntry(
SM.getFileID(
Loc))
952 .isExpansionTokenRange();
974 if (
Range.isTokenRange()) {
995 if (
Range.isTokenRange())
1035 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(
Range.
getBegin());
1036 if (beginInfo.first.isInvalid()) {
1042 if (!
SM.isInFileID(
Range.
getEnd(), beginInfo.first, &EndOffs) ||
1043 beginInfo.second > EndOffs) {
1049 bool invalidTemp =
false;
1050 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1057 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1063 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1079 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1087 if (
SM.isInFileID(SpellLoc, MacroFID))
1101 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1103 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1104 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1109 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1111 while (
SM.isMacroArgExpansion(
Loc))
1112 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1118 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1124 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(
Loc).getBegin());
1128 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(
Loc);
1130 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1131 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1140 if (Str - 1 < BufferStart)
1143 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1144 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1145 if (Str - 2 < BufferStart)
1155 return *Str ==
'\\';
1162 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1163 if (LocInfo.first.isInvalid())
1166 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1172 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1173 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1174 return NumWhitespaceChars == StringRef::npos
1176 : Rest.take_front(NumWhitespaceChars);
1191 unsigned CharNo,
unsigned TokLen) {
1192 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1208 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1214 unsigned TokLen)
const {
1215 assert(
Loc >= BufferStart &&
Loc <= BufferEnd &&
1216 "Location out of range for this buffer!");
1220 unsigned CharNo =
Loc-BufferStart;
1226 assert(
PP &&
"This doesn't work on raw lexers");
1245 case '=':
return '#';
1246 case ')':
return ']';
1247 case '(':
return '[';
1248 case '!':
return '|';
1249 case '\'':
return '^';
1250 case '>':
return '}';
1251 case '/':
return '\\';
1252 case '<':
return '{';
1253 case '-':
return '~';
1268 L->
Diag(CP-2, diag::trigraph_ignored);
1273 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1280unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1285 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1289 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1290 Ptr[Size-1] != Ptr[Size])
1303const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1305 const char *AfterEscape;
1308 }
else if (*
P ==
'?') {
1310 if (
P[1] !=
'?' ||
P[2] !=
'/')
1319 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1320 if (NewLineSize == 0)
return P;
1321 P = AfterEscape+NewLineSize;
1330 return std::nullopt;
1335 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(
Loc);
1338 bool InvalidTemp =
false;
1339 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1341 return std::nullopt;
1343 const char *TokenBegin =
File.data() + LocInfo.second;
1346 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1347 TokenBegin,
File.end());
1360 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1362 if (!Tok || Tok->isNot(TKind))
1367 unsigned NumWhitespaceChars = 0;
1368 if (SkipTrailingWhitespaceAndNewLine) {
1369 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1370 unsigned char C = *TokenEnd;
1373 NumWhitespaceChars++;
1377 if (
C ==
'\n' ||
C ==
'\r') {
1380 NumWhitespaceChars++;
1381 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1382 NumWhitespaceChars++;
1407 if (Ptr[0] ==
'\\') {
1413 return {
'\\', Size};
1417 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1423 Diag(Ptr, diag::backslash_newline_space);
1426 Size += EscapedNewLineSize;
1427 Ptr += EscapedNewLineSize;
1430 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1431 CharAndSize.Size += Size;
1436 return {
'\\',
Size};
1440 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1444 LangOpts.Trigraphs)) {
1450 if (
C ==
'\\')
goto Slash;
1456 return {*Ptr,
Size + 1u};
1470 if (Ptr[0] ==
'\\') {
1476 return {
'\\',
Size};
1479 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1481 Size += EscapedNewLineSize;
1482 Ptr += EscapedNewLineSize;
1485 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1486 CharAndSize.Size +=
Size;
1491 return {
'\\',
Size};
1495 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1501 if (
C ==
'\\')
goto Slash;
1507 return {*Ptr,
Size + 1u};
1515void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1516 BufferPtr = BufferStart + Offset;
1517 if (BufferPtr > BufferEnd)
1518 BufferPtr = BufferEnd;
1522 IsAtStartOfLine = StartOfLine;
1523 IsAtPhysicalStartOfLine = StartOfLine;
1527 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1529 return UnicodeWhitespaceChars.contains(Codepoint);
1534 llvm::raw_svector_ostream CharOS(CharBuf);
1535 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1546 bool IsStart,
bool &IsExtension) {
1547 static const llvm::sys::UnicodeCharSet MathStartChars(
1549 static const llvm::sys::UnicodeCharSet MathContinueChars(
1551 if (MathStartChars.contains(
C) ||
1552 (!IsStart && MathContinueChars.contains(
C))) {
1560 bool &IsExtension) {
1561 if (LangOpts.AsmPreprocessor) {
1563 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1565 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1570 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1572 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1576 }
else if (LangOpts.C11) {
1577 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1579 return C11AllowedIDChars.contains(
C);
1581 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1583 return C99AllowedIDChars.contains(
C);
1588 bool &IsExtension) {
1589 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1590 IsExtension =
false;
1591 if (LangOpts.AsmPreprocessor) {
1594 if (LangOpts.CPlusPlus || LangOpts.C23) {
1595 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1596 if (XIDStartChars.contains(
C))
1604 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1606 return !C11DisallowedInitialIDChars.contains(
C);
1608 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1610 return !C99DisallowedInitialIDChars.contains(
C);
1616 static const llvm::sys::UnicodeCharSet MathStartChars(
1618 static const llvm::sys::UnicodeCharSet MathContinueChars(
1621 (void)MathStartChars;
1622 (void)MathContinueChars;
1623 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1624 "Unexpected mathematical notation codepoint");
1640 CannotAppearInIdentifier = 0,
1641 CannotStartIdentifier
1644 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1646 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1648 if (!C99AllowedIDChars.contains(
C)) {
1651 << CannotAppearInIdentifier;
1652 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1655 << CannotStartIdentifier;
1667 struct HomoglyphPair {
1670 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1672 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1725 std::lower_bound(std::begin(SortedHomoglyphs),
1726 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1727 if (Homoglyph->Character ==
C) {
1728 if (Homoglyph->LooksLike) {
1729 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1750 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1753 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1755 if (!IsFirst || InvalidOnlyAtStart) {
1766bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1768 const char *UCNPtr = CurPtr +
Size;
1769 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1770 if (CodePoint == 0) {
1773 bool IsExtension =
false;
1798 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1799 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1802 while (CurPtr != UCNPtr)
1803 (void)getAndAdvanceChar(CurPtr,
Result);
1807bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1808 llvm::UTF32 CodePoint;
1813 unsigned FirstCodeUnitSize;
1814 getCharAndSize(CurPtr, FirstCodeUnitSize);
1815 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1816 const char *UnicodePtr = CharStart;
1818 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1819 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1820 &CodePoint, llvm::strictConversion);
1821 if (ConvResult != llvm::conversionOK)
1824 bool IsExtension =
false;
1853 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1854 CurPtr = UnicodePtr;
1858bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1859 const char *CurPtr) {
1860 bool IsExtension =
false;
1875 return LexIdentifierContinue(
Result, CurPtr);
1900 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1906 [[maybe_unused]]
const char *BufferEnd) {
1908 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1909 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1911 constexpr ssize_t BytesPerRegister = 16;
1913 __m128i AsciiIdentifierRangeV =
1916 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1923 if (Consumed == BytesPerRegister)
1929 unsigned char C = *CurPtr;
1935bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1944 unsigned char C = getCharAndSize(CurPtr, Size);
1946 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1951 if (!LangOpts.DollarIdents)
1955 Diag(CurPtr, diag::ext_dollar_in_identifier);
1956 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1959 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1967 const char *IdStart = BufferPtr;
1968 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1969 Result.setRawIdentifierData(IdStart);
1985 if (isCodeCompletionPoint(CurPtr)) {
1987 Result.setKind(tok::code_completion);
1993 assert(*CurPtr == 0 &&
"Completion character must be 0");
1998 if (CurPtr < BufferEnd) {
2016bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2018 char C1 = CharAndSize1.Char;
2024 char C2 = CharAndSize2.Char;
2025 return (C2 ==
'x' || C2 ==
'X');
2031bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2033 char C = getCharAndSize(CurPtr, Size);
2036 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2038 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2042 C = getCharAndSize(CurPtr, Size);
2046 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2049 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2050 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2054 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2058 bool IsHexFloat =
true;
2059 if (!LangOpts.C99) {
2060 if (!isHexaLiteral(BufferPtr, LangOpts))
2062 else if (!LangOpts.CPlusPlus17 &&
2063 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2067 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2071 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2075 Diag(CurPtr, LangOpts.CPlusPlus
2076 ? diag::warn_cxx11_compat_digit_separator
2077 : diag::warn_c23_compat_digit_separator);
2078 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2079 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2080 return LexNumericConstant(
Result, CurPtr);
2085 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2086 return LexNumericConstant(
Result, CurPtr);
2088 return LexNumericConstant(
Result, CurPtr);
2091 const char *TokStart = BufferPtr;
2092 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2093 Result.setLiteralData(TokStart);
2099const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2100 bool IsStringLiteral) {
2101 assert(LangOpts.CPlusPlus);
2105 char C = getCharAndSize(CurPtr, Size);
2106 bool Consumed =
false;
2109 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2111 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2117 if (!LangOpts.CPlusPlus11) {
2120 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2121 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2132 bool IsUDSuffix =
false;
2135 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2139 const unsigned MaxStandardSuffixLength = 3;
2140 char Buffer[MaxStandardSuffixLength] = {
C };
2141 unsigned Consumed =
Size;
2144 auto [Next, NextSize] =
2148 const StringRef CompleteSuffix(Buffer, Chars);
2154 if (Chars == MaxStandardSuffixLength)
2158 Buffer[Chars++] = Next;
2159 Consumed += NextSize;
2165 Diag(CurPtr, LangOpts.MSVCCompat
2166 ? diag::ext_ms_reserved_user_defined_literal
2167 : diag::ext_reserved_user_defined_literal)
2172 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2177 C = getCharAndSize(CurPtr, Size);
2179 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2180 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2181 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2191bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2193 const char *AfterQuote = CurPtr;
2195 const char *NulCharacter =
nullptr;
2198 (Kind == tok::utf8_string_literal ||
2199 Kind == tok::utf16_string_literal ||
2200 Kind == tok::utf32_string_literal))
2201 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2202 : diag::warn_c99_compat_unicode_literal);
2204 char C = getAndAdvanceChar(CurPtr,
Result);
2209 C = getAndAdvanceChar(CurPtr,
Result);
2211 if (
C ==
'\n' ||
C ==
'\r' ||
2212 (
C == 0 && CurPtr-1 == BufferEnd)) {
2214 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2215 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2220 if (isCodeCompletionPoint(CurPtr-1)) {
2222 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2225 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2230 NulCharacter = CurPtr-1;
2232 C = getAndAdvanceChar(CurPtr,
Result);
2236 if (LangOpts.CPlusPlus)
2237 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2241 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2244 const char *TokStart = BufferPtr;
2245 FormTokenWithChars(
Result, CurPtr, Kind);
2246 Result.setLiteralData(TokStart);
2252bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2260 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2262 unsigned PrefixLen = 0;
2266 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2267 const char *Pos = &CurPtr[PrefixLen];
2268 Diag(Pos, LangOpts.CPlusPlus26
2269 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2270 : diag::ext_cxx26_raw_string_literal_character_set)
2271 << StringRef(Pos, 1);
2277 if (CurPtr[PrefixLen] !=
'(') {
2279 const char *PrefixEnd = &CurPtr[PrefixLen];
2280 if (PrefixLen == 16) {
2281 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2282 }
else if (*PrefixEnd ==
'\n') {
2283 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2285 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2286 << StringRef(PrefixEnd, 1);
2298 if (
C == 0 && CurPtr-1 == BufferEnd) {
2304 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2309 const char *Prefix = CurPtr;
2310 CurPtr += PrefixLen + 1;
2317 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2318 CurPtr += PrefixLen + 1;
2321 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2323 Diag(BufferPtr, diag::err_unterminated_raw_string)
2324 << StringRef(Prefix, PrefixLen);
2325 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2331 if (LangOpts.CPlusPlus)
2332 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2335 const char *TokStart = BufferPtr;
2336 FormTokenWithChars(
Result, CurPtr, Kind);
2337 Result.setLiteralData(TokStart);
2343bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2345 const char *NulCharacter =
nullptr;
2346 const char *AfterLessPos = CurPtr;
2347 char C = getAndAdvanceChar(CurPtr,
Result);
2352 C = getAndAdvanceChar(CurPtr,
Result);
2355 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2358 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2363 if (isCodeCompletionPoint(CurPtr - 1)) {
2364 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2366 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2369 NulCharacter = CurPtr-1;
2371 C = getAndAdvanceChar(CurPtr,
Result);
2376 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2379 const char *TokStart = BufferPtr;
2380 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2381 Result.setLiteralData(TokStart);
2385void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2386 const char *CompletionPoint,
2389 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2390 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2391 auto Slash = PartialPath.find_last_of(SlashChars);
2393 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2394 const char *StartOfFilename =
2395 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2398 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2401 while (CompletionPoint < BufferEnd) {
2402 char Next = *(CompletionPoint + 1);
2403 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2406 if (Next == (IsAngled ?
'>' :
'"'))
2408 if (SlashChars.contains(Next))
2420bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2423 const char *NulCharacter =
nullptr;
2426 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2427 Diag(BufferPtr, LangOpts.CPlusPlus
2428 ? diag::warn_cxx98_compat_unicode_literal
2429 : diag::warn_c99_compat_unicode_literal);
2430 else if (Kind == tok::utf8_char_constant)
2431 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2434 char C = getAndAdvanceChar(CurPtr,
Result);
2437 Diag(BufferPtr, diag::ext_empty_character);
2438 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2445 C = getAndAdvanceChar(CurPtr,
Result);
2447 if (
C ==
'\n' ||
C ==
'\r' ||
2448 (
C == 0 && CurPtr-1 == BufferEnd)) {
2450 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2451 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2456 if (isCodeCompletionPoint(CurPtr-1)) {
2458 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2463 NulCharacter = CurPtr-1;
2465 C = getAndAdvanceChar(CurPtr,
Result);
2469 if (LangOpts.CPlusPlus)
2470 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2474 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2477 const char *TokStart = BufferPtr;
2478 FormTokenWithChars(
Result, CurPtr, Kind);
2479 Result.setLiteralData(TokStart);
2487bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2488 bool &TokAtPhysicalStartOfLine) {
2492 unsigned char Char = *CurPtr;
2494 const char *lastNewLine =
nullptr;
2495 auto setLastNewLine = [&](
const char *Ptr) {
2501 setLastNewLine(CurPtr - 1);
2520 if (*CurPtr ==
'\n')
2521 setLastNewLine(CurPtr);
2528 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2530 IsAtStartOfLine =
true;
2531 IsAtPhysicalStartOfLine =
true;
2538 char PrevChar = CurPtr[-1];
2544 TokAtPhysicalStartOfLine =
true;
2546 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2563bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2564 bool &TokAtPhysicalStartOfLine) {
2569 Diag(BufferPtr, diag::ext_line_comment);
2587 bool UnicodeDecodingAlreadyDiagnosed =
false;
2594 C !=
'\n' &&
C !=
'\r') {
2596 UnicodeDecodingAlreadyDiagnosed =
false;
2600 unsigned Length = llvm::getUTF8SequenceSize(
2601 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2604 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2605 UnicodeDecodingAlreadyDiagnosed =
true;
2608 UnicodeDecodingAlreadyDiagnosed =
false;
2614 const char *NextLine = CurPtr;
2617 const char *EscapePtr = CurPtr-1;
2618 bool HasSpace =
false;
2624 if (*EscapePtr ==
'\\')
2627 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2628 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2630 CurPtr = EscapePtr-2;
2636 Diag(EscapePtr, diag::backslash_newline_space);
2643 const char *OldPtr = CurPtr;
2646 C = getAndAdvanceChar(CurPtr,
Result);
2651 if (
C != 0 && CurPtr == OldPtr+1) {
2659 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2660 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2661 for (; OldPtr != CurPtr; ++OldPtr)
2662 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2666 const char *ForwardPtr = CurPtr;
2669 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2674 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2679 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2684 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2702 return SaveLineComment(
Result, CurPtr);
2716 NewLinePtr = CurPtr++;
2720 TokAtPhysicalStartOfLine =
true;
2729bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2732 FormTokenWithChars(
Result, CurPtr, tok::comment);
2744 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2748 Result.setKind(tok::comment);
2759 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2762 const char *TrigraphPos =
nullptr;
2764 const char *SpacePos =
nullptr;
2771 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2773 if (CurPtr[0] == CurPtr[1])
2787 if (*CurPtr ==
'\\') {
2789 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2791 TrigraphPos = CurPtr - 2;
2802 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2811 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2815 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2820 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2824 L->
Diag(SpacePos, diag::backslash_newline_space);
2830#include <emmintrin.h>
2845bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2846 bool &TokAtPhysicalStartOfLine) {
2856 unsigned char C = getCharAndSize(CurPtr, CharSize);
2858 if (
C == 0 && CurPtr == BufferEnd+1) {
2860 Diag(BufferPtr, diag::err_unterminated_block_comment);
2866 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2883 bool UnicodeDecodingAlreadyDiagnosed =
false;
2888 if (CurPtr + 24 < BufferEnd &&
2893 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2898 if (
C ==
'/')
goto FoundSlash;
2902 while (CurPtr + 16 < BufferEnd) {
2904 if (LLVM_UNLIKELY(Mask != 0)) {
2914 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2920 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2921 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2922 0x80, 0x80, 0x80, 0x80};
2923 __vector
unsigned char Slashes = {
2924 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2925 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2927 while (CurPtr + 16 < BufferEnd) {
2929 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2931 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2938 while (CurPtr + 16 < BufferEnd) {
2939 bool HasNonASCII =
false;
2940 for (
unsigned I = 0; I < 16; ++I)
2941 HasNonASCII |= !
isASCII(CurPtr[I]);
2943 if (LLVM_UNLIKELY(HasNonASCII))
2946 bool HasSlash =
false;
2947 for (
unsigned I = 0; I < 16; ++I)
2948 HasSlash |= CurPtr[I] ==
'/';
2962 while (
C !=
'/' &&
C !=
'\0') {
2964 UnicodeDecodingAlreadyDiagnosed =
false;
2971 unsigned Length = llvm::getUTF8SequenceSize(
2972 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2975 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2976 UnicodeDecodingAlreadyDiagnosed =
true;
2978 UnicodeDecodingAlreadyDiagnosed =
false;
2979 CurPtr += Length - 1;
2986 if (CurPtr[-2] ==
'*')
2989 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2991 LangOpts.Trigraphs)) {
2997 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3002 Diag(CurPtr-1, diag::warn_nested_block_comment);
3004 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3006 Diag(BufferPtr, diag::err_unterminated_block_comment);
3015 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3021 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3040 FormTokenWithChars(
Result, CurPtr, tok::comment);
3049 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3067 "Must be in a preprocessing directive!");
3072 const char *CurPtr = BufferPtr;
3074 char Char = getAndAdvanceChar(CurPtr, Tmp);
3082 if (CurPtr-1 != BufferEnd) {
3083 if (isCodeCompletionPoint(CurPtr-1)) {
3099 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3100 BufferPtr = CurPtr-1;
3104 if (Tmp.
is(tok::code_completion)) {
3109 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3121bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3129 FormTokenWithChars(
Result, CurPtr, tok::eod);
3141 BufferPtr = BufferEnd;
3142 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3162 diag::err_pp_unterminated_conditional);
3168 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3173 if (LangOpts.CPlusPlus11) {
3177 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3178 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3180 DiagID = diag::warn_no_newline_eof;
3183 DiagID = diag::ext_no_newline_eof;
3186 Diag(BufferEnd, DiagID)
3200unsigned Lexer::isNextPPTokenLParen() {
3201 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3203 if (isDependencyDirectivesLexer()) {
3204 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3206 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3216 const char *TmpBufferPtr = BufferPtr;
3218 bool atStartOfLine = IsAtStartOfLine;
3219 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3220 bool leadingSpace = HasLeadingSpace;
3226 BufferPtr = TmpBufferPtr;
3228 HasLeadingSpace = leadingSpace;
3229 IsAtStartOfLine = atStartOfLine;
3230 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3235 if (Tok.
is(tok::eof))
3237 return Tok.
is(tok::l_paren);
3243 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3245 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3246 size_t Pos = RestOfBuffer.find(Terminator);
3247 while (Pos != StringRef::npos) {
3250 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3251 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3252 Pos = RestOfBuffer.find(Terminator);
3255 return RestOfBuffer.data()+Pos;
3264bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3266 if (CurPtr != BufferStart &&
3267 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3271 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3272 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3287 Diag(CurPtr, diag::err_conflict_marker);
3288 CurrentConflictMarkerState =
Kind;
3292 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3293 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3308bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3310 if (CurPtr != BufferStart &&
3311 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3320 for (
unsigned i = 1; i != 4; ++i)
3321 if (CurPtr[i] != CurPtr[0])
3328 CurrentConflictMarkerState)) {
3332 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3338 CurrentConflictMarkerState =
CMK_None;
3346 const char *BufferEnd) {
3347 if (CurPtr == BufferEnd)
3350 for (; CurPtr != BufferEnd; ++CurPtr) {
3351 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3357bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3358 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3364 const char *Start = CurPtr - 1;
3365 if (!LangOpts.AllowEditorPlaceholders)
3366 Diag(Start, diag::err_placeholder_in_source);
3368 FormTokenWithChars(
Result, End, tok::raw_identifier);
3369 Result.setRawIdentifierData(Start);
3376bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3385std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3386 const char *SlashLoc,
3389 char Kind = getCharAndSize(StartPtr, CharSize);
3390 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3392 unsigned NumHexDigits;
3395 else if (Kind ==
'U')
3398 bool Delimited =
false;
3399 bool FoundEndDelimiter =
false;
3403 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3405 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3406 return std::nullopt;
3409 const char *CurPtr = StartPtr + CharSize;
3410 const char *KindLoc = &CurPtr[-1];
3413 while (Count != NumHexDigits || Delimited) {
3414 char C = getCharAndSize(CurPtr, CharSize);
3415 if (!Delimited && Count == 0 &&
C ==
'{') {
3421 if (Delimited &&
C ==
'}') {
3423 FoundEndDelimiter =
true;
3427 unsigned Value = llvm::hexDigitValue(
C);
3432 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3433 << StringRef(KindLoc, 1);
3434 return std::nullopt;
3437 if (CodePoint & 0xF000'0000) {
3439 Diag(KindLoc, diag::err_escape_too_large) << 0;
3440 return std::nullopt;
3451 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3452 : diag::warn_ucn_escape_no_digits)
3453 << StringRef(KindLoc, 1);
3454 return std::nullopt;
3457 if (Delimited && Kind ==
'U') {
3459 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3460 return std::nullopt;
3463 if (!Delimited && Count != NumHexDigits) {
3465 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3467 if (Count == 4 && NumHexDigits == 8) {
3469 Diag(KindLoc, diag::note_ucn_four_not_eight)
3473 return std::nullopt;
3476 if (Delimited &&
PP) {
3478 ? diag::warn_cxx23_delimited_escape_sequence
3479 : diag::ext_delimited_escape_sequence)
3488 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3491 while (StartPtr != CurPtr)
3492 (void)getAndAdvanceChar(StartPtr, *
Result);
3499std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3500 const char *SlashLoc,
3505 char C = getCharAndSize(StartPtr, CharSize);
3506 assert(
C ==
'N' &&
"expected \\N{...}");
3508 const char *CurPtr = StartPtr + CharSize;
3509 const char *KindLoc = &CurPtr[-1];
3511 C = getCharAndSize(CurPtr, CharSize);
3514 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3515 return std::nullopt;
3518 const char *StartName = CurPtr;
3519 bool FoundEndDelimiter =
false;
3522 C = getCharAndSize(CurPtr, CharSize);
3525 FoundEndDelimiter =
true;
3531 Buffer.push_back(
C);
3534 if (!FoundEndDelimiter || Buffer.empty()) {
3536 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3537 : diag::warn_delimited_ucn_incomplete)
3538 << StringRef(KindLoc, 1);
3539 return std::nullopt;
3542 StringRef Name(Buffer.data(), Buffer.size());
3543 std::optional<char32_t> Match =
3544 llvm::sys::unicode::nameToCodepointStrict(Name);
3545 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3547 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3549 Diag(StartName, diag::err_invalid_ucn_name)
3550 << StringRef(Buffer.data(), Buffer.size())
3553 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3564 if (Diagnose && Match)
3566 ? diag::warn_cxx23_delimited_escape_sequence
3567 : diag::ext_delimited_escape_sequence)
3574 if (LooseMatch && Diagnose)
3575 Match = LooseMatch->CodePoint;
3582 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3585 while (StartPtr != CurPtr)
3586 (void)getAndAdvanceChar(StartPtr, *
Result);
3590 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3593uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3597 std::optional<uint32_t> CodePointOpt;
3598 char Kind = getCharAndSize(StartPtr, CharSize);
3599 if (Kind ==
'u' || Kind ==
'U')
3600 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3601 else if (Kind ==
'N')
3602 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3607 uint32_t CodePoint = *CodePointOpt;
3610 if (LangOpts.AsmPreprocessor)
3629 if (CodePoint < 0xA0) {
3633 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3634 Diag(BufferPtr, diag::err_ucn_control_character);
3636 char C =
static_cast<char>(CodePoint);
3637 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3642 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3647 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3648 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3650 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3658bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3659 const char *CurPtr) {
3662 Diag(BufferPtr, diag::ext_unicode_whitespace)
3671void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3672 IsAtStartOfLine =
Result.isAtStartOfLine();
3673 HasLeadingSpace =
Result.hasLeadingSpace();
3674 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3679 assert(!isDependencyDirectivesLexer());
3685 if (IsAtStartOfLine) {
3687 IsAtStartOfLine =
false;
3690 if (HasLeadingSpace) {
3692 HasLeadingSpace =
false;
3695 if (HasLeadingEmptyMacro) {
3697 HasLeadingEmptyMacro =
false;
3700 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3701 IsAtPhysicalStartOfLine =
false;
3704 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3706 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3707 return returnedToken;
3715bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3717 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3718 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3721 const char *CurPtr = BufferPtr;
3733 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3742 unsigned SizeTmp, SizeTmp2;
3745 char Char = getAndAdvanceChar(CurPtr,
Result);
3749 NewLinePtr =
nullptr;
3754 if (CurPtr-1 == BufferEnd)
3755 return LexEndOfFile(
Result, CurPtr-1);
3758 if (isCodeCompletionPoint(CurPtr-1)) {
3761 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3766 Diag(CurPtr-1, diag::null_in_file);
3768 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3777 if (LangOpts.MicrosoftExt) {
3779 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3780 return LexEndOfFile(
Result, CurPtr-1);
3784 Kind = tok::unknown;
3788 if (CurPtr[0] ==
'\n')
3789 (void)getAndAdvanceChar(CurPtr,
Result);
3803 IsAtStartOfLine =
true;
3804 IsAtPhysicalStartOfLine =
true;
3805 NewLinePtr = CurPtr - 1;
3814 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3824 SkipHorizontalWhitespace:
3826 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3835 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3836 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3838 goto SkipIgnoredUnits;
3840 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3842 goto SkipIgnoredUnits;
3844 goto SkipHorizontalWhitespace;
3852 case '0':
case '1':
case '2':
case '3':
case '4':
3853 case '5':
case '6':
case '7':
case '8':
case '9':
3856 return LexNumericConstant(
Result, CurPtr);
3865 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3866 Char = getCharAndSize(CurPtr, SizeTmp);
3870 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3871 tok::utf16_string_literal);
3875 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3876 tok::utf16_char_constant);
3879 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3880 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3881 return LexRawStringLiteral(
Result,
3882 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3884 tok::utf16_string_literal);
3887 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3891 return LexStringLiteral(
Result,
3892 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3894 tok::utf8_string_literal);
3895 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3896 return LexCharConstant(
3897 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3899 tok::utf8_char_constant);
3901 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3903 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3906 return LexRawStringLiteral(
Result,
3907 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3910 tok::utf8_string_literal);
3917 return LexIdentifierContinue(
Result, CurPtr);
3923 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3924 Char = getCharAndSize(CurPtr, SizeTmp);
3928 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3929 tok::utf32_string_literal);
3933 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3934 tok::utf32_char_constant);
3937 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3938 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3939 return LexRawStringLiteral(
Result,
3940 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3942 tok::utf32_string_literal);
3946 return LexIdentifierContinue(
Result, CurPtr);
3952 if (LangOpts.RawStringLiterals) {
3953 Char = getCharAndSize(CurPtr, SizeTmp);
3956 return LexRawStringLiteral(
Result,
3957 ConsumeChar(CurPtr, SizeTmp,
Result),
3958 tok::string_literal);
3962 return LexIdentifierContinue(
Result, CurPtr);
3967 Char = getCharAndSize(CurPtr, SizeTmp);
3971 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3972 tok::wide_string_literal);
3975 if (LangOpts.RawStringLiterals && Char ==
'R' &&
3976 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3977 return LexRawStringLiteral(
Result,
3978 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3980 tok::wide_string_literal);
3984 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3985 tok::wide_char_constant);
3990 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3991 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3992 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3993 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3994 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3995 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3996 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3997 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4001 return LexIdentifierContinue(
Result, CurPtr);
4004 if (LangOpts.DollarIdents) {
4006 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4009 return LexIdentifierContinue(
Result, CurPtr);
4012 Kind = tok::unknown;
4019 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4025 return LexStringLiteral(
Result, CurPtr,
4027 : tok::string_literal);
4031 Kind = tok::question;
4034 Kind = tok::l_square;
4037 Kind = tok::r_square;
4040 Kind = tok::l_paren;
4043 Kind = tok::r_paren;
4046 Kind = tok::l_brace;
4049 Kind = tok::r_brace;
4052 Char = getCharAndSize(CurPtr, SizeTmp);
4053 if (Char >=
'0' && Char <=
'9') {
4057 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4058 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4059 Kind = tok::periodstar;
4061 }
else if (Char ==
'.' &&
4062 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4063 Kind = tok::ellipsis;
4064 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4071 Char = getCharAndSize(CurPtr, SizeTmp);
4074 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4075 }
else if (Char ==
'=') {
4076 Kind = tok::ampequal;
4077 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4083 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4084 Kind = tok::starequal;
4085 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4091 Char = getCharAndSize(CurPtr, SizeTmp);
4093 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4094 Kind = tok::plusplus;
4095 }
else if (Char ==
'=') {
4096 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4097 Kind = tok::plusequal;
4103 Char = getCharAndSize(CurPtr, SizeTmp);
4105 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4106 Kind = tok::minusminus;
4107 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4108 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4109 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4111 Kind = tok::arrowstar;
4112 }
else if (Char ==
'>') {
4113 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4115 }
else if (Char ==
'=') {
4116 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4117 Kind = tok::minusequal;
4126 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4127 Kind = tok::exclaimequal;
4128 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 Kind = tok::exclaim;
4135 Char = getCharAndSize(CurPtr, SizeTmp);
4145 bool TreatAsComment =
4146 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4147 if (!TreatAsComment)
4149 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4151 if (TreatAsComment) {
4152 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4153 TokAtPhysicalStartOfLine))
4159 goto SkipIgnoredUnits;
4164 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4165 TokAtPhysicalStartOfLine))
4174 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4175 Kind = tok::slashequal;
4181 Char = getCharAndSize(CurPtr, SizeTmp);
4183 Kind = tok::percentequal;
4184 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4185 }
else if (LangOpts.Digraphs && Char ==
'>') {
4186 Kind = tok::r_brace;
4187 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4188 }
else if (LangOpts.Digraphs && Char ==
':') {
4189 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4190 Char = getCharAndSize(CurPtr, SizeTmp);
4191 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4192 Kind = tok::hashhash;
4193 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4195 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4196 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4198 Diag(BufferPtr, diag::ext_charize_microsoft);
4205 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4206 goto HandleDirective;
4211 Kind = tok::percent;
4215 Char = getCharAndSize(CurPtr, SizeTmp);
4217 return LexAngledStringLiteral(
Result, CurPtr);
4218 }
else if (Char ==
'<') {
4219 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4221 Kind = tok::lesslessequal;
4222 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4224 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4228 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4232 }
else if (LangOpts.CUDA && After ==
'<') {
4233 Kind = tok::lesslessless;
4234 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4237 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4238 Kind = tok::lessless;
4240 }
else if (Char ==
'=') {
4241 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4243 if (LangOpts.CPlusPlus20) {
4245 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4246 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4248 Kind = tok::spaceship;
4254 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4259 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4260 Kind = tok::lessequal;
4261 }
else if (LangOpts.Digraphs && Char ==
':') {
4262 if (LangOpts.CPlusPlus11 &&
4263 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4270 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4271 if (After !=
':' && After !=
'>') {
4274 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4279 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4280 Kind = tok::l_square;
4281 }
else if (LangOpts.Digraphs && Char ==
'%') {
4282 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4283 Kind = tok::l_brace;
4284 }
else if (Char ==
'#' && SizeTmp == 1 &&
4285 lexEditorPlaceholder(
Result, CurPtr)) {
4292 Char = getCharAndSize(CurPtr, SizeTmp);
4294 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4295 Kind = tok::greaterequal;
4296 }
else if (Char ==
'>') {
4297 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4299 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4301 Kind = tok::greatergreaterequal;
4302 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4306 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4309 }
else if (LangOpts.CUDA && After ==
'>') {
4310 Kind = tok::greatergreatergreater;
4311 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4314 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4315 Kind = tok::greatergreater;
4318 Kind = tok::greater;
4322 Char = getCharAndSize(CurPtr, SizeTmp);
4324 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4325 Kind = tok::caretequal;
4326 }
else if (LangOpts.OpenCL && Char ==
'^') {
4327 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4328 Kind = tok::caretcaret;
4334 Char = getCharAndSize(CurPtr, SizeTmp);
4336 Kind = tok::pipeequal;
4337 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4338 }
else if (Char ==
'|') {
4340 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4342 Kind = tok::pipepipe;
4343 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4349 Char = getCharAndSize(CurPtr, SizeTmp);
4350 if (LangOpts.Digraphs && Char ==
'>') {
4351 Kind = tok::r_square;
4352 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4353 }
else if (Char ==
':') {
4354 Kind = tok::coloncolon;
4355 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4364 Char = getCharAndSize(CurPtr, SizeTmp);
4367 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4370 Kind = tok::equalequal;
4371 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4380 Char = getCharAndSize(CurPtr, SizeTmp);
4382 Kind = tok::hashhash;
4383 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4384 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4387 Diag(BufferPtr, diag::ext_charize_microsoft);
4388 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4394 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4395 goto HandleDirective;
4403 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4406 Kind = tok::unknown;
4411 if (!LangOpts.AsmPreprocessor) {
4412 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4413 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4414 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4422 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4426 Kind = tok::unknown;
4431 Kind = tok::unknown;
4435 llvm::UTF32 CodePoint;
4440 llvm::ConversionResult Status =
4441 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4442 (
const llvm::UTF8 *)BufferEnd,
4444 llvm::strictConversion);
4445 if (Status == llvm::conversionOK) {
4446 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4447 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4454 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4460 Kind = tok::unknown;
4467 Diag(CurPtr, diag::err_invalid_utf8);
4469 BufferPtr = CurPtr+1;
4481 FormTokenWithChars(
Result, CurPtr, Kind);
4487 FormTokenWithChars(
Result, CurPtr, tok::hash);
4502const char *Lexer::convertDependencyDirectiveToken(
4504 const char *TokPtr = BufferStart + DDTok.
Offset;
4510 BufferPtr = TokPtr + DDTok.
Length;
4514bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4515 assert(isDependencyDirectivesLexer());
4517 using namespace dependency_directives_scan;
4519 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4520 if (DepDirectives.front().Kind == pp_eof)
4521 return LexEndOfFile(
Result, BufferEnd);
4522 if (DepDirectives.front().Kind == tokens_present_before_eof)
4524 NextDepDirectiveTokenIndex = 0;
4525 DepDirectives = DepDirectives.drop_front();
4529 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4530 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4536 BufferPtr = BufferStart + DDTok.
Offset;
4537 LexAngledStringLiteral(
Result, BufferPtr + 1);
4538 if (
Result.isNot(tok::header_name))
4543 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4544 if (BufferStart + NextTok.
Offset >= BufferPtr)
4546 ++NextDepDirectiveTokenIndex;
4551 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4553 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4557 if (
Result.is(tok::raw_identifier)) {
4558 Result.setRawIdentifierData(TokPtr);
4566 if (
Result.isLiteral()) {
4567 Result.setLiteralData(TokPtr);
4570 if (
Result.is(tok::colon)) {
4572 if (*BufferPtr ==
':') {
4573 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4575 ++NextDepDirectiveTokenIndex;
4576 Result.setKind(tok::coloncolon);
4586bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4587 assert(isDependencyDirectivesLexer());
4589 using namespace dependency_directives_scan;
4592 unsigned NestedIfs = 0;
4594 DepDirectives = DepDirectives.drop_front();
4595 switch (DepDirectives.front().Kind) {
4597 llvm_unreachable(
"unexpected 'pp_none'");
4638 NextDepDirectiveTokenIndex = 0;
4639 return LexEndOfFile(
Result, BufferEnd);
4644 DepDirectives.front().Tokens.front();
4645 assert(DDTok.
is(tok::hash));
4646 NextDepDirectiveTokenIndex = 1;
4648 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setEnd(SourceLocation e)
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
const FunctionProtoType * T
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const