29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
63 return II->getObjCKeywordID() == objcKey;
70 return tok::objc_not_keyword;
76 if (AllowExport &&
is(tok::kw_export))
78 if (
isOneOf(tok::kw_import, tok::kw_module))
80 if (
isNot(tok::identifier))
83 return II->isImportKeyword() || II->isModuleKeyword();
89 case tok::annot_typename:
90 case tok::annot_decltype:
91 case tok::annot_pack_indexing_type:
97 case tok::kw___int128:
99 case tok::kw_unsigned:
107 case tok::kw__Float16:
108 case tok::kw___float128:
109 case tok::kw___ibm128:
110 case tok::kw_wchar_t:
116#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
117#include "clang/Basic/TransformTypeTraits.def"
118 case tok::kw___auto_type:
119 case tok::kw_char16_t:
120 case tok::kw_char32_t:
122 case tok::kw_decltype:
123 case tok::kw_char8_t:
135void Lexer::anchor() {}
137void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
138 const char *BufEnd) {
139 BufferStart = BufStart;
143 assert(BufEnd[0] == 0 &&
144 "We assume that the input buffer has a null character at the end"
145 " to simplify lexing!");
150 if (BufferStart == BufferPtr) {
152 StringRef Buf(BufferStart, BufferEnd - BufferStart);
153 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
154 .StartsWith(
"\xEF\xBB\xBF", 3)
158 BufferPtr += BOMLength;
161 Is_PragmaLexer =
false;
162 CurrentConflictMarkerState =
CMK_None;
165 IsAtStartOfLine =
true;
166 IsAtPhysicalStartOfLine =
true;
168 HasLeadingSpace =
false;
169 HasLeadingEmptyMacro =
false;
184 ExtendedTokenMode = 0;
186 NewLinePtr =
nullptr;
196 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
197 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
198 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
199 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
200 InputFile.getBufferEnd());
209 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
210 bool IsFirstIncludeOfFile)
211 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
212 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
213 InitLexer(BufStart, BufPtr, BufEnd);
224 bool IsFirstIncludeOfFile)
225 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
226 FromFile.getBufferStart(), FromFile.getBufferEnd(),
227 IsFirstIncludeOfFile) {}
230 assert(
PP &&
"Cannot reset token mode without a preprocessor");
231 if (LangOpts.TraditionalCPP)
259 FileID SpellingFID =
SM.getFileID(SpellingLoc);
260 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
266 const char *StrData =
SM.getCharacterData(SpellingLoc);
268 L->BufferPtr = StrData;
269 L->BufferEnd = StrData+TokLen;
270 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
274 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
276 ExpansionLocEnd, TokLen);
283 L->Is_PragmaLexer =
true;
288 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
289 this->IsAtStartOfLine = IsAtStartOfLine;
290 assert((BufferStart + Offset) <= BufferEnd);
291 BufferPtr = BufferStart + Offset;
295 typename T::size_type i = 0, e = Str.size();
297 if (Str[i] ==
'\\' || Str[i] == Quote) {
298 Str.insert(Str.begin() + i,
'\\');
301 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
303 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
304 Str[i] != Str[i + 1]) {
310 Str.insert(Str.begin() + i + 1,
'n');
320 std::string
Result = std::string(Str);
321 char Quote = Charify ?
'\'' :
'"';
336 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
339 const char *BufEnd = BufPtr +
Tok.getLength();
343 while (BufPtr < BufEnd) {
345 Spelling[Length++] = CharAndSize.Char;
346 BufPtr += CharAndSize.Size;
348 if (Spelling[Length - 1] ==
'"')
356 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
359 const char *RawEnd = BufEnd;
360 do --RawEnd;
while (*RawEnd !=
'"');
361 size_t RawLength = RawEnd - BufPtr + 1;
364 memcpy(Spelling + Length, BufPtr, RawLength);
372 while (BufPtr < BufEnd) {
374 Spelling[Length++] = CharAndSize.Char;
375 BufPtr += CharAndSize.Size;
378 assert(Length <
Tok.getLength() &&
379 "NeedsCleaning flag set on token that didn't need cleaning!");
397 bool invalidTemp =
false;
398 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
400 if (invalid) *invalid =
true;
404 const char *tokenBegin = file.data() + locInfo.second;
408 file.begin(), tokenBegin, file.end());
416 return StringRef(tokenBegin,
length);
421 return StringRef(buffer.data(), buffer.size());
431 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
433 bool CharDataInvalid =
false;
434 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
442 if (!
Tok.needsCleaning())
443 return std::string(TokStart, TokStart +
Tok.getLength());
464 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
466 const char *TokStart =
nullptr;
468 if (
Tok.is(tok::raw_identifier))
469 TokStart =
Tok.getRawIdentifier().data();
470 else if (!
Tok.hasUCN()) {
473 Buffer = II->getNameStart();
474 return II->getLength();
480 TokStart =
Tok.getLiteralData();
484 bool CharDataInvalid =
false;
485 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
488 if (CharDataInvalid) {
495 if (!
Tok.needsCleaning()) {
497 return Tok.getLength();
522 bool IgnoreWhiteSpace) {
531 Loc =
SM.getExpansionLoc(Loc);
534 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
538 const char *StrData = Buffer.data()+LocInfo.second;
540 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
544 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
545 Buffer.begin(), StrData, Buffer.end());
554 const char *BufStart = Buffer.data();
555 if (Offset >= Buffer.size())
558 const char *LexStart = BufStart + Offset;
559 for (; LexStart != BufStart; --LexStart) {
575 if (LocInfo.first.isInvalid())
579 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
585 const char *StrData = Buffer.data() + LocInfo.second;
587 if (!LexStart || LexStart == StrData)
592 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
612 }
while (TheTok.
getKind() != tok::eof);
624 if (!
SM.isMacroArgExpansion(Loc))
631 assert(FileLocInfo.first == BeginFileLocInfo.first &&
632 FileLocInfo.second >= BeginFileLocInfo.second);
638enum PreambleDirectiveKind {
653 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
657 bool InPreprocessorDirective =
false;
661 unsigned MaxLineOffset = 0;
663 const char *CurPtr = Buffer.begin();
664 unsigned CurLine = 0;
665 while (CurPtr != Buffer.end()) {
669 if (CurLine == MaxLines)
673 if (CurPtr != Buffer.end())
674 MaxLineOffset = CurPtr - Buffer.begin();
680 if (InPreprocessorDirective) {
682 if (TheTok.
getKind() == tok::eof) {
693 InPreprocessorDirective =
false;
702 if (MaxLineOffset && TokOffset >= MaxLineOffset)
707 if (TheTok.
getKind() == tok::comment) {
715 Token HashTok = TheTok;
716 InPreprocessorDirective =
true;
725 PreambleDirectiveKind PDK
726 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
727 .Case(
"include", PDK_Skipped)
728 .Case(
"__include_macros", PDK_Skipped)
729 .Case(
"define", PDK_Skipped)
730 .Case(
"undef", PDK_Skipped)
731 .Case(
"line", PDK_Skipped)
732 .Case(
"error", PDK_Skipped)
733 .Case(
"pragma", PDK_Skipped)
734 .Case(
"import", PDK_Skipped)
735 .Case(
"include_next", PDK_Skipped)
736 .Case(
"warning", PDK_Skipped)
737 .Case(
"ident", PDK_Skipped)
738 .Case(
"sccs", PDK_Skipped)
739 .Case(
"assert", PDK_Skipped)
740 .Case(
"unassert", PDK_Skipped)
741 .Case(
"if", PDK_Skipped)
742 .Case(
"ifdef", PDK_Skipped)
743 .Case(
"ifndef", PDK_Skipped)
744 .Case(
"elif", PDK_Skipped)
745 .Case(
"elifdef", PDK_Skipped)
746 .Case(
"elifndef", PDK_Skipped)
747 .Case(
"else", PDK_Skipped)
748 .Case(
"endif", PDK_Skipped)
749 .Default(PDK_Unknown);
766 TheTok.
getKind() == tok::raw_identifier &&
768 LangOpts.CPlusPlusModules) {
771 Token ModuleTok = TheTok;
774 }
while (TheTok.
getKind() == tok::comment);
775 if (TheTok.
getKind() != tok::semi) {
790 if (ActiveCommentLoc.
isValid())
791 End = ActiveCommentLoc;
806 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
809 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
812 unsigned PhysOffset = 0;
817 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
827 for (; CharNo; --CharNo) {
829 TokPtr += CharAndSize.Size;
830 PhysOffset += CharAndSize.Size;
837 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
838 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
870 const FileID LocFileID =
SM.getFileID(Loc);
873 if (!
SM.getSLocEntry(LocFileID).getExpansion().isExpansionTokenRange())
895 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
901 *MacroBegin = expansionLoc;
923 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
929 *MacroEnd = expansionLoc;
942 if (Range.isTokenRange()) {
949 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
954 if (!
SM.isInFileID(End, FID, &EndOffs) ||
964 return SM.getSLocEntry(
SM.getFileID(Loc))
966 .isExpansionTokenRange();
983 Range.setBegin(Begin);
988 if (Range.isTokenRange()) {
1006 Range.setBegin(MacroBegin);
1007 Range.setEnd(MacroEnd);
1009 if (Range.isTokenRange())
1029 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1030 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1043 if (Range.isInvalid()) {
1050 if (beginInfo.first.isInvalid()) {
1056 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1057 beginInfo.second > EndOffs) {
1063 bool invalidTemp =
false;
1064 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1071 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1077 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1093 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1100 FileID MacroFID =
SM.getFileID(Loc);
1101 if (
SM.isInFileID(SpellLoc, MacroFID))
1111 Loc =
SM.getSpellingLoc(Loc);
1117 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1118 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1123 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1125 while (
SM.isMacroArgExpansion(Loc))
1126 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1132 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1138 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1144 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1145 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1154 if (Str - 1 < BufferStart)
1157 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1158 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1159 if (Str - 2 < BufferStart)
1169 return *Str ==
'\\';
1177 if (LocInfo.first.isInvalid())
1180 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1186 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1187 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1188 return NumWhitespaceChars == StringRef::npos
1190 : Rest.take_front(NumWhitespaceChars);
1205 unsigned CharNo,
unsigned TokLen) {
1206 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1222 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1228 unsigned TokLen)
const {
1229 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1230 "Location out of range for this buffer!");
1234 unsigned CharNo = Loc-BufferStart;
1235 if (FileLoc.isFileID())
1236 return FileLoc.getLocWithOffset(CharNo);
1240 assert(
PP &&
"This doesn't work on raw lexers");
1259 case '=':
return '#';
1260 case ')':
return ']';
1261 case '(':
return '[';
1262 case '!':
return '|';
1263 case '\'':
return '^';
1264 case '>':
return '}';
1265 case '/':
return '\\';
1266 case '<':
return '{';
1267 case '-':
return '~';
1282 L->
Diag(CP-2, diag::trigraph_ignored);
1287 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1299 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1303 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1304 Ptr[Size-1] != Ptr[Size])
1317const char *Lexer::SkipEscapedNewLines(
const char *P) {
1319 const char *AfterEscape;
1322 }
else if (*P ==
'?') {
1324 if (P[1] !=
'?' || P[2] !=
'/')
1334 if (NewLineSize == 0)
return P;
1335 P = AfterEscape+NewLineSize;
1342 bool IncludeComments) {
1345 return std::nullopt;
1353 bool InvalidTemp =
false;
1354 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1356 return std::nullopt;
1358 const char *TokenBegin =
File.data() + LocInfo.second;
1361 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1362 TokenBegin,
File.end());
1373 bool IncludeComments) {
1374 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1375 while (Loc != StartOfFile) {
1378 return std::nullopt;
1384 if (!
Tok.is(tok::comment) || IncludeComments) {
1388 return std::nullopt;
1397 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1399 if (!
Tok ||
Tok->isNot(TKind))
1404 unsigned NumWhitespaceChars = 0;
1405 if (SkipTrailingWhitespaceAndNewLine) {
1406 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1407 unsigned char C = *TokenEnd;
1410 NumWhitespaceChars++;
1414 if (
C ==
'\n' ||
C ==
'\r') {
1417 NumWhitespaceChars++;
1418 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1419 NumWhitespaceChars++;
1444 if (Ptr[0] ==
'\\') {
1450 return {
'\\', Size};
1460 Diag(Ptr, diag::backslash_newline_space);
1463 Size += EscapedNewLineSize;
1464 Ptr += EscapedNewLineSize;
1467 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1468 CharAndSize.Size += Size;
1473 return {
'\\',
Size};
1477 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1481 LangOpts.Trigraphs)) {
1487 if (
C ==
'\\')
goto Slash;
1493 return {*Ptr,
Size + 1u};
1507 if (Ptr[0] ==
'\\') {
1513 return {
'\\',
Size};
1518 Size += EscapedNewLineSize;
1519 Ptr += EscapedNewLineSize;
1522 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1523 CharAndSize.Size +=
Size;
1528 return {
'\\',
Size};
1532 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1538 if (
C ==
'\\')
goto Slash;
1544 return {*Ptr,
Size + 1u};
1552void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1553 BufferPtr = BufferStart + Offset;
1554 if (BufferPtr > BufferEnd)
1555 BufferPtr = BufferEnd;
1559 IsAtStartOfLine = StartOfLine;
1560 IsAtPhysicalStartOfLine = StartOfLine;
1564 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1566 return UnicodeWhitespaceChars.contains(Codepoint);
1571 llvm::raw_svector_ostream CharOS(CharBuf);
1572 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1583 bool IsStart,
bool &IsExtension) {
1584 static const llvm::sys::UnicodeCharSet MathStartChars(
1586 static const llvm::sys::UnicodeCharSet MathContinueChars(
1588 if (MathStartChars.contains(
C) ||
1589 (!IsStart && MathContinueChars.contains(
C))) {
1597 bool &IsExtension) {
1598 if (LangOpts.AsmPreprocessor) {
1600 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1602 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1607 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1609 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1613 }
else if (LangOpts.C11) {
1614 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1616 return C11AllowedIDChars.contains(
C);
1618 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1620 return C99AllowedIDChars.contains(
C);
1625 bool &IsExtension) {
1626 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1627 IsExtension =
false;
1628 if (LangOpts.AsmPreprocessor) {
1631 if (LangOpts.CPlusPlus || LangOpts.C23) {
1632 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1633 if (XIDStartChars.contains(
C))
1641 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1643 return !C11DisallowedInitialIDChars.contains(
C);
1645 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1647 return !C99DisallowedInitialIDChars.contains(
C);
1653 static const llvm::sys::UnicodeCharSet MathStartChars(
1655 static const llvm::sys::UnicodeCharSet MathContinueChars(
1658 (void)MathStartChars;
1659 (void)MathContinueChars;
1660 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1661 "Unexpected mathematical notation codepoint");
1662 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1675 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1677 CannotAppearInIdentifier = 0,
1678 CannotStartIdentifier
1681 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1683 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1685 if (!C99AllowedIDChars.contains(
C)) {
1686 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1688 << CannotAppearInIdentifier;
1689 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1690 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1692 << CannotStartIdentifier;
1704 struct HomoglyphPair {
1707 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1709 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1762 std::lower_bound(std::begin(SortedHomoglyphs),
1763 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1764 if (Homoglyph->Character ==
C) {
1765 if (Homoglyph->LooksLike) {
1766 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1767 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1770 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1790 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1792 if (!
IsFirst || InvalidOnlyAtStart) {
1793 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1797 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1803bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1805 const char *UCNPtr = CurPtr +
Size;
1806 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1807 if (CodePoint == 0) {
1810 bool IsExtension =
false;
1815 !
PP->isPreprocessedOutput())
1817 PP->getDiagnostics(), LangOpts, CodePoint,
1835 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1836 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1839 while (CurPtr != UCNPtr)
1840 (void)getAndAdvanceChar(CurPtr,
Result);
1844bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1845 llvm::UTF32 CodePoint;
1850 unsigned FirstCodeUnitSize;
1851 getCharAndSize(CurPtr, FirstCodeUnitSize);
1852 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1853 const char *UnicodePtr = CharStart;
1855 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1856 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1857 &CodePoint, llvm::strictConversion);
1858 if (ConvResult != llvm::conversionOK)
1861 bool IsExtension =
false;
1868 !
PP->isPreprocessedOutput())
1870 PP->getDiagnostics(), LangOpts, CodePoint,
1878 PP->getDiagnostics(), CodePoint,
1890 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1891 CurPtr = UnicodePtr;
1895bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1896 const char *CurPtr) {
1897 bool IsExtension =
false;
1900 !
PP->isPreprocessedOutput()) {
1912 return LexIdentifierContinue(
Result, CurPtr);
1916 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1928 PP->getDiagnostics(), LangOpts,
C,
1937 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1943 [[maybe_unused]]
const char *BufferEnd) {
1945 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1946 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1948 constexpr ssize_t BytesPerRegister = 16;
1950 __m128i AsciiIdentifierRangeV =
1953 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1960 if (Consumed == BytesPerRegister)
1966 unsigned char C = *CurPtr;
1972bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1981 unsigned char C = getCharAndSize(CurPtr, Size);
1983 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1988 if (!LangOpts.DollarIdents)
1992 Diag(CurPtr, diag::ext_dollar_in_identifier);
1993 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1996 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2004 const char *IdStart = BufferPtr;
2005 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
2006 Result.setRawIdentifierData(IdStart);
2015 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2022 if (isCodeCompletionPoint(CurPtr)) {
2024 Result.setKind(tok::code_completion);
2030 assert(*CurPtr == 0 &&
"Completion character must be 0");
2035 if (CurPtr < BufferEnd) {
2047 return PP->HandleIdentifier(
Result);
2054bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2056 char C1 = CharAndSize1.Char;
2062 char C2 = CharAndSize2.Char;
2063 return (C2 ==
'x' || C2 ==
'X');
2069bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2071 char C = getCharAndSize(CurPtr, Size);
2074 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2076 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2080 C = getCharAndSize(CurPtr, Size);
2084 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2087 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2088 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2092 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2096 bool IsHexFloat =
true;
2097 if (!LangOpts.C99) {
2098 if (!isHexaLiteral(BufferPtr, LangOpts))
2100 else if (!LangOpts.CPlusPlus17 &&
2101 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2105 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2109 if (
C ==
'\'' && LangOpts.AllowLiteralDigitSeparator) {
2113 Diag(CurPtr, LangOpts.CPlusPlus
2114 ? diag::warn_cxx11_compat_digit_separator
2115 : diag::warn_c23_compat_digit_separator);
2116 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2117 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2118 return LexNumericConstant(
Result, CurPtr);
2123 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2124 return LexNumericConstant(
Result, CurPtr);
2126 return LexNumericConstant(
Result, CurPtr);
2129 const char *TokStart = BufferPtr;
2130 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2131 Result.setLiteralData(TokStart);
2137const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2138 bool IsStringLiteral) {
2139 assert(LangOpts.CPlusPlus);
2143 char C = getCharAndSize(CurPtr, Size);
2144 bool Consumed =
false;
2147 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2149 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2155 if (!LangOpts.CPlusPlus11) {
2158 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2159 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2170 bool IsUDSuffix =
false;
2173 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2177 const unsigned MaxStandardSuffixLength = 3;
2178 char Buffer[MaxStandardSuffixLength] = {
C };
2179 unsigned Consumed =
Size;
2182 auto [
Next, NextSize] =
2186 const StringRef CompleteSuffix(Buffer, Chars);
2192 if (Chars == MaxStandardSuffixLength)
2196 Buffer[Chars++] =
Next;
2197 Consumed += NextSize;
2203 Diag(CurPtr, LangOpts.MSVCCompat
2204 ? diag::ext_ms_reserved_user_defined_literal
2205 : diag::ext_reserved_user_defined_literal)
2210 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2215 C = getCharAndSize(CurPtr, Size);
2217 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2218 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2219 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2229bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2231 const char *AfterQuote = CurPtr;
2233 const char *NulCharacter =
nullptr;
2236 (Kind == tok::utf8_string_literal ||
2237 Kind == tok::utf16_string_literal ||
2238 Kind == tok::utf32_string_literal))
2239 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2240 : diag::warn_c99_compat_unicode_literal);
2242 char C = getAndAdvanceChar(CurPtr,
Result);
2247 C = getAndAdvanceChar(CurPtr,
Result);
2249 if (
C ==
'\n' ||
C ==
'\r' ||
2250 (
C == 0 && CurPtr-1 == BufferEnd)) {
2252 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2253 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2258 if (isCodeCompletionPoint(CurPtr-1)) {
2260 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2262 PP->CodeCompleteNaturalLanguage();
2263 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2268 NulCharacter = CurPtr-1;
2270 C = getAndAdvanceChar(CurPtr,
Result);
2274 if (LangOpts.CPlusPlus)
2275 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2279 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2282 const char *TokStart = BufferPtr;
2283 FormTokenWithChars(
Result, CurPtr, Kind);
2284 Result.setLiteralData(TokStart);
2290bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2298 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2300 unsigned PrefixLen = 0;
2304 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2305 const char *Pos = &CurPtr[PrefixLen];
2306 Diag(Pos, LangOpts.CPlusPlus26
2307 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2308 : diag::ext_cxx26_raw_string_literal_character_set)
2309 << StringRef(Pos, 1);
2315 if (CurPtr[PrefixLen] !=
'(') {
2317 const char *PrefixEnd = &CurPtr[PrefixLen];
2318 if (PrefixLen == 16) {
2319 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2320 }
else if (*PrefixEnd ==
'\n') {
2321 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2323 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2324 << StringRef(PrefixEnd, 1);
2336 if (
C == 0 && CurPtr-1 == BufferEnd) {
2342 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2347 const char *Prefix = CurPtr;
2348 CurPtr += PrefixLen + 1;
2355 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2356 CurPtr += PrefixLen + 1;
2359 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2361 Diag(BufferPtr, diag::err_unterminated_raw_string)
2362 << StringRef(Prefix, PrefixLen);
2363 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2369 if (LangOpts.CPlusPlus)
2370 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2373 const char *TokStart = BufferPtr;
2374 FormTokenWithChars(
Result, CurPtr, Kind);
2375 Result.setLiteralData(TokStart);
2381bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2383 const char *NulCharacter =
nullptr;
2384 const char *AfterLessPos = CurPtr;
2385 char C = getAndAdvanceChar(CurPtr,
Result);
2390 C = getAndAdvanceChar(CurPtr,
Result);
2393 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2396 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2401 if (isCodeCompletionPoint(CurPtr - 1)) {
2402 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2404 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2407 NulCharacter = CurPtr-1;
2409 C = getAndAdvanceChar(CurPtr,
Result);
2414 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2417 const char *TokStart = BufferPtr;
2418 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2419 Result.setLiteralData(TokStart);
2423void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2424 const char *CompletionPoint,
2427 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2428 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2429 auto Slash = PartialPath.find_last_of(SlashChars);
2431 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2432 const char *StartOfFilename =
2433 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2435 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2436 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2439 while (CompletionPoint < BufferEnd) {
2440 char Next = *(CompletionPoint + 1);
2444 if (
Next == (IsAngled ?
'>' :
'"'))
2446 if (SlashChars.contains(
Next))
2450 PP->setCodeCompletionTokenRange(
2451 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2452 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2453 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2458bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2461 const char *NulCharacter =
nullptr;
2464 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2465 Diag(BufferPtr, LangOpts.CPlusPlus
2466 ? diag::warn_cxx98_compat_unicode_literal
2467 : diag::warn_c99_compat_unicode_literal);
2468 else if (Kind == tok::utf8_char_constant)
2469 Diag(BufferPtr, LangOpts.CPlusPlus
2470 ? diag::warn_cxx14_compat_u8_character_literal
2471 : diag::warn_c17_compat_u8_character_literal);
2474 char C = getAndAdvanceChar(CurPtr,
Result);
2477 Diag(BufferPtr, diag::ext_empty_character);
2478 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2485 C = getAndAdvanceChar(CurPtr,
Result);
2487 if (
C ==
'\n' ||
C ==
'\r' ||
2488 (
C == 0 && CurPtr-1 == BufferEnd)) {
2490 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2491 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2496 if (isCodeCompletionPoint(CurPtr-1)) {
2497 PP->CodeCompleteNaturalLanguage();
2498 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2503 NulCharacter = CurPtr-1;
2505 C = getAndAdvanceChar(CurPtr,
Result);
2509 if (LangOpts.CPlusPlus)
2510 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2514 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2517 const char *TokStart = BufferPtr;
2518 FormTokenWithChars(
Result, CurPtr, Kind);
2519 Result.setLiteralData(TokStart);
2527bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr) {
2531 unsigned char Char = *CurPtr;
2533 const char *lastNewLine =
nullptr;
2534 auto setLastNewLine = [&](
const char *Ptr) {
2540 setLastNewLine(CurPtr - 1);
2559 if (*CurPtr ==
'\n')
2560 setLastNewLine(CurPtr);
2567 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2569 IsAtStartOfLine =
true;
2570 IsAtPhysicalStartOfLine =
true;
2577 char PrevChar = CurPtr[-1];
2585 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2586 if (
auto *Handler =
PP->getEmptylineHandler())
2602bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr) {
2607 Diag(BufferPtr, diag::ext_line_comment);
2625 bool UnicodeDecodingAlreadyDiagnosed =
false;
2632 C !=
'\n' &&
C !=
'\r') {
2634 UnicodeDecodingAlreadyDiagnosed =
false;
2638 unsigned Length = llvm::getUTF8SequenceSize(
2639 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2642 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2643 UnicodeDecodingAlreadyDiagnosed =
true;
2646 UnicodeDecodingAlreadyDiagnosed =
false;
2652 const char *NextLine = CurPtr;
2655 const char *EscapePtr = CurPtr-1;
2656 bool HasSpace =
false;
2662 if (*EscapePtr ==
'\\')
2665 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2666 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2668 CurPtr = EscapePtr-2;
2674 Diag(EscapePtr, diag::backslash_newline_space);
2681 const char *OldPtr = CurPtr;
2684 C = getAndAdvanceChar(CurPtr,
Result);
2689 if (
C != 0 && CurPtr == OldPtr+1) {
2697 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2698 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2699 for (; OldPtr != CurPtr; ++OldPtr)
2700 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2704 const char *ForwardPtr = CurPtr;
2707 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2712 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2717 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2722 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2723 PP->CodeCompleteNaturalLanguage();
2740 return SaveLineComment(
Result, CurPtr);
2754 NewLinePtr = CurPtr++;
2767bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2770 FormTokenWithChars(
Result, CurPtr, tok::comment);
2782 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2786 Result.setKind(tok::comment);
2797 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2800 const char *TrigraphPos =
nullptr;
2802 const char *SpacePos =
nullptr;
2809 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2811 if (CurPtr[0] == CurPtr[1])
2825 if (*CurPtr ==
'\\') {
2827 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2829 TrigraphPos = CurPtr - 2;
2840 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2849 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2853 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2858 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2862 L->
Diag(SpacePos, diag::backslash_newline_space);
2868#include <emmintrin.h>
2883bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr) {
2893 unsigned char C = getCharAndSize(CurPtr, CharSize);
2895 if (
C == 0 && CurPtr == BufferEnd+1) {
2897 Diag(BufferPtr, diag::err_unterminated_block_comment);
2903 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2920 bool UnicodeDecodingAlreadyDiagnosed =
false;
2925 if (CurPtr + 24 < BufferEnd &&
2928 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2930 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2935 if (
C ==
'/')
goto FoundSlash;
2939 while (CurPtr + 16 < BufferEnd) {
2941 if (LLVM_UNLIKELY(Mask != 0)) {
2951 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2957 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2958 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2959 0x80, 0x80, 0x80, 0x80};
2960 __vector
unsigned char Slashes = {
2961 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2962 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2964 while (CurPtr + 16 < BufferEnd) {
2966 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2968 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2975 while (CurPtr + 16 < BufferEnd) {
2976 bool HasNonASCII =
false;
2977 for (
unsigned I = 0; I < 16; ++I)
2978 HasNonASCII |= !
isASCII(CurPtr[I]);
2980 if (LLVM_UNLIKELY(HasNonASCII))
2983 bool HasSlash =
false;
2984 for (
unsigned I = 0; I < 16; ++I)
2985 HasSlash |= CurPtr[I] ==
'/';
2999 while (
C !=
'/' &&
C !=
'\0') {
3001 UnicodeDecodingAlreadyDiagnosed =
false;
3008 unsigned Length = llvm::getUTF8SequenceSize(
3009 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3012 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3013 UnicodeDecodingAlreadyDiagnosed =
true;
3015 UnicodeDecodingAlreadyDiagnosed =
false;
3016 CurPtr += Length - 1;
3023 if (CurPtr[-2] ==
'*')
3026 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3028 LangOpts.Trigraphs)) {
3034 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3039 Diag(CurPtr-1, diag::warn_nested_block_comment);
3041 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3043 Diag(BufferPtr, diag::err_unterminated_block_comment);
3052 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3058 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3059 PP->CodeCompleteNaturalLanguage();
3077 FormTokenWithChars(
Result, CurPtr, tok::comment);
3086 SkipWhitespace(
Result, CurPtr + 1);
3104 "Must be in a preprocessing directive!");
3109 const char *CurPtr = BufferPtr;
3111 char Char = getAndAdvanceChar(CurPtr, Tmp);
3119 if (CurPtr-1 != BufferEnd) {
3120 if (isCodeCompletionPoint(CurPtr-1)) {
3121 PP->CodeCompleteNaturalLanguage();
3136 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3137 BufferPtr = CurPtr-1;
3141 if (Tmp.
is(tok::code_completion)) {
3143 PP->CodeCompleteNaturalLanguage();
3146 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3158bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3166 FormTokenWithChars(
Result, CurPtr, tok::eod);
3177 Result.startToken();
3178 BufferPtr = BufferEnd;
3179 FormTokenWithChars(Result, BufferEnd, tok::eof);
3183 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3189 MIOpt.ExitTopLevelConditional();
3197 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3199 diag::err_pp_unterminated_conditional);
3206 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3207 Diag(BufferEnd, diag::warn_no_newline_eof)
3219std::optional<Token> Lexer::peekNextPPToken() {
3220 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3222 if (isDependencyDirectivesLexer()) {
3223 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3224 return std::nullopt;
3226 (void)convertDependencyDirectiveToken(
3227 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3237 const char *TmpBufferPtr = BufferPtr;
3239 bool atStartOfLine = IsAtStartOfLine;
3240 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3241 bool leadingSpace = HasLeadingSpace;
3242 MultipleIncludeOpt MIOptState =
MIOpt;
3248 BufferPtr = TmpBufferPtr;
3250 HasLeadingSpace = leadingSpace;
3251 IsAtStartOfLine = atStartOfLine;
3252 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3257 if (
Tok.
is(tok::eof))
3258 return std::nullopt;
3265 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3267 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3268 size_t Pos = RestOfBuffer.find(Terminator);
3269 while (Pos != StringRef::npos) {
3272 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3273 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3274 Pos = RestOfBuffer.find(Terminator);
3277 return RestOfBuffer.data()+Pos;
3286bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3288 if (CurPtr != BufferStart &&
3289 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3293 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3294 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3309 Diag(CurPtr, diag::err_conflict_marker);
3310 CurrentConflictMarkerState =
Kind;
3314 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3315 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3330bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3332 if (CurPtr != BufferStart &&
3333 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3342 for (
unsigned i = 1; i != 4; ++i)
3343 if (CurPtr[i] != CurPtr[0])
3350 CurrentConflictMarkerState)) {
3354 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3360 CurrentConflictMarkerState =
CMK_None;
3368 const char *BufferEnd) {
3369 if (CurPtr == BufferEnd)
3372 for (; CurPtr != BufferEnd; ++CurPtr) {
3373 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3379bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3380 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3386 const char *Start = CurPtr - 1;
3387 if (!LangOpts.AllowEditorPlaceholders)
3388 Diag(Start, diag::err_placeholder_in_source);
3390 FormTokenWithChars(
Result, End, tok::raw_identifier);
3391 Result.setRawIdentifierData(Start);
3398bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3399 if (
PP &&
PP->isCodeCompletionEnabled()) {
3400 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3401 return Loc ==
PP->getCodeCompletionLoc();
3412 if (Opts.CPlusPlus23)
3413 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3414 else if (Opts.C2y && !Named)
3415 DiagId = diag::warn_c2y_delimited_escape_sequence;
3417 DiagId = diag::ext_delimited_escape_sequence;
3423 if (!Opts.CPlusPlus)
3424 Ext = Named ? 2 : 1 ;
3428 Diags.
Report(Loc, DiagId) << Named << Ext;
3431std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3432 const char *SlashLoc,
3435 char Kind = getCharAndSize(StartPtr, CharSize);
3436 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3438 unsigned NumHexDigits;
3441 else if (Kind ==
'U')
3444 bool Delimited =
false;
3445 bool FoundEndDelimiter =
false;
3449 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3451 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3452 return std::nullopt;
3455 const char *CurPtr = StartPtr + CharSize;
3456 const char *KindLoc = &CurPtr[-1];
3458 uint32_t CodePoint = 0;
3459 while (Count != NumHexDigits || Delimited) {
3460 char C = getCharAndSize(CurPtr, CharSize);
3461 if (!Delimited && Count == 0 &&
C ==
'{') {
3467 if (Delimited &&
C ==
'}') {
3469 FoundEndDelimiter =
true;
3473 unsigned Value = llvm::hexDigitValue(
C);
3474 if (
Value == std::numeric_limits<unsigned>::max()) {
3478 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3479 << StringRef(KindLoc, 1);
3480 return std::nullopt;
3483 if (CodePoint & 0xF000'0000) {
3485 Diag(KindLoc, diag::err_escape_too_large) << 0;
3486 return std::nullopt;
3497 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3498 : diag::warn_ucn_escape_no_digits)
3499 << StringRef(KindLoc, 1);
3500 return std::nullopt;
3503 if (Delimited && Kind ==
'U') {
3505 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3506 return std::nullopt;
3509 if (!Delimited && Count != NumHexDigits) {
3511 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3513 if (Count == 4 && NumHexDigits == 8) {
3514 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3515 Diag(KindLoc, diag::note_ucn_four_not_eight)
3519 return std::nullopt;
3522 if (Delimited &&
PP)
3525 PP->getDiagnostics());
3532 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3535 while (StartPtr != CurPtr)
3536 (void)getAndAdvanceChar(StartPtr, *
Result);
3543std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3544 const char *SlashLoc,
3549 char C = getCharAndSize(StartPtr, CharSize);
3550 assert(
C ==
'N' &&
"expected \\N{...}");
3552 const char *CurPtr = StartPtr + CharSize;
3553 const char *KindLoc = &CurPtr[-1];
3555 C = getCharAndSize(CurPtr, CharSize);
3558 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3559 return std::nullopt;
3562 const char *StartName = CurPtr;
3563 bool FoundEndDelimiter =
false;
3564 llvm::SmallVector<char, 30> Buffer;
3566 C = getCharAndSize(CurPtr, CharSize);
3569 FoundEndDelimiter =
true;
3575 Buffer.push_back(
C);
3578 if (!FoundEndDelimiter || Buffer.empty()) {
3580 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3581 : diag::warn_delimited_ucn_incomplete)
3582 << StringRef(KindLoc, 1);
3583 return std::nullopt;
3586 StringRef Name(Buffer.data(), Buffer.size());
3587 std::optional<char32_t>
Match =
3588 llvm::sys::unicode::nameToCodepointStrict(Name);
3589 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3591 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3593 Diag(StartName, diag::err_invalid_ucn_name)
3594 << StringRef(Buffer.data(), Buffer.size())
3597 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3608 if (Diagnose &&
Match)
3611 PP->getDiagnostics());
3617 if (LooseMatch && Diagnose)
3618 Match = LooseMatch->CodePoint;
3625 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3628 while (StartPtr != CurPtr)
3629 (void)getAndAdvanceChar(StartPtr, *
Result);
3633 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3636uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3640 std::optional<uint32_t> CodePointOpt;
3641 char Kind = getCharAndSize(StartPtr, CharSize);
3642 if (Kind ==
'u' || Kind ==
'U')
3643 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3644 else if (Kind ==
'N')
3645 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3650 uint32_t CodePoint = *CodePointOpt;
3653 if (LangOpts.AsmPreprocessor)
3672 if (CodePoint < 0xA0) {
3676 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3677 Diag(BufferPtr, diag::err_ucn_control_character);
3679 char C =
static_cast<char>(CodePoint);
3680 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3685 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3690 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3691 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3693 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3701bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3702 const char *CurPtr) {
3705 Diag(BufferPtr, diag::ext_unicode_whitespace)
3714void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3715 IsAtStartOfLine =
Result.isAtStartOfLine();
3716 HasLeadingSpace =
Result.hasLeadingSpace();
3717 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3722 assert(!isDependencyDirectivesLexer());
3728 if (IsAtStartOfLine) {
3730 IsAtStartOfLine =
false;
3733 if (IsAtPhysicalStartOfLine) {
3735 IsAtPhysicalStartOfLine =
false;
3738 if (HasLeadingSpace) {
3740 HasLeadingSpace =
false;
3743 if (HasLeadingEmptyMacro) {
3745 HasLeadingEmptyMacro =
false;
3750 bool returnedToken = LexTokenInternal(
Result);
3752 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3753 return returnedToken;
3763 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3764 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3767 const char *CurPtr = BufferPtr;
3781 FormTokenWithChars(Result, CurPtr, tok::unknown);
3790 unsigned SizeTmp, SizeTmp2;
3793 char Char = getAndAdvanceChar(CurPtr,
Result);
3797 NewLinePtr =
nullptr;
3802 if (CurPtr-1 == BufferEnd)
3803 return LexEndOfFile(
Result, CurPtr-1);
3806 if (isCodeCompletionPoint(CurPtr-1)) {
3809 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3814 Diag(CurPtr-1, diag::null_in_file);
3816 if (SkipWhitespace(
Result, CurPtr))
3825 if (LangOpts.MicrosoftExt) {
3827 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3828 return LexEndOfFile(
Result, CurPtr-1);
3832 Kind = tok::unknown;
3836 if (CurPtr[0] ==
'\n')
3837 (void)getAndAdvanceChar(CurPtr,
Result);
3851 IsAtStartOfLine =
true;
3852 IsAtPhysicalStartOfLine =
true;
3853 NewLinePtr = CurPtr - 1;
3862 if (SkipWhitespace(
Result, CurPtr))
3872 SkipHorizontalWhitespace:
3874 if (SkipWhitespace(
Result, CurPtr))
3883 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3884 if (SkipLineComment(
Result, CurPtr + 2))
3886 goto SkipIgnoredUnits;
3888 if (SkipBlockComment(
Result, CurPtr + 2))
3890 goto SkipIgnoredUnits;
3892 goto SkipHorizontalWhitespace;
3900 case '0':
case '1':
case '2':
case '3':
case '4':
3901 case '5':
case '6':
case '7':
case '8':
case '9':
3904 return LexNumericConstant(
Result, CurPtr);
3913 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3914 Char = getCharAndSize(CurPtr, SizeTmp);
3918 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3919 tok::utf16_string_literal);
3923 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3924 tok::utf16_char_constant);
3927 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3928 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3929 return LexRawStringLiteral(
Result,
3930 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3932 tok::utf16_string_literal);
3935 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3939 return LexStringLiteral(
Result,
3940 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3942 tok::utf8_string_literal);
3943 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3944 return LexCharConstant(
3945 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3947 tok::utf8_char_constant);
3949 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3951 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3954 return LexRawStringLiteral(
Result,
3955 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3958 tok::utf8_string_literal);
3965 return LexIdentifierContinue(
Result, CurPtr);
3971 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3972 Char = getCharAndSize(CurPtr, SizeTmp);
3976 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3977 tok::utf32_string_literal);
3981 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3982 tok::utf32_char_constant);
3985 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3986 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3987 return LexRawStringLiteral(
Result,
3988 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3990 tok::utf32_string_literal);
3994 return LexIdentifierContinue(
Result, CurPtr);
4000 if (LangOpts.RawStringLiterals) {
4001 Char = getCharAndSize(CurPtr, SizeTmp);
4004 return LexRawStringLiteral(
Result,
4005 ConsumeChar(CurPtr, SizeTmp,
Result),
4006 tok::string_literal);
4010 return LexIdentifierContinue(
Result, CurPtr);
4015 Char = getCharAndSize(CurPtr, SizeTmp);
4019 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4020 tok::wide_string_literal);
4023 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4024 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4025 return LexRawStringLiteral(
Result,
4026 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4028 tok::wide_string_literal);
4032 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4033 tok::wide_char_constant);
4038 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4039 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4040 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4041 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4042 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4043 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4044 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4045 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4049 return LexIdentifierContinue(
Result, CurPtr);
4051 if (LangOpts.DollarIdents) {
4053 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4056 return LexIdentifierContinue(
Result, CurPtr);
4059 Kind = tok::unknown;
4066 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4072 return LexStringLiteral(
Result, CurPtr,
4074 : tok::string_literal);
4078 Kind = tok::question;
4081 Kind = tok::l_square;
4084 Kind = tok::r_square;
4087 Kind = tok::l_paren;
4090 Kind = tok::r_paren;
4093 Kind = tok::l_brace;
4096 Kind = tok::r_brace;
4099 Char = getCharAndSize(CurPtr, SizeTmp);
4100 if (Char >=
'0' && Char <=
'9') {
4104 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4105 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4106 Kind = tok::periodstar;
4108 }
else if (Char ==
'.' &&
4109 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4110 Kind = tok::ellipsis;
4111 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4118 Char = getCharAndSize(CurPtr, SizeTmp);
4121 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4122 }
else if (Char ==
'=') {
4123 Kind = tok::ampequal;
4124 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4130 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4131 Kind = tok::starequal;
4132 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4138 Char = getCharAndSize(CurPtr, SizeTmp);
4140 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4141 Kind = tok::plusplus;
4142 }
else if (Char ==
'=') {
4143 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4144 Kind = tok::plusequal;
4150 Char = getCharAndSize(CurPtr, SizeTmp);
4152 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4153 Kind = tok::minusminus;
4154 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4155 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4156 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4158 Kind = tok::arrowstar;
4159 }
else if (Char ==
'>') {
4160 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4162 }
else if (Char ==
'=') {
4163 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4164 Kind = tok::minusequal;
4173 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4174 Kind = tok::exclaimequal;
4175 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4177 Kind = tok::exclaim;
4182 Char = getCharAndSize(CurPtr, SizeTmp);
4192 bool TreatAsComment =
4193 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4194 if (!TreatAsComment)
4195 if (!(
PP &&
PP->isPreprocessedOutput()))
4196 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4198 if (TreatAsComment) {
4199 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4205 goto SkipIgnoredUnits;
4210 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4219 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4220 Kind = tok::slashequal;
4226 Char = getCharAndSize(CurPtr, SizeTmp);
4228 Kind = tok::percentequal;
4229 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4230 }
else if (LangOpts.Digraphs && Char ==
'>') {
4231 Kind = tok::r_brace;
4232 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4233 }
else if (LangOpts.Digraphs && Char ==
':') {
4234 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4235 Char = getCharAndSize(CurPtr, SizeTmp);
4236 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4237 Kind = tok::hashhash;
4238 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4240 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4241 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4243 Diag(BufferPtr, diag::ext_charize_microsoft);
4252 goto HandleDirective;
4257 Kind = tok::percent;
4261 Char = getCharAndSize(CurPtr, SizeTmp);
4263 return LexAngledStringLiteral(
Result, CurPtr);
4264 }
else if (Char ==
'<') {
4265 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4267 Kind = tok::lesslessequal;
4268 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4270 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4274 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4278 }
else if (LangOpts.CUDA && After ==
'<') {
4279 Kind = tok::lesslessless;
4280 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4283 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4284 Kind = tok::lessless;
4286 }
else if (Char ==
'=') {
4287 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4289 if (LangOpts.CPlusPlus20) {
4291 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4292 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4294 Kind = tok::spaceship;
4300 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4305 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4306 Kind = tok::lessequal;
4307 }
else if (LangOpts.Digraphs && Char ==
':') {
4308 if (LangOpts.CPlusPlus11 &&
4309 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4316 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4317 if (After !=
':' && After !=
'>') {
4320 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4325 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4326 Kind = tok::l_square;
4327 }
else if (LangOpts.Digraphs && Char ==
'%') {
4328 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4329 Kind = tok::l_brace;
4330 }
else if (Char ==
'#' && SizeTmp == 1 &&
4331 lexEditorPlaceholder(
Result, CurPtr)) {
4338 Char = getCharAndSize(CurPtr, SizeTmp);
4340 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4341 Kind = tok::greaterequal;
4342 }
else if (Char ==
'>') {
4343 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4345 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4347 Kind = tok::greatergreaterequal;
4348 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4352 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4355 }
else if (LangOpts.CUDA && After ==
'>') {
4356 Kind = tok::greatergreatergreater;
4357 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4360 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4361 Kind = tok::greatergreater;
4364 Kind = tok::greater;
4368 Char = getCharAndSize(CurPtr, SizeTmp);
4370 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4371 Kind = tok::caretequal;
4372 }
else if (LangOpts.Reflection && Char ==
'^') {
4373 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4374 Kind = tok::caretcaret;
4376 if (LangOpts.OpenCL && Char ==
'^')
4377 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4382 Char = getCharAndSize(CurPtr, SizeTmp);
4384 Kind = tok::pipeequal;
4385 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4386 }
else if (Char ==
'|') {
4388 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4390 Kind = tok::pipepipe;
4391 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4397 Char = getCharAndSize(CurPtr, SizeTmp);
4398 if (LangOpts.Digraphs && Char ==
'>') {
4399 Kind = tok::r_square;
4400 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4401 }
else if (Char ==
':') {
4402 Kind = tok::coloncolon;
4403 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4412 Char = getCharAndSize(CurPtr, SizeTmp);
4415 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4418 Kind = tok::equalequal;
4419 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4428 Char = getCharAndSize(CurPtr, SizeTmp);
4430 Kind = tok::hashhash;
4431 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4432 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4435 Diag(BufferPtr, diag::ext_charize_microsoft);
4436 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4443 goto HandleDirective;
4451 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4454 Kind = tok::unknown;
4459 if (!LangOpts.AsmPreprocessor) {
4460 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4461 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4462 if (SkipWhitespace(
Result, CurPtr))
4470 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4474 Kind = tok::unknown;
4479 Kind = tok::unknown;
4483 llvm::UTF32 CodePoint;
4488 llvm::ConversionResult Status =
4489 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4490 (
const llvm::UTF8 *)BufferEnd,
4492 llvm::strictConversion);
4493 if (Status == llvm::conversionOK) {
4494 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4495 if (SkipWhitespace(
Result, CurPtr))
4502 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4506 PP->isPreprocessedOutput()) {
4508 Kind = tok::unknown;
4515 Diag(CurPtr, diag::err_invalid_utf8);
4517 BufferPtr = CurPtr+1;
4529 FormTokenWithChars(
Result, CurPtr, Kind);
4535 FormTokenWithChars(
Result, CurPtr, tok::hash);
4538 if (
PP->hadModuleLoaderFatalFailure())
4550const char *Lexer::convertDependencyDirectiveToken(
4552 const char *TokPtr = BufferStart + DDTok.
Offset;
4558 if (
Result.is(tok::raw_identifier))
4559 Result.setRawIdentifierData(TokPtr);
4560 else if (
Result.isLiteral())
4561 Result.setLiteralData(TokPtr);
4562 BufferPtr = TokPtr + DDTok.
Length;
4566bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4567 assert(isDependencyDirectivesLexer());
4569 using namespace dependency_directives_scan;
4571 if (BufferPtr == BufferEnd)
4572 return LexEndOfFile(
Result, BufferPtr);
4574 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4575 if (DepDirectives.front().Kind == pp_eof)
4576 return LexEndOfFile(
Result, BufferEnd);
4577 if (DepDirectives.front().Kind == tokens_present_before_eof)
4579 NextDepDirectiveTokenIndex = 0;
4580 DepDirectives = DepDirectives.drop_front();
4583 const dependency_directives_scan::Token &DDTok =
4584 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4585 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4591 BufferPtr = BufferStart + DDTok.
Offset;
4592 LexAngledStringLiteral(
Result, BufferPtr + 1);
4593 if (
Result.isNot(tok::header_name))
4597 const dependency_directives_scan::Token &NextTok =
4598 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4599 if (BufferStart + NextTok.
Offset >= BufferPtr)
4601 ++NextDepDirectiveTokenIndex;
4606 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4608 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4610 if (
PP->hadModuleLoaderFatalFailure())
4615 if (
Result.is(tok::raw_identifier)) {
4616 Result.setRawIdentifierData(TokPtr);
4618 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4619 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4620 PP->HandleModuleContextualKeyword(
Result)) {
4625 return PP->HandleIdentifier(
Result);
4631 if (
Result.is(tok::colon)) {
4633 if (*BufferPtr ==
':') {
4634 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4636 ++NextDepDirectiveTokenIndex;
4637 Result.setKind(tok::coloncolon);
4647bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4648 assert(isDependencyDirectivesLexer());
4650 using namespace dependency_directives_scan;
4653 unsigned NestedIfs = 0;
4655 DepDirectives = DepDirectives.drop_front();
4656 switch (DepDirectives.front().Kind) {
4658 llvm_unreachable(
"unexpected 'pp_none'");
4699 NextDepDirectiveTokenIndex = 0;
4700 return LexEndOfFile(
Result, BufferEnd);
4704 const dependency_directives_scan::Token &DDTok =
4705 DepDirectives.front().Tokens.front();
4706 assert(DDTok.
is(tok::hash));
4707 NextDepDirectiveTokenIndex = 1;
4709 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a byte-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isModuleKeyword() const
Determine whether this is the contextual keyword module.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const