29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/SaveAndRestore.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
77 if (AllowExport &&
is(tok::kw_export))
79 if (
isOneOf(tok::kw_import, tok::kw_module))
81 if (
isNot(tok::identifier))
84 return II->isImportKeyword() || II->isModuleKeyword();
90 case tok::annot_typename:
91 case tok::annot_decltype:
92 case tok::annot_pack_indexing_type:
98 case tok::kw___int128:
100 case tok::kw_unsigned:
108 case tok::kw__Float16:
109 case tok::kw___float128:
110 case tok::kw___ibm128:
111 case tok::kw_wchar_t:
117#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
118#include "clang/Basic/TransformTypeTraits.def"
119 case tok::kw___auto_type:
120 case tok::kw_char16_t:
121 case tok::kw_char32_t:
123 case tok::kw_decltype:
124 case tok::kw_char8_t:
136void Lexer::anchor() {}
138void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
139 const char *BufEnd) {
140 BufferStart = BufStart;
144 assert(BufEnd[0] == 0 &&
145 "We assume that the input buffer has a null character at the end"
146 " to simplify lexing!");
151 if (BufferStart == BufferPtr) {
153 StringRef Buf(BufferStart, BufferEnd - BufferStart);
154 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
155 .StartsWith(
"\xEF\xBB\xBF", 3)
159 BufferPtr += BOMLength;
162 Is_PragmaLexer =
false;
163 CurrentConflictMarkerState =
CMK_None;
166 IsAtStartOfLine =
true;
167 IsAtPhysicalStartOfLine =
true;
169 HasLeadingSpace =
false;
170 HasLeadingEmptyMacro =
false;
185 ExtendedTokenMode = 0;
187 NewLinePtr =
nullptr;
197 FileLoc(
PP.getSourceManager().getLocForStartOfFile(
FID)),
198 LangOpts(
PP.getLangOpts()), LineComment(LangOpts.LineComment),
199 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
200 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
201 InputFile.getBufferEnd());
210 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
211 bool IsFirstIncludeOfFile)
212 : FileLoc(fileloc), LangOpts(langOpts), LineComment(LangOpts.LineComment),
213 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
214 InitLexer(BufStart, BufPtr, BufEnd);
225 bool IsFirstIncludeOfFile)
226 :
Lexer(
SM.getLocForStartOfFile(
FID), langOpts, FromFile.getBufferStart(),
227 FromFile.getBufferStart(), FromFile.getBufferEnd(),
228 IsFirstIncludeOfFile) {}
231 assert(
PP &&
"Cannot reset token mode without a preprocessor");
232 if (LangOpts.TraditionalCPP)
260 FileID SpellingFID =
SM.getFileID(SpellingLoc);
261 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
267 const char *StrData =
SM.getCharacterData(SpellingLoc);
269 L->BufferPtr = StrData;
270 L->BufferEnd = StrData+TokLen;
271 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
275 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
277 ExpansionLocEnd, TokLen);
284 L->Is_PragmaLexer =
true;
289 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
290 this->IsAtStartOfLine = IsAtStartOfLine;
291 assert((BufferStart + Offset) <= BufferEnd);
292 BufferPtr = BufferStart + Offset;
296 typename T::size_type i = 0, e = Str.size();
298 if (Str[i] ==
'\\' || Str[i] == Quote) {
299 Str.insert(Str.begin() + i,
'\\');
302 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
304 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
305 Str[i] != Str[i + 1]) {
311 Str.insert(Str.begin() + i + 1,
'n');
321 std::string
Result = std::string(Str);
322 char Quote = Charify ?
'\'' :
'"';
337 assert(
Tok.needsCleaning() &&
"getSpellingSlow called on simple token");
340 const char *BufEnd = BufPtr +
Tok.getLength();
344 while (BufPtr < BufEnd) {
346 Spelling[Length++] = CharAndSize.Char;
347 BufPtr += CharAndSize.Size;
349 if (Spelling[Length - 1] ==
'"')
357 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
360 const char *RawEnd = BufEnd;
361 do --RawEnd;
while (*RawEnd !=
'"');
362 size_t RawLength = RawEnd - BufPtr + 1;
365 memcpy(Spelling + Length, BufPtr, RawLength);
373 while (BufPtr < BufEnd) {
375 Spelling[Length++] = CharAndSize.Char;
376 BufPtr += CharAndSize.Size;
379 assert(Length <
Tok.getLength() &&
380 "NeedsCleaning flag set on token that didn't need cleaning!");
398 bool invalidTemp =
false;
399 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
401 if (invalid) *invalid =
true;
405 const char *tokenBegin = file.data() + locInfo.second;
409 file.begin(), tokenBegin, file.end());
417 return StringRef(tokenBegin,
length);
422 return StringRef(buffer.data(), buffer.size());
432 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
434 bool CharDataInvalid =
false;
435 const char *TokStart = SourceMgr.getCharacterData(
Tok.getLocation(),
443 if (!
Tok.needsCleaning())
444 return std::string(TokStart, TokStart +
Tok.getLength());
465 assert((
int)
Tok.getLength() >= 0 &&
"Token character range is bogus!");
467 const char *TokStart =
nullptr;
469 if (
Tok.is(tok::raw_identifier))
470 TokStart =
Tok.getRawIdentifier().data();
471 else if (!
Tok.hasUCN()) {
474 Buffer = II->getNameStart();
475 return II->getLength();
481 TokStart =
Tok.getLiteralData();
485 bool CharDataInvalid =
false;
486 TokStart = SourceMgr.getCharacterData(
Tok.getLocation(), &CharDataInvalid);
489 if (CharDataInvalid) {
496 if (!
Tok.needsCleaning()) {
498 return Tok.getLength();
523 bool IgnoreWhiteSpace) {
532 Loc =
SM.getExpansionLoc(Loc);
535 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
539 const char *StrData = Buffer.data()+LocInfo.second;
541 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
545 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
546 Buffer.begin(), StrData, Buffer.end());
555 const char *BufStart = Buffer.data();
556 if (Offset >= Buffer.size())
559 const char *LexStart = BufStart + Offset;
560 for (; LexStart != BufStart; --LexStart) {
576 if (LocInfo.first.isInvalid())
580 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
586 const char *StrData = Buffer.data() + LocInfo.second;
588 if (!LexStart || LexStart == StrData)
593 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
613 }
while (TheTok.
getKind() != tok::eof);
625 if (!
SM.isMacroArgExpansion(Loc))
632 assert(FileLocInfo.first == BeginFileLocInfo.first &&
633 FileLocInfo.second >= BeginFileLocInfo.second);
639enum PreambleDirectiveKind {
654 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
658 bool InPreprocessorDirective =
false;
662 unsigned MaxLineOffset = 0;
664 const char *CurPtr = Buffer.begin();
665 unsigned CurLine = 0;
666 while (CurPtr != Buffer.end()) {
670 if (CurLine == MaxLines)
674 if (CurPtr != Buffer.end())
675 MaxLineOffset = CurPtr - Buffer.begin();
681 if (InPreprocessorDirective) {
683 if (TheTok.
getKind() == tok::eof) {
694 InPreprocessorDirective =
false;
703 if (MaxLineOffset && TokOffset >= MaxLineOffset)
708 if (TheTok.
getKind() == tok::comment) {
716 Token HashTok = TheTok;
717 InPreprocessorDirective =
true;
726 PreambleDirectiveKind PDK
727 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
728 .Case(
"include", PDK_Skipped)
729 .Case(
"__include_macros", PDK_Skipped)
730 .Case(
"define", PDK_Skipped)
731 .Case(
"undef", PDK_Skipped)
732 .Case(
"line", PDK_Skipped)
733 .Case(
"error", PDK_Skipped)
734 .Case(
"pragma", PDK_Skipped)
735 .Case(
"import", PDK_Skipped)
736 .Case(
"include_next", PDK_Skipped)
737 .Case(
"warning", PDK_Skipped)
738 .Case(
"ident", PDK_Skipped)
739 .Case(
"sccs", PDK_Skipped)
740 .Case(
"assert", PDK_Skipped)
741 .Case(
"unassert", PDK_Skipped)
742 .Case(
"if", PDK_Skipped)
743 .Case(
"ifdef", PDK_Skipped)
744 .Case(
"ifndef", PDK_Skipped)
745 .Case(
"elif", PDK_Skipped)
746 .Case(
"elifdef", PDK_Skipped)
747 .Case(
"elifndef", PDK_Skipped)
748 .Case(
"else", PDK_Skipped)
749 .Case(
"endif", PDK_Skipped)
750 .Default(PDK_Unknown);
767 TheTok.
getKind() == tok::raw_identifier &&
769 LangOpts.CPlusPlusModules) {
772 Token ModuleTok = TheTok;
775 }
while (TheTok.
getKind() == tok::comment);
776 if (TheTok.
getKind() != tok::semi) {
791 if (ActiveCommentLoc.
isValid())
792 End = ActiveCommentLoc;
807 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
810 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
813 unsigned PhysOffset = 0;
818 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
828 for (; CharNo; --CharNo) {
830 TokPtr += CharAndSize.Size;
831 PhysOffset += CharAndSize.Size;
838 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
839 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
871 const FileID LocFileID =
SM.getFileID(Loc);
874 if (!
SM.getSLocEntry(LocFileID).getExpansion().isExpansionTokenRange())
896 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
902 *MacroBegin = expansionLoc;
926 if (
SM.isInFileID(afterLoc,
FID)) {
927 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
936 assert(Entry.
isExpansion() &&
"Should be in an expansion");
943 *MacroEnd = expansionLoc;
956 if (Range.isTokenRange()) {
963 auto [FID, BeginOffs] =
SM.getDecomposedLoc(Begin);
968 if (!
SM.isInFileID(End, FID, &EndOffs) ||
978 return SM.getSLocEntry(
SM.getFileID(Loc))
980 .isExpansionTokenRange();
997 Range.setBegin(Begin);
1002 if (Range.isTokenRange()) {
1020 Range.setBegin(MacroBegin);
1021 Range.setEnd(MacroEnd);
1023 if (Range.isTokenRange())
1043 Range.setBegin(
SM.getImmediateSpellingLoc(Begin));
1044 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1057 if (Range.isInvalid()) {
1064 if (beginInfo.first.isInvalid()) {
1070 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1071 beginInfo.second > EndOffs) {
1077 bool invalidTemp =
false;
1078 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1085 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1091 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1107 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1114 FileID MacroFID =
SM.getFileID(Loc);
1115 if (
SM.isInFileID(SpellLoc, MacroFID))
1125 Loc =
SM.getSpellingLoc(Loc);
1131 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1132 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1137 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1139 while (
SM.isMacroArgExpansion(Loc))
1140 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1146 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1152 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1158 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1159 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1168 if (Str - 1 < BufferStart)
1171 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1172 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1173 if (Str - 2 < BufferStart)
1183 return *Str ==
'\\';
1191 if (LocInfo.first.isInvalid())
1194 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1200 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1201 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1202 return NumWhitespaceChars == StringRef::npos
1204 : Rest.take_front(NumWhitespaceChars);
1219 unsigned CharNo,
unsigned TokLen) {
1220 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1236 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1242 unsigned TokLen)
const {
1243 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1244 "Location out of range for this buffer!");
1248 unsigned CharNo = Loc-BufferStart;
1249 if (FileLoc.isFileID())
1250 return FileLoc.getLocWithOffset(CharNo);
1254 assert(
PP &&
"This doesn't work on raw lexers");
1273 case '=':
return '#';
1274 case ')':
return ']';
1275 case '(':
return '[';
1276 case '!':
return '|';
1277 case '\'':
return '^';
1278 case '>':
return '}';
1279 case '/':
return '\\';
1280 case '<':
return '{';
1281 case '-':
return '~';
1296 L->
Diag(CP-2, diag::trigraph_ignored);
1301 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1313 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1317 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1318 Ptr[Size-1] != Ptr[Size])
1331const char *Lexer::SkipEscapedNewLines(
const char *P) {
1333 const char *AfterEscape;
1336 }
else if (*P ==
'?') {
1338 if (P[1] !=
'?' || P[2] !=
'/')
1348 if (NewLineSize == 0)
return P;
1349 P = AfterEscape+NewLineSize;
1356 bool IncludeComments) {
1359 return std::nullopt;
1367 bool InvalidTemp =
false;
1368 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1370 return std::nullopt;
1372 const char *TokenBegin =
File.data() + LocInfo.second;
1375 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1376 TokenBegin,
File.end());
1387 bool IncludeComments) {
1388 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(Loc));
1389 while (Loc != StartOfFile) {
1392 return std::nullopt;
1398 if (!
Tok.is(tok::comment) || IncludeComments) {
1402 return std::nullopt;
1411 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1413 if (!
Tok ||
Tok->isNot(TKind))
1418 unsigned NumWhitespaceChars = 0;
1419 if (SkipTrailingWhitespaceAndNewLine) {
1420 const char *TokenEnd =
SM.getCharacterData(TokenLoc) +
Tok->getLength();
1421 unsigned char C = *TokenEnd;
1424 NumWhitespaceChars++;
1428 if (
C ==
'\n' ||
C ==
'\r') {
1431 NumWhitespaceChars++;
1432 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1433 NumWhitespaceChars++;
1458 if (Ptr[0] ==
'\\') {
1464 return {
'\\', Size};
1474 Diag(Ptr, diag::backslash_newline_space);
1477 Size += EscapedNewLineSize;
1478 Ptr += EscapedNewLineSize;
1481 auto CharAndSize = getCharAndSizeSlow(Ptr,
Tok);
1482 CharAndSize.Size += Size;
1487 return {
'\\',
Size};
1491 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1495 LangOpts.Trigraphs)) {
1501 if (
C ==
'\\')
goto Slash;
1507 return {*Ptr,
Size + 1u};
1521 if (Ptr[0] ==
'\\') {
1527 return {
'\\',
Size};
1532 Size += EscapedNewLineSize;
1533 Ptr += EscapedNewLineSize;
1536 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1537 CharAndSize.Size +=
Size;
1542 return {
'\\',
Size};
1546 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1552 if (
C ==
'\\')
goto Slash;
1558 return {*Ptr,
Size + 1u};
1566void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1567 BufferPtr = BufferStart + Offset;
1568 if (BufferPtr > BufferEnd)
1569 BufferPtr = BufferEnd;
1573 IsAtStartOfLine = StartOfLine;
1574 IsAtPhysicalStartOfLine = StartOfLine;
1578 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1580 return UnicodeWhitespaceChars.contains(Codepoint);
1585 llvm::raw_svector_ostream CharOS(CharBuf);
1586 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1597 bool IsStart,
bool &IsExtension) {
1598 static const llvm::sys::UnicodeCharSet MathStartChars(
1600 static const llvm::sys::UnicodeCharSet MathContinueChars(
1602 if (MathStartChars.contains(
C) ||
1603 (!IsStart && MathContinueChars.contains(
C))) {
1611 bool &IsExtension) {
1612 if (LangOpts.AsmPreprocessor) {
1614 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1616 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1621 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1623 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1627 }
else if (LangOpts.C11) {
1628 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1630 return C11AllowedIDChars.contains(
C);
1632 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1634 return C99AllowedIDChars.contains(
C);
1639 bool &IsExtension) {
1640 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1641 IsExtension =
false;
1642 if (LangOpts.AsmPreprocessor) {
1645 if (LangOpts.CPlusPlus || LangOpts.C23) {
1646 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1647 if (XIDStartChars.contains(
C))
1655 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1657 return !C11DisallowedInitialIDChars.contains(
C);
1659 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1661 return !C99DisallowedInitialIDChars.contains(
C);
1667 static const llvm::sys::UnicodeCharSet MathStartChars(
1669 static const llvm::sys::UnicodeCharSet MathContinueChars(
1672 (void)MathStartChars;
1673 (void)MathContinueChars;
1674 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1675 "Unexpected mathematical notation codepoint");
1676 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1689 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1691 CannotAppearInIdentifier = 0,
1692 CannotStartIdentifier
1695 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1697 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1699 if (!C99AllowedIDChars.contains(
C)) {
1700 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1702 << CannotAppearInIdentifier;
1703 }
else if (
IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1704 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1706 << CannotStartIdentifier;
1718 struct HomoglyphPair {
1721 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1723 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1776 std::lower_bound(std::begin(SortedHomoglyphs),
1777 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1778 if (Homoglyph->Character ==
C) {
1779 if (Homoglyph->LooksLike) {
1780 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1781 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1784 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1804 bool InvalidOnlyAtStart =
IsFirst && !IsIDStart && IsIDContinue;
1806 if (!
IsFirst || InvalidOnlyAtStart) {
1807 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1811 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1817bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1819 const char *UCNPtr = CurPtr +
Size;
1820 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1821 if (CodePoint == 0) {
1824 bool IsExtension =
false;
1829 !
PP->isPreprocessedOutput())
1831 PP->getDiagnostics(), LangOpts, CodePoint,
1849 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1850 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1853 while (CurPtr != UCNPtr)
1854 (void)getAndAdvanceChar(CurPtr,
Result);
1858bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1859 llvm::UTF32 CodePoint;
1864 unsigned FirstCodeUnitSize;
1865 getCharAndSize(CurPtr, FirstCodeUnitSize);
1866 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1867 const char *UnicodePtr = CharStart;
1869 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1870 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1871 &CodePoint, llvm::strictConversion);
1872 if (ConvResult != llvm::conversionOK)
1875 bool IsExtension =
false;
1882 !
PP->isPreprocessedOutput())
1884 PP->getDiagnostics(), LangOpts, CodePoint,
1892 PP->getDiagnostics(), CodePoint,
1904 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1905 CurPtr = UnicodePtr;
1909bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1910 const char *CurPtr) {
1911 bool IsExtension =
false;
1914 !
PP->isPreprocessedOutput()) {
1926 return LexIdentifierContinue(
Result, CurPtr);
1930 !
PP->isPreprocessedOutput() && !
isASCII(*BufferPtr) &&
1942 PP->getDiagnostics(), LangOpts,
C,
1951 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1957 [[maybe_unused]]
const char *BufferEnd) {
1959 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1960 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1962 constexpr ssize_t BytesPerRegister = 16;
1964 __m128i AsciiIdentifierRangeV =
1967 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1974 if (Consumed == BytesPerRegister)
1980 unsigned char C = *CurPtr;
1986bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1995 unsigned char C = getCharAndSize(CurPtr, Size);
1997 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2002 if (!LangOpts.DollarIdents)
2006 Diag(CurPtr, diag::ext_dollar_in_identifier);
2007 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2010 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2018 const char *IdStart = BufferPtr;
2019 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
2020 Result.setRawIdentifierData(IdStart);
2029 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
2036 if (isCodeCompletionPoint(CurPtr)) {
2038 Result.setKind(tok::code_completion);
2044 assert(*CurPtr == 0 &&
"Completion character must be 0");
2049 if (CurPtr < BufferEnd) {
2061 return PP->HandleIdentifier(
Result);
2068bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2070 char C1 = CharAndSize1.Char;
2076 char C2 = CharAndSize2.Char;
2077 return (C2 ==
'x' || C2 ==
'X');
2083bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2085 char C = getCharAndSize(CurPtr, Size);
2088 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2090 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2094 C = getCharAndSize(CurPtr, Size);
2098 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2101 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2102 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2106 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2110 bool IsHexFloat =
true;
2111 if (!LangOpts.C99) {
2112 if (!isHexaLiteral(BufferPtr, LangOpts))
2114 else if (!LangOpts.CPlusPlus17 &&
2115 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2119 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2123 if (
C ==
'\'' && LangOpts.AllowLiteralDigitSeparator) {
2127 Diag(CurPtr, LangOpts.CPlusPlus
2128 ? diag::warn_cxx11_compat_digit_separator
2129 : diag::warn_c23_compat_digit_separator);
2130 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2131 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2132 return LexNumericConstant(
Result, CurPtr);
2137 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2138 return LexNumericConstant(
Result, CurPtr);
2140 return LexNumericConstant(
Result, CurPtr);
2143 const char *TokStart = BufferPtr;
2144 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2145 Result.setLiteralData(TokStart);
2151const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2152 bool IsStringLiteral) {
2153 assert(LangOpts.CPlusPlus);
2157 char C = getCharAndSize(CurPtr, Size);
2158 bool Consumed =
false;
2161 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2163 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2169 if (!LangOpts.CPlusPlus11) {
2172 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2173 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2184 bool IsUDSuffix =
false;
2187 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2191 const unsigned MaxStandardSuffixLength = 3;
2192 char Buffer[MaxStandardSuffixLength] = {
C };
2193 unsigned Consumed =
Size;
2196 auto [
Next, NextSize] =
2200 const StringRef CompleteSuffix(Buffer, Chars);
2206 if (Chars == MaxStandardSuffixLength)
2210 Buffer[Chars++] =
Next;
2211 Consumed += NextSize;
2217 Diag(CurPtr, LangOpts.MSVCCompat
2218 ? diag::ext_ms_reserved_user_defined_literal
2219 : diag::ext_reserved_user_defined_literal)
2224 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2229 C = getCharAndSize(CurPtr, Size);
2231 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2232 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2233 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2243bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2245 const char *AfterQuote = CurPtr;
2247 const char *NulCharacter =
nullptr;
2250 (Kind == tok::utf8_string_literal ||
2251 Kind == tok::utf16_string_literal ||
2252 Kind == tok::utf32_string_literal))
2253 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2254 : diag::warn_c99_compat_unicode_literal);
2256 char C = getAndAdvanceChar(CurPtr,
Result);
2261 C = getAndAdvanceChar(CurPtr,
Result);
2263 if (
C ==
'\n' ||
C ==
'\r' ||
2264 (
C == 0 && CurPtr-1 == BufferEnd)) {
2266 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2267 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2272 if (isCodeCompletionPoint(CurPtr-1)) {
2274 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2276 PP->CodeCompleteNaturalLanguage();
2277 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2282 NulCharacter = CurPtr-1;
2284 C = getAndAdvanceChar(CurPtr,
Result);
2288 if (LangOpts.CPlusPlus)
2289 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2293 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2296 const char *TokStart = BufferPtr;
2297 FormTokenWithChars(
Result, CurPtr, Kind);
2298 Result.setLiteralData(TokStart);
2304bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2312 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2314 unsigned PrefixLen = 0;
2318 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2319 const char *Pos = &CurPtr[PrefixLen];
2320 Diag(Pos, LangOpts.CPlusPlus26
2321 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2322 : diag::ext_cxx26_raw_string_literal_character_set)
2323 << StringRef(Pos, 1);
2329 if (CurPtr[PrefixLen] !=
'(') {
2331 const char *PrefixEnd = &CurPtr[PrefixLen];
2332 if (PrefixLen == 16) {
2333 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2334 }
else if (*PrefixEnd ==
'\n') {
2335 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2337 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2338 << StringRef(PrefixEnd, 1);
2350 if (
C == 0 && CurPtr-1 == BufferEnd) {
2356 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2361 const char *Prefix = CurPtr;
2362 CurPtr += PrefixLen + 1;
2369 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2370 CurPtr += PrefixLen + 1;
2373 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2375 Diag(BufferPtr, diag::err_unterminated_raw_string)
2376 << StringRef(Prefix, PrefixLen);
2377 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2383 if (LangOpts.CPlusPlus)
2384 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2387 const char *TokStart = BufferPtr;
2388 FormTokenWithChars(
Result, CurPtr, Kind);
2389 Result.setLiteralData(TokStart);
2395bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2397 const char *NulCharacter =
nullptr;
2398 const char *AfterLessPos = CurPtr;
2399 char C = getAndAdvanceChar(CurPtr,
Result);
2404 C = getAndAdvanceChar(CurPtr,
Result);
2407 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2410 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2415 if (isCodeCompletionPoint(CurPtr - 1)) {
2416 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2418 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2421 NulCharacter = CurPtr-1;
2423 C = getAndAdvanceChar(CurPtr,
Result);
2428 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2431 const char *TokStart = BufferPtr;
2432 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2433 Result.setLiteralData(TokStart);
2437void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2438 const char *CompletionPoint,
2441 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2442 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2443 auto Slash = PartialPath.find_last_of(SlashChars);
2445 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2446 const char *StartOfFilename =
2447 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2449 PP->setCodeCompletionIdentifierInfo(&
PP->getIdentifierTable().get(
2450 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2453 while (CompletionPoint < BufferEnd) {
2454 char Next = *(CompletionPoint + 1);
2458 if (
Next == (IsAngled ?
'>' :
'"'))
2460 if (SlashChars.contains(
Next))
2464 PP->setCodeCompletionTokenRange(
2465 FileLoc.getLocWithOffset(StartOfFilename - BufferStart),
2466 FileLoc.getLocWithOffset(CompletionPoint - BufferStart));
2467 PP->CodeCompleteIncludedFile(Dir, IsAngled);
2472bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2475 const char *NulCharacter =
nullptr;
2478 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2479 Diag(BufferPtr, LangOpts.CPlusPlus
2480 ? diag::warn_cxx98_compat_unicode_literal
2481 : diag::warn_c99_compat_unicode_literal);
2482 else if (Kind == tok::utf8_char_constant)
2483 Diag(BufferPtr, LangOpts.CPlusPlus
2484 ? diag::warn_cxx14_compat_u8_character_literal
2485 : diag::warn_c17_compat_u8_character_literal);
2488 char C = getAndAdvanceChar(CurPtr,
Result);
2491 Diag(BufferPtr, diag::ext_empty_character);
2492 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2499 C = getAndAdvanceChar(CurPtr,
Result);
2501 if (
C ==
'\n' ||
C ==
'\r' ||
2502 (
C == 0 && CurPtr-1 == BufferEnd)) {
2504 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2505 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2510 if (isCodeCompletionPoint(CurPtr-1)) {
2511 PP->CodeCompleteNaturalLanguage();
2512 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2517 NulCharacter = CurPtr-1;
2519 C = getAndAdvanceChar(CurPtr,
Result);
2523 if (LangOpts.CPlusPlus)
2524 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2528 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2531 const char *TokStart = BufferPtr;
2532 FormTokenWithChars(
Result, CurPtr, Kind);
2533 Result.setLiteralData(TokStart);
2541bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr) {
2545 unsigned char Char = *CurPtr;
2547 const char *lastNewLine =
nullptr;
2548 auto setLastNewLine = [&](
const char *Ptr) {
2554 setLastNewLine(CurPtr - 1);
2573 if (*CurPtr ==
'\n')
2574 setLastNewLine(CurPtr);
2581 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2583 IsAtStartOfLine =
true;
2584 IsAtPhysicalStartOfLine =
true;
2591 char PrevChar = CurPtr[-1];
2599 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2600 if (
auto *Handler =
PP->getEmptylineHandler())
2616bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr) {
2621 Diag(BufferPtr, diag::ext_line_comment);
2639 bool UnicodeDecodingAlreadyDiagnosed =
false;
2646 C !=
'\n' &&
C !=
'\r') {
2648 UnicodeDecodingAlreadyDiagnosed =
false;
2652 unsigned Length = llvm::getUTF8SequenceSize(
2653 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2656 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2657 UnicodeDecodingAlreadyDiagnosed =
true;
2660 UnicodeDecodingAlreadyDiagnosed =
false;
2666 const char *NextLine = CurPtr;
2669 const char *EscapePtr = CurPtr-1;
2670 bool HasSpace =
false;
2676 if (*EscapePtr ==
'\\')
2679 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2680 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2682 CurPtr = EscapePtr-2;
2688 Diag(EscapePtr, diag::backslash_newline_space);
2695 const char *OldPtr = CurPtr;
2698 C = getAndAdvanceChar(CurPtr,
Result);
2703 if (
C != 0 && CurPtr == OldPtr+1) {
2711 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2712 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2713 for (; OldPtr != CurPtr; ++OldPtr)
2714 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2718 const char *ForwardPtr = CurPtr;
2721 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2726 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2731 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2736 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2737 PP->CodeCompleteNaturalLanguage();
2754 return SaveLineComment(
Result, CurPtr);
2768 NewLinePtr = CurPtr++;
2781bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2784 FormTokenWithChars(
Result, CurPtr, tok::comment);
2796 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2800 Result.setKind(tok::comment);
2811 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2814 const char *TrigraphPos =
nullptr;
2816 const char *SpacePos =
nullptr;
2823 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2825 if (CurPtr[0] == CurPtr[1])
2839 if (*CurPtr ==
'\\') {
2841 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2843 TrigraphPos = CurPtr - 2;
2854 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2863 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2867 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2872 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2876 L->
Diag(SpacePos, diag::backslash_newline_space);
2882#include <emmintrin.h>
2897bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr) {
2907 unsigned char C = getCharAndSize(CurPtr, CharSize);
2909 if (
C == 0 && CurPtr == BufferEnd+1) {
2911 Diag(BufferPtr, diag::err_unterminated_block_comment);
2917 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2934 bool UnicodeDecodingAlreadyDiagnosed =
false;
2939 if (CurPtr + 24 < BufferEnd &&
2942 !(
PP &&
PP->getCodeCompletionFileLoc() == FileLoc)) {
2944 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2949 if (
C ==
'/')
goto FoundSlash;
2953 while (CurPtr + 16 < BufferEnd) {
2955 if (LLVM_UNLIKELY(Mask != 0)) {
2965 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2971 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2972 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2973 0x80, 0x80, 0x80, 0x80};
2974 __vector
unsigned char Slashes = {
2975 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2976 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2978 while (CurPtr + 16 < BufferEnd) {
2980 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2982 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2989 while (CurPtr + 16 < BufferEnd) {
2990 bool HasNonASCII =
false;
2991 for (
unsigned I = 0; I < 16; ++I)
2992 HasNonASCII |= !
isASCII(CurPtr[I]);
2994 if (LLVM_UNLIKELY(HasNonASCII))
2997 bool HasSlash =
false;
2998 for (
unsigned I = 0; I < 16; ++I)
2999 HasSlash |= CurPtr[I] ==
'/';
3013 while (
C !=
'/' &&
C !=
'\0') {
3015 UnicodeDecodingAlreadyDiagnosed =
false;
3022 unsigned Length = llvm::getUTF8SequenceSize(
3023 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
3026 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
3027 UnicodeDecodingAlreadyDiagnosed =
true;
3029 UnicodeDecodingAlreadyDiagnosed =
false;
3030 CurPtr += Length - 1;
3037 if (CurPtr[-2] ==
'*')
3040 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3042 LangOpts.Trigraphs)) {
3048 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3053 Diag(CurPtr-1, diag::warn_nested_block_comment);
3055 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3057 Diag(BufferPtr, diag::err_unterminated_block_comment);
3066 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3072 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3073 PP->CodeCompleteNaturalLanguage();
3091 FormTokenWithChars(
Result, CurPtr, tok::comment);
3100 SkipWhitespace(
Result, CurPtr + 1);
3118 "Must be in a preprocessing directive!");
3123 const char *CurPtr = BufferPtr;
3125 char Char = getAndAdvanceChar(CurPtr, Tmp);
3133 if (CurPtr-1 != BufferEnd) {
3134 if (isCodeCompletionPoint(CurPtr-1)) {
3135 PP->CodeCompleteNaturalLanguage();
3150 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3151 BufferPtr = CurPtr-1;
3155 if (Tmp.
is(tok::code_completion)) {
3157 PP->CodeCompleteNaturalLanguage();
3160 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3172bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3180 FormTokenWithChars(
Result, CurPtr, tok::eod);
3191 Result.startToken();
3192 BufferPtr = BufferEnd;
3193 FormTokenWithChars(Result, BufferEnd, tok::eof);
3197 if (
PP->isRecordingPreamble() &&
PP->isInPrimaryFile()) {
3203 MIOpt.ExitTopLevelConditional();
3211 if (
PP->getCodeCompletionFileLoc() != FileLoc)
3213 diag::err_pp_unterminated_conditional);
3220 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3221 Diag(BufferEnd, diag::warn_no_newline_eof)
3233std::optional<Token> Lexer::peekNextPPToken() {
3234 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3236 if (isDependencyDirectivesLexer()) {
3237 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3238 return std::nullopt;
3240 (void)convertDependencyDirectiveToken(
3241 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3251 const char *TmpBufferPtr = BufferPtr;
3253 bool atStartOfLine = IsAtStartOfLine;
3254 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3255 bool leadingSpace = HasLeadingSpace;
3256 MultipleIncludeOpt MIOptState =
MIOpt;
3262 BufferPtr = TmpBufferPtr;
3264 HasLeadingSpace = leadingSpace;
3265 IsAtStartOfLine = atStartOfLine;
3266 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3271 if (
Tok.
is(tok::eof))
3272 return std::nullopt;
3279 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3281 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3282 size_t Pos = RestOfBuffer.find(Terminator);
3283 while (Pos != StringRef::npos) {
3286 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3287 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3288 Pos = RestOfBuffer.find(Terminator);
3291 return RestOfBuffer.data()+Pos;
3300bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3302 if (CurPtr != BufferStart &&
3303 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3307 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3308 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3323 Diag(CurPtr, diag::err_conflict_marker);
3324 CurrentConflictMarkerState =
Kind;
3328 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3329 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3344bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3346 if (CurPtr != BufferStart &&
3347 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3356 for (
unsigned i = 1; i != 4; ++i)
3357 if (CurPtr[i] != CurPtr[0])
3364 CurrentConflictMarkerState)) {
3368 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3374 CurrentConflictMarkerState =
CMK_None;
3382 const char *BufferEnd) {
3383 if (CurPtr == BufferEnd)
3386 for (; CurPtr != BufferEnd; ++CurPtr) {
3387 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3393bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3394 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3400 const char *Start = CurPtr - 1;
3401 if (!LangOpts.AllowEditorPlaceholders)
3402 Diag(Start, diag::err_placeholder_in_source);
3404 FormTokenWithChars(
Result, End, tok::raw_identifier);
3405 Result.setRawIdentifierData(Start);
3412bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3413 if (
PP &&
PP->isCodeCompletionEnabled()) {
3414 SourceLocation Loc = FileLoc.getLocWithOffset(CurPtr-BufferStart);
3415 return Loc ==
PP->getCodeCompletionLoc();
3426 if (Opts.CPlusPlus23)
3427 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3428 else if (Opts.C2y && !Named)
3429 DiagId = diag::warn_c2y_delimited_escape_sequence;
3431 DiagId = diag::ext_delimited_escape_sequence;
3437 if (!Opts.CPlusPlus)
3438 Ext = Named ? 2 : 1 ;
3442 Diags.
Report(Loc, DiagId) << Named << Ext;
3445std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3446 const char *SlashLoc,
3449 char Kind = getCharAndSize(StartPtr, CharSize);
3450 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3452 unsigned NumHexDigits;
3455 else if (Kind ==
'U')
3458 bool Delimited =
false;
3459 bool FoundEndDelimiter =
false;
3463 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3465 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3466 return std::nullopt;
3469 const char *CurPtr = StartPtr + CharSize;
3470 const char *KindLoc = &CurPtr[-1];
3472 uint32_t CodePoint = 0;
3473 while (Count != NumHexDigits || Delimited) {
3474 char C = getCharAndSize(CurPtr, CharSize);
3475 if (!Delimited && Count == 0 &&
C ==
'{') {
3481 if (Delimited &&
C ==
'}') {
3483 FoundEndDelimiter =
true;
3487 unsigned Value = llvm::hexDigitValue(
C);
3488 if (
Value == std::numeric_limits<unsigned>::max()) {
3492 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3493 << StringRef(KindLoc, 1);
3494 return std::nullopt;
3497 if (CodePoint & 0xF000'0000) {
3499 Diag(KindLoc, diag::err_escape_too_large) << 0;
3500 return std::nullopt;
3511 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3512 : diag::warn_ucn_escape_no_digits)
3513 << StringRef(KindLoc, 1);
3514 return std::nullopt;
3517 if (Delimited && Kind ==
'U') {
3519 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3520 return std::nullopt;
3523 if (!Delimited && Count != NumHexDigits) {
3525 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3527 if (Count == 4 && NumHexDigits == 8) {
3528 CharSourceRange URange =
makeCharRange(*
this, KindLoc, KindLoc + 1);
3529 Diag(KindLoc, diag::note_ucn_four_not_eight)
3533 return std::nullopt;
3536 if (Delimited &&
PP)
3539 PP->getDiagnostics());
3546 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3549 while (StartPtr != CurPtr)
3550 (void)getAndAdvanceChar(StartPtr, *
Result);
3557std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3558 const char *SlashLoc,
3563 char C = getCharAndSize(StartPtr, CharSize);
3564 assert(
C ==
'N' &&
"expected \\N{...}");
3566 const char *CurPtr = StartPtr + CharSize;
3567 const char *KindLoc = &CurPtr[-1];
3569 C = getCharAndSize(CurPtr, CharSize);
3572 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3573 return std::nullopt;
3576 const char *StartName = CurPtr;
3577 bool FoundEndDelimiter =
false;
3578 llvm::SmallVector<char, 30> Buffer;
3580 C = getCharAndSize(CurPtr, CharSize);
3583 FoundEndDelimiter =
true;
3589 Buffer.push_back(
C);
3592 if (!FoundEndDelimiter || Buffer.empty()) {
3594 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3595 : diag::warn_delimited_ucn_incomplete)
3596 << StringRef(KindLoc, 1);
3597 return std::nullopt;
3600 StringRef Name(Buffer.data(), Buffer.size());
3601 std::optional<char32_t>
Match =
3602 llvm::sys::unicode::nameToCodepointStrict(Name);
3603 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3605 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3607 Diag(StartName, diag::err_invalid_ucn_name)
3608 << StringRef(Buffer.data(), Buffer.size())
3611 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3622 if (Diagnose &&
Match)
3625 PP->getDiagnostics());
3631 if (LooseMatch && Diagnose)
3632 Match = LooseMatch->CodePoint;
3639 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3642 while (StartPtr != CurPtr)
3643 (void)getAndAdvanceChar(StartPtr, *
Result);
3647 return Match ? std::optional<uint32_t>(*
Match) : std::nullopt;
3650uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3654 std::optional<uint32_t> CodePointOpt;
3655 char Kind = getCharAndSize(StartPtr, CharSize);
3656 if (Kind ==
'u' || Kind ==
'U')
3657 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3658 else if (Kind ==
'N')
3659 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3664 uint32_t CodePoint = *CodePointOpt;
3667 if (LangOpts.AsmPreprocessor)
3686 if (CodePoint < 0xA0) {
3690 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3691 Diag(BufferPtr, diag::err_ucn_control_character);
3693 char C =
static_cast<char>(CodePoint);
3694 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3699 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3704 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3705 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3707 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3715bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3716 const char *CurPtr) {
3719 Diag(BufferPtr, diag::ext_unicode_whitespace)
3728void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3729 IsAtStartOfLine =
Result.isAtStartOfLine();
3730 HasLeadingSpace =
Result.hasLeadingSpace();
3731 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3736 assert(!isDependencyDirectivesLexer());
3742 if (IsAtStartOfLine) {
3744 IsAtStartOfLine =
false;
3747 if (IsAtPhysicalStartOfLine) {
3749 IsAtPhysicalStartOfLine =
false;
3752 if (HasLeadingSpace) {
3754 HasLeadingSpace =
false;
3757 if (HasLeadingEmptyMacro) {
3759 HasLeadingEmptyMacro =
false;
3764 bool returnedToken = LexTokenInternal(
Result);
3766 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3767 return returnedToken;
3777 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3778 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3781 const char *CurPtr = BufferPtr;
3795 FormTokenWithChars(Result, CurPtr, tok::unknown);
3804 unsigned SizeTmp, SizeTmp2;
3807 char Char = getAndAdvanceChar(CurPtr,
Result);
3811 NewLinePtr =
nullptr;
3816 if (CurPtr-1 == BufferEnd)
3817 return LexEndOfFile(
Result, CurPtr-1);
3820 if (isCodeCompletionPoint(CurPtr-1)) {
3823 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3828 Diag(CurPtr-1, diag::null_in_file);
3830 if (SkipWhitespace(
Result, CurPtr))
3839 if (LangOpts.MicrosoftExt) {
3841 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3842 return LexEndOfFile(
Result, CurPtr-1);
3846 Kind = tok::unknown;
3850 if (CurPtr[0] ==
'\n')
3851 (void)getAndAdvanceChar(CurPtr,
Result);
3865 IsAtStartOfLine =
true;
3866 IsAtPhysicalStartOfLine =
true;
3867 NewLinePtr = CurPtr - 1;
3876 if (SkipWhitespace(
Result, CurPtr))
3886 SkipHorizontalWhitespace:
3888 if (SkipWhitespace(
Result, CurPtr))
3897 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3898 if (SkipLineComment(
Result, CurPtr + 2))
3900 goto SkipIgnoredUnits;
3902 if (SkipBlockComment(
Result, CurPtr + 2))
3904 goto SkipIgnoredUnits;
3906 goto SkipHorizontalWhitespace;
3914 case '0':
case '1':
case '2':
case '3':
case '4':
3915 case '5':
case '6':
case '7':
case '8':
case '9':
3918 return LexNumericConstant(
Result, CurPtr);
3927 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3928 Char = getCharAndSize(CurPtr, SizeTmp);
3932 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3933 tok::utf16_string_literal);
3937 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3938 tok::utf16_char_constant);
3941 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3942 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3943 return LexRawStringLiteral(
Result,
3944 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3946 tok::utf16_string_literal);
3949 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3953 return LexStringLiteral(
Result,
3954 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3956 tok::utf8_string_literal);
3957 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3958 return LexCharConstant(
3959 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3961 tok::utf8_char_constant);
3963 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3965 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3968 return LexRawStringLiteral(
Result,
3969 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3972 tok::utf8_string_literal);
3979 return LexIdentifierContinue(
Result, CurPtr);
3985 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3986 Char = getCharAndSize(CurPtr, SizeTmp);
3990 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3991 tok::utf32_string_literal);
3995 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3996 tok::utf32_char_constant);
3999 if (Char ==
'R' && LangOpts.RawStringLiterals &&
4000 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4001 return LexRawStringLiteral(
Result,
4002 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4004 tok::utf32_string_literal);
4008 return LexIdentifierContinue(
Result, CurPtr);
4014 if (LangOpts.RawStringLiterals) {
4015 Char = getCharAndSize(CurPtr, SizeTmp);
4018 return LexRawStringLiteral(
Result,
4019 ConsumeChar(CurPtr, SizeTmp,
Result),
4020 tok::string_literal);
4024 return LexIdentifierContinue(
Result, CurPtr);
4029 Char = getCharAndSize(CurPtr, SizeTmp);
4033 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4034 tok::wide_string_literal);
4037 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4038 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4039 return LexRawStringLiteral(
Result,
4040 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4042 tok::wide_string_literal);
4046 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4047 tok::wide_char_constant);
4052 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4053 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4054 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4055 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4056 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4057 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4058 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4059 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4063 return LexIdentifierContinue(
Result, CurPtr);
4065 if (LangOpts.DollarIdents) {
4067 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4070 return LexIdentifierContinue(
Result, CurPtr);
4073 Kind = tok::unknown;
4080 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4086 return LexStringLiteral(
Result, CurPtr,
4088 : tok::string_literal);
4092 Kind = tok::question;
4095 Kind = tok::l_square;
4098 Kind = tok::r_square;
4101 Kind = tok::l_paren;
4104 Kind = tok::r_paren;
4107 Kind = tok::l_brace;
4110 Kind = tok::r_brace;
4113 Char = getCharAndSize(CurPtr, SizeTmp);
4114 if (Char >=
'0' && Char <=
'9') {
4118 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4119 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4120 Kind = tok::periodstar;
4122 }
else if (Char ==
'.' &&
4123 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4124 Kind = tok::ellipsis;
4125 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4132 Char = getCharAndSize(CurPtr, SizeTmp);
4135 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4136 }
else if (Char ==
'=') {
4137 Kind = tok::ampequal;
4138 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4144 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4145 Kind = tok::starequal;
4146 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4152 Char = getCharAndSize(CurPtr, SizeTmp);
4154 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4155 Kind = tok::plusplus;
4156 }
else if (Char ==
'=') {
4157 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4158 Kind = tok::plusequal;
4164 Char = getCharAndSize(CurPtr, SizeTmp);
4166 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4167 Kind = tok::minusminus;
4168 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4169 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4170 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4172 Kind = tok::arrowstar;
4173 }
else if (Char ==
'>') {
4174 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4176 }
else if (Char ==
'=') {
4177 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4178 Kind = tok::minusequal;
4187 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4188 Kind = tok::exclaimequal;
4189 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4191 Kind = tok::exclaim;
4196 Char = getCharAndSize(CurPtr, SizeTmp);
4206 bool TreatAsComment =
4207 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4208 if (!TreatAsComment)
4209 if (!(
PP &&
PP->isPreprocessedOutput()))
4210 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4212 if (TreatAsComment) {
4213 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4219 goto SkipIgnoredUnits;
4224 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result)))
4233 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4234 Kind = tok::slashequal;
4240 Char = getCharAndSize(CurPtr, SizeTmp);
4242 Kind = tok::percentequal;
4243 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4244 }
else if (LangOpts.Digraphs && Char ==
'>') {
4245 Kind = tok::r_brace;
4246 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4247 }
else if (LangOpts.Digraphs && Char ==
':') {
4248 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4249 Char = getCharAndSize(CurPtr, SizeTmp);
4250 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4251 Kind = tok::hashhash;
4252 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4254 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4255 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4257 Diag(BufferPtr, diag::ext_charize_microsoft);
4266 goto HandleDirective;
4271 Kind = tok::percent;
4275 Char = getCharAndSize(CurPtr, SizeTmp);
4277 return LexAngledStringLiteral(
Result, CurPtr);
4278 }
else if (Char ==
'<') {
4279 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4281 Kind = tok::lesslessequal;
4282 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4284 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4288 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4292 }
else if (LangOpts.CUDA && After ==
'<') {
4293 Kind = tok::lesslessless;
4294 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4297 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4298 Kind = tok::lessless;
4300 }
else if (Char ==
'=') {
4301 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4303 if (LangOpts.CPlusPlus20) {
4305 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4306 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4308 Kind = tok::spaceship;
4314 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4319 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4320 Kind = tok::lessequal;
4321 }
else if (LangOpts.Digraphs && Char ==
':') {
4322 if (LangOpts.CPlusPlus11 &&
4323 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4330 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4331 if (After !=
':' && After !=
'>') {
4334 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4339 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4340 Kind = tok::l_square;
4341 }
else if (LangOpts.Digraphs && Char ==
'%') {
4342 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4343 Kind = tok::l_brace;
4344 }
else if (Char ==
'#' && SizeTmp == 1 &&
4345 lexEditorPlaceholder(
Result, CurPtr)) {
4352 Char = getCharAndSize(CurPtr, SizeTmp);
4354 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4355 Kind = tok::greaterequal;
4356 }
else if (Char ==
'>') {
4357 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4359 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4361 Kind = tok::greatergreaterequal;
4362 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4366 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4369 }
else if (LangOpts.CUDA && After ==
'>') {
4370 Kind = tok::greatergreatergreater;
4371 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4374 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4375 Kind = tok::greatergreater;
4378 Kind = tok::greater;
4382 Char = getCharAndSize(CurPtr, SizeTmp);
4384 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4385 Kind = tok::caretequal;
4386 }
else if (LangOpts.Reflection && Char ==
'^') {
4387 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4388 Kind = tok::caretcaret;
4390 if (LangOpts.OpenCL && Char ==
'^')
4391 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4396 Char = getCharAndSize(CurPtr, SizeTmp);
4398 Kind = tok::pipeequal;
4399 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4400 }
else if (Char ==
'|') {
4402 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4404 Kind = tok::pipepipe;
4405 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4411 Char = getCharAndSize(CurPtr, SizeTmp);
4412 if (LangOpts.Digraphs && Char ==
'>') {
4413 Kind = tok::r_square;
4414 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4415 }
else if (Char ==
':') {
4416 Kind = tok::coloncolon;
4417 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4426 Char = getCharAndSize(CurPtr, SizeTmp);
4429 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4432 Kind = tok::equalequal;
4433 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4442 Char = getCharAndSize(CurPtr, SizeTmp);
4444 Kind = tok::hashhash;
4445 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4446 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4449 Diag(BufferPtr, diag::ext_charize_microsoft);
4450 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4457 goto HandleDirective;
4465 if (CurPtr[-1] ==
'@' && LangOpts.ObjC) {
4466 FormTokenWithChars(
Result, CurPtr, tok::at);
4472 llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
4474 auto NextTokOr = peekNextPPToken();
4475 if (NextTokOr.has_value()) {
4476 NextPPTok = *NextTokOr;
4479 if (NextPPTok.
is(tok::raw_identifier) &&
4487 Kind = tok::unknown;
4492 if (!LangOpts.AsmPreprocessor) {
4493 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4494 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4495 if (SkipWhitespace(
Result, CurPtr))
4503 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4507 Kind = tok::unknown;
4512 Kind = tok::unknown;
4516 llvm::UTF32 CodePoint;
4521 llvm::ConversionResult Status =
4522 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4523 (
const llvm::UTF8 *)BufferEnd,
4525 llvm::strictConversion);
4526 if (Status == llvm::conversionOK) {
4527 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4528 if (SkipWhitespace(
Result, CurPtr))
4535 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4539 PP->isPreprocessedOutput()) {
4541 Kind = tok::unknown;
4548 Diag(CurPtr, diag::err_invalid_utf8);
4550 BufferPtr = CurPtr+1;
4562 FormTokenWithChars(
Result, CurPtr, Kind);
4568 FormTokenWithChars(
Result, CurPtr, tok::hash);
4571 if (
PP->hadModuleLoaderFatalFailure())
4583const char *Lexer::convertDependencyDirectiveToken(
4585 const char *TokPtr = BufferStart + DDTok.
Offset;
4591 if (
Result.is(tok::raw_identifier))
4592 Result.setRawIdentifierData(TokPtr);
4593 else if (
Result.isLiteral())
4594 Result.setLiteralData(TokPtr);
4595 BufferPtr = TokPtr + DDTok.
Length;
4599bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4600 assert(isDependencyDirectivesLexer());
4602 using namespace dependency_directives_scan;
4604 if (BufferPtr == BufferEnd)
4605 return LexEndOfFile(
Result, BufferPtr);
4607 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4608 if (DepDirectives.front().Kind == pp_eof)
4609 return LexEndOfFile(
Result, BufferEnd);
4610 if (DepDirectives.front().Kind == tokens_present_before_eof)
4612 NextDepDirectiveTokenIndex = 0;
4613 DepDirectives = DepDirectives.drop_front();
4616 const dependency_directives_scan::Token &DDTok =
4617 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4618 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4624 BufferPtr = BufferStart + DDTok.
Offset;
4625 LexAngledStringLiteral(
Result, BufferPtr + 1);
4626 if (
Result.isNot(tok::header_name))
4630 const dependency_directives_scan::Token &NextTok =
4631 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4632 if (BufferStart + NextTok.
Offset >= BufferPtr)
4634 ++NextDepDirectiveTokenIndex;
4639 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4641 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4643 if (
PP->hadModuleLoaderFatalFailure())
4649 auto NextTok = peekNextPPToken();
4650 if (NextTok && NextTok->
is(tok::raw_identifier) &&
4651 NextTok->getRawIdentifier() ==
"import") {
4653 if (
PP->hadModuleLoaderFatalFailure())
4658 if (
Result.is(tok::raw_identifier)) {
4659 Result.setRawIdentifierData(TokPtr);
4661 const IdentifierInfo *II =
PP->LookUpIdentifierInfo(
Result);
4662 if (LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
4663 PP->HandleModuleContextualKeyword(
Result)) {
4668 return PP->HandleIdentifier(
Result);
4674 if (
Result.is(tok::colon)) {
4676 if (*BufferPtr ==
':') {
4677 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4679 ++NextDepDirectiveTokenIndex;
4680 Result.setKind(tok::coloncolon);
4690bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4691 assert(isDependencyDirectivesLexer());
4693 using namespace dependency_directives_scan;
4696 unsigned NestedIfs = 0;
4698 DepDirectives = DepDirectives.drop_front();
4699 switch (DepDirectives.front().Kind) {
4701 llvm_unreachable(
"unexpected 'pp_none'");
4742 NextDepDirectiveTokenIndex = 0;
4743 return LexEndOfFile(
Result, BufferEnd);
4747 const dependency_directives_scan::Token &DDTok =
4748 DepDirectives.front().Tokens.front();
4749 assert(DDTok.
is(tok::hash));
4750 NextDepDirectiveTokenIndex = 1;
4752 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
static constexpr bool isOneOf()
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a byte-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isModuleKeyword() const
Determine whether this is the contextual keyword module.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
bool isImportKeyword() const
Determine whether this is the contextual keyword import.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
SourceLocation getExpansionLocEnd() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isModuleContextualKeyword(bool AllowExport=true) const
Return true if we have a C++20 modules contextual keyword(export, importor module).
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
std::pair< FileID, unsigned > FileIDAndOffset
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const