29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/Support/MemoryBufferRef.h"
37#include "llvm/Support/NativeFormatting.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
65 return II->getObjCKeywordID() == objcKey;
72 return tok::objc_not_keyword;
80 case tok::annot_typename:
81 case tok::annot_decltype:
82 case tok::annot_pack_indexing_type:
88 case tok::kw___int128:
90 case tok::kw_unsigned:
98 case tok::kw__Float16:
99 case tok::kw___float128:
100 case tok::kw___ibm128:
101 case tok::kw_wchar_t:
107#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
108#include "clang/Basic/TransformTypeTraits.def"
109 case tok::kw___auto_type:
110 case tok::kw_char16_t:
111 case tok::kw_char32_t:
113 case tok::kw_decltype:
114 case tok::kw_char8_t:
126void Lexer::anchor() {}
128void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
129 const char *BufEnd) {
130 BufferStart = BufStart;
134 assert(BufEnd[0] == 0 &&
135 "We assume that the input buffer has a null character at the end"
136 " to simplify lexing!");
141 if (BufferStart == BufferPtr) {
143 StringRef Buf(BufferStart, BufferEnd - BufferStart);
144 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
145 .StartsWith(
"\xEF\xBB\xBF", 3)
149 BufferPtr += BOMLength;
152 Is_PragmaLexer =
false;
153 CurrentConflictMarkerState =
CMK_None;
156 IsAtStartOfLine =
true;
157 IsAtPhysicalStartOfLine =
true;
159 HasLeadingSpace =
false;
160 HasLeadingEmptyMacro =
false;
175 ExtendedTokenMode = 0;
177 NewLinePtr =
nullptr;
187 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
189 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
190 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
191 InputFile.getBufferEnd());
200 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
201 bool IsFirstIncludeOfFile)
203 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
204 InitLexer(BufStart, BufPtr, BufEnd);
215 bool IsFirstIncludeOfFile)
216 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
217 FromFile.getBufferStart(), FromFile.getBufferEnd(),
218 IsFirstIncludeOfFile) {}
221 assert(
PP &&
"Cannot reset token mode without a preprocessor");
222 if (LangOpts.TraditionalCPP)
250 FileID SpellingFID =
SM.getFileID(SpellingLoc);
251 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
257 const char *StrData =
SM.getCharacterData(SpellingLoc);
259 L->BufferPtr = StrData;
260 L->BufferEnd = StrData+TokLen;
261 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
265 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
267 ExpansionLocEnd, TokLen);
274 L->Is_PragmaLexer =
true;
279 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
280 this->IsAtStartOfLine = IsAtStartOfLine;
281 assert((BufferStart + Offset) <= BufferEnd);
282 BufferPtr = BufferStart + Offset;
286 typename T::size_type i = 0, e = Str.size();
288 if (Str[i] ==
'\\' || Str[i] == Quote) {
289 Str.insert(Str.begin() + i,
'\\');
292 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
294 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
295 Str[i] != Str[i + 1]) {
301 Str.insert(Str.begin() + i + 1,
'n');
311 std::string
Result = std::string(Str);
312 char Quote = Charify ?
'\'' :
'"';
327 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
330 const char *BufEnd = BufPtr + Tok.
getLength();
334 while (BufPtr < BufEnd) {
336 Spelling[Length++] = CharAndSize.Char;
337 BufPtr += CharAndSize.Size;
339 if (Spelling[Length - 1] ==
'"')
347 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
350 const char *RawEnd = BufEnd;
351 do --RawEnd;
while (*RawEnd !=
'"');
352 size_t RawLength = RawEnd - BufPtr + 1;
355 memcpy(Spelling + Length, BufPtr, RawLength);
363 while (BufPtr < BufEnd) {
365 Spelling[Length++] = CharAndSize.Char;
366 BufPtr += CharAndSize.Size;
370 "NeedsCleaning flag set on token that didn't need cleaning!");
385 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
388 bool invalidTemp =
false;
389 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
391 if (invalid) *invalid =
true;
395 const char *tokenBegin = file.data() + locInfo.second;
398 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
399 file.begin(), tokenBegin, file.end());
407 return StringRef(tokenBegin,
length);
411 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
412 return StringRef(buffer.data(), buffer.size());
422 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
424 bool CharDataInvalid =
false;
434 return std::string(TokStart, TokStart + Tok.
getLength());
455 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
457 const char *TokStart =
nullptr;
459 if (Tok.
is(tok::raw_identifier))
464 Buffer = II->getNameStart();
465 return II->getLength();
475 bool CharDataInvalid =
false;
479 if (CharDataInvalid) {
492 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
513 bool IgnoreWhiteSpace) {
522 Loc =
SM.getExpansionLoc(Loc);
523 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
525 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
529 const char *StrData = Buffer.data()+LocInfo.second;
535 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
536 Buffer.begin(), StrData, Buffer.end());
545 const char *BufStart = Buffer.data();
546 if (Offset >= Buffer.size())
549 const char *LexStart = BufStart + Offset;
550 for (; LexStart != BufStart; --LexStart) {
565 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
566 if (LocInfo.first.isInvalid())
570 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
576 const char *StrData = Buffer.data() + LocInfo.second;
578 if (!LexStart || LexStart == StrData)
583 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
603 }
while (TheTok.
getKind() != tok::eof);
615 if (!
SM.isMacroArgExpansion(Loc))
620 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
621 std::pair<FileID, unsigned> BeginFileLocInfo =
622 SM.getDecomposedLoc(BeginFileLoc);
623 assert(FileLocInfo.first == BeginFileLocInfo.first &&
624 FileLocInfo.second >= BeginFileLocInfo.second);
630enum PreambleDirectiveKind {
645 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
649 bool InPreprocessorDirective =
false;
653 unsigned MaxLineOffset = 0;
655 const char *CurPtr = Buffer.begin();
656 unsigned CurLine = 0;
657 while (CurPtr != Buffer.end()) {
661 if (CurLine == MaxLines)
665 if (CurPtr != Buffer.end())
666 MaxLineOffset = CurPtr - Buffer.begin();
672 if (InPreprocessorDirective) {
674 if (TheTok.
getKind() == tok::eof) {
685 InPreprocessorDirective =
false;
694 if (MaxLineOffset && TokOffset >= MaxLineOffset)
699 if (TheTok.
getKind() == tok::comment) {
707 Token HashTok = TheTok;
708 InPreprocessorDirective =
true;
717 PreambleDirectiveKind PDK
718 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
719 .Case(
"include", PDK_Skipped)
720 .Case(
"__include_macros", PDK_Skipped)
721 .Case(
"define", PDK_Skipped)
722 .Case(
"undef", PDK_Skipped)
723 .Case(
"line", PDK_Skipped)
724 .Case(
"error", PDK_Skipped)
725 .Case(
"pragma", PDK_Skipped)
726 .Case(
"import", PDK_Skipped)
727 .Case(
"include_next", PDK_Skipped)
728 .Case(
"warning", PDK_Skipped)
729 .Case(
"ident", PDK_Skipped)
730 .Case(
"sccs", PDK_Skipped)
731 .Case(
"assert", PDK_Skipped)
732 .Case(
"unassert", PDK_Skipped)
733 .Case(
"if", PDK_Skipped)
734 .Case(
"ifdef", PDK_Skipped)
735 .Case(
"ifndef", PDK_Skipped)
736 .Case(
"elif", PDK_Skipped)
737 .Case(
"elifdef", PDK_Skipped)
738 .Case(
"elifndef", PDK_Skipped)
739 .Case(
"else", PDK_Skipped)
740 .Case(
"endif", PDK_Skipped)
741 .Default(PDK_Unknown);
758 TheTok.
getKind() == tok::raw_identifier &&
760 LangOpts.CPlusPlusModules) {
763 Token ModuleTok = TheTok;
766 }
while (TheTok.
getKind() == tok::comment);
767 if (TheTok.
getKind() != tok::semi) {
782 if (ActiveCommentLoc.
isValid())
783 End = ActiveCommentLoc;
798 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
801 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
804 unsigned PhysOffset = 0;
809 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
819 for (; CharNo; --CharNo) {
821 TokPtr += CharAndSize.Size;
822 PhysOffset += CharAndSize.Size;
829 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
830 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
879 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
885 *MacroBegin = expansionLoc;
907 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
913 *MacroEnd = expansionLoc;
926 if (Range.isTokenRange()) {
935 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
940 if (!
SM.isInFileID(End, FID, &EndOffs) ||
950 return SM.getSLocEntry(
SM.getFileID(Loc))
952 .isExpansionTokenRange();
969 Range.setBegin(
Begin);
974 if (Range.isTokenRange()) {
992 Range.setBegin(MacroBegin);
993 Range.setEnd(MacroEnd);
995 if (Range.isTokenRange())
1015 Range.setBegin(
SM.getImmediateSpellingLoc(
Begin));
1016 Range.setEnd(
SM.getImmediateSpellingLoc(End));
1029 if (Range.isInvalid()) {
1035 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(Range.getBegin());
1036 if (beginInfo.first.isInvalid()) {
1042 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
1043 beginInfo.second > EndOffs) {
1049 bool invalidTemp =
false;
1050 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1057 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1063 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1079 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1086 FileID MacroFID =
SM.getFileID(Loc);
1087 if (
SM.isInFileID(SpellLoc, MacroFID))
1097 Loc =
SM.getSpellingLoc(Loc);
1101 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1103 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1104 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1109 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1111 while (
SM.isMacroArgExpansion(Loc))
1112 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1118 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1124 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1128 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1130 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1131 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1140 if (Str - 1 < BufferStart)
1143 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1144 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1145 if (Str - 2 < BufferStart)
1155 return *Str ==
'\\';
1162 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1163 if (LocInfo.first.isInvalid())
1166 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1172 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1173 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1174 return NumWhitespaceChars == StringRef::npos
1176 : Rest.take_front(NumWhitespaceChars);
1191 unsigned CharNo,
unsigned TokLen) {
1192 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1208 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1214 unsigned TokLen)
const {
1215 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1216 "Location out of range for this buffer!");
1220 unsigned CharNo = Loc-BufferStart;
1226 assert(
PP &&
"This doesn't work on raw lexers");
1245 case '=':
return '#';
1246 case ')':
return ']';
1247 case '(':
return '[';
1248 case '!':
return '|';
1249 case '\'':
return '^';
1250 case '>':
return '}';
1251 case '/':
return '\\';
1252 case '<':
return '{';
1253 case '-':
return '~';
1268 L->
Diag(CP-2, diag::trigraph_ignored);
1273 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1280unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1285 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1289 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1290 Ptr[Size-1] != Ptr[Size])
1303const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1305 const char *AfterEscape;
1308 }
else if (*
P ==
'?') {
1310 if (
P[1] !=
'?' ||
P[2] !=
'/')
1319 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1320 if (NewLineSize == 0)
return P;
1321 P = AfterEscape+NewLineSize;
1330 return std::nullopt;
1335 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1338 bool InvalidTemp =
false;
1339 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1341 return std::nullopt;
1343 const char *TokenBegin =
File.data() + LocInfo.second;
1346 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1347 TokenBegin,
File.end());
1360 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1362 if (!Tok || Tok->isNot(TKind))
1367 unsigned NumWhitespaceChars = 0;
1368 if (SkipTrailingWhitespaceAndNewLine) {
1369 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1370 unsigned char C = *TokenEnd;
1373 NumWhitespaceChars++;
1377 if (
C ==
'\n' ||
C ==
'\r') {
1380 NumWhitespaceChars++;
1381 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1382 NumWhitespaceChars++;
1407 if (Ptr[0] ==
'\\') {
1413 return {
'\\', Size};
1417 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1423 Diag(Ptr, diag::backslash_newline_space);
1426 Size += EscapedNewLineSize;
1427 Ptr += EscapedNewLineSize;
1430 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1431 CharAndSize.Size += Size;
1436 return {
'\\',
Size};
1440 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1444 LangOpts.Trigraphs)) {
1450 if (
C ==
'\\')
goto Slash;
1456 return {*Ptr,
Size + 1u};
1470 if (Ptr[0] ==
'\\') {
1476 return {
'\\',
Size};
1479 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1481 Size += EscapedNewLineSize;
1482 Ptr += EscapedNewLineSize;
1485 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1486 CharAndSize.Size +=
Size;
1491 return {
'\\',
Size};
1495 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1501 if (
C ==
'\\')
goto Slash;
1507 return {*Ptr,
Size + 1u};
1515void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1516 BufferPtr = BufferStart + Offset;
1517 if (BufferPtr > BufferEnd)
1518 BufferPtr = BufferEnd;
1522 IsAtStartOfLine = StartOfLine;
1523 IsAtPhysicalStartOfLine = StartOfLine;
1527 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1529 return UnicodeWhitespaceChars.contains(Codepoint);
1534 llvm::raw_svector_ostream CharOS(CharBuf);
1535 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1546 bool IsStart,
bool &IsExtension) {
1547 static const llvm::sys::UnicodeCharSet MathStartChars(
1549 static const llvm::sys::UnicodeCharSet MathContinueChars(
1551 if (MathStartChars.contains(
C) ||
1552 (!IsStart && MathContinueChars.contains(
C))) {
1560 bool &IsExtension) {
1561 if (LangOpts.AsmPreprocessor) {
1563 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1565 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1570 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1572 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1576 }
else if (LangOpts.C11) {
1577 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1579 return C11AllowedIDChars.contains(
C);
1581 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1583 return C99AllowedIDChars.contains(
C);
1588 bool &IsExtension) {
1589 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1590 IsExtension =
false;
1591 if (LangOpts.AsmPreprocessor) {
1594 if (LangOpts.CPlusPlus || LangOpts.C23) {
1595 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1596 if (XIDStartChars.contains(
C))
1604 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1606 return !C11DisallowedInitialIDChars.contains(
C);
1608 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1610 return !C99DisallowedInitialIDChars.contains(
C);
1616 static const llvm::sys::UnicodeCharSet MathStartChars(
1618 static const llvm::sys::UnicodeCharSet MathContinueChars(
1621 (void)MathStartChars;
1622 (void)MathContinueChars;
1623 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1624 "Unexpected mathematical notation codepoint");
1625 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1638 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1640 CannotAppearInIdentifier = 0,
1641 CannotStartIdentifier
1644 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1646 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1648 if (!C99AllowedIDChars.contains(
C)) {
1649 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1651 << CannotAppearInIdentifier;
1652 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1653 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1655 << CannotStartIdentifier;
1667 struct HomoglyphPair {
1670 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1672 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1725 std::lower_bound(std::begin(SortedHomoglyphs),
1726 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1727 if (Homoglyph->Character ==
C) {
1728 if (Homoglyph->LooksLike) {
1729 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1730 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1733 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1750 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1753 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1755 if (!IsFirst || InvalidOnlyAtStart) {
1756 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1760 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1766bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1768 const char *UCNPtr = CurPtr +
Size;
1769 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1770 if (CodePoint == 0) {
1773 bool IsExtension =
false;
1798 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1799 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1802 while (CurPtr != UCNPtr)
1803 (void)getAndAdvanceChar(CurPtr,
Result);
1807bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1808 llvm::UTF32 CodePoint;
1813 unsigned FirstCodeUnitSize;
1814 getCharAndSize(CurPtr, FirstCodeUnitSize);
1815 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1816 const char *UnicodePtr = CharStart;
1818 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1819 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1820 &CodePoint, llvm::strictConversion);
1821 if (ConvResult != llvm::conversionOK)
1824 bool IsExtension =
false;
1853 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1854 CurPtr = UnicodePtr;
1858bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1859 const char *CurPtr) {
1860 bool IsExtension =
false;
1875 return LexIdentifierContinue(
Result, CurPtr);
1900 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1906 [[maybe_unused]]
const char *BufferEnd) {
1908 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1909 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1911 constexpr ssize_t BytesPerRegister = 16;
1913 __m128i AsciiIdentifierRangeV =
1916 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1923 if (Consumed == BytesPerRegister)
1929 unsigned char C = *CurPtr;
1935bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1944 unsigned char C = getCharAndSize(CurPtr, Size);
1946 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1951 if (!LangOpts.DollarIdents)
1955 Diag(CurPtr, diag::ext_dollar_in_identifier);
1956 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1959 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1967 const char *IdStart = BufferPtr;
1968 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1969 Result.setRawIdentifierData(IdStart);
1985 if (isCodeCompletionPoint(CurPtr)) {
1987 Result.setKind(tok::code_completion);
1993 assert(*CurPtr == 0 &&
"Completion character must be 0");
1998 if (CurPtr < BufferEnd) {
2016bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2018 char C1 = CharAndSize1.Char;
2024 char C2 = CharAndSize2.Char;
2025 return (C2 ==
'x' || C2 ==
'X');
2031bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2033 char C = getCharAndSize(CurPtr, Size);
2036 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2038 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2042 C = getCharAndSize(CurPtr, Size);
2046 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2049 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2050 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2054 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2058 bool IsHexFloat =
true;
2059 if (!LangOpts.C99) {
2060 if (!isHexaLiteral(BufferPtr, LangOpts))
2062 else if (!LangOpts.CPlusPlus17 &&
2063 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2067 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2071 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2075 Diag(CurPtr, LangOpts.CPlusPlus
2076 ? diag::warn_cxx11_compat_digit_separator
2077 : diag::warn_c23_compat_digit_separator);
2078 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2079 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2080 return LexNumericConstant(
Result, CurPtr);
2085 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2086 return LexNumericConstant(
Result, CurPtr);
2088 return LexNumericConstant(
Result, CurPtr);
2091 const char *TokStart = BufferPtr;
2092 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2093 Result.setLiteralData(TokStart);
2099const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2100 bool IsStringLiteral) {
2101 assert(LangOpts.CPlusPlus);
2105 char C = getCharAndSize(CurPtr, Size);
2106 bool Consumed =
false;
2109 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2111 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2117 if (!LangOpts.CPlusPlus11) {
2120 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2121 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2132 bool IsUDSuffix =
false;
2135 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2139 const unsigned MaxStandardSuffixLength = 3;
2140 char Buffer[MaxStandardSuffixLength] = {
C };
2141 unsigned Consumed =
Size;
2144 auto [Next, NextSize] =
2148 const StringRef CompleteSuffix(Buffer, Chars);
2154 if (Chars == MaxStandardSuffixLength)
2158 Buffer[Chars++] = Next;
2159 Consumed += NextSize;
2165 Diag(CurPtr, LangOpts.MSVCCompat
2166 ? diag::ext_ms_reserved_user_defined_literal
2167 : diag::ext_reserved_user_defined_literal)
2172 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2177 C = getCharAndSize(CurPtr, Size);
2179 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2180 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2181 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2191bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2193 const char *AfterQuote = CurPtr;
2195 const char *NulCharacter =
nullptr;
2198 (Kind == tok::utf8_string_literal ||
2199 Kind == tok::utf16_string_literal ||
2200 Kind == tok::utf32_string_literal))
2201 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2202 : diag::warn_c99_compat_unicode_literal);
2204 char C = getAndAdvanceChar(CurPtr,
Result);
2209 C = getAndAdvanceChar(CurPtr,
Result);
2211 if (
C ==
'\n' ||
C ==
'\r' ||
2212 (
C == 0 && CurPtr-1 == BufferEnd)) {
2214 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2215 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2220 if (isCodeCompletionPoint(CurPtr-1)) {
2222 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2225 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2230 NulCharacter = CurPtr-1;
2232 C = getAndAdvanceChar(CurPtr,
Result);
2236 if (LangOpts.CPlusPlus)
2237 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2241 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2244 const char *TokStart = BufferPtr;
2245 FormTokenWithChars(
Result, CurPtr, Kind);
2246 Result.setLiteralData(TokStart);
2252bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2260 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2262 unsigned PrefixLen = 0;
2268 if (CurPtr[PrefixLen] !=
'(') {
2270 const char *PrefixEnd = &CurPtr[PrefixLen];
2271 if (PrefixLen == 16) {
2272 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2273 }
else if (*PrefixEnd ==
'\n') {
2274 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2276 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2277 << StringRef(PrefixEnd, 1);
2289 if (
C == 0 && CurPtr-1 == BufferEnd) {
2295 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2300 const char *Prefix = CurPtr;
2301 CurPtr += PrefixLen + 1;
2308 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2309 CurPtr += PrefixLen + 1;
2312 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2314 Diag(BufferPtr, diag::err_unterminated_raw_string)
2315 << StringRef(Prefix, PrefixLen);
2316 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2322 if (LangOpts.CPlusPlus)
2323 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2326 const char *TokStart = BufferPtr;
2327 FormTokenWithChars(
Result, CurPtr, Kind);
2328 Result.setLiteralData(TokStart);
2334bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2336 const char *NulCharacter =
nullptr;
2337 const char *AfterLessPos = CurPtr;
2338 char C = getAndAdvanceChar(CurPtr,
Result);
2343 C = getAndAdvanceChar(CurPtr,
Result);
2346 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2349 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2354 if (isCodeCompletionPoint(CurPtr - 1)) {
2355 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2357 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2360 NulCharacter = CurPtr-1;
2362 C = getAndAdvanceChar(CurPtr,
Result);
2367 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2370 const char *TokStart = BufferPtr;
2371 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2372 Result.setLiteralData(TokStart);
2376void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2377 const char *CompletionPoint,
2380 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2381 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2382 auto Slash = PartialPath.find_last_of(SlashChars);
2384 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2385 const char *StartOfFilename =
2386 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2389 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2392 while (CompletionPoint < BufferEnd) {
2393 char Next = *(CompletionPoint + 1);
2394 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2397 if (Next == (IsAngled ?
'>' :
'"'))
2399 if (SlashChars.contains(Next))
2411bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2414 const char *NulCharacter =
nullptr;
2417 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2418 Diag(BufferPtr, LangOpts.CPlusPlus
2419 ? diag::warn_cxx98_compat_unicode_literal
2420 : diag::warn_c99_compat_unicode_literal);
2421 else if (Kind == tok::utf8_char_constant)
2422 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2425 char C = getAndAdvanceChar(CurPtr,
Result);
2428 Diag(BufferPtr, diag::ext_empty_character);
2429 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2436 C = getAndAdvanceChar(CurPtr,
Result);
2438 if (
C ==
'\n' ||
C ==
'\r' ||
2439 (
C == 0 && CurPtr-1 == BufferEnd)) {
2441 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2442 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2447 if (isCodeCompletionPoint(CurPtr-1)) {
2449 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2454 NulCharacter = CurPtr-1;
2456 C = getAndAdvanceChar(CurPtr,
Result);
2460 if (LangOpts.CPlusPlus)
2461 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2465 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2468 const char *TokStart = BufferPtr;
2469 FormTokenWithChars(
Result, CurPtr, Kind);
2470 Result.setLiteralData(TokStart);
2478bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2479 bool &TokAtPhysicalStartOfLine) {
2483 unsigned char Char = *CurPtr;
2485 const char *lastNewLine =
nullptr;
2486 auto setLastNewLine = [&](
const char *Ptr) {
2492 setLastNewLine(CurPtr - 1);
2511 if (*CurPtr ==
'\n')
2512 setLastNewLine(CurPtr);
2519 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2521 IsAtStartOfLine =
true;
2522 IsAtPhysicalStartOfLine =
true;
2529 char PrevChar = CurPtr[-1];
2535 TokAtPhysicalStartOfLine =
true;
2537 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2554bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2555 bool &TokAtPhysicalStartOfLine) {
2560 Diag(BufferPtr, diag::ext_line_comment);
2578 bool UnicodeDecodingAlreadyDiagnosed =
false;
2585 C !=
'\n' &&
C !=
'\r') {
2587 UnicodeDecodingAlreadyDiagnosed =
false;
2591 unsigned Length = llvm::getUTF8SequenceSize(
2592 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2595 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2596 UnicodeDecodingAlreadyDiagnosed =
true;
2599 UnicodeDecodingAlreadyDiagnosed =
false;
2605 const char *NextLine = CurPtr;
2608 const char *EscapePtr = CurPtr-1;
2609 bool HasSpace =
false;
2615 if (*EscapePtr ==
'\\')
2618 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2619 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2621 CurPtr = EscapePtr-2;
2627 Diag(EscapePtr, diag::backslash_newline_space);
2634 const char *OldPtr = CurPtr;
2637 C = getAndAdvanceChar(CurPtr,
Result);
2642 if (
C != 0 && CurPtr == OldPtr+1) {
2650 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2651 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2652 for (; OldPtr != CurPtr; ++OldPtr)
2653 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2657 const char *ForwardPtr = CurPtr;
2660 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2665 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2670 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2675 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2693 return SaveLineComment(
Result, CurPtr);
2707 NewLinePtr = CurPtr++;
2711 TokAtPhysicalStartOfLine =
true;
2720bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2723 FormTokenWithChars(
Result, CurPtr, tok::comment);
2735 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2739 Result.setKind(tok::comment);
2750 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2753 const char *TrigraphPos =
nullptr;
2755 const char *SpacePos =
nullptr;
2762 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2764 if (CurPtr[0] == CurPtr[1])
2778 if (*CurPtr ==
'\\') {
2780 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2782 TrigraphPos = CurPtr - 2;
2793 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2802 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2806 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2811 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2815 L->
Diag(SpacePos, diag::backslash_newline_space);
2821#include <emmintrin.h>
2836bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2837 bool &TokAtPhysicalStartOfLine) {
2847 unsigned char C = getCharAndSize(CurPtr, CharSize);
2849 if (
C == 0 && CurPtr == BufferEnd+1) {
2851 Diag(BufferPtr, diag::err_unterminated_block_comment);
2857 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2874 bool UnicodeDecodingAlreadyDiagnosed =
false;
2879 if (CurPtr + 24 < BufferEnd &&
2884 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2889 if (
C ==
'/')
goto FoundSlash;
2893 while (CurPtr + 16 < BufferEnd) {
2895 if (LLVM_UNLIKELY(Mask != 0)) {
2905 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2911 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2912 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2913 0x80, 0x80, 0x80, 0x80};
2914 __vector
unsigned char Slashes = {
2915 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2916 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2918 while (CurPtr + 16 < BufferEnd) {
2920 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2922 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2929 while (CurPtr + 16 < BufferEnd) {
2930 bool HasNonASCII =
false;
2931 for (
unsigned I = 0; I < 16; ++I)
2932 HasNonASCII |= !
isASCII(CurPtr[I]);
2934 if (LLVM_UNLIKELY(HasNonASCII))
2937 bool HasSlash =
false;
2938 for (
unsigned I = 0; I < 16; ++I)
2939 HasSlash |= CurPtr[I] ==
'/';
2953 while (
C !=
'/' &&
C !=
'\0') {
2955 UnicodeDecodingAlreadyDiagnosed =
false;
2962 unsigned Length = llvm::getUTF8SequenceSize(
2963 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2966 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2967 UnicodeDecodingAlreadyDiagnosed =
true;
2969 UnicodeDecodingAlreadyDiagnosed =
false;
2970 CurPtr += Length - 1;
2977 if (CurPtr[-2] ==
'*')
2980 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2982 LangOpts.Trigraphs)) {
2988 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2993 Diag(CurPtr-1, diag::warn_nested_block_comment);
2995 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
2997 Diag(BufferPtr, diag::err_unterminated_block_comment);
3006 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3012 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3031 FormTokenWithChars(
Result, CurPtr, tok::comment);
3040 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3058 "Must be in a preprocessing directive!");
3063 const char *CurPtr = BufferPtr;
3065 char Char = getAndAdvanceChar(CurPtr, Tmp);
3073 if (CurPtr-1 != BufferEnd) {
3074 if (isCodeCompletionPoint(CurPtr-1)) {
3090 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3091 BufferPtr = CurPtr-1;
3095 if (Tmp.
is(tok::code_completion)) {
3100 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3112bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3120 FormTokenWithChars(
Result, CurPtr, tok::eod);
3132 BufferPtr = BufferEnd;
3133 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3153 diag::err_pp_unterminated_conditional);
3159 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3164 if (LangOpts.CPlusPlus11) {
3168 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3169 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3171 DiagID = diag::warn_no_newline_eof;
3174 DiagID = diag::ext_no_newline_eof;
3177 Diag(BufferEnd, DiagID)
3191unsigned Lexer::isNextPPTokenLParen() {
3192 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3194 if (isDependencyDirectivesLexer()) {
3195 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3197 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3207 const char *TmpBufferPtr = BufferPtr;
3209 bool atStartOfLine = IsAtStartOfLine;
3210 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3211 bool leadingSpace = HasLeadingSpace;
3217 BufferPtr = TmpBufferPtr;
3219 HasLeadingSpace = leadingSpace;
3220 IsAtStartOfLine = atStartOfLine;
3221 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3226 if (Tok.
is(tok::eof))
3228 return Tok.
is(tok::l_paren);
3234 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3236 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3237 size_t Pos = RestOfBuffer.find(Terminator);
3238 while (Pos != StringRef::npos) {
3241 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3242 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3243 Pos = RestOfBuffer.find(Terminator);
3246 return RestOfBuffer.data()+Pos;
3255bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3257 if (CurPtr != BufferStart &&
3258 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3262 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3263 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3278 Diag(CurPtr, diag::err_conflict_marker);
3279 CurrentConflictMarkerState =
Kind;
3283 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3284 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3299bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3301 if (CurPtr != BufferStart &&
3302 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3311 for (
unsigned i = 1; i != 4; ++i)
3312 if (CurPtr[i] != CurPtr[0])
3319 CurrentConflictMarkerState)) {
3323 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3329 CurrentConflictMarkerState =
CMK_None;
3337 const char *BufferEnd) {
3338 if (CurPtr == BufferEnd)
3341 for (; CurPtr != BufferEnd; ++CurPtr) {
3342 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3348bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3349 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3355 const char *Start = CurPtr - 1;
3356 if (!LangOpts.AllowEditorPlaceholders)
3357 Diag(Start, diag::err_placeholder_in_source);
3359 FormTokenWithChars(
Result, End, tok::raw_identifier);
3360 Result.setRawIdentifierData(Start);
3367bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3376std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3377 const char *SlashLoc,
3380 char Kind = getCharAndSize(StartPtr, CharSize);
3381 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3383 unsigned NumHexDigits;
3386 else if (Kind ==
'U')
3389 bool Delimited =
false;
3390 bool FoundEndDelimiter =
false;
3394 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3396 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3397 return std::nullopt;
3400 const char *CurPtr = StartPtr + CharSize;
3401 const char *KindLoc = &CurPtr[-1];
3403 uint32_t CodePoint = 0;
3404 while (Count != NumHexDigits || Delimited) {
3405 char C = getCharAndSize(CurPtr, CharSize);
3406 if (!Delimited && Count == 0 &&
C ==
'{') {
3412 if (Delimited &&
C ==
'}') {
3414 FoundEndDelimiter =
true;
3418 unsigned Value = llvm::hexDigitValue(
C);
3423 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3424 << StringRef(KindLoc, 1);
3425 return std::nullopt;
3428 if (CodePoint & 0xF000'0000) {
3430 Diag(KindLoc, diag::err_escape_too_large) << 0;
3431 return std::nullopt;
3442 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3443 : diag::warn_ucn_escape_no_digits)
3444 << StringRef(KindLoc, 1);
3445 return std::nullopt;
3448 if (Delimited && Kind ==
'U') {
3450 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3451 return std::nullopt;
3454 if (!Delimited && Count != NumHexDigits) {
3456 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3458 if (Count == 4 && NumHexDigits == 8) {
3460 Diag(KindLoc, diag::note_ucn_four_not_eight)
3464 return std::nullopt;
3467 if (Delimited &&
PP) {
3469 ? diag::warn_cxx23_delimited_escape_sequence
3470 : diag::ext_delimited_escape_sequence)
3479 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3482 while (StartPtr != CurPtr)
3483 (void)getAndAdvanceChar(StartPtr, *
Result);
3490std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3491 const char *SlashLoc,
3496 char C = getCharAndSize(StartPtr, CharSize);
3497 assert(
C ==
'N' &&
"expected \\N{...}");
3499 const char *CurPtr = StartPtr + CharSize;
3500 const char *KindLoc = &CurPtr[-1];
3502 C = getCharAndSize(CurPtr, CharSize);
3505 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3506 return std::nullopt;
3509 const char *StartName = CurPtr;
3510 bool FoundEndDelimiter =
false;
3513 C = getCharAndSize(CurPtr, CharSize);
3516 FoundEndDelimiter =
true;
3522 Buffer.push_back(
C);
3525 if (!FoundEndDelimiter || Buffer.empty()) {
3527 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3528 : diag::warn_delimited_ucn_incomplete)
3529 << StringRef(KindLoc, 1);
3530 return std::nullopt;
3533 StringRef Name(Buffer.data(), Buffer.size());
3534 std::optional<char32_t> Match =
3535 llvm::sys::unicode::nameToCodepointStrict(Name);
3536 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3538 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3540 Diag(StartName, diag::err_invalid_ucn_name)
3541 << StringRef(Buffer.data(), Buffer.size())
3544 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3555 if (Diagnose && Match)
3557 ? diag::warn_cxx23_delimited_escape_sequence
3558 : diag::ext_delimited_escape_sequence)
3565 if (LooseMatch && Diagnose)
3566 Match = LooseMatch->CodePoint;
3573 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3576 while (StartPtr != CurPtr)
3577 (void)getAndAdvanceChar(StartPtr, *
Result);
3581 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3584uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3588 std::optional<uint32_t> CodePointOpt;
3589 char Kind = getCharAndSize(StartPtr, CharSize);
3590 if (Kind ==
'u' || Kind ==
'U')
3591 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3592 else if (Kind ==
'N')
3593 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3598 uint32_t CodePoint = *CodePointOpt;
3601 if (LangOpts.AsmPreprocessor)
3620 if (CodePoint < 0xA0) {
3624 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3625 Diag(BufferPtr, diag::err_ucn_control_character);
3627 char C =
static_cast<char>(CodePoint);
3628 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3633 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3638 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3639 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3641 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3649bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3650 const char *CurPtr) {
3653 Diag(BufferPtr, diag::ext_unicode_whitespace)
3662void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3663 IsAtStartOfLine =
Result.isAtStartOfLine();
3664 HasLeadingSpace =
Result.hasLeadingSpace();
3665 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3670 assert(!isDependencyDirectivesLexer());
3676 if (IsAtStartOfLine) {
3678 IsAtStartOfLine =
false;
3681 if (HasLeadingSpace) {
3683 HasLeadingSpace =
false;
3686 if (HasLeadingEmptyMacro) {
3688 HasLeadingEmptyMacro =
false;
3691 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3692 IsAtPhysicalStartOfLine =
false;
3695 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3697 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3698 return returnedToken;
3706bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3708 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3709 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3712 const char *CurPtr = BufferPtr;
3724 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3733 unsigned SizeTmp, SizeTmp2;
3736 char Char = getAndAdvanceChar(CurPtr,
Result);
3740 NewLinePtr =
nullptr;
3745 if (CurPtr-1 == BufferEnd)
3746 return LexEndOfFile(
Result, CurPtr-1);
3749 if (isCodeCompletionPoint(CurPtr-1)) {
3752 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3757 Diag(CurPtr-1, diag::null_in_file);
3759 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3768 if (LangOpts.MicrosoftExt) {
3770 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3771 return LexEndOfFile(
Result, CurPtr-1);
3775 Kind = tok::unknown;
3779 if (CurPtr[0] ==
'\n')
3780 (void)getAndAdvanceChar(CurPtr,
Result);
3794 IsAtStartOfLine =
true;
3795 IsAtPhysicalStartOfLine =
true;
3796 NewLinePtr = CurPtr - 1;
3805 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3815 SkipHorizontalWhitespace:
3817 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3826 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3827 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3829 goto SkipIgnoredUnits;
3831 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3833 goto SkipIgnoredUnits;
3835 goto SkipHorizontalWhitespace;
3843 case '0':
case '1':
case '2':
case '3':
case '4':
3844 case '5':
case '6':
case '7':
case '8':
case '9':
3847 return LexNumericConstant(
Result, CurPtr);
3856 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3857 Char = getCharAndSize(CurPtr, SizeTmp);
3861 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3862 tok::utf16_string_literal);
3866 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3867 tok::utf16_char_constant);
3870 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3871 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3872 return LexRawStringLiteral(
Result,
3873 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3875 tok::utf16_string_literal);
3878 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3882 return LexStringLiteral(
Result,
3883 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3885 tok::utf8_string_literal);
3886 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3887 return LexCharConstant(
3888 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3890 tok::utf8_char_constant);
3892 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3894 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3897 return LexRawStringLiteral(
Result,
3898 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3901 tok::utf8_string_literal);
3908 return LexIdentifierContinue(
Result, CurPtr);
3914 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3915 Char = getCharAndSize(CurPtr, SizeTmp);
3919 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3920 tok::utf32_string_literal);
3924 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3925 tok::utf32_char_constant);
3928 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3929 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3930 return LexRawStringLiteral(
Result,
3931 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3933 tok::utf32_string_literal);
3937 return LexIdentifierContinue(
Result, CurPtr);
3943 if (LangOpts.CPlusPlus11) {
3944 Char = getCharAndSize(CurPtr, SizeTmp);
3947 return LexRawStringLiteral(
Result,
3948 ConsumeChar(CurPtr, SizeTmp,
Result),
3949 tok::string_literal);
3953 return LexIdentifierContinue(
Result, CurPtr);
3958 Char = getCharAndSize(CurPtr, SizeTmp);
3962 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3963 tok::wide_string_literal);
3966 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3967 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3968 return LexRawStringLiteral(
Result,
3969 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3971 tok::wide_string_literal);
3975 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3976 tok::wide_char_constant);
3981 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3982 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3983 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3984 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3985 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3986 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3987 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3988 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3992 return LexIdentifierContinue(
Result, CurPtr);
3995 if (LangOpts.DollarIdents) {
3997 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4000 return LexIdentifierContinue(
Result, CurPtr);
4003 Kind = tok::unknown;
4010 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4016 return LexStringLiteral(
Result, CurPtr,
4018 : tok::string_literal);
4022 Kind = tok::question;
4025 Kind = tok::l_square;
4028 Kind = tok::r_square;
4031 Kind = tok::l_paren;
4034 Kind = tok::r_paren;
4037 Kind = tok::l_brace;
4040 Kind = tok::r_brace;
4043 Char = getCharAndSize(CurPtr, SizeTmp);
4044 if (Char >=
'0' && Char <=
'9') {
4048 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4049 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4050 Kind = tok::periodstar;
4052 }
else if (Char ==
'.' &&
4053 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4054 Kind = tok::ellipsis;
4055 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4062 Char = getCharAndSize(CurPtr, SizeTmp);
4065 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4066 }
else if (Char ==
'=') {
4067 Kind = tok::ampequal;
4068 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4074 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4075 Kind = tok::starequal;
4076 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4082 Char = getCharAndSize(CurPtr, SizeTmp);
4084 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4085 Kind = tok::plusplus;
4086 }
else if (Char ==
'=') {
4087 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4088 Kind = tok::plusequal;
4094 Char = getCharAndSize(CurPtr, SizeTmp);
4096 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4097 Kind = tok::minusminus;
4098 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4099 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4100 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4102 Kind = tok::arrowstar;
4103 }
else if (Char ==
'>') {
4104 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4106 }
else if (Char ==
'=') {
4107 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4108 Kind = tok::minusequal;
4117 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4118 Kind = tok::exclaimequal;
4119 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4121 Kind = tok::exclaim;
4126 Char = getCharAndSize(CurPtr, SizeTmp);
4136 bool TreatAsComment =
4137 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4138 if (!TreatAsComment)
4140 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4142 if (TreatAsComment) {
4143 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4144 TokAtPhysicalStartOfLine))
4150 goto SkipIgnoredUnits;
4155 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4156 TokAtPhysicalStartOfLine))
4165 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4166 Kind = tok::slashequal;
4172 Char = getCharAndSize(CurPtr, SizeTmp);
4174 Kind = tok::percentequal;
4175 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4176 }
else if (LangOpts.Digraphs && Char ==
'>') {
4177 Kind = tok::r_brace;
4178 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4179 }
else if (LangOpts.Digraphs && Char ==
':') {
4180 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4181 Char = getCharAndSize(CurPtr, SizeTmp);
4182 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4183 Kind = tok::hashhash;
4184 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4186 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4187 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4189 Diag(BufferPtr, diag::ext_charize_microsoft);
4196 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4197 goto HandleDirective;
4202 Kind = tok::percent;
4206 Char = getCharAndSize(CurPtr, SizeTmp);
4208 return LexAngledStringLiteral(
Result, CurPtr);
4209 }
else if (Char ==
'<') {
4210 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4212 Kind = tok::lesslessequal;
4213 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4215 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4219 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4223 }
else if (LangOpts.CUDA && After ==
'<') {
4224 Kind = tok::lesslessless;
4225 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4228 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4229 Kind = tok::lessless;
4231 }
else if (Char ==
'=') {
4232 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4234 if (LangOpts.CPlusPlus20) {
4236 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4237 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4239 Kind = tok::spaceship;
4245 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4250 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4251 Kind = tok::lessequal;
4252 }
else if (LangOpts.Digraphs && Char ==
':') {
4253 if (LangOpts.CPlusPlus11 &&
4254 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4261 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4262 if (After !=
':' && After !=
'>') {
4265 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4270 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4271 Kind = tok::l_square;
4272 }
else if (LangOpts.Digraphs && Char ==
'%') {
4273 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4274 Kind = tok::l_brace;
4275 }
else if (Char ==
'#' && SizeTmp == 1 &&
4276 lexEditorPlaceholder(
Result, CurPtr)) {
4283 Char = getCharAndSize(CurPtr, SizeTmp);
4285 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4286 Kind = tok::greaterequal;
4287 }
else if (Char ==
'>') {
4288 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4290 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4292 Kind = tok::greatergreaterequal;
4293 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4297 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4300 }
else if (LangOpts.CUDA && After ==
'>') {
4301 Kind = tok::greatergreatergreater;
4302 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4305 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4306 Kind = tok::greatergreater;
4309 Kind = tok::greater;
4313 Char = getCharAndSize(CurPtr, SizeTmp);
4315 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4316 Kind = tok::caretequal;
4317 }
else if (LangOpts.OpenCL && Char ==
'^') {
4318 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4319 Kind = tok::caretcaret;
4325 Char = getCharAndSize(CurPtr, SizeTmp);
4327 Kind = tok::pipeequal;
4328 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4329 }
else if (Char ==
'|') {
4331 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4333 Kind = tok::pipepipe;
4334 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4340 Char = getCharAndSize(CurPtr, SizeTmp);
4341 if (LangOpts.Digraphs && Char ==
'>') {
4342 Kind = tok::r_square;
4343 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4344 }
else if (Char ==
':') {
4345 Kind = tok::coloncolon;
4346 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4355 Char = getCharAndSize(CurPtr, SizeTmp);
4358 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4361 Kind = tok::equalequal;
4362 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4371 Char = getCharAndSize(CurPtr, SizeTmp);
4373 Kind = tok::hashhash;
4374 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4375 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4378 Diag(BufferPtr, diag::ext_charize_microsoft);
4379 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4385 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4386 goto HandleDirective;
4394 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4397 Kind = tok::unknown;
4402 if (!LangOpts.AsmPreprocessor) {
4403 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4404 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4405 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4413 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4417 Kind = tok::unknown;
4422 Kind = tok::unknown;
4426 llvm::UTF32 CodePoint;
4431 llvm::ConversionResult Status =
4432 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4433 (
const llvm::UTF8 *)BufferEnd,
4435 llvm::strictConversion);
4436 if (Status == llvm::conversionOK) {
4437 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4438 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4445 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4451 Kind = tok::unknown;
4458 Diag(CurPtr, diag::err_invalid_utf8);
4460 BufferPtr = CurPtr+1;
4472 FormTokenWithChars(
Result, CurPtr, Kind);
4478 FormTokenWithChars(
Result, CurPtr, tok::hash);
4493const char *Lexer::convertDependencyDirectiveToken(
4495 const char *TokPtr = BufferStart + DDTok.
Offset;
4501 BufferPtr = TokPtr + DDTok.
Length;
4505bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4506 assert(isDependencyDirectivesLexer());
4508 using namespace dependency_directives_scan;
4510 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4511 if (DepDirectives.front().Kind == pp_eof)
4512 return LexEndOfFile(
Result, BufferEnd);
4513 if (DepDirectives.front().Kind == tokens_present_before_eof)
4515 NextDepDirectiveTokenIndex = 0;
4516 DepDirectives = DepDirectives.drop_front();
4520 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4521 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4527 BufferPtr = BufferStart + DDTok.
Offset;
4528 LexAngledStringLiteral(
Result, BufferPtr + 1);
4529 if (
Result.isNot(tok::header_name))
4534 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4535 if (BufferStart + NextTok.
Offset >= BufferPtr)
4537 ++NextDepDirectiveTokenIndex;
4542 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4544 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4548 if (
Result.is(tok::raw_identifier)) {
4549 Result.setRawIdentifierData(TokPtr);
4557 if (
Result.isLiteral()) {
4558 Result.setLiteralData(TokPtr);
4561 if (
Result.is(tok::colon)) {
4563 if (*BufferPtr ==
':') {
4564 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4566 ++NextDepDirectiveTokenIndex;
4567 Result.setKind(tok::coloncolon);
4577bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4578 assert(isDependencyDirectivesLexer());
4580 using namespace dependency_directives_scan;
4583 unsigned NestedIfs = 0;
4585 DepDirectives = DepDirectives.drop_front();
4586 switch (DepDirectives.front().Kind) {
4588 llvm_unreachable(
"unexpected 'pp_none'");
4629 NextDepDirectiveTokenIndex = 0;
4630 return LexEndOfFile(
Result, BufferEnd);
4635 DepDirectives.front().Tokens.front();
4636 assert(DDTok.
is(tok::hash));
4637 NextDepDirectiveTokenIndex = 1;
4639 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const