29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/Support/MemoryBufferRef.h"
37#include "llvm/Support/NativeFormatting.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
61 return II->getObjCKeywordID() == objcKey;
68 return tok::objc_not_keyword;
77void Lexer::anchor() {}
79void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
81 BufferStart = BufStart;
85 assert(BufEnd[0] == 0 &&
86 "We assume that the input buffer has a null character at the end"
87 " to simplify lexing!");
92 if (BufferStart == BufferPtr) {
94 StringRef Buf(BufferStart, BufferEnd - BufferStart);
95 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
96 .StartsWith(
"\xEF\xBB\xBF", 3)
100 BufferPtr += BOMLength;
103 Is_PragmaLexer =
false;
104 CurrentConflictMarkerState =
CMK_None;
107 IsAtStartOfLine =
true;
108 IsAtPhysicalStartOfLine =
true;
110 HasLeadingSpace =
false;
111 HasLeadingEmptyMacro =
false;
126 ExtendedTokenMode = 0;
128 NewLinePtr =
nullptr;
138 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
140 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
141 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
142 InputFile.getBufferEnd());
151 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
152 bool IsFirstIncludeOfFile)
154 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
155 InitLexer(BufStart, BufPtr, BufEnd);
166 bool IsFirstIncludeOfFile)
167 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
168 FromFile.getBufferStart(), FromFile.getBufferEnd(),
169 IsFirstIncludeOfFile) {}
172 assert(
PP &&
"Cannot reset token mode without a preprocessor");
173 if (LangOpts.TraditionalCPP)
201 FileID SpellingFID =
SM.getFileID(SpellingLoc);
202 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
208 const char *StrData =
SM.getCharacterData(SpellingLoc);
210 L->BufferPtr = StrData;
211 L->BufferEnd = StrData+TokLen;
212 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
216 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
218 ExpansionLocEnd, TokLen);
225 L->Is_PragmaLexer =
true;
230 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
231 this->IsAtStartOfLine = IsAtStartOfLine;
232 assert((BufferStart + Offset) <= BufferEnd);
233 BufferPtr = BufferStart + Offset;
237 typename T::size_type i = 0, e = Str.size();
239 if (Str[i] ==
'\\' || Str[i] == Quote) {
240 Str.insert(Str.begin() + i,
'\\');
243 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
245 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
246 Str[i] != Str[i + 1]) {
252 Str.insert(Str.begin() + i + 1,
'n');
262 std::string
Result = std::string(Str);
263 char Quote = Charify ?
'\'' :
'"';
278 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
281 const char *BufEnd = BufPtr + Tok.
getLength();
285 while (BufPtr < BufEnd) {
290 if (Spelling[Length - 1] ==
'"')
298 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
301 const char *RawEnd = BufEnd;
302 do --RawEnd;
while (*RawEnd !=
'"');
303 size_t RawLength = RawEnd - BufPtr + 1;
306 memcpy(Spelling + Length, BufPtr, RawLength);
314 while (BufPtr < BufEnd) {
321 "NeedsCleaning flag set on token that didn't need cleaning!");
336 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
339 bool invalidTemp =
false;
340 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
342 if (invalid) *invalid =
true;
346 const char *tokenBegin = file.data() + locInfo.second;
349 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
350 file.begin(), tokenBegin, file.end());
358 return StringRef(tokenBegin,
length);
362 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
363 return StringRef(buffer.data(), buffer.size());
373 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
375 bool CharDataInvalid =
false;
385 return std::string(TokStart, TokStart + Tok.
getLength());
406 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
408 const char *TokStart =
nullptr;
410 if (Tok.
is(tok::raw_identifier))
415 Buffer = II->getNameStart();
416 return II->getLength();
426 bool CharDataInvalid =
false;
430 if (CharDataInvalid) {
443 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
464 bool IgnoreWhiteSpace) {
473 Loc =
SM.getExpansionLoc(Loc);
474 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
476 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
480 const char *StrData = Buffer.data()+LocInfo.second;
486 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
487 Buffer.begin(), StrData, Buffer.end());
496 const char *BufStart = Buffer.data();
497 if (Offset >= Buffer.size())
500 const char *LexStart = BufStart + Offset;
501 for (; LexStart != BufStart; --LexStart) {
516 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
517 if (LocInfo.first.isInvalid())
521 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
527 const char *StrData = Buffer.data() + LocInfo.second;
529 if (!LexStart || LexStart == StrData)
534 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
554 }
while (TheTok.
getKind() != tok::eof);
566 if (!
SM.isMacroArgExpansion(Loc))
571 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
572 std::pair<FileID, unsigned> BeginFileLocInfo =
573 SM.getDecomposedLoc(BeginFileLoc);
574 assert(FileLocInfo.first == BeginFileLocInfo.first &&
575 FileLocInfo.second >= BeginFileLocInfo.second);
581enum PreambleDirectiveKind {
596 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
600 bool InPreprocessorDirective =
false;
604 unsigned MaxLineOffset = 0;
606 const char *CurPtr = Buffer.begin();
607 unsigned CurLine = 0;
608 while (CurPtr != Buffer.end()) {
612 if (CurLine == MaxLines)
616 if (CurPtr != Buffer.end())
617 MaxLineOffset = CurPtr - Buffer.begin();
623 if (InPreprocessorDirective) {
625 if (TheTok.
getKind() == tok::eof) {
636 InPreprocessorDirective =
false;
645 if (MaxLineOffset && TokOffset >= MaxLineOffset)
650 if (TheTok.
getKind() == tok::comment) {
658 Token HashTok = TheTok;
659 InPreprocessorDirective =
true;
668 PreambleDirectiveKind PDK
669 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
670 .Case(
"include", PDK_Skipped)
671 .Case(
"__include_macros", PDK_Skipped)
672 .Case(
"define", PDK_Skipped)
673 .Case(
"undef", PDK_Skipped)
674 .Case(
"line", PDK_Skipped)
675 .Case(
"error", PDK_Skipped)
676 .Case(
"pragma", PDK_Skipped)
677 .Case(
"import", PDK_Skipped)
678 .Case(
"include_next", PDK_Skipped)
679 .Case(
"warning", PDK_Skipped)
680 .Case(
"ident", PDK_Skipped)
681 .Case(
"sccs", PDK_Skipped)
682 .Case(
"assert", PDK_Skipped)
683 .Case(
"unassert", PDK_Skipped)
684 .Case(
"if", PDK_Skipped)
685 .Case(
"ifdef", PDK_Skipped)
686 .Case(
"ifndef", PDK_Skipped)
687 .Case(
"elif", PDK_Skipped)
688 .Case(
"elifdef", PDK_Skipped)
689 .Case(
"elifndef", PDK_Skipped)
690 .Case(
"else", PDK_Skipped)
691 .Case(
"endif", PDK_Skipped)
692 .Default(PDK_Unknown);
709 TheTok.
getKind() == tok::raw_identifier &&
711 LangOpts.CPlusPlusModules) {
714 Token ModuleTok = TheTok;
717 }
while (TheTok.
getKind() == tok::comment);
718 if (TheTok.
getKind() != tok::semi) {
733 if (ActiveCommentLoc.
isValid())
734 End = ActiveCommentLoc;
749 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
752 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
755 unsigned PhysOffset = 0;
760 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
770 for (; CharNo; --CharNo) {
781 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
782 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
831 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
837 *MacroBegin = expansionLoc;
859 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
865 *MacroEnd = expansionLoc;
878 if (Range.isTokenRange()) {
887 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
892 if (!
SM.isInFileID(End, FID, &EndOffs) ||
902 return SM.getSLocEntry(
SM.getFileID(Loc))
904 .isExpansionTokenRange();
921 Range.setBegin(
Begin);
926 if (Range.isTokenRange()) {
944 Range.setBegin(MacroBegin);
945 Range.setEnd(MacroEnd);
947 if (Range.isTokenRange())
967 Range.setBegin(
SM.getImmediateSpellingLoc(
Begin));
968 Range.setEnd(
SM.getImmediateSpellingLoc(End));
981 if (Range.isInvalid()) {
987 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(Range.getBegin());
988 if (beginInfo.first.isInvalid()) {
994 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
995 beginInfo.second > EndOffs) {
1001 bool invalidTemp =
false;
1002 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1009 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1015 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1031 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1038 FileID MacroFID =
SM.getFileID(Loc);
1039 if (
SM.isInFileID(SpellLoc, MacroFID))
1049 Loc =
SM.getSpellingLoc(Loc);
1053 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1055 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1056 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1061 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1063 while (
SM.isMacroArgExpansion(Loc))
1064 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1070 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1076 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1080 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1082 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1083 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1092 if (Str - 1 < BufferStart)
1095 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1096 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1097 if (Str - 2 < BufferStart)
1107 return *Str ==
'\\';
1114 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1115 if (LocInfo.first.isInvalid())
1118 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1124 StringRef Rest = Buffer.substr(Line - Buffer.data());
1125 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1126 return NumWhitespaceChars == StringRef::npos
1128 : Rest.take_front(NumWhitespaceChars);
1143 unsigned CharNo,
unsigned TokLen) {
1144 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1160 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1166 unsigned TokLen)
const {
1167 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1168 "Location out of range for this buffer!");
1172 unsigned CharNo = Loc-BufferStart;
1178 assert(
PP &&
"This doesn't work on raw lexers");
1197 case '=':
return '#';
1198 case ')':
return ']';
1199 case '(':
return '[';
1200 case '!':
return '|';
1201 case '\'':
return '^';
1202 case '>':
return '}';
1203 case '/':
return '\\';
1204 case '<':
return '{';
1205 case '-':
return '~';
1220 L->
Diag(CP-2, diag::trigraph_ignored);
1225 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1232unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1237 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1241 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1242 Ptr[Size-1] != Ptr[Size])
1255const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1257 const char *AfterEscape;
1260 }
else if (*
P ==
'?') {
1262 if (
P[1] !=
'?' ||
P[2] !=
'/')
1271 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1272 if (NewLineSize == 0)
return P;
1273 P = AfterEscape+NewLineSize;
1282 return std::nullopt;
1287 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1290 bool InvalidTemp =
false;
1291 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1293 return std::nullopt;
1295 const char *TokenBegin =
File.data() + LocInfo.second;
1298 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1299 TokenBegin,
File.end());
1312 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1314 if (!Tok || Tok->isNot(TKind))
1319 unsigned NumWhitespaceChars = 0;
1320 if (SkipTrailingWhitespaceAndNewLine) {
1321 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1322 unsigned char C = *TokenEnd;
1325 NumWhitespaceChars++;
1329 if (
C ==
'\n' ||
C ==
'\r') {
1332 NumWhitespaceChars++;
1333 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1334 NumWhitespaceChars++;
1356char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1359 if (Ptr[0] ==
'\\') {
1368 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1374 Diag(Ptr, diag::backslash_newline_space);
1377 Size += EscapedNewLineSize;
1378 Ptr += EscapedNewLineSize;
1381 return getCharAndSizeSlow(Ptr, Size, Tok);
1389 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1393 LangOpts.Trigraphs)) {
1399 if (
C ==
'\\')
goto Slash;
1415char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1418 if (Ptr[0] ==
'\\') {
1426 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1428 Size += EscapedNewLineSize;
1429 Ptr += EscapedNewLineSize;
1432 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1440 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1446 if (
C ==
'\\')
goto Slash;
1461void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1462 BufferPtr = BufferStart + Offset;
1463 if (BufferPtr > BufferEnd)
1464 BufferPtr = BufferEnd;
1468 IsAtStartOfLine = StartOfLine;
1469 IsAtPhysicalStartOfLine = StartOfLine;
1473 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1475 return UnicodeWhitespaceChars.contains(Codepoint);
1480 llvm::raw_svector_ostream CharOS(CharBuf);
1481 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1492 bool IsStart,
bool &IsExtension) {
1493 static const llvm::sys::UnicodeCharSet MathStartChars(
1495 static const llvm::sys::UnicodeCharSet MathContinueChars(
1497 if (MathStartChars.contains(
C) ||
1498 (!IsStart && MathContinueChars.contains(
C))) {
1506 bool &IsExtension) {
1507 if (LangOpts.AsmPreprocessor) {
1509 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1511 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1516 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1518 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1522 }
else if (LangOpts.C11) {
1523 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1525 return C11AllowedIDChars.contains(
C);
1527 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1529 return C99AllowedIDChars.contains(
C);
1534 bool &IsExtension) {
1535 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1536 IsExtension =
false;
1537 if (LangOpts.AsmPreprocessor) {
1540 if (LangOpts.CPlusPlus || LangOpts.C23) {
1541 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1542 if (XIDStartChars.contains(
C))
1550 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1552 return !C11DisallowedInitialIDChars.contains(
C);
1554 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1556 return !C99DisallowedInitialIDChars.contains(
C);
1562 static const llvm::sys::UnicodeCharSet MathStartChars(
1564 static const llvm::sys::UnicodeCharSet MathContinueChars(
1567 (void)MathStartChars;
1568 (void)MathContinueChars;
1569 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1570 "Unexpected mathematical notation codepoint");
1571 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1584 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1586 CannotAppearInIdentifier = 0,
1587 CannotStartIdentifier
1590 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1592 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1594 if (!C99AllowedIDChars.contains(
C)) {
1595 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1597 << CannotAppearInIdentifier;
1598 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1599 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1601 << CannotStartIdentifier;
1613 struct HomoglyphPair {
1616 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1618 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1671 std::lower_bound(std::begin(SortedHomoglyphs),
1672 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1673 if (Homoglyph->Character ==
C) {
1674 if (Homoglyph->LooksLike) {
1675 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1676 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1679 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1696 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1699 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1701 if (!IsFirst || InvalidOnlyAtStart) {
1702 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1706 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1712bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1714 const char *UCNPtr = CurPtr +
Size;
1715 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1716 if (CodePoint == 0) {
1719 bool IsExtension =
false;
1744 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1745 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1748 while (CurPtr != UCNPtr)
1749 (void)getAndAdvanceChar(CurPtr,
Result);
1753bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1754 llvm::UTF32 CodePoint;
1759 unsigned FirstCodeUnitSize;
1760 getCharAndSize(CurPtr, FirstCodeUnitSize);
1761 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1762 const char *UnicodePtr = CharStart;
1764 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1765 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1766 &CodePoint, llvm::strictConversion);
1767 if (ConvResult != llvm::conversionOK)
1770 bool IsExtension =
false;
1799 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1800 CurPtr = UnicodePtr;
1804bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1805 const char *CurPtr) {
1806 bool IsExtension =
false;
1821 return LexIdentifierContinue(
Result, CurPtr);
1846 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1850bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1853 unsigned char C = *CurPtr;
1862 C = getCharAndSize(CurPtr, Size);
1864 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1869 if (!LangOpts.DollarIdents)
1873 Diag(CurPtr, diag::ext_dollar_in_identifier);
1874 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1877 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1885 const char *IdStart = BufferPtr;
1886 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1887 Result.setRawIdentifierData(IdStart);
1903 if (isCodeCompletionPoint(CurPtr)) {
1905 Result.setKind(tok::code_completion);
1911 assert(*CurPtr == 0 &&
"Completion character must be 0");
1916 if (CurPtr < BufferEnd) {
1934bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1940 return (C2 ==
'x' || C2 ==
'X');
1946bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
1948 char C = getCharAndSize(CurPtr, Size);
1951 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1953 C = getCharAndSize(CurPtr, Size);
1957 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1960 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1961 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
1965 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1969 bool IsHexFloat =
true;
1970 if (!LangOpts.C99) {
1971 if (!isHexaLiteral(BufferPtr, LangOpts))
1973 else if (!LangOpts.CPlusPlus17 &&
1974 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1978 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
1982 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
1987 Diag(CurPtr, LangOpts.CPlusPlus
1988 ? diag::warn_cxx11_compat_digit_separator
1989 : diag::warn_c23_compat_digit_separator);
1990 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1991 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
1992 return LexNumericConstant(
Result, CurPtr);
1997 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1998 return LexNumericConstant(
Result, CurPtr);
2000 return LexNumericConstant(
Result, CurPtr);
2003 const char *TokStart = BufferPtr;
2004 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2005 Result.setLiteralData(TokStart);
2011const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2012 bool IsStringLiteral) {
2013 assert(LangOpts.CPlusPlus);
2017 char C = getCharAndSize(CurPtr, Size);
2018 bool Consumed =
false;
2021 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2023 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2029 if (!LangOpts.CPlusPlus11) {
2032 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2033 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2044 bool IsUDSuffix =
false;
2047 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2051 const unsigned MaxStandardSuffixLength = 3;
2052 char Buffer[MaxStandardSuffixLength] = {
C };
2053 unsigned Consumed =
Size;
2060 const StringRef CompleteSuffix(Buffer, Chars);
2066 if (Chars == MaxStandardSuffixLength)
2070 Buffer[Chars++] = Next;
2071 Consumed += NextSize;
2077 Diag(CurPtr, LangOpts.MSVCCompat
2078 ? diag::ext_ms_reserved_user_defined_literal
2079 : diag::ext_reserved_user_defined_literal)
2084 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2089 C = getCharAndSize(CurPtr, Size);
2091 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2092 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2093 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2103bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2105 const char *AfterQuote = CurPtr;
2107 const char *NulCharacter =
nullptr;
2110 (Kind == tok::utf8_string_literal ||
2111 Kind == tok::utf16_string_literal ||
2112 Kind == tok::utf32_string_literal))
2113 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2114 : diag::warn_c99_compat_unicode_literal);
2116 char C = getAndAdvanceChar(CurPtr,
Result);
2121 C = getAndAdvanceChar(CurPtr,
Result);
2123 if (
C ==
'\n' ||
C ==
'\r' ||
2124 (
C == 0 && CurPtr-1 == BufferEnd)) {
2126 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2127 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2132 if (isCodeCompletionPoint(CurPtr-1)) {
2134 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2137 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2142 NulCharacter = CurPtr-1;
2144 C = getAndAdvanceChar(CurPtr,
Result);
2148 if (LangOpts.CPlusPlus)
2149 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2153 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2156 const char *TokStart = BufferPtr;
2157 FormTokenWithChars(
Result, CurPtr, Kind);
2158 Result.setLiteralData(TokStart);
2164bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2172 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2174 unsigned PrefixLen = 0;
2180 if (CurPtr[PrefixLen] !=
'(') {
2182 const char *PrefixEnd = &CurPtr[PrefixLen];
2183 if (PrefixLen == 16) {
2184 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2186 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2187 << StringRef(PrefixEnd, 1);
2199 if (
C == 0 && CurPtr-1 == BufferEnd) {
2205 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2210 const char *Prefix = CurPtr;
2211 CurPtr += PrefixLen + 1;
2218 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2219 CurPtr += PrefixLen + 1;
2222 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2224 Diag(BufferPtr, diag::err_unterminated_raw_string)
2225 << StringRef(Prefix, PrefixLen);
2226 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2232 if (LangOpts.CPlusPlus)
2233 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2236 const char *TokStart = BufferPtr;
2237 FormTokenWithChars(
Result, CurPtr, Kind);
2238 Result.setLiteralData(TokStart);
2244bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2246 const char *NulCharacter =
nullptr;
2247 const char *AfterLessPos = CurPtr;
2248 char C = getAndAdvanceChar(CurPtr,
Result);
2253 C = getAndAdvanceChar(CurPtr,
Result);
2256 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2259 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2264 if (isCodeCompletionPoint(CurPtr - 1)) {
2265 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2267 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2270 NulCharacter = CurPtr-1;
2272 C = getAndAdvanceChar(CurPtr,
Result);
2277 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2280 const char *TokStart = BufferPtr;
2281 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2282 Result.setLiteralData(TokStart);
2286void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2287 const char *CompletionPoint,
2290 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2291 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2292 auto Slash = PartialPath.find_last_of(SlashChars);
2294 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2295 const char *StartOfFilename =
2296 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2299 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2302 while (CompletionPoint < BufferEnd) {
2303 char Next = *(CompletionPoint + 1);
2304 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2307 if (Next == (IsAngled ?
'>' :
'"'))
2309 if (SlashChars.contains(Next))
2321bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2324 const char *NulCharacter =
nullptr;
2327 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2328 Diag(BufferPtr, LangOpts.CPlusPlus
2329 ? diag::warn_cxx98_compat_unicode_literal
2330 : diag::warn_c99_compat_unicode_literal);
2331 else if (Kind == tok::utf8_char_constant)
2332 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2335 char C = getAndAdvanceChar(CurPtr,
Result);
2338 Diag(BufferPtr, diag::ext_empty_character);
2339 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2346 C = getAndAdvanceChar(CurPtr,
Result);
2348 if (
C ==
'\n' ||
C ==
'\r' ||
2349 (
C == 0 && CurPtr-1 == BufferEnd)) {
2351 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2352 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2357 if (isCodeCompletionPoint(CurPtr-1)) {
2359 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2364 NulCharacter = CurPtr-1;
2366 C = getAndAdvanceChar(CurPtr,
Result);
2370 if (LangOpts.CPlusPlus)
2371 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2375 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2378 const char *TokStart = BufferPtr;
2379 FormTokenWithChars(
Result, CurPtr, Kind);
2380 Result.setLiteralData(TokStart);
2388bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2389 bool &TokAtPhysicalStartOfLine) {
2393 unsigned char Char = *CurPtr;
2395 const char *lastNewLine =
nullptr;
2396 auto setLastNewLine = [&](
const char *Ptr) {
2402 setLastNewLine(CurPtr - 1);
2421 if (*CurPtr ==
'\n')
2422 setLastNewLine(CurPtr);
2429 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2431 IsAtStartOfLine =
true;
2432 IsAtPhysicalStartOfLine =
true;
2439 char PrevChar = CurPtr[-1];
2445 TokAtPhysicalStartOfLine =
true;
2447 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2464bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2465 bool &TokAtPhysicalStartOfLine) {
2470 Diag(BufferPtr, diag::ext_line_comment);
2488 bool UnicodeDecodingAlreadyDiagnosed =
false;
2495 C !=
'\n' &&
C !=
'\r') {
2497 UnicodeDecodingAlreadyDiagnosed =
false;
2501 unsigned Length = llvm::getUTF8SequenceSize(
2502 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2505 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2506 UnicodeDecodingAlreadyDiagnosed =
true;
2509 UnicodeDecodingAlreadyDiagnosed =
false;
2515 const char *NextLine = CurPtr;
2518 const char *EscapePtr = CurPtr-1;
2519 bool HasSpace =
false;
2525 if (*EscapePtr ==
'\\')
2528 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2529 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2531 CurPtr = EscapePtr-2;
2537 Diag(EscapePtr, diag::backslash_newline_space);
2544 const char *OldPtr = CurPtr;
2547 C = getAndAdvanceChar(CurPtr,
Result);
2552 if (
C != 0 && CurPtr == OldPtr+1) {
2560 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2561 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2562 for (; OldPtr != CurPtr; ++OldPtr)
2563 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2567 const char *ForwardPtr = CurPtr;
2570 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2575 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2580 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2585 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2603 return SaveLineComment(
Result, CurPtr);
2617 NewLinePtr = CurPtr++;
2621 TokAtPhysicalStartOfLine =
true;
2630bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2633 FormTokenWithChars(
Result, CurPtr, tok::comment);
2645 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2649 Result.setKind(tok::comment);
2660 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2663 const char *TrigraphPos =
nullptr;
2665 const char *SpacePos =
nullptr;
2672 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2674 if (CurPtr[0] == CurPtr[1])
2688 if (*CurPtr ==
'\\') {
2690 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2692 TrigraphPos = CurPtr - 2;
2703 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2712 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2716 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2721 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2725 L->
Diag(SpacePos, diag::backslash_newline_space);
2731#include <emmintrin.h>
2746bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2747 bool &TokAtPhysicalStartOfLine) {
2757 unsigned char C = getCharAndSize(CurPtr, CharSize);
2759 if (
C == 0 && CurPtr == BufferEnd+1) {
2761 Diag(BufferPtr, diag::err_unterminated_block_comment);
2767 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2784 bool UnicodeDecodingAlreadyDiagnosed =
false;
2789 if (CurPtr + 24 < BufferEnd &&
2794 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2799 if (
C ==
'/')
goto FoundSlash;
2803 while (CurPtr + 16 < BufferEnd) {
2805 if (LLVM_UNLIKELY(Mask != 0)) {
2815 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2821 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2822 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2823 0x80, 0x80, 0x80, 0x80};
2824 __vector
unsigned char Slashes = {
2825 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2826 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2828 while (CurPtr + 16 < BufferEnd) {
2830 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2832 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2839 while (CurPtr + 16 < BufferEnd) {
2840 bool HasNonASCII =
false;
2841 for (
unsigned I = 0; I < 16; ++I)
2842 HasNonASCII |= !
isASCII(CurPtr[I]);
2844 if (LLVM_UNLIKELY(HasNonASCII))
2847 bool HasSlash =
false;
2848 for (
unsigned I = 0; I < 16; ++I)
2849 HasSlash |= CurPtr[I] ==
'/';
2863 while (
C !=
'/' &&
C !=
'\0') {
2865 UnicodeDecodingAlreadyDiagnosed =
false;
2872 unsigned Length = llvm::getUTF8SequenceSize(
2873 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2876 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2877 UnicodeDecodingAlreadyDiagnosed =
true;
2879 UnicodeDecodingAlreadyDiagnosed =
false;
2880 CurPtr += Length - 1;
2887 if (CurPtr[-2] ==
'*')
2890 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2892 LangOpts.Trigraphs)) {
2898 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2903 Diag(CurPtr-1, diag::warn_nested_block_comment);
2905 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
2907 Diag(BufferPtr, diag::err_unterminated_block_comment);
2916 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2922 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2941 FormTokenWithChars(
Result, CurPtr, tok::comment);
2950 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
2968 "Must be in a preprocessing directive!");
2973 const char *CurPtr = BufferPtr;
2975 char Char = getAndAdvanceChar(CurPtr, Tmp);
2983 if (CurPtr-1 != BufferEnd) {
2984 if (isCodeCompletionPoint(CurPtr-1)) {
3000 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3001 BufferPtr = CurPtr-1;
3005 if (Tmp.
is(tok::code_completion)) {
3010 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3022bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3030 FormTokenWithChars(
Result, CurPtr, tok::eod);
3042 BufferPtr = BufferEnd;
3043 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3063 diag::err_pp_unterminated_conditional);
3069 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3074 if (LangOpts.CPlusPlus11) {
3078 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3079 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3081 DiagID = diag::warn_no_newline_eof;
3084 DiagID = diag::ext_no_newline_eof;
3087 Diag(BufferEnd, DiagID)
3101unsigned Lexer::isNextPPTokenLParen() {
3102 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3104 if (isDependencyDirectivesLexer()) {
3105 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3107 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3117 const char *TmpBufferPtr = BufferPtr;
3119 bool atStartOfLine = IsAtStartOfLine;
3120 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3121 bool leadingSpace = HasLeadingSpace;
3127 BufferPtr = TmpBufferPtr;
3129 HasLeadingSpace = leadingSpace;
3130 IsAtStartOfLine = atStartOfLine;
3131 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3136 if (Tok.
is(tok::eof))
3138 return Tok.
is(tok::l_paren);
3144 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3146 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3147 size_t Pos = RestOfBuffer.find(Terminator);
3148 while (Pos != StringRef::npos) {
3151 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3152 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3153 Pos = RestOfBuffer.find(Terminator);
3156 return RestOfBuffer.data()+Pos;
3165bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3167 if (CurPtr != BufferStart &&
3168 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3172 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
3173 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
3188 Diag(CurPtr, diag::err_conflict_marker);
3189 CurrentConflictMarkerState =
Kind;
3193 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3194 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3209bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3211 if (CurPtr != BufferStart &&
3212 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3221 for (
unsigned i = 1; i != 4; ++i)
3222 if (CurPtr[i] != CurPtr[0])
3229 CurrentConflictMarkerState)) {
3233 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3239 CurrentConflictMarkerState =
CMK_None;
3247 const char *BufferEnd) {
3248 if (CurPtr == BufferEnd)
3251 for (; CurPtr != BufferEnd; ++CurPtr) {
3252 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3258bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3259 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3265 const char *Start = CurPtr - 1;
3266 if (!LangOpts.AllowEditorPlaceholders)
3267 Diag(Start, diag::err_placeholder_in_source);
3269 FormTokenWithChars(
Result, End, tok::raw_identifier);
3270 Result.setRawIdentifierData(Start);
3277bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3286std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3287 const char *SlashLoc,
3290 char Kind = getCharAndSize(StartPtr, CharSize);
3291 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3293 unsigned NumHexDigits;
3296 else if (Kind ==
'U')
3299 bool Delimited =
false;
3300 bool FoundEndDelimiter =
false;
3304 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3306 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3307 return std::nullopt;
3310 const char *CurPtr = StartPtr + CharSize;
3311 const char *KindLoc = &CurPtr[-1];
3313 uint32_t CodePoint = 0;
3314 while (Count != NumHexDigits || Delimited) {
3315 char C = getCharAndSize(CurPtr, CharSize);
3316 if (!Delimited && Count == 0 &&
C ==
'{') {
3322 if (Delimited &&
C ==
'}') {
3324 FoundEndDelimiter =
true;
3328 unsigned Value = llvm::hexDigitValue(
C);
3333 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3334 << StringRef(KindLoc, 1);
3335 return std::nullopt;
3338 if (CodePoint & 0xF000'0000) {
3340 Diag(KindLoc, diag::err_escape_too_large) << 0;
3341 return std::nullopt;
3352 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3353 : diag::warn_ucn_escape_no_digits)
3354 << StringRef(KindLoc, 1);
3355 return std::nullopt;
3358 if (Delimited && Kind ==
'U') {
3360 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3361 return std::nullopt;
3364 if (!Delimited && Count != NumHexDigits) {
3366 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3368 if (Count == 4 && NumHexDigits == 8) {
3370 Diag(KindLoc, diag::note_ucn_four_not_eight)
3374 return std::nullopt;
3377 if (Delimited &&
PP) {
3379 ? diag::warn_cxx23_delimited_escape_sequence
3380 : diag::ext_delimited_escape_sequence)
3389 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3392 while (StartPtr != CurPtr)
3393 (void)getAndAdvanceChar(StartPtr, *
Result);
3400std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3401 const char *SlashLoc,
3406 char C = getCharAndSize(StartPtr, CharSize);
3407 assert(
C ==
'N' &&
"expected \\N{...}");
3409 const char *CurPtr = StartPtr + CharSize;
3410 const char *KindLoc = &CurPtr[-1];
3412 C = getCharAndSize(CurPtr, CharSize);
3415 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3416 return std::nullopt;
3419 const char *StartName = CurPtr;
3420 bool FoundEndDelimiter =
false;
3423 C = getCharAndSize(CurPtr, CharSize);
3426 FoundEndDelimiter =
true;
3432 Buffer.push_back(
C);
3435 if (!FoundEndDelimiter || Buffer.empty()) {
3437 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3438 : diag::warn_delimited_ucn_incomplete)
3439 << StringRef(KindLoc, 1);
3440 return std::nullopt;
3443 StringRef Name(Buffer.data(), Buffer.size());
3444 std::optional<char32_t> Match =
3445 llvm::sys::unicode::nameToCodepointStrict(Name);
3446 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3448 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3450 Diag(StartName, diag::err_invalid_ucn_name)
3451 << StringRef(Buffer.data(), Buffer.size())
3454 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3465 if (Diagnose && Match)
3467 ? diag::warn_cxx23_delimited_escape_sequence
3468 : diag::ext_delimited_escape_sequence)
3475 if (LooseMatch && Diagnose)
3476 Match = LooseMatch->CodePoint;
3483 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3486 while (StartPtr != CurPtr)
3487 (void)getAndAdvanceChar(StartPtr, *
Result);
3491 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3494uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3498 std::optional<uint32_t> CodePointOpt;
3499 char Kind = getCharAndSize(StartPtr, CharSize);
3500 if (Kind ==
'u' || Kind ==
'U')
3501 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3502 else if (Kind ==
'N')
3503 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3508 uint32_t CodePoint = *CodePointOpt;
3511 if (LangOpts.AsmPreprocessor)
3530 if (CodePoint < 0xA0) {
3534 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3535 Diag(BufferPtr, diag::err_ucn_control_character);
3537 char C =
static_cast<char>(CodePoint);
3538 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3543 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3548 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3549 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3551 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3559bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3560 const char *CurPtr) {
3563 Diag(BufferPtr, diag::ext_unicode_whitespace)
3572void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3573 IsAtStartOfLine =
Result.isAtStartOfLine();
3574 HasLeadingSpace =
Result.hasLeadingSpace();
3575 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3580 assert(!isDependencyDirectivesLexer());
3586 if (IsAtStartOfLine) {
3588 IsAtStartOfLine =
false;
3591 if (HasLeadingSpace) {
3593 HasLeadingSpace =
false;
3596 if (HasLeadingEmptyMacro) {
3598 HasLeadingEmptyMacro =
false;
3601 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3602 IsAtPhysicalStartOfLine =
false;
3605 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3607 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3608 return returnedToken;
3616bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3618 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3619 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3622 const char *CurPtr = BufferPtr;
3634 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3643 unsigned SizeTmp, SizeTmp2;
3646 char Char = getAndAdvanceChar(CurPtr,
Result);
3650 NewLinePtr =
nullptr;
3655 if (CurPtr-1 == BufferEnd)
3656 return LexEndOfFile(
Result, CurPtr-1);
3659 if (isCodeCompletionPoint(CurPtr-1)) {
3662 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3667 Diag(CurPtr-1, diag::null_in_file);
3669 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3678 if (LangOpts.MicrosoftExt) {
3680 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3681 return LexEndOfFile(
Result, CurPtr-1);
3685 Kind = tok::unknown;
3689 if (CurPtr[0] ==
'\n')
3690 (void)getAndAdvanceChar(CurPtr,
Result);
3704 IsAtStartOfLine =
true;
3705 IsAtPhysicalStartOfLine =
true;
3706 NewLinePtr = CurPtr - 1;
3715 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3725 SkipHorizontalWhitespace:
3727 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3736 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3737 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3739 goto SkipIgnoredUnits;
3741 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3743 goto SkipIgnoredUnits;
3745 goto SkipHorizontalWhitespace;
3753 case '0':
case '1':
case '2':
case '3':
case '4':
3754 case '5':
case '6':
case '7':
case '8':
case '9':
3757 return LexNumericConstant(
Result, CurPtr);
3766 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3767 Char = getCharAndSize(CurPtr, SizeTmp);
3771 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3772 tok::utf16_string_literal);
3776 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3777 tok::utf16_char_constant);
3780 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3781 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3782 return LexRawStringLiteral(
Result,
3783 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3785 tok::utf16_string_literal);
3788 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3792 return LexStringLiteral(
Result,
3793 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3795 tok::utf8_string_literal);
3796 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3797 return LexCharConstant(
3798 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3800 tok::utf8_char_constant);
3802 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3804 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3807 return LexRawStringLiteral(
Result,
3808 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3811 tok::utf8_string_literal);
3818 return LexIdentifierContinue(
Result, CurPtr);
3824 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3825 Char = getCharAndSize(CurPtr, SizeTmp);
3829 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3830 tok::utf32_string_literal);
3834 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3835 tok::utf32_char_constant);
3838 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3839 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3840 return LexRawStringLiteral(
Result,
3841 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3843 tok::utf32_string_literal);
3847 return LexIdentifierContinue(
Result, CurPtr);
3853 if (LangOpts.CPlusPlus11) {
3854 Char = getCharAndSize(CurPtr, SizeTmp);
3857 return LexRawStringLiteral(
Result,
3858 ConsumeChar(CurPtr, SizeTmp,
Result),
3859 tok::string_literal);
3863 return LexIdentifierContinue(
Result, CurPtr);
3868 Char = getCharAndSize(CurPtr, SizeTmp);
3872 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3873 tok::wide_string_literal);
3876 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3877 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3878 return LexRawStringLiteral(
Result,
3879 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3881 tok::wide_string_literal);
3885 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3886 tok::wide_char_constant);
3891 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3892 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3893 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3894 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3895 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3896 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3897 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3898 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3902 return LexIdentifierContinue(
Result, CurPtr);
3905 if (LangOpts.DollarIdents) {
3907 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3910 return LexIdentifierContinue(
Result, CurPtr);
3913 Kind = tok::unknown;
3920 return LexCharConstant(
Result, CurPtr, tok::char_constant);
3926 return LexStringLiteral(
Result, CurPtr,
3928 : tok::string_literal);
3932 Kind = tok::question;
3935 Kind = tok::l_square;
3938 Kind = tok::r_square;
3941 Kind = tok::l_paren;
3944 Kind = tok::r_paren;
3947 Kind = tok::l_brace;
3950 Kind = tok::r_brace;
3953 Char = getCharAndSize(CurPtr, SizeTmp);
3954 if (Char >=
'0' && Char <=
'9') {
3958 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
3959 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3960 Kind = tok::periodstar;
3962 }
else if (Char ==
'.' &&
3963 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3964 Kind = tok::ellipsis;
3965 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3972 Char = getCharAndSize(CurPtr, SizeTmp);
3975 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3976 }
else if (Char ==
'=') {
3977 Kind = tok::ampequal;
3978 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3984 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3985 Kind = tok::starequal;
3986 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3992 Char = getCharAndSize(CurPtr, SizeTmp);
3994 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3995 Kind = tok::plusplus;
3996 }
else if (Char ==
'=') {
3997 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3998 Kind = tok::plusequal;
4004 Char = getCharAndSize(CurPtr, SizeTmp);
4006 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4007 Kind = tok::minusminus;
4008 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4009 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4010 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4012 Kind = tok::arrowstar;
4013 }
else if (Char ==
'>') {
4014 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4016 }
else if (Char ==
'=') {
4017 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4018 Kind = tok::minusequal;
4027 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4028 Kind = tok::exclaimequal;
4029 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4031 Kind = tok::exclaim;
4036 Char = getCharAndSize(CurPtr, SizeTmp);
4046 bool TreatAsComment =
4047 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4048 if (!TreatAsComment)
4050 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4052 if (TreatAsComment) {
4053 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4054 TokAtPhysicalStartOfLine))
4060 goto SkipIgnoredUnits;
4065 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4066 TokAtPhysicalStartOfLine))
4075 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4076 Kind = tok::slashequal;
4082 Char = getCharAndSize(CurPtr, SizeTmp);
4084 Kind = tok::percentequal;
4085 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4086 }
else if (LangOpts.Digraphs && Char ==
'>') {
4087 Kind = tok::r_brace;
4088 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4089 }
else if (LangOpts.Digraphs && Char ==
':') {
4090 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4091 Char = getCharAndSize(CurPtr, SizeTmp);
4092 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4093 Kind = tok::hashhash;
4094 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4096 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4097 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4099 Diag(BufferPtr, diag::ext_charize_microsoft);
4106 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4107 goto HandleDirective;
4112 Kind = tok::percent;
4116 Char = getCharAndSize(CurPtr, SizeTmp);
4118 return LexAngledStringLiteral(
Result, CurPtr);
4119 }
else if (Char ==
'<') {
4120 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4122 Kind = tok::lesslessequal;
4123 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4125 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4129 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4133 }
else if (LangOpts.CUDA && After ==
'<') {
4134 Kind = tok::lesslessless;
4135 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4138 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4139 Kind = tok::lessless;
4141 }
else if (Char ==
'=') {
4142 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4144 if (LangOpts.CPlusPlus20) {
4146 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4147 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4149 Kind = tok::spaceship;
4155 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4160 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4161 Kind = tok::lessequal;
4162 }
else if (LangOpts.Digraphs && Char ==
':') {
4163 if (LangOpts.CPlusPlus11 &&
4164 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4171 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4172 if (After !=
':' && After !=
'>') {
4175 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4180 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4181 Kind = tok::l_square;
4182 }
else if (LangOpts.Digraphs && Char ==
'%') {
4183 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4184 Kind = tok::l_brace;
4185 }
else if (Char ==
'#' && SizeTmp == 1 &&
4186 lexEditorPlaceholder(
Result, CurPtr)) {
4193 Char = getCharAndSize(CurPtr, SizeTmp);
4195 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4196 Kind = tok::greaterequal;
4197 }
else if (Char ==
'>') {
4198 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4200 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4202 Kind = tok::greatergreaterequal;
4203 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4207 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4210 }
else if (LangOpts.CUDA && After ==
'>') {
4211 Kind = tok::greatergreatergreater;
4212 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4215 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4216 Kind = tok::greatergreater;
4219 Kind = tok::greater;
4223 Char = getCharAndSize(CurPtr, SizeTmp);
4225 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4226 Kind = tok::caretequal;
4227 }
else if (LangOpts.OpenCL && Char ==
'^') {
4228 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4229 Kind = tok::caretcaret;
4235 Char = getCharAndSize(CurPtr, SizeTmp);
4237 Kind = tok::pipeequal;
4238 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4239 }
else if (Char ==
'|') {
4241 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4243 Kind = tok::pipepipe;
4244 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4250 Char = getCharAndSize(CurPtr, SizeTmp);
4251 if (LangOpts.Digraphs && Char ==
'>') {
4252 Kind = tok::r_square;
4253 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4254 }
else if (Char ==
':') {
4255 Kind = tok::coloncolon;
4256 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4265 Char = getCharAndSize(CurPtr, SizeTmp);
4268 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4271 Kind = tok::equalequal;
4272 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4281 Char = getCharAndSize(CurPtr, SizeTmp);
4283 Kind = tok::hashhash;
4284 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4285 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4288 Diag(BufferPtr, diag::ext_charize_microsoft);
4289 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4295 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4296 goto HandleDirective;
4304 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4307 Kind = tok::unknown;
4312 if (!LangOpts.AsmPreprocessor) {
4313 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4314 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4315 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4323 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4327 Kind = tok::unknown;
4332 Kind = tok::unknown;
4336 llvm::UTF32 CodePoint;
4341 llvm::ConversionResult Status =
4342 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4343 (
const llvm::UTF8 *)BufferEnd,
4345 llvm::strictConversion);
4346 if (Status == llvm::conversionOK) {
4347 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4348 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4355 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4361 Kind = tok::unknown;
4368 Diag(CurPtr, diag::err_invalid_utf8);
4370 BufferPtr = CurPtr+1;
4382 FormTokenWithChars(
Result, CurPtr, Kind);
4388 FormTokenWithChars(
Result, CurPtr, tok::hash);
4403const char *Lexer::convertDependencyDirectiveToken(
4405 const char *TokPtr = BufferStart + DDTok.
Offset;
4411 BufferPtr = TokPtr + DDTok.
Length;
4415bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4416 assert(isDependencyDirectivesLexer());
4418 using namespace dependency_directives_scan;
4420 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4421 if (DepDirectives.front().Kind == pp_eof)
4422 return LexEndOfFile(
Result, BufferEnd);
4423 if (DepDirectives.front().Kind == tokens_present_before_eof)
4425 NextDepDirectiveTokenIndex = 0;
4426 DepDirectives = DepDirectives.drop_front();
4430 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4431 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4437 BufferPtr = BufferStart + DDTok.
Offset;
4438 LexAngledStringLiteral(
Result, BufferPtr + 1);
4439 if (
Result.isNot(tok::header_name))
4444 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4445 if (BufferStart + NextTok.
Offset >= BufferPtr)
4447 ++NextDepDirectiveTokenIndex;
4452 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4454 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4458 if (
Result.is(tok::raw_identifier)) {
4459 Result.setRawIdentifierData(TokPtr);
4467 if (
Result.isLiteral()) {
4468 Result.setLiteralData(TokPtr);
4471 if (
Result.is(tok::colon)) {
4473 if (*BufferPtr ==
':') {
4474 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4476 ++NextDepDirectiveTokenIndex;
4477 Result.setKind(tok::coloncolon);
4487bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4488 assert(isDependencyDirectivesLexer());
4490 using namespace dependency_directives_scan;
4493 unsigned NestedIfs = 0;
4495 DepDirectives = DepDirectives.drop_front();
4496 switch (DepDirectives.front().Kind) {
4498 llvm_unreachable(
"unexpected 'pp_none'");
4539 NextDepDirectiveTokenIndex = 0;
4540 return LexEndOfFile(
Result, BufferEnd);
4545 DepDirectives.front().Tokens.front();
4546 assert(DDTok.
is(tok::hash));
4547 NextDepDirectiveTokenIndex = 1;
4549 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
const char * getBufferLocation() const
Return the current location in the buffer.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ C
Languages that the frontend can parse and compile.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const