29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MathExtras.h"
36#include "llvm/Support/MemoryBufferRef.h"
37#include "llvm/Support/NativeFormatting.h"
38#include "llvm/Support/Unicode.h"
39#include "llvm/Support/UnicodeCharRanges.h"
61 return II->getObjCKeywordID() == objcKey;
68 return tok::objc_not_keyword;
77void Lexer::anchor() {}
79void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
81 BufferStart = BufStart;
85 assert(BufEnd[0] == 0 &&
86 "We assume that the input buffer has a null character at the end"
87 " to simplify lexing!");
92 if (BufferStart == BufferPtr) {
94 StringRef Buf(BufferStart, BufferEnd - BufferStart);
95 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
96 .StartsWith(
"\xEF\xBB\xBF", 3)
100 BufferPtr += BOMLength;
103 Is_PragmaLexer =
false;
104 CurrentConflictMarkerState =
CMK_None;
107 IsAtStartOfLine =
true;
108 IsAtPhysicalStartOfLine =
true;
110 HasLeadingSpace =
false;
111 HasLeadingEmptyMacro =
false;
126 ExtendedTokenMode = 0;
128 NewLinePtr =
nullptr;
138 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
140 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
141 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
142 InputFile.getBufferEnd());
151 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
152 bool IsFirstIncludeOfFile)
154 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
155 InitLexer(BufStart, BufPtr, BufEnd);
166 bool IsFirstIncludeOfFile)
167 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
168 FromFile.getBufferStart(), FromFile.getBufferEnd(),
169 IsFirstIncludeOfFile) {}
172 assert(
PP &&
"Cannot reset token mode without a preprocessor");
173 if (LangOpts.TraditionalCPP)
201 FileID SpellingFID =
SM.getFileID(SpellingLoc);
202 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
208 const char *StrData =
SM.getCharacterData(SpellingLoc);
210 L->BufferPtr = StrData;
211 L->BufferEnd = StrData+TokLen;
212 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
216 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
218 ExpansionLocEnd, TokLen);
225 L->Is_PragmaLexer =
true;
230 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
231 this->IsAtStartOfLine = IsAtStartOfLine;
232 assert((BufferStart +
Offset) <= BufferEnd);
233 BufferPtr = BufferStart +
Offset;
237 typename T::size_type i = 0, e = Str.size();
239 if (Str[i] ==
'\\' || Str[i] == Quote) {
240 Str.insert(Str.begin() + i,
'\\');
243 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
245 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
246 Str[i] != Str[i + 1]) {
252 Str.insert(Str.begin() + i + 1,
'n');
262 std::string
Result = std::string(Str);
263 char Quote = Charify ?
'\'' :
'"';
278 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
281 const char *BufEnd = BufPtr + Tok.
getLength();
285 while (BufPtr < BufEnd) {
290 if (Spelling[Length - 1] ==
'"')
298 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
301 const char *RawEnd = BufEnd;
302 do --RawEnd;
while (*RawEnd !=
'"');
303 size_t RawLength = RawEnd - BufPtr + 1;
306 memcpy(Spelling + Length, BufPtr, RawLength);
314 while (BufPtr < BufEnd) {
321 "NeedsCleaning flag set on token that didn't need cleaning!");
336 std::pair<FileID, unsigned> locInfo =
SM.getDecomposedLoc(loc);
339 bool invalidTemp =
false;
340 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
342 if (invalid) *invalid =
true;
346 const char *tokenBegin = file.data() + locInfo.second;
349 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
350 file.begin(), tokenBegin, file.end());
358 return StringRef(tokenBegin,
length);
362 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
363 return StringRef(buffer.data(), buffer.size());
373 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
375 bool CharDataInvalid =
false;
385 return std::string(TokStart, TokStart + Tok.
getLength());
406 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
408 const char *TokStart =
nullptr;
410 if (Tok.
is(tok::raw_identifier))
415 Buffer = II->getNameStart();
416 return II->getLength();
426 bool CharDataInvalid =
false;
430 if (CharDataInvalid) {
443 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
464 bool IgnoreWhiteSpace) {
473 Loc =
SM.getExpansionLoc(Loc);
474 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
476 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
480 const char *StrData = Buffer.data()+LocInfo.second;
486 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
487 Buffer.begin(), StrData, Buffer.end());
496 const char *BufStart = Buffer.data();
497 if (
Offset >= Buffer.size())
500 const char *LexStart = BufStart +
Offset;
501 for (; LexStart != BufStart; --LexStart) {
516 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
517 if (LocInfo.first.isInvalid())
521 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
527 const char *StrData = Buffer.data() + LocInfo.second;
529 if (!LexStart || LexStart == StrData)
534 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
554 }
while (TheTok.
getKind() != tok::eof);
566 if (!
SM.isMacroArgExpansion(Loc))
571 std::pair<FileID, unsigned> FileLocInfo =
SM.getDecomposedLoc(FileLoc);
572 std::pair<FileID, unsigned> BeginFileLocInfo =
573 SM.getDecomposedLoc(BeginFileLoc);
574 assert(FileLocInfo.first == BeginFileLocInfo.first &&
575 FileLocInfo.second >= BeginFileLocInfo.second);
581enum PreambleDirectiveKind {
596 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
600 bool InPreprocessorDirective =
false;
604 unsigned MaxLineOffset = 0;
606 const char *CurPtr = Buffer.begin();
607 unsigned CurLine = 0;
608 while (CurPtr != Buffer.end()) {
612 if (CurLine == MaxLines)
616 if (CurPtr != Buffer.end())
617 MaxLineOffset = CurPtr - Buffer.begin();
623 if (InPreprocessorDirective) {
625 if (TheTok.
getKind() == tok::eof) {
636 InPreprocessorDirective =
false;
645 if (MaxLineOffset && TokOffset >= MaxLineOffset)
650 if (TheTok.
getKind() == tok::comment) {
658 Token HashTok = TheTok;
659 InPreprocessorDirective =
true;
668 PreambleDirectiveKind PDK
669 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
670 .Case(
"include", PDK_Skipped)
671 .Case(
"__include_macros", PDK_Skipped)
672 .Case(
"define", PDK_Skipped)
673 .Case(
"undef", PDK_Skipped)
674 .Case(
"line", PDK_Skipped)
675 .Case(
"error", PDK_Skipped)
676 .Case(
"pragma", PDK_Skipped)
677 .Case(
"import", PDK_Skipped)
678 .Case(
"include_next", PDK_Skipped)
679 .Case(
"warning", PDK_Skipped)
680 .Case(
"ident", PDK_Skipped)
681 .Case(
"sccs", PDK_Skipped)
682 .Case(
"assert", PDK_Skipped)
683 .Case(
"unassert", PDK_Skipped)
684 .Case(
"if", PDK_Skipped)
685 .Case(
"ifdef", PDK_Skipped)
686 .Case(
"ifndef", PDK_Skipped)
687 .Case(
"elif", PDK_Skipped)
688 .Case(
"elifdef", PDK_Skipped)
689 .Case(
"elifndef", PDK_Skipped)
690 .Case(
"else", PDK_Skipped)
691 .Case(
"endif", PDK_Skipped)
692 .Default(PDK_Unknown);
717 if (ActiveCommentLoc.
isValid())
718 End = ActiveCommentLoc;
733 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
736 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
739 unsigned PhysOffset = 0;
744 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
754 for (; CharNo; --CharNo) {
765 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
766 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
815 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
821 *MacroBegin = expansionLoc;
843 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
849 *MacroEnd = expansionLoc;
862 if (Range.isTokenRange()) {
871 std::tie(FID, BeginOffs) =
SM.getDecomposedLoc(
Begin);
876 if (!
SM.isInFileID(End, FID, &EndOffs) ||
886 return SM.getSLocEntry(
SM.getFileID(Loc))
888 .isExpansionTokenRange();
905 Range.setBegin(
Begin);
910 if (Range.isTokenRange()) {
928 Range.setBegin(MacroBegin);
929 Range.setEnd(MacroEnd);
931 if (Range.isTokenRange())
951 Range.setBegin(
SM.getImmediateSpellingLoc(
Begin));
952 Range.setEnd(
SM.getImmediateSpellingLoc(End));
965 if (Range.isInvalid()) {
971 std::pair<FileID, unsigned> beginInfo =
SM.getDecomposedLoc(Range.getBegin());
972 if (beginInfo.first.isInvalid()) {
978 if (!
SM.isInFileID(Range.getEnd(), beginInfo.first, &EndOffs) ||
979 beginInfo.second > EndOffs) {
985 bool invalidTemp =
false;
986 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
993 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
999 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1015 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1022 FileID MacroFID =
SM.getFileID(Loc);
1023 if (
SM.isInFileID(SpellLoc, MacroFID))
1033 Loc =
SM.getSpellingLoc(Loc);
1037 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1039 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1040 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1045 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1047 while (
SM.isMacroArgExpansion(Loc))
1048 Loc =
SM.getImmediateExpansionRange(Loc).getBegin();
1054 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1060 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(Loc).getBegin());
1064 std::pair<FileID, unsigned> ExpansionInfo =
SM.getDecomposedLoc(Loc);
1066 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1067 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1076 if (Str - 1 < BufferStart)
1079 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1080 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1081 if (Str - 2 < BufferStart)
1091 return *Str ==
'\\';
1098 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1099 if (LocInfo.first.isInvalid())
1102 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1108 StringRef Rest = Buffer.substr(Line - Buffer.data());
1109 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1110 return NumWhitespaceChars == StringRef::npos
1112 : Rest.take_front(NumWhitespaceChars);
1127 unsigned CharNo,
unsigned TokLen) {
1128 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1144 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1150 unsigned TokLen)
const {
1151 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1152 "Location out of range for this buffer!");
1156 unsigned CharNo = Loc-BufferStart;
1162 assert(
PP &&
"This doesn't work on raw lexers");
1181 case '=':
return '#';
1182 case ')':
return ']';
1183 case '(':
return '[';
1184 case '!':
return '|';
1185 case '\'':
return '^';
1186 case '>':
return '}';
1187 case '/':
return '\\';
1188 case '<':
return '{';
1189 case '-':
return '~';
1204 L->
Diag(CP-2, diag::trigraph_ignored);
1209 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1216unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1221 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1225 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1226 Ptr[Size-1] != Ptr[Size])
1239const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1241 const char *AfterEscape;
1244 }
else if (*
P ==
'?') {
1246 if (
P[1] !=
'?' ||
P[2] !=
'/')
1255 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1256 if (NewLineSize == 0)
return P;
1257 P = AfterEscape+NewLineSize;
1266 return std::nullopt;
1271 std::pair<FileID, unsigned> LocInfo =
SM.getDecomposedLoc(Loc);
1274 bool InvalidTemp =
false;
1275 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1277 return std::nullopt;
1279 const char *TokenBegin =
File.data() + LocInfo.second;
1282 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1283 TokenBegin,
File.end());
1296 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1298 if (!Tok || Tok->isNot(TKind))
1303 unsigned NumWhitespaceChars = 0;
1304 if (SkipTrailingWhitespaceAndNewLine) {
1305 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1306 unsigned char C = *TokenEnd;
1309 NumWhitespaceChars++;
1313 if (
C ==
'\n' ||
C ==
'\r') {
1316 NumWhitespaceChars++;
1317 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1318 NumWhitespaceChars++;
1340char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1343 if (Ptr[0] ==
'\\') {
1352 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1358 Diag(Ptr, diag::backslash_newline_space);
1361 Size += EscapedNewLineSize;
1362 Ptr += EscapedNewLineSize;
1365 return getCharAndSizeSlow(Ptr, Size, Tok);
1373 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1377 LangOpts.Trigraphs)) {
1383 if (
C ==
'\\')
goto Slash;
1399char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1402 if (Ptr[0] ==
'\\') {
1410 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1412 Size += EscapedNewLineSize;
1413 Ptr += EscapedNewLineSize;
1416 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1424 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1430 if (
C ==
'\\')
goto Slash;
1445void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1446 BufferPtr = BufferStart +
Offset;
1447 if (BufferPtr > BufferEnd)
1448 BufferPtr = BufferEnd;
1452 IsAtStartOfLine = StartOfLine;
1453 IsAtPhysicalStartOfLine = StartOfLine;
1457 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1459 return UnicodeWhitespaceChars.contains(Codepoint);
1464 llvm::raw_svector_ostream CharOS(CharBuf);
1465 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1476 bool IsStart,
bool &IsExtension) {
1477 static const llvm::sys::UnicodeCharSet MathStartChars(
1479 static const llvm::sys::UnicodeCharSet MathContinueChars(
1481 if (MathStartChars.contains(
C) ||
1482 (!IsStart && MathContinueChars.contains(
C))) {
1490 bool &IsExtension) {
1491 if (LangOpts.AsmPreprocessor) {
1493 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1495 }
else if (LangOpts.CPlusPlus || LangOpts.C2x) {
1500 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1502 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1506 }
else if (LangOpts.C11) {
1507 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1509 return C11AllowedIDChars.contains(
C);
1511 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1513 return C99AllowedIDChars.contains(
C);
1518 bool &IsExtension) {
1519 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1520 IsExtension =
false;
1521 if (LangOpts.AsmPreprocessor) {
1524 if (LangOpts.CPlusPlus || LangOpts.C2x) {
1525 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1526 if (XIDStartChars.contains(
C))
1534 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1536 return !C11DisallowedInitialIDChars.contains(
C);
1538 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1540 return !C99DisallowedInitialIDChars.contains(
C);
1546 static const llvm::sys::UnicodeCharSet MathStartChars(
1548 static const llvm::sys::UnicodeCharSet MathContinueChars(
1551 (void)MathStartChars;
1552 (void)MathContinueChars;
1553 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1554 "Unexpected mathematical notation codepoint");
1555 Diags.
Report(Range.getBegin(), diag::ext_mathematical_notation)
1568 if (!Diags.
isIgnored(diag::warn_c99_compat_unicode_id, Range.getBegin())) {
1570 CannotAppearInIdentifier = 0,
1571 CannotStartIdentifier
1574 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1576 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1578 if (!C99AllowedIDChars.contains(
C)) {
1579 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1581 << CannotAppearInIdentifier;
1582 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1583 Diags.
Report(Range.getBegin(), diag::warn_c99_compat_unicode_id)
1585 << CannotStartIdentifier;
1597 struct HomoglyphPair {
1600 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1602 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1655 std::lower_bound(std::begin(SortedHomoglyphs),
1656 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1657 if (Homoglyph->Character ==
C) {
1658 if (Homoglyph->LooksLike) {
1659 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1660 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
1663 Diags.
Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
1680 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1683 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1685 if (!IsFirst || InvalidOnlyAtStart) {
1686 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
1690 Diags.
Report(Range.getBegin(), diag::err_character_not_allowed)
1696bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1698 const char *UCNPtr = CurPtr +
Size;
1699 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1700 if (CodePoint == 0) {
1703 bool IsExtension =
false;
1728 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1729 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1732 while (CurPtr != UCNPtr)
1733 (void)getAndAdvanceChar(CurPtr,
Result);
1737bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1738 const char *UnicodePtr = CurPtr;
1739 llvm::UTF32 CodePoint;
1740 llvm::ConversionResult
Result =
1741 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1742 (
const llvm::UTF8 *)BufferEnd,
1744 llvm::strictConversion);
1745 if (
Result != llvm::conversionOK)
1748 bool IsExtension =
false;
1773 CurPtr = UnicodePtr;
1777bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1778 const char *CurPtr) {
1779 bool IsExtension =
false;
1794 return LexIdentifierContinue(
Result, CurPtr);
1819 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1823bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1826 unsigned char C = *CurPtr;
1835 C = getCharAndSize(CurPtr, Size);
1837 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1842 if (!LangOpts.DollarIdents)
1846 Diag(CurPtr, diag::ext_dollar_in_identifier);
1847 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1850 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1852 if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr))
1858 const char *IdStart = BufferPtr;
1859 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1860 Result.setRawIdentifierData(IdStart);
1876 if (isCodeCompletionPoint(CurPtr)) {
1878 Result.setKind(tok::code_completion);
1884 assert(*CurPtr == 0 &&
"Completion character must be 0");
1889 if (CurPtr < BufferEnd) {
1907bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1913 return (C2 ==
'x' || C2 ==
'X');
1919bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
1921 char C = getCharAndSize(CurPtr, Size);
1924 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1926 C = getCharAndSize(CurPtr, Size);
1930 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1933 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1934 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
1938 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1942 bool IsHexFloat =
true;
1943 if (!LangOpts.C99) {
1944 if (!isHexaLiteral(BufferPtr, LangOpts))
1946 else if (!LangOpts.CPlusPlus17 &&
1947 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1951 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
1955 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C2x)) {
1960 Diag(CurPtr, LangOpts.CPlusPlus
1961 ? diag::warn_cxx11_compat_digit_separator
1962 : diag::warn_c2x_compat_digit_separator);
1963 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1964 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
1965 return LexNumericConstant(
Result, CurPtr);
1970 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1971 return LexNumericConstant(
Result, CurPtr);
1972 if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr))
1973 return LexNumericConstant(
Result, CurPtr);
1976 const char *TokStart = BufferPtr;
1977 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
1978 Result.setLiteralData(TokStart);
1984const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
1985 bool IsStringLiteral) {
1986 assert(LangOpts.CPlusPlus);
1990 char C = getCharAndSize(CurPtr, Size);
1991 bool Consumed =
false;
1994 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1996 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr))
2002 if (!LangOpts.CPlusPlus11) {
2005 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2006 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2017 bool IsUDSuffix =
false;
2020 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2024 const unsigned MaxStandardSuffixLength = 3;
2025 char Buffer[MaxStandardSuffixLength] = {
C };
2026 unsigned Consumed =
Size;
2033 const StringRef CompleteSuffix(Buffer, Chars);
2039 if (Chars == MaxStandardSuffixLength)
2043 Buffer[Chars++] = Next;
2044 Consumed += NextSize;
2050 Diag(CurPtr, LangOpts.MSVCCompat
2051 ? diag::ext_ms_reserved_user_defined_literal
2052 : diag::ext_reserved_user_defined_literal)
2057 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2062 C = getCharAndSize(CurPtr, Size);
2064 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2065 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2066 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
2076bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2078 const char *AfterQuote = CurPtr;
2080 const char *NulCharacter =
nullptr;
2083 (Kind == tok::utf8_string_literal ||
2084 Kind == tok::utf16_string_literal ||
2085 Kind == tok::utf32_string_literal))
2086 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2087 : diag::warn_c99_compat_unicode_literal);
2089 char C = getAndAdvanceChar(CurPtr,
Result);
2094 C = getAndAdvanceChar(CurPtr,
Result);
2096 if (
C ==
'\n' ||
C ==
'\r' ||
2097 (
C == 0 && CurPtr-1 == BufferEnd)) {
2099 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2100 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2105 if (isCodeCompletionPoint(CurPtr-1)) {
2107 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2110 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2115 NulCharacter = CurPtr-1;
2117 C = getAndAdvanceChar(CurPtr,
Result);
2121 if (LangOpts.CPlusPlus)
2122 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2126 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2129 const char *TokStart = BufferPtr;
2130 FormTokenWithChars(
Result, CurPtr, Kind);
2131 Result.setLiteralData(TokStart);
2137bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2145 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2147 unsigned PrefixLen = 0;
2153 if (CurPtr[PrefixLen] !=
'(') {
2155 const char *PrefixEnd = &CurPtr[PrefixLen];
2156 if (PrefixLen == 16) {
2157 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2159 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2160 << StringRef(PrefixEnd, 1);
2172 if (
C == 0 && CurPtr-1 == BufferEnd) {
2178 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2183 const char *Prefix = CurPtr;
2184 CurPtr += PrefixLen + 1;
2191 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2192 CurPtr += PrefixLen + 1;
2195 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2197 Diag(BufferPtr, diag::err_unterminated_raw_string)
2198 << StringRef(Prefix, PrefixLen);
2199 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2205 if (LangOpts.CPlusPlus)
2206 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2209 const char *TokStart = BufferPtr;
2210 FormTokenWithChars(
Result, CurPtr, Kind);
2211 Result.setLiteralData(TokStart);
2217bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2219 const char *NulCharacter =
nullptr;
2220 const char *AfterLessPos = CurPtr;
2221 char C = getAndAdvanceChar(CurPtr,
Result);
2226 C = getAndAdvanceChar(CurPtr,
Result);
2229 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2232 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2237 if (isCodeCompletionPoint(CurPtr - 1)) {
2238 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2240 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2243 NulCharacter = CurPtr-1;
2245 C = getAndAdvanceChar(CurPtr,
Result);
2250 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2253 const char *TokStart = BufferPtr;
2254 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2255 Result.setLiteralData(TokStart);
2259void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2260 const char *CompletionPoint,
2263 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2264 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2265 auto Slash = PartialPath.find_last_of(SlashChars);
2267 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2268 const char *StartOfFilename =
2269 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2272 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2275 while (CompletionPoint < BufferEnd) {
2276 char Next = *(CompletionPoint + 1);
2277 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2280 if (Next == (IsAngled ?
'>' :
'"'))
2282 if (SlashChars.contains(Next))
2294bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2297 const char *NulCharacter =
nullptr;
2300 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2301 Diag(BufferPtr, LangOpts.CPlusPlus
2302 ? diag::warn_cxx98_compat_unicode_literal
2303 : diag::warn_c99_compat_unicode_literal);
2304 else if (Kind == tok::utf8_char_constant)
2305 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2308 char C = getAndAdvanceChar(CurPtr,
Result);
2311 Diag(BufferPtr, diag::ext_empty_character);
2312 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2319 C = getAndAdvanceChar(CurPtr,
Result);
2321 if (
C ==
'\n' ||
C ==
'\r' ||
2322 (
C == 0 && CurPtr-1 == BufferEnd)) {
2324 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2325 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2330 if (isCodeCompletionPoint(CurPtr-1)) {
2332 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2337 NulCharacter = CurPtr-1;
2339 C = getAndAdvanceChar(CurPtr,
Result);
2343 if (LangOpts.CPlusPlus)
2344 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2348 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2351 const char *TokStart = BufferPtr;
2352 FormTokenWithChars(
Result, CurPtr, Kind);
2353 Result.setLiteralData(TokStart);
2361bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2362 bool &TokAtPhysicalStartOfLine) {
2366 unsigned char Char = *CurPtr;
2368 const char *lastNewLine =
nullptr;
2369 auto setLastNewLine = [&](
const char *Ptr) {
2375 setLastNewLine(CurPtr - 1);
2394 if (*CurPtr ==
'\n')
2395 setLastNewLine(CurPtr);
2402 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2404 IsAtStartOfLine =
true;
2405 IsAtPhysicalStartOfLine =
true;
2412 char PrevChar = CurPtr[-1];
2418 TokAtPhysicalStartOfLine =
true;
2420 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2437bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2438 bool &TokAtPhysicalStartOfLine) {
2443 Diag(BufferPtr, diag::ext_line_comment);
2461 bool UnicodeDecodingAlreadyDiagnosed =
false;
2468 C !=
'\n' &&
C !=
'\r') {
2470 UnicodeDecodingAlreadyDiagnosed =
false;
2474 unsigned Length = llvm::getUTF8SequenceSize(
2475 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2478 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2479 UnicodeDecodingAlreadyDiagnosed =
true;
2482 UnicodeDecodingAlreadyDiagnosed =
false;
2488 const char *NextLine = CurPtr;
2491 const char *EscapePtr = CurPtr-1;
2492 bool HasSpace =
false;
2498 if (*EscapePtr ==
'\\')
2501 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2502 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2504 CurPtr = EscapePtr-2;
2510 Diag(EscapePtr, diag::backslash_newline_space);
2517 const char *OldPtr = CurPtr;
2520 C = getAndAdvanceChar(CurPtr,
Result);
2525 if (
C != 0 && CurPtr == OldPtr+1) {
2533 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2534 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2535 for (; OldPtr != CurPtr; ++OldPtr)
2536 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2540 const char *ForwardPtr = CurPtr;
2543 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2548 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2553 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2558 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2576 return SaveLineComment(
Result, CurPtr);
2590 NewLinePtr = CurPtr++;
2594 TokAtPhysicalStartOfLine =
true;
2603bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2606 FormTokenWithChars(
Result, CurPtr, tok::comment);
2618 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2622 Result.setKind(tok::comment);
2633 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2636 const char *TrigraphPos =
nullptr;
2638 const char *SpacePos =
nullptr;
2645 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2647 if (CurPtr[0] == CurPtr[1])
2661 if (*CurPtr ==
'\\') {
2663 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2665 TrigraphPos = CurPtr - 2;
2676 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2685 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2689 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2694 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2698 L->
Diag(SpacePos, diag::backslash_newline_space);
2704#include <emmintrin.h>
2719bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2720 bool &TokAtPhysicalStartOfLine) {
2730 unsigned char C = getCharAndSize(CurPtr, CharSize);
2732 if (
C == 0 && CurPtr == BufferEnd+1) {
2734 Diag(BufferPtr, diag::err_unterminated_block_comment);
2740 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2757 bool UnicodeDecodingAlreadyDiagnosed =
false;
2762 if (CurPtr + 24 < BufferEnd &&
2767 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2772 if (
C ==
'/')
goto FoundSlash;
2776 while (CurPtr + 16 < BufferEnd) {
2778 if (LLVM_UNLIKELY(Mask != 0)) {
2788 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2794 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2795 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2796 0x80, 0x80, 0x80, 0x80};
2797 __vector
unsigned char Slashes = {
2798 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2799 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2801 while (CurPtr + 16 < BufferEnd) {
2803 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2805 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2812 while (CurPtr + 16 < BufferEnd) {
2813 bool HasNonASCII =
false;
2814 for (
unsigned I = 0; I < 16; ++I)
2815 HasNonASCII |= !
isASCII(CurPtr[I]);
2817 if (LLVM_UNLIKELY(HasNonASCII))
2820 bool HasSlash =
false;
2821 for (
unsigned I = 0; I < 16; ++I)
2822 HasSlash |= CurPtr[I] ==
'/';
2836 while (
C !=
'/' &&
C !=
'\0') {
2838 UnicodeDecodingAlreadyDiagnosed =
false;
2845 unsigned Length = llvm::getUTF8SequenceSize(
2846 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2849 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2850 UnicodeDecodingAlreadyDiagnosed =
true;
2852 UnicodeDecodingAlreadyDiagnosed =
false;
2853 CurPtr += Length - 1;
2860 if (CurPtr[-2] ==
'*')
2863 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2865 LangOpts.Trigraphs)) {
2871 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2876 Diag(CurPtr-1, diag::warn_nested_block_comment);
2878 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
2880 Diag(BufferPtr, diag::err_unterminated_block_comment);
2889 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2895 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2914 FormTokenWithChars(
Result, CurPtr, tok::comment);
2923 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
2941 "Must be in a preprocessing directive!");
2946 const char *CurPtr = BufferPtr;
2948 char Char = getAndAdvanceChar(CurPtr, Tmp);
2956 if (CurPtr-1 != BufferEnd) {
2957 if (isCodeCompletionPoint(CurPtr-1)) {
2973 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2974 BufferPtr = CurPtr-1;
2978 if (Tmp.
is(tok::code_completion)) {
2983 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2995bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3003 FormTokenWithChars(
Result, CurPtr, tok::eod);
3015 BufferPtr = BufferEnd;
3016 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3036 diag::err_pp_unterminated_conditional);
3042 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
3047 if (LangOpts.CPlusPlus11) {
3051 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
3052 DiagID = diag::warn_cxx98_compat_no_newline_eof;
3054 DiagID = diag::warn_no_newline_eof;
3057 DiagID = diag::ext_no_newline_eof;
3060 Diag(BufferEnd, DiagID)
3074unsigned Lexer::isNextPPTokenLParen() {
3075 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3077 if (isDependencyDirectivesLexer()) {
3078 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3080 return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
3090 const char *TmpBufferPtr = BufferPtr;
3092 bool atStartOfLine = IsAtStartOfLine;
3093 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3094 bool leadingSpace = HasLeadingSpace;
3100 BufferPtr = TmpBufferPtr;
3102 HasLeadingSpace = leadingSpace;
3103 IsAtStartOfLine = atStartOfLine;
3104 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3109 if (Tok.
is(tok::eof))
3111 return Tok.
is(tok::l_paren);
3117 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3119 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3120 size_t Pos = RestOfBuffer.find(Terminator);
3121 while (Pos != StringRef::npos) {
3124 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3125 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3126 Pos = RestOfBuffer.find(Terminator);
3129 return RestOfBuffer.data()+Pos;
3138bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3140 if (CurPtr != BufferStart &&
3141 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3145 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
3146 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
3161 Diag(CurPtr, diag::err_conflict_marker);
3162 CurrentConflictMarkerState =
Kind;
3166 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3167 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3182bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3184 if (CurPtr != BufferStart &&
3185 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3194 for (
unsigned i = 1; i != 4; ++i)
3195 if (CurPtr[i] != CurPtr[0])
3202 CurrentConflictMarkerState)) {
3206 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3212 CurrentConflictMarkerState =
CMK_None;
3220 const char *BufferEnd) {
3221 if (CurPtr == BufferEnd)
3224 for (; CurPtr != BufferEnd; ++CurPtr) {
3225 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3231bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3232 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3238 const char *Start = CurPtr - 1;
3239 if (!LangOpts.AllowEditorPlaceholders)
3240 Diag(Start, diag::err_placeholder_in_source);
3242 FormTokenWithChars(
Result, End, tok::raw_identifier);
3243 Result.setRawIdentifierData(Start);
3250bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3259std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3260 const char *SlashLoc,
3263 char Kind = getCharAndSize(StartPtr, CharSize);
3264 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3266 unsigned NumHexDigits;
3269 else if (Kind ==
'U')
3272 bool Delimited =
false;
3273 bool FoundEndDelimiter =
false;
3277 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3279 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3280 return std::nullopt;
3283 const char *CurPtr = StartPtr + CharSize;
3284 const char *KindLoc = &CurPtr[-1];
3286 uint32_t CodePoint = 0;
3287 while (Count != NumHexDigits || Delimited) {
3288 char C = getCharAndSize(CurPtr, CharSize);
3289 if (!Delimited && Count == 0 &&
C ==
'{') {
3295 if (Delimited &&
C ==
'}') {
3297 FoundEndDelimiter =
true;
3301 unsigned Value = llvm::hexDigitValue(
C);
3306 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3307 << StringRef(KindLoc, 1);
3308 return std::nullopt;
3311 if (CodePoint & 0xF000'0000) {
3313 Diag(KindLoc, diag::err_escape_too_large) << 0;
3314 return std::nullopt;
3325 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3326 : diag::warn_ucn_escape_no_digits)
3327 << StringRef(KindLoc, 1);
3328 return std::nullopt;
3331 if (Delimited && Kind ==
'U') {
3333 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3334 return std::nullopt;
3337 if (!Delimited && Count != NumHexDigits) {
3339 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3341 if (Count == 4 && NumHexDigits == 8) {
3343 Diag(KindLoc, diag::note_ucn_four_not_eight)
3347 return std::nullopt;
3350 if (Delimited &&
PP) {
3352 ? diag::warn_cxx2b_delimited_escape_sequence
3353 : diag::ext_delimited_escape_sequence)
3362 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3365 while (StartPtr != CurPtr)
3366 (void)getAndAdvanceChar(StartPtr, *
Result);
3373std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3374 const char *SlashLoc,
3379 char C = getCharAndSize(StartPtr, CharSize);
3380 assert(
C ==
'N' &&
"expected \\N{...}");
3382 const char *CurPtr = StartPtr + CharSize;
3383 const char *KindLoc = &CurPtr[-1];
3385 C = getCharAndSize(CurPtr, CharSize);
3388 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3389 return std::nullopt;
3392 const char *StartName = CurPtr;
3393 bool FoundEndDelimiter =
false;
3396 C = getCharAndSize(CurPtr, CharSize);
3399 FoundEndDelimiter =
true;
3405 Buffer.push_back(
C);
3408 if (!FoundEndDelimiter || Buffer.empty()) {
3410 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3411 : diag::warn_delimited_ucn_incomplete)
3412 << StringRef(KindLoc, 1);
3413 return std::nullopt;
3416 StringRef Name(Buffer.data(), Buffer.size());
3417 std::optional<char32_t> Match =
3418 llvm::sys::unicode::nameToCodepointStrict(Name);
3419 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3421 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3423 Diag(StartName, diag::err_invalid_ucn_name)
3424 << StringRef(Buffer.data(), Buffer.size())
3427 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3438 if (Diagnose && Match)
3440 ? diag::warn_cxx2b_delimited_escape_sequence
3441 : diag::ext_delimited_escape_sequence)
3448 if (LooseMatch && Diagnose)
3449 Match = LooseMatch->CodePoint;
3456 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3459 while (StartPtr != CurPtr)
3460 (void)getAndAdvanceChar(StartPtr, *
Result);
3464 return Match ? std::optional<uint32_t>(*Match) :
std::nullopt;
3467uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3471 std::optional<uint32_t> CodePointOpt;
3472 char Kind = getCharAndSize(StartPtr, CharSize);
3473 if (Kind ==
'u' || Kind ==
'U')
3474 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3475 else if (Kind ==
'N')
3476 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3481 uint32_t CodePoint = *CodePointOpt;
3484 if (LangOpts.AsmPreprocessor)
3498 if (CodePoint < 0xA0) {
3499 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
3505 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3506 Diag(BufferPtr, diag::err_ucn_control_character);
3508 char C =
static_cast<char>(CodePoint);
3509 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3514 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3519 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3520 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3522 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3530bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3531 const char *CurPtr) {
3534 Diag(BufferPtr, diag::ext_unicode_whitespace)
3543void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3544 IsAtStartOfLine =
Result.isAtStartOfLine();
3545 HasLeadingSpace =
Result.hasLeadingSpace();
3546 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3551 assert(!isDependencyDirectivesLexer());
3557 if (IsAtStartOfLine) {
3559 IsAtStartOfLine =
false;
3562 if (HasLeadingSpace) {
3564 HasLeadingSpace =
false;
3567 if (HasLeadingEmptyMacro) {
3569 HasLeadingEmptyMacro =
false;
3572 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3573 IsAtPhysicalStartOfLine =
false;
3576 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3578 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3579 return returnedToken;
3587bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3589 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3590 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3593 const char *CurPtr = BufferPtr;
3605 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3614 unsigned SizeTmp, SizeTmp2;
3617 char Char = getAndAdvanceChar(CurPtr,
Result);
3621 NewLinePtr =
nullptr;
3626 if (CurPtr-1 == BufferEnd)
3627 return LexEndOfFile(
Result, CurPtr-1);
3630 if (isCodeCompletionPoint(CurPtr-1)) {
3633 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3638 Diag(CurPtr-1, diag::null_in_file);
3640 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3649 if (LangOpts.MicrosoftExt) {
3651 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3652 return LexEndOfFile(
Result, CurPtr-1);
3656 Kind = tok::unknown;
3660 if (CurPtr[0] ==
'\n')
3661 (void)getAndAdvanceChar(CurPtr,
Result);
3675 IsAtStartOfLine =
true;
3676 IsAtPhysicalStartOfLine =
true;
3677 NewLinePtr = CurPtr - 1;
3686 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3696 SkipHorizontalWhitespace:
3698 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3707 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3708 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3710 goto SkipIgnoredUnits;
3712 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3714 goto SkipIgnoredUnits;
3716 goto SkipHorizontalWhitespace;
3724 case '0':
case '1':
case '2':
case '3':
case '4':
3725 case '5':
case '6':
case '7':
case '8':
case '9':
3728 return LexNumericConstant(
Result, CurPtr);
3737 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3738 Char = getCharAndSize(CurPtr, SizeTmp);
3742 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3743 tok::utf16_string_literal);
3747 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3748 tok::utf16_char_constant);
3751 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3752 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3753 return LexRawStringLiteral(
Result,
3754 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3756 tok::utf16_string_literal);
3759 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3763 return LexStringLiteral(
Result,
3764 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3766 tok::utf8_string_literal);
3767 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C2x))
3768 return LexCharConstant(
3769 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3771 tok::utf8_char_constant);
3773 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3775 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3778 return LexRawStringLiteral(
Result,
3779 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3782 tok::utf8_string_literal);
3789 return LexIdentifierContinue(
Result, CurPtr);
3795 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3796 Char = getCharAndSize(CurPtr, SizeTmp);
3800 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3801 tok::utf32_string_literal);
3805 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3806 tok::utf32_char_constant);
3809 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3810 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3811 return LexRawStringLiteral(
Result,
3812 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3814 tok::utf32_string_literal);
3818 return LexIdentifierContinue(
Result, CurPtr);
3824 if (LangOpts.CPlusPlus11) {
3825 Char = getCharAndSize(CurPtr, SizeTmp);
3828 return LexRawStringLiteral(
Result,
3829 ConsumeChar(CurPtr, SizeTmp,
Result),
3830 tok::string_literal);
3834 return LexIdentifierContinue(
Result, CurPtr);
3839 Char = getCharAndSize(CurPtr, SizeTmp);
3843 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3844 tok::wide_string_literal);
3847 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3848 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3849 return LexRawStringLiteral(
Result,
3850 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3852 tok::wide_string_literal);
3856 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3857 tok::wide_char_constant);
3862 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3863 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3864 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3865 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3866 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3867 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3868 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3869 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3873 return LexIdentifierContinue(
Result, CurPtr);
3876 if (LangOpts.DollarIdents) {
3878 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3881 return LexIdentifierContinue(
Result, CurPtr);
3884 Kind = tok::unknown;
3891 return LexCharConstant(
Result, CurPtr, tok::char_constant);
3897 return LexStringLiteral(
Result, CurPtr,
3899 : tok::string_literal);
3903 Kind = tok::question;
3906 Kind = tok::l_square;
3909 Kind = tok::r_square;
3912 Kind = tok::l_paren;
3915 Kind = tok::r_paren;
3918 Kind = tok::l_brace;
3921 Kind = tok::r_brace;
3924 Char = getCharAndSize(CurPtr, SizeTmp);
3925 if (Char >=
'0' && Char <=
'9') {
3929 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
3930 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3931 Kind = tok::periodstar;
3933 }
else if (Char ==
'.' &&
3934 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3935 Kind = tok::ellipsis;
3936 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3943 Char = getCharAndSize(CurPtr, SizeTmp);
3946 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3947 }
else if (Char ==
'=') {
3948 Kind = tok::ampequal;
3949 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3955 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3956 Kind = tok::starequal;
3957 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3963 Char = getCharAndSize(CurPtr, SizeTmp);
3965 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3966 Kind = tok::plusplus;
3967 }
else if (Char ==
'=') {
3968 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3969 Kind = tok::plusequal;
3975 Char = getCharAndSize(CurPtr, SizeTmp);
3977 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3978 Kind = tok::minusminus;
3979 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3980 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3981 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3983 Kind = tok::arrowstar;
3984 }
else if (Char ==
'>') {
3985 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3987 }
else if (Char ==
'=') {
3988 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
3989 Kind = tok::minusequal;
3998 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3999 Kind = tok::exclaimequal;
4000 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4002 Kind = tok::exclaim;
4007 Char = getCharAndSize(CurPtr, SizeTmp);
4017 bool TreatAsComment =
4018 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4019 if (!TreatAsComment)
4021 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4023 if (TreatAsComment) {
4024 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4025 TokAtPhysicalStartOfLine))
4031 goto SkipIgnoredUnits;
4036 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4037 TokAtPhysicalStartOfLine))
4046 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4047 Kind = tok::slashequal;
4053 Char = getCharAndSize(CurPtr, SizeTmp);
4055 Kind = tok::percentequal;
4056 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4057 }
else if (LangOpts.Digraphs && Char ==
'>') {
4058 Kind = tok::r_brace;
4059 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4060 }
else if (LangOpts.Digraphs && Char ==
':') {
4061 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4062 Char = getCharAndSize(CurPtr, SizeTmp);
4063 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4064 Kind = tok::hashhash;
4065 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4067 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4068 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4070 Diag(BufferPtr, diag::ext_charize_microsoft);
4077 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4078 goto HandleDirective;
4083 Kind = tok::percent;
4087 Char = getCharAndSize(CurPtr, SizeTmp);
4089 return LexAngledStringLiteral(
Result, CurPtr);
4090 }
else if (Char ==
'<') {
4091 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4093 Kind = tok::lesslessequal;
4094 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4096 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4100 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4104 }
else if (LangOpts.CUDA && After ==
'<') {
4105 Kind = tok::lesslessless;
4106 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4109 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4110 Kind = tok::lessless;
4112 }
else if (Char ==
'=') {
4113 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4115 if (LangOpts.CPlusPlus20) {
4117 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4118 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4120 Kind = tok::spaceship;
4126 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4131 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4132 Kind = tok::lessequal;
4133 }
else if (LangOpts.Digraphs && Char ==
':') {
4134 if (LangOpts.CPlusPlus11 &&
4135 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4142 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4143 if (After !=
':' && After !=
'>') {
4146 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4151 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4152 Kind = tok::l_square;
4153 }
else if (LangOpts.Digraphs && Char ==
'%') {
4154 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4155 Kind = tok::l_brace;
4156 }
else if (Char ==
'#' && SizeTmp == 1 &&
4157 lexEditorPlaceholder(
Result, CurPtr)) {
4164 Char = getCharAndSize(CurPtr, SizeTmp);
4166 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4167 Kind = tok::greaterequal;
4168 }
else if (Char ==
'>') {
4169 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4171 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4173 Kind = tok::greatergreaterequal;
4174 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4178 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4181 }
else if (LangOpts.CUDA && After ==
'>') {
4182 Kind = tok::greatergreatergreater;
4183 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4186 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4187 Kind = tok::greatergreater;
4190 Kind = tok::greater;
4194 Char = getCharAndSize(CurPtr, SizeTmp);
4196 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4197 Kind = tok::caretequal;
4198 }
else if (LangOpts.OpenCL && Char ==
'^') {
4199 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4200 Kind = tok::caretcaret;
4206 Char = getCharAndSize(CurPtr, SizeTmp);
4208 Kind = tok::pipeequal;
4209 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4210 }
else if (Char ==
'|') {
4212 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4214 Kind = tok::pipepipe;
4215 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4221 Char = getCharAndSize(CurPtr, SizeTmp);
4222 if (LangOpts.Digraphs && Char ==
'>') {
4223 Kind = tok::r_square;
4224 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4225 }
else if ((LangOpts.CPlusPlus ||
4226 LangOpts.DoubleSquareBracketAttributes) &&
4228 Kind = tok::coloncolon;
4229 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4238 Char = getCharAndSize(CurPtr, SizeTmp);
4241 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4244 Kind = tok::equalequal;
4245 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4254 Char = getCharAndSize(CurPtr, SizeTmp);
4256 Kind = tok::hashhash;
4257 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4258 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4261 Diag(BufferPtr, diag::ext_charize_microsoft);
4262 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4268 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4269 goto HandleDirective;
4277 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4280 Kind = tok::unknown;
4285 if (!LangOpts.AsmPreprocessor) {
4286 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4287 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4288 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4296 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4300 Kind = tok::unknown;
4305 Kind = tok::unknown;
4309 llvm::UTF32 CodePoint;
4314 llvm::ConversionResult Status =
4315 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4316 (
const llvm::UTF8 *)BufferEnd,
4318 llvm::strictConversion);
4319 if (Status == llvm::conversionOK) {
4320 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4321 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4328 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4334 Kind = tok::unknown;
4341 Diag(CurPtr, diag::err_invalid_utf8);
4343 BufferPtr = CurPtr+1;
4355 FormTokenWithChars(
Result, CurPtr, Kind);
4361 FormTokenWithChars(
Result, CurPtr, tok::hash);
4366 assert(
Result.is(tok::eof) &&
"Preprocessor did not set tok:eof");
4378const char *Lexer::convertDependencyDirectiveToken(
4380 const char *TokPtr = BufferStart + DDTok.
Offset;
4386 BufferPtr = TokPtr + DDTok.
Length;
4390bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4391 assert(isDependencyDirectivesLexer());
4393 using namespace dependency_directives_scan;
4395 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4396 if (DepDirectives.front().Kind == pp_eof)
4397 return LexEndOfFile(
Result, BufferEnd);
4398 if (DepDirectives.front().Kind == tokens_present_before_eof)
4400 NextDepDirectiveTokenIndex = 0;
4401 DepDirectives = DepDirectives.drop_front();
4405 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4406 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4412 BufferPtr = BufferStart + DDTok.
Offset;
4413 LexAngledStringLiteral(
Result, BufferPtr + 1);
4414 if (
Result.isNot(tok::header_name))
4419 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4420 if (BufferStart + NextTok.
Offset >= BufferPtr)
4422 ++NextDepDirectiveTokenIndex;
4427 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4429 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4433 if (
Result.is(tok::raw_identifier)) {
4434 Result.setRawIdentifierData(TokPtr);
4442 if (
Result.isLiteral()) {
4443 Result.setLiteralData(TokPtr);
4446 if (
Result.is(tok::colon) &&
4447 (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) {
4449 if (*BufferPtr ==
':') {
4450 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4452 ++NextDepDirectiveTokenIndex;
4453 Result.setKind(tok::coloncolon);
4463bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4464 assert(isDependencyDirectivesLexer());
4466 using namespace dependency_directives_scan;
4469 unsigned NestedIfs = 0;
4471 DepDirectives = DepDirectives.drop_front();
4472 switch (DepDirectives.front().Kind) {
4474 llvm_unreachable(
"unexpected 'pp_none'");
4514 NextDepDirectiveTokenIndex = 0;
4515 return LexEndOfFile(
Result, BufferEnd);
4520 DepDirectives.front().Tokens.front();
4521 assert(DDTok.
is(tok::hash));
4522 NextDepDirectiveTokenIndex = 1;
4524 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
const char * getBufferLocation() const
Return the current location in the buffer.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ C
Languages that the frontend can parse and compile.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const