23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/StringSwitch.h"
35struct DirectiveWithTokens {
40 : Kind(Kind), NumTokens(NumTokens) {}
63 Scanner(StringRef Input,
64 SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
65 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
66 : Input(Input), Tokens(Tokens), Diags(Diags),
67 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
68 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
71 static LangOptions getLangOptsForDepScanning() {
75 LangOpts.LineComment =
true;
76 LangOpts.RawStringLiterals =
true;
77 LangOpts.AllowLiteralDigitSeparator =
true;
85 bool scan(SmallVectorImpl<Directive> &Directives);
92 [[nodiscard]] dependency_directives_scan::Token &
93 lexToken(
const char *&
First,
const char *
const End);
95 [[nodiscard]] dependency_directives_scan::Token &
96 lexIncludeFilename(
const char *&
First,
const char *
const End);
98 void skipLine(
const char *&
First,
const char *
const End);
99 void skipDirective(StringRef Name,
const char *&
First,
const char *
const End);
103 StringRef cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok);
110 [[nodiscard]] std::optional<StringRef>
111 tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End);
114 [[nodiscard]] StringRef lexIdentifier(
const char *&
First,
115 const char *
const End);
122 [[nodiscard]]
bool isNextIdentifierOrSkipLine(StringRef Id,
124 const char *
const End);
132 const char *
const End);
139 [[nodiscard]] std::optional<StringRef>
140 tryLexStringLiteralOrSkipLine(
const char *&
First,
const char *
const End);
142 [[nodiscard]]
bool scanImpl(
const char *
First,
const char *
const End);
143 [[nodiscard]]
bool lexPPLine(
const char *&
First,
const char *
const End);
144 [[nodiscard]]
bool lexAt(
const char *&
First,
const char *
const End);
145 [[nodiscard]]
bool lexModule(
const char *&
First,
const char *
const End);
146 [[nodiscard]]
bool lexDefine(
const char *HashLoc,
const char *&
First,
147 const char *
const End);
148 [[nodiscard]]
bool lexPragma(
const char *&
First,
const char *
const End);
149 [[nodiscard]]
bool lex_Pragma(
const char *&
First,
const char *
const End);
150 [[nodiscard]]
bool lexEndif(
const char *&
First,
const char *
const End);
152 const char *
const End);
153 [[nodiscard]]
bool lexModuleDirectiveBody(
DirectiveKind Kind,
155 const char *
const End);
156 void lexPPDirectiveBody(
const char *&
First,
const char *
const End);
159 Tokens.append(CurDirToks);
160 DirsWithToks.emplace_back(Kind, CurDirToks.size());
162 return DirsWithToks.back();
164 void popDirective() {
165 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
168 return DirsWithToks.empty() ?
pp_none : DirsWithToks.back().Kind;
171 unsigned getOffsetAt(
const char *CurPtr)
const {
172 return CurPtr - Input.data();
177 bool reportError(
const char *CurPtr,
unsigned Err);
179 bool ScanningPreprocessedModuleFile =
false;
180 StringMap<char> SplitIds;
182 SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
183 DiagnosticsEngine *Diags;
184 SourceLocation InputSourceLoc;
186 const char *LastTokenPtr =
nullptr;
190 SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
194 SmallVector<DirectiveWithTokens, 64> DirsWithToks;
195 LangOptions LangOpts;
201bool Scanner::reportError(
const char *CurPtr,
unsigned Err) {
204 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
216 assert(Current >
First);
220 if (Current >
First && *(Current - 1) ==
'\\') {
224 if (EscapeSize > 0) {
227 Current -= (1 + EscapeSize);
239 const char *Current) {
240 assert(
First <= Current);
243 if (*Current !=
'"' ||
First == Current)
249 if (
First == Current ||
254 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
255 return First == Current ||
259 if (*Current !=
'8' ||
First == Current ||
262 return First == Current ||
267 assert(
First[0] ==
'"');
289 while (
Last != End &&
size_t(
Last -
First) < Terminator.size() &&
298 if (
size_t(
Last -
First) < Terminator.size())
308static unsigned isEOL(
const char *
First,
const char *
const End) {
319 const char Terminator = *
First ==
'<' ?
'>' : *
First;
334 const char *FirstAfterBackslashPastSpace =
First;
336 if (
unsigned NLSize =
isEOL(FirstAfterBackslashPastSpace, End)) {
339 First = FirstAfterBackslashPastSpace + NLSize - 1;
352 assert(Len &&
"expected newline");
366 char LastNonWhitespace =
' ';
369 LastNonWhitespace = *
First;
375 if (LastNonWhitespace !=
'\\')
391 if (End -
First < 4) {
405 const char *
const Cur,
406 const char *
const End) {
407 assert(*Cur ==
'\'' &&
"expected quotation character");
415 char Prev = *(Cur - 1);
416 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
418 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
426void Scanner::skipLine(
const char *&
First,
const char *
const End) {
428 assert(
First <= End);
436 const char *Start =
First;
439 char LastNonWhitespace =
' ';
444 LastTokenPtr =
First;
462 LastTokenPtr =
First;
464 LastNonWhitespace = *
First;
469 if (
First[1] ==
'/') {
475 if (
First[1] !=
'*') {
476 LastTokenPtr =
First;
478 LastNonWhitespace = *
First;
492 if (LastNonWhitespace !=
'\\')
497void Scanner::skipDirective(StringRef Name,
const char *&
First,
498 const char *
const End) {
499 if (llvm::StringSwitch<bool>(Name)
500 .Case(
"warning",
true)
506 skipLine(
First, End);
511 assert(
First <= End);
517 if (*
First ==
'\\') {
518 const char *Ptr =
First + 1;
534 if (
First[1] ==
'/') {
549 const char *
const End) {
550 assert(Kind == DirectiveKind::cxx_export_import_decl ||
551 Kind == DirectiveKind::cxx_export_module_decl ||
552 Kind == DirectiveKind::cxx_import_decl ||
553 Kind == DirectiveKind::cxx_module_decl ||
554 Kind == DirectiveKind::decl_at_import);
556 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
560 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
563 CurDirToks.pop_back();
570 diag::err_dep_source_scanner_missing_semi_after_at_import);
571 if (
Tok.
is(tok::semi))
575 bool IsCXXModules =
Kind == DirectiveKind::cxx_export_import_decl ||
576 Kind == DirectiveKind::cxx_export_module_decl ||
577 Kind == DirectiveKind::cxx_import_decl ||
578 Kind == DirectiveKind::cxx_module_decl;
580 lexPPDirectiveBody(
First, End);
585 const auto &
Tok = lexToken(
First, End);
589 return reportError(DirectiveLoc,
590 diag::err_dep_source_scanner_unexpected_tokens_at_import);
593dependency_directives_scan::Token &Scanner::lexToken(
const char *&
First,
594 const char *
const End) {
598 assert(
First <= End);
601 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
603 return CurDirToks.back();
606dependency_directives_scan::Token &
607Scanner::lexIncludeFilename(
const char *&
First,
const char *
const End) {
611 assert(
First <= End);
614 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
616 return CurDirToks.back();
619void Scanner::lexPPDirectiveBody(
const char *&
First,
const char *
const End) {
621 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
628Scanner::cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok) {
630 if (LLVM_LIKELY(!NeedsCleaning))
633 SmallString<64> Spelling;
639 unsigned SpellingLength = 0;
640 const char *BufPtr = Input.begin() +
Tok.
Offset;
641 const char *AfterIdent = Input.begin() +
Tok.
getEnd();
642 while (BufPtr < AfterIdent) {
644 Spelling[SpellingLength++] = Char;
648 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
652std::optional<StringRef>
653Scanner::tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End) {
654 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
655 if (
Tok.
isNot(tok::raw_identifier)) {
656 if (!
Tok.
is(tok::eod))
657 skipLine(
First, End);
661 return cleanStringIfNeeded(
Tok);
664StringRef Scanner::lexIdentifier(
const char *&
First,
const char *
const End) {
665 std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(
First, End);
666 assert(Id &&
"expected identifier token");
670bool Scanner::isNextIdentifierOrSkipLine(StringRef Id,
const char *&
First,
671 const char *
const End) {
672 if (std::optional<StringRef> FoundId =
673 tryLexIdentifierOrSkipLine(
First, End)) {
676 skipLine(
First, End);
682 const char *
const End) {
683 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
686 skipLine(
First, End);
690std::optional<StringRef>
691Scanner::tryLexStringLiteralOrSkipLine(
const char *&
First,
692 const char *
const End) {
693 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
695 if (!
Tok.
is(tok::eod))
696 skipLine(
First, End);
700 return cleanStringIfNeeded(
Tok);
703bool Scanner::lexAt(
const char *&
First,
const char *
const End) {
707 const dependency_directives_scan::Token &AtTok = lexToken(
First, End);
708 assert(AtTok.
is(tok::at));
711 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
716bool Scanner::lexModule(
const char *&
First,
const char *
const End) {
717 StringRef Id = lexIdentifier(
First, End);
719 if (Id ==
"export") {
721 std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(
First, End);
728 ScanningPreprocessedModuleFile ?
"__preprocessed_module" :
"module";
730 ScanningPreprocessedModuleFile ?
"__preprocessed_import" :
"import";
732 if (Id !=
Module && Id != Import) {
733 skipLine(
First, End);
746 skipLine(
First, End);
753 skipLine(
First, End);
757 (void)lexToken(
First, End);
758 if (!tryLexIdentifierOrSkipLine(
First, End))
764 if (Id ==
Module && !Export)
766 skipLine(
First, End);
774 skipLine(
First, End);
779 TheLexer.
seek(getOffsetAt(
First),
false);
787 return lexModuleDirectiveBody(Kind,
First, End);
790bool Scanner::lex_Pragma(
const char *&
First,
const char *
const End) {
791 if (!isNextTokenOrSkipLine(tok::l_paren,
First, End))
794 std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(
First, End);
796 if (!Str || !isNextTokenOrSkipLine(tok::r_paren,
First, End))
799 SmallString<64> Buffer(*Str);
805 SmallVector<dependency_directives_scan::Token> DiscardTokens;
806 const char *Begin = Buffer.c_str();
807 Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
811 if (PragmaScanner.lexPragma(Begin, Buffer.end()))
816 skipLine(
First, End);
820 assert(Begin == Buffer.end());
825bool Scanner::lexPragma(
const char *&
First,
const char *
const End) {
826 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
830 StringRef Id = *FoundId;
831 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
838 lexPPDirectiveBody(
First, End);
844 skipLine(
First, End);
848 FoundId = tryLexIdentifierOrSkipLine(
First, End);
854 if (Id ==
"system_header") {
855 lexPPDirectiveBody(
First, End);
860 if (Id !=
"module") {
861 skipLine(
First, End);
866 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
870 lexPPDirectiveBody(
First, End);
875bool Scanner::lexEndif(
const char *&
First,
const char *
const End) {
888 skipLine(
First, End);
896 const char *
const End) {
897 lexPPDirectiveBody(
First, End);
917 assert(
First <= End);
920 return Str.starts_with(
928bool Scanner::lexPPLine(
const char *&
First,
const char *
const End) {
929 assert(
First != End);
932 assert(
First <= End);
937 skipLine(
First, End);
938 assert(
First <= End);
942 LastTokenPtr =
First;
946 llvm::scope_exit ScEx1([&]() {
952 bool IsPreprocessedModule =
954 if (*
First ==
'_' && !IsPreprocessedModule) {
955 if (isNextIdentifierOrSkipLine(
"_Pragma",
First, End))
956 return lex_Pragma(
First, End);
963 llvm::scope_exit ScEx2(
967 return lexAt(
First, End);
970 if (*
First ==
'i' || *
First ==
'e' || *
First ==
'm' || IsPreprocessedModule)
971 return lexModule(
First, End);
974 const dependency_directives_scan::Token &HashTok = lexToken(
First, End);
975 if (HashTok.
is(tok::hashhash)) {
979 skipLine(
First, End);
980 assert(
First <= End);
983 assert(HashTok.
is(tok::hash));
986 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
990 StringRef Id = *FoundId;
993 return lexPragma(
First, End);
995 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
1012 skipDirective(Id,
First, End);
1017 return lexEndif(
First, End);
1025 if (lexIncludeFilename(
First, End).
is(tok::eod)) {
1034 return lexDefault(Kind,
First, End);
1043bool Scanner::scanImpl(
const char *
First,
const char *
const End) {
1045 while (
First != End)
1046 if (lexPPLine(
First, End))
1051bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
1053 bool Error = scanImpl(Input.begin(), Input.end());
1058 (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
1063 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
1064 for (
const DirectiveWithTokens &DirWithToks : DirsWithToks) {
1065 assert(RemainingTokens.size() >= DirWithToks.NumTokens);
1066 Directives.emplace_back(DirWithToks.Kind,
1067 RemainingTokens.take_front(DirWithToks.NumTokens));
1068 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
1070 assert(RemainingTokens.empty());
1079 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
1085 llvm::raw_ostream &OS) {
1087 auto needsSpaceSeparator =
1090 if (Prev ==
Tok.Kind)
1091 return !
Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
1093 if (Prev == tok::raw_identifier &&
1094 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
1095 tok::char_constant, tok::header_name))
1097 if (Prev == tok::r_paren &&
1098 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
1099 tok::char_constant, tok::unknown))
1101 if (Prev == tok::comma &&
1102 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
1109 OS <<
"<TokBeforeEOF>";
1110 std::optional<tok::TokenKind> PrevTokenKind;
1112 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind,
Tok))
1114 PrevTokenKind =
Tok.Kind;
1115 OS << Source.slice(
Tok.Offset,
Tok.getEnd());
1121 const char *
const End) {
1122 assert(
First <= End);
1123 while (
First != End) {
1124 if (*
First ==
'#') {
1138 const char *
First = Source.begin();
1139 const char *
const End = Source.end();
1152 if (S.lexModule(
First, End))
1154 auto IsCXXNamedModuleDirective = [](
const DirectiveWithTokens &D) {
1165 return llvm::any_of(S.DirsWithToks, IsCXXNamedModuleDirective);
1169 const char *
First = Source.begin();
1170 const char *
const End = Source.end();
1178 while (
First != End) {
1179 if (*
First ==
'#') {
1182 }
else if (*
First ==
'e') {
1183 S.TheLexer.
seek(S.getOffsetAt(
First),
true);
1184 StringRef Id = S.lexIdentifier(
First, End);
1185 if (Id ==
"export") {
1186 std::optional<StringRef> NextId =
1187 S.tryLexIdentifierOrSkipLine(
First, End);
1192 if (Id ==
"__preprocessed_module" || Id ==
"__preprocessed_import")
Defines the Diagnostic-related interfaces.
static void skipBlockComment(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
static void skipString(const char *&First, const char *const End)
static bool isStartOfRelevantLine(char First)
static bool isStartWithPreprocessedModuleDirective(const char *First, const char *End)
static bool isRawStringLiteral(const char *First, const char *Current)
static void skipUntilMaybeCXX20ModuleDirective(const char *&First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static unsigned isEOL(const char *First, const char *const End)
static char previousChar(const char *First, const char *&Current)
static void skipToNewlineRaw(const char *&First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setParsingPreprocessorDirective(bool f)
Inform the lexer whether or not we are currently lexing a preprocessor directive.
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
DirectiveKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the scanner ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
const char * getPPKeywordSpelling(PPKeywordKind Kind) LLVM_READNONE
Returns the spelling of preprocessor keywords, such as "else".
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
void printDependencyDirectivesAsSource(StringRef Source, ArrayRef< dependency_directives_scan::Directive > Directives, llvm::raw_ostream &OS)
Print the previously scanned dependency directives as minimized source text.
bool scanInputForCXX20ModulesUsage(StringRef Source)
Scan an input source buffer for C++20 named module usage.
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
void prepare_PragmaString(SmallVectorImpl< char > &StrVal)
Destringize a _Pragma("") string according to C11 6.10.9.1: "The string literal is destringized by de...
Diagnostic wrappers for TextAPI types for error reporting.
Represents a directive that's lexed as part of the dependency directives scanning.
DirectiveKind Kind
The kind of token.
Token lexed as part of dependency directive scanning.
bool isNot(tok::TokenKind K) const
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const
bool isOneOf(Ts... Ks) const