23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/StringSwitch.h"
35struct DirectiveWithTokens {
40 : Kind(Kind), NumTokens(NumTokens) {}
63 Scanner(StringRef Input,
64 SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
65 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
66 : Input(Input), Tokens(Tokens), Diags(Diags),
67 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
68 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
71 static LangOptions getLangOptsForDepScanning() {
75 LangOpts.LineComment =
true;
76 LangOpts.RawStringLiterals =
true;
77 LangOpts.AllowLiteralDigitSeparator =
true;
85 bool scan(SmallVectorImpl<Directive> &Directives);
92 [[nodiscard]] dependency_directives_scan::Token &
93 lexToken(
const char *&
First,
const char *
const End);
95 [[nodiscard]] dependency_directives_scan::Token &
96 lexIncludeFilename(
const char *&
First,
const char *
const End);
98 void skipLine(
const char *&
First,
const char *
const End);
99 void skipDirective(StringRef Name,
const char *&
First,
const char *
const End);
103 StringRef cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok);
110 [[nodiscard]] std::optional<StringRef>
111 tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End);
114 [[nodiscard]] StringRef lexIdentifier(
const char *&
First,
115 const char *
const End);
122 [[nodiscard]]
bool isNextIdentifierOrSkipLine(StringRef Id,
124 const char *
const End);
132 const char *
const End);
139 [[nodiscard]] std::optional<StringRef>
140 tryLexStringLiteralOrSkipLine(
const char *&
First,
const char *
const End);
142 [[nodiscard]]
bool scanImpl(
const char *
First,
const char *
const End);
143 [[nodiscard]]
bool lexPPLine(
const char *&
First,
const char *
const End);
144 [[nodiscard]]
bool lexAt(
const char *&
First,
const char *
const End);
145 [[nodiscard]]
bool lexModule(
const char *&
First,
const char *
const End);
146 [[nodiscard]]
bool lexDefine(
const char *HashLoc,
const char *&
First,
147 const char *
const End);
148 [[nodiscard]]
bool lexPragma(
const char *&
First,
const char *
const End);
149 [[nodiscard]]
bool lex_Pragma(
const char *&
First,
const char *
const End);
150 [[nodiscard]]
bool lexEndif(
const char *&
First,
const char *
const End);
152 const char *
const End);
153 [[nodiscard]]
bool lexModuleDirectiveBody(
DirectiveKind Kind,
155 const char *
const End);
156 void lexPPDirectiveBody(
const char *&
First,
const char *
const End);
159 Tokens.append(CurDirToks);
160 DirsWithToks.emplace_back(Kind, CurDirToks.size());
162 return DirsWithToks.back();
164 void popDirective() {
165 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
168 return DirsWithToks.empty() ?
pp_none : DirsWithToks.back().Kind;
171 unsigned getOffsetAt(
const char *CurPtr)
const {
172 return CurPtr - Input.data();
177 bool reportError(
const char *CurPtr,
unsigned Err);
179 bool ScanningPreprocessedModuleFile =
false;
180 StringMap<char> SplitIds;
182 SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
183 DiagnosticsEngine *Diags;
184 SourceLocation InputSourceLoc;
186 const char *LastTokenPtr =
nullptr;
190 SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
194 SmallVector<DirectiveWithTokens, 64> DirsWithToks;
195 LangOptions LangOpts;
201bool Scanner::reportError(
const char *CurPtr,
unsigned Err) {
204 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
216 assert(Current >
First);
220 if (Current >
First && *(Current - 1) ==
'\\') {
224 if (EscapeSize > 0) {
227 Current -= (1 + EscapeSize);
239 const char *Current) {
240 assert(
First <= Current);
243 if (*Current !=
'"' ||
First == Current)
249 if (
First == Current ||
254 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
255 return First == Current ||
259 if (*Current !=
'8' ||
First == Current ||
262 return First == Current ||
267 assert(
First[0] ==
'"');
289 while (
Last != End &&
size_t(
Last -
First) < Terminator.size() &&
298 if (
size_t(
Last -
First) < Terminator.size())
308static unsigned isEOL(
const char *
First,
const char *
const End) {
319 const char Terminator = *
First ==
'<' ?
'>' : *
First;
334 const char *FirstAfterBackslashPastSpace =
First;
336 if (
unsigned NLSize =
isEOL(FirstAfterBackslashPastSpace, End)) {
339 First = FirstAfterBackslashPastSpace + NLSize - 1;
352 assert(Len &&
"expected newline");
366 char LastNonWhitespace =
' ';
369 LastNonWhitespace = *
First;
375 if (LastNonWhitespace !=
'\\')
391 if (End -
First < 4) {
405 const char *
const Cur,
406 const char *
const End) {
407 assert(*Cur ==
'\'' &&
"expected quotation character");
415 char Prev = *(Cur - 1);
416 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
418 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
426void Scanner::skipLine(
const char *&
First,
const char *
const End) {
428 assert(
First <= End);
436 const char *Start =
First;
439 char LastNonWhitespace =
' ';
444 LastTokenPtr =
First;
462 LastTokenPtr =
First;
464 LastNonWhitespace = *
First;
469 if (
First[1] ==
'/') {
475 if (
First[1] !=
'*') {
476 LastTokenPtr =
First;
478 LastNonWhitespace = *
First;
492 if (LastNonWhitespace !=
'\\')
497void Scanner::skipDirective(StringRef Name,
const char *&
First,
498 const char *
const End) {
499 if (llvm::StringSwitch<bool>(Name)
500 .Case(
"warning",
true)
506 skipLine(
First, End);
511 assert(
First <= End);
517 if (*
First ==
'\\') {
518 const char *Ptr =
First + 1;
534 if (
First[1] ==
'/') {
549 const char *
const End) {
550 assert(Kind == DirectiveKind::cxx_export_import_decl ||
551 Kind == DirectiveKind::cxx_export_module_decl ||
552 Kind == DirectiveKind::cxx_import_decl ||
553 Kind == DirectiveKind::cxx_module_decl ||
554 Kind == DirectiveKind::decl_at_import);
556 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
560 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
563 CurDirToks.pop_back();
570 diag::err_dep_source_scanner_missing_semi_after_at_import);
571 if (
Tok.
is(tok::semi))
575 bool IsCXXModules =
Kind == DirectiveKind::cxx_export_import_decl ||
576 Kind == DirectiveKind::cxx_export_module_decl ||
577 Kind == DirectiveKind::cxx_import_decl ||
578 Kind == DirectiveKind::cxx_module_decl;
580 lexPPDirectiveBody(
First, End);
591 DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
596dependency_directives_scan::Token &Scanner::lexToken(
const char *&
First,
597 const char *
const End) {
601 assert(
First <= End);
604 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
606 return CurDirToks.back();
609dependency_directives_scan::Token &
610Scanner::lexIncludeFilename(
const char *&
First,
const char *
const End) {
614 assert(
First <= End);
617 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
619 return CurDirToks.back();
622void Scanner::lexPPDirectiveBody(
const char *&
First,
const char *
const End) {
624 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
631Scanner::cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok) {
633 if (LLVM_LIKELY(!NeedsCleaning))
636 SmallString<64> Spelling;
642 unsigned SpellingLength = 0;
643 const char *BufPtr = Input.begin() +
Tok.
Offset;
644 const char *AfterIdent = Input.begin() +
Tok.
getEnd();
645 while (BufPtr < AfterIdent) {
647 Spelling[SpellingLength++] = Char;
651 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
655std::optional<StringRef>
656Scanner::tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End) {
657 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
658 if (
Tok.
isNot(tok::raw_identifier)) {
659 if (!
Tok.
is(tok::eod))
660 skipLine(
First, End);
664 return cleanStringIfNeeded(
Tok);
667StringRef Scanner::lexIdentifier(
const char *&
First,
const char *
const End) {
668 std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(
First, End);
669 assert(Id &&
"expected identifier token");
673bool Scanner::isNextIdentifierOrSkipLine(StringRef Id,
const char *&
First,
674 const char *
const End) {
675 if (std::optional<StringRef> FoundId =
676 tryLexIdentifierOrSkipLine(
First, End)) {
679 skipLine(
First, End);
685 const char *
const End) {
686 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
689 skipLine(
First, End);
693std::optional<StringRef>
694Scanner::tryLexStringLiteralOrSkipLine(
const char *&
First,
695 const char *
const End) {
696 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
698 if (!
Tok.
is(tok::eod))
699 skipLine(
First, End);
703 return cleanStringIfNeeded(
Tok);
706bool Scanner::lexAt(
const char *&
First,
const char *
const End) {
710 const dependency_directives_scan::Token &AtTok = lexToken(
First, End);
711 assert(AtTok.
is(tok::at));
714 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
719bool Scanner::lexModule(
const char *&
First,
const char *
const End) {
720 StringRef Id = lexIdentifier(
First, End);
722 if (Id ==
"export") {
724 std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(
First, End);
731 ScanningPreprocessedModuleFile ?
"__preprocessed_module" :
"module";
733 ScanningPreprocessedModuleFile ?
"__preprocessed_import" :
"import";
735 if (Id !=
Module && Id != Import) {
736 skipLine(
First, End);
749 skipLine(
First, End);
756 skipLine(
First, End);
760 (void)lexToken(
First, End);
761 if (!tryLexIdentifierOrSkipLine(
First, End))
767 if (Id ==
Module && !Export)
769 skipLine(
First, End);
777 skipLine(
First, End);
782 TheLexer.
seek(getOffsetAt(
First),
false);
790 return lexModuleDirectiveBody(Kind,
First, End);
793bool Scanner::lex_Pragma(
const char *&
First,
const char *
const End) {
794 if (!isNextTokenOrSkipLine(tok::l_paren,
First, End))
797 std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(
First, End);
799 if (!Str || !isNextTokenOrSkipLine(tok::r_paren,
First, End))
802 SmallString<64> Buffer(*Str);
808 SmallVector<dependency_directives_scan::Token> DiscardTokens;
809 const char *Begin = Buffer.c_str();
810 Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
814 if (PragmaScanner.lexPragma(Begin, Buffer.end()))
819 skipLine(
First, End);
823 assert(Begin == Buffer.end());
828bool Scanner::lexPragma(
const char *&
First,
const char *
const End) {
829 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
833 StringRef Id = *FoundId;
834 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
841 lexPPDirectiveBody(
First, End);
847 skipLine(
First, End);
851 FoundId = tryLexIdentifierOrSkipLine(
First, End);
857 if (Id ==
"system_header") {
858 lexPPDirectiveBody(
First, End);
863 if (Id !=
"module") {
864 skipLine(
First, End);
869 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
873 lexPPDirectiveBody(
First, End);
878bool Scanner::lexEndif(
const char *&
First,
const char *
const End) {
891 skipLine(
First, End);
899 const char *
const End) {
900 lexPPDirectiveBody(
First, End);
920 assert(
First <= End);
923 return Str.starts_with(
931bool Scanner::lexPPLine(
const char *&
First,
const char *
const End) {
932 assert(
First != End);
935 assert(
First <= End);
940 skipLine(
First, End);
941 assert(
First <= End);
945 LastTokenPtr =
First;
949 llvm::scope_exit ScEx1([&]() {
957 return lexAt(
First, End);
959 bool IsPreprocessedModule =
961 if (*
First ==
'_' && !IsPreprocessedModule) {
962 if (isNextIdentifierOrSkipLine(
"_Pragma",
First, End))
963 return lex_Pragma(
First, End);
970 llvm::scope_exit ScEx2(
974 if (*
First ==
'i' || *
First ==
'e' || *
First ==
'm' || IsPreprocessedModule)
975 return lexModule(
First, End);
978 const dependency_directives_scan::Token &HashTok = lexToken(
First, End);
979 if (HashTok.
is(tok::hashhash)) {
983 skipLine(
First, End);
984 assert(
First <= End);
987 assert(HashTok.
is(tok::hash));
990 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
994 StringRef Id = *FoundId;
997 return lexPragma(
First, End);
999 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
1016 skipDirective(Id,
First, End);
1021 return lexEndif(
First, End);
1029 if (lexIncludeFilename(
First, End).
is(tok::eod)) {
1038 return lexDefault(Kind,
First, End);
1047bool Scanner::scanImpl(
const char *
First,
const char *
const End) {
1049 while (
First != End)
1050 if (lexPPLine(
First, End))
1055bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
1057 bool Error = scanImpl(Input.begin(), Input.end());
1062 (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
1067 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
1068 for (
const DirectiveWithTokens &DirWithToks : DirsWithToks) {
1069 assert(RemainingTokens.size() >= DirWithToks.NumTokens);
1070 Directives.emplace_back(DirWithToks.Kind,
1071 RemainingTokens.take_front(DirWithToks.NumTokens));
1072 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
1074 assert(RemainingTokens.empty());
1083 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
1089 llvm::raw_ostream &OS) {
1091 auto needsSpaceSeparator =
1094 if (Prev ==
Tok.Kind)
1095 return !
Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
1097 if (Prev == tok::raw_identifier &&
1098 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
1099 tok::char_constant, tok::header_name))
1101 if (Prev == tok::r_paren &&
1102 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
1103 tok::char_constant, tok::unknown))
1105 if (Prev == tok::comma &&
1106 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
1113 OS <<
"<TokBeforeEOF>";
1114 std::optional<tok::TokenKind> PrevTokenKind;
1116 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind,
Tok))
1118 PrevTokenKind =
Tok.Kind;
1119 OS << Source.slice(
Tok.Offset,
Tok.getEnd());
1125 const char *
const End) {
1126 assert(
First <= End);
1127 while (
First != End) {
1128 if (*
First ==
'#') {
1142 const char *
First = Source.begin();
1143 const char *
const End = Source.end();
1156 if (S.lexModule(
First, End))
1158 auto IsCXXNamedModuleDirective = [](
const DirectiveWithTokens &D) {
1169 return llvm::any_of(S.DirsWithToks, IsCXXNamedModuleDirective);
1173 const char *
First = Source.begin();
1174 const char *
const End = Source.end();
1182 while (
First != End) {
1183 if (*
First ==
'#') {
1186 }
else if (*
First ==
'e') {
1187 S.TheLexer.
seek(S.getOffsetAt(
First),
true);
1188 StringRef Id = S.lexIdentifier(
First, End);
1189 if (Id ==
"export") {
1190 std::optional<StringRef> NextId =
1191 S.tryLexIdentifierOrSkipLine(
First, End);
1196 if (Id ==
"__preprocessed_module" || Id ==
"__preprocessed_import")
Defines the Diagnostic-related interfaces.
static void skipBlockComment(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
static void skipString(const char *&First, const char *const End)
static bool isStartOfRelevantLine(char First)
static bool isStartWithPreprocessedModuleDirective(const char *First, const char *End)
static bool isRawStringLiteral(const char *First, const char *Current)
static void skipUntilMaybeCXX20ModuleDirective(const char *&First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static unsigned isEOL(const char *First, const char *const End)
static char previousChar(const char *First, const char *&Current)
static void skipToNewlineRaw(const char *&First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setParsingPreprocessorDirective(bool f)
Inform the lexer whether or not we are currently lexing a preprocessor directive.
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
DirectiveKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the scanner ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
const char * getPPKeywordSpelling(PPKeywordKind Kind) LLVM_READNONE
Returns the spelling of preprocessor keywords, such as "else".
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
void printDependencyDirectivesAsSource(StringRef Source, ArrayRef< dependency_directives_scan::Directive > Directives, llvm::raw_ostream &OS)
Print the previously scanned dependency directives as minimized source text.
bool scanInputForCXX20ModulesUsage(StringRef Source)
Scan an input source buffer for C++20 named module usage.
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
void prepare_PragmaString(SmallVectorImpl< char > &StrVal)
Destringize a _Pragma("") string according to C11 6.10.9.1: "The string literal is destringized by de...
Diagnostic wrappers for TextAPI types for error reporting.
Represents a directive that's lexed as part of the dependency directives scanning.
DirectiveKind Kind
The kind of token.
Token lexed as part of dependency directive scanning.
bool isNot(tok::TokenKind K) const
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const
bool isOneOf(Ts... Ks) const