23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/StringSwitch.h"
35struct DirectiveWithTokens {
40 : Kind(Kind), NumTokens(NumTokens) {}
63 Scanner(StringRef Input,
64 SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
65 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
66 : Input(Input), Tokens(Tokens), Diags(Diags),
67 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
68 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
71 static LangOptions getLangOptsForDepScanning() {
75 LangOpts.LineComment =
true;
76 LangOpts.RawStringLiterals =
true;
84 bool scan(SmallVectorImpl<Directive> &Directives);
91 [[nodiscard]] dependency_directives_scan::Token &
92 lexToken(
const char *&
First,
const char *
const End);
94 [[nodiscard]] dependency_directives_scan::Token &
95 lexIncludeFilename(
const char *&
First,
const char *
const End);
97 void skipLine(
const char *&
First,
const char *
const End);
98 void skipDirective(StringRef Name,
const char *&
First,
const char *
const End);
102 StringRef cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok);
109 [[nodiscard]] std::optional<StringRef>
110 tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End);
113 [[nodiscard]] StringRef lexIdentifier(
const char *&
First,
114 const char *
const End);
121 [[nodiscard]]
bool isNextIdentifierOrSkipLine(StringRef Id,
123 const char *
const End);
131 const char *
const End);
138 [[nodiscard]] std::optional<StringRef>
139 tryLexStringLiteralOrSkipLine(
const char *&
First,
const char *
const End);
141 [[nodiscard]]
bool scanImpl(
const char *
First,
const char *
const End);
142 [[nodiscard]]
bool lexPPLine(
const char *&
First,
const char *
const End);
143 [[nodiscard]]
bool lexAt(
const char *&
First,
const char *
const End);
144 [[nodiscard]]
bool lexModule(
const char *&
First,
const char *
const End);
145 [[nodiscard]]
bool lexDefine(
const char *HashLoc,
const char *&
First,
146 const char *
const End);
147 [[nodiscard]]
bool lexPragma(
const char *&
First,
const char *
const End);
148 [[nodiscard]]
bool lex_Pragma(
const char *&
First,
const char *
const End);
149 [[nodiscard]]
bool lexEndif(
const char *&
First,
const char *
const End);
151 const char *
const End);
152 [[nodiscard]]
bool lexModuleDirectiveBody(
DirectiveKind Kind,
154 const char *
const End);
155 void lexPPDirectiveBody(
const char *&
First,
const char *
const End);
158 Tokens.append(CurDirToks);
159 DirsWithToks.emplace_back(Kind, CurDirToks.size());
161 return DirsWithToks.back();
163 void popDirective() {
164 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
167 return DirsWithToks.empty() ?
pp_none : DirsWithToks.back().Kind;
170 unsigned getOffsetAt(
const char *CurPtr)
const {
171 return CurPtr - Input.data();
176 bool reportError(
const char *CurPtr,
unsigned Err);
178 bool ScanningPreprocessedModuleFile =
false;
179 StringMap<char> SplitIds;
181 SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
182 DiagnosticsEngine *Diags;
183 SourceLocation InputSourceLoc;
185 const char *LastTokenPtr =
nullptr;
189 SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
193 SmallVector<DirectiveWithTokens, 64> DirsWithToks;
194 LangOptions LangOpts;
200bool Scanner::reportError(
const char *CurPtr,
unsigned Err) {
203 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
215 assert(Current >
First);
219 if (Current >
First && *(Current - 1) ==
'\\') {
223 if (EscapeSize > 0) {
226 Current -= (1 + EscapeSize);
238 const char *Current) {
239 assert(
First <= Current);
242 if (*Current !=
'"' ||
First == Current)
248 if (
First == Current ||
253 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
254 return First == Current ||
258 if (*Current !=
'8' ||
First == Current ||
261 return First == Current ||
266 assert(
First[0] ==
'"');
288 while (
Last != End &&
size_t(
Last -
First) < Terminator.size() &&
297 if (
size_t(
Last -
First) < Terminator.size())
307static unsigned isEOL(
const char *
First,
const char *
const End) {
318 const char Terminator = *
First ==
'<' ?
'>' : *
First;
333 const char *FirstAfterBackslashPastSpace =
First;
335 if (
unsigned NLSize =
isEOL(FirstAfterBackslashPastSpace, End)) {
338 First = FirstAfterBackslashPastSpace + NLSize - 1;
351 assert(Len &&
"expected newline");
365 char LastNonWhitespace =
' ';
368 LastNonWhitespace = *
First;
374 if (LastNonWhitespace !=
'\\')
390 if (End -
First < 4) {
404 const char *
const Cur,
405 const char *
const End) {
406 assert(*Cur ==
'\'' &&
"expected quotation character");
414 char Prev = *(Cur - 1);
415 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
417 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
425void Scanner::skipLine(
const char *&
First,
const char *
const End) {
427 assert(
First <= End);
435 const char *Start =
First;
438 char LastNonWhitespace =
' ';
443 LastTokenPtr =
First;
461 LastTokenPtr =
First;
463 LastNonWhitespace = *
First;
468 if (
First[1] ==
'/') {
474 if (
First[1] !=
'*') {
475 LastTokenPtr =
First;
477 LastNonWhitespace = *
First;
491 if (LastNonWhitespace !=
'\\')
496void Scanner::skipDirective(StringRef Name,
const char *&
First,
497 const char *
const End) {
498 if (llvm::StringSwitch<bool>(Name)
499 .Case(
"warning",
true)
505 skipLine(
First, End);
510 assert(
First <= End);
516 if (*
First ==
'\\') {
517 const char *Ptr =
First + 1;
533 if (
First[1] ==
'/') {
548 const char *
const End) {
549 assert(Kind == DirectiveKind::cxx_export_import_decl ||
550 Kind == DirectiveKind::cxx_export_module_decl ||
551 Kind == DirectiveKind::cxx_import_decl ||
552 Kind == DirectiveKind::cxx_module_decl ||
553 Kind == DirectiveKind::decl_at_import);
555 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
559 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
562 CurDirToks.pop_back();
569 diag::err_dep_source_scanner_missing_semi_after_at_import);
570 if (
Tok.
is(tok::semi))
574 bool IsCXXModules =
Kind == DirectiveKind::cxx_export_import_decl ||
575 Kind == DirectiveKind::cxx_export_module_decl ||
576 Kind == DirectiveKind::cxx_import_decl ||
577 Kind == DirectiveKind::cxx_module_decl;
579 lexPPDirectiveBody(
First, End);
590 DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
595dependency_directives_scan::Token &Scanner::lexToken(
const char *&
First,
596 const char *
const End) {
600 assert(
First <= End);
603 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
605 return CurDirToks.back();
608dependency_directives_scan::Token &
609Scanner::lexIncludeFilename(
const char *&
First,
const char *
const End) {
613 assert(
First <= End);
616 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
618 return CurDirToks.back();
621void Scanner::lexPPDirectiveBody(
const char *&
First,
const char *
const End) {
623 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
630Scanner::cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok) {
632 if (LLVM_LIKELY(!NeedsCleaning))
635 SmallString<64> Spelling;
641 unsigned SpellingLength = 0;
642 const char *BufPtr = Input.begin() +
Tok.
Offset;
643 const char *AfterIdent = Input.begin() +
Tok.
getEnd();
644 while (BufPtr < AfterIdent) {
646 Spelling[SpellingLength++] = Char;
650 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
654std::optional<StringRef>
655Scanner::tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End) {
656 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
657 if (
Tok.
isNot(tok::raw_identifier)) {
658 if (!
Tok.
is(tok::eod))
659 skipLine(
First, End);
663 return cleanStringIfNeeded(
Tok);
666StringRef Scanner::lexIdentifier(
const char *&
First,
const char *
const End) {
667 std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(
First, End);
668 assert(Id &&
"expected identifier token");
672bool Scanner::isNextIdentifierOrSkipLine(StringRef Id,
const char *&
First,
673 const char *
const End) {
674 if (std::optional<StringRef> FoundId =
675 tryLexIdentifierOrSkipLine(
First, End)) {
678 skipLine(
First, End);
684 const char *
const End) {
685 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
688 skipLine(
First, End);
692std::optional<StringRef>
693Scanner::tryLexStringLiteralOrSkipLine(
const char *&
First,
694 const char *
const End) {
695 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
697 if (!
Tok.
is(tok::eod))
698 skipLine(
First, End);
702 return cleanStringIfNeeded(
Tok);
705bool Scanner::lexAt(
const char *&
First,
const char *
const End) {
709 const dependency_directives_scan::Token &AtTok = lexToken(
First, End);
710 assert(AtTok.
is(tok::at));
713 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
718bool Scanner::lexModule(
const char *&
First,
const char *
const End) {
719 StringRef Id = lexIdentifier(
First, End);
721 if (Id ==
"export") {
723 std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(
First, End);
730 ScanningPreprocessedModuleFile ?
"__preprocessed_module" :
"module";
732 ScanningPreprocessedModuleFile ?
"__preprocessed_import" :
"import";
734 if (Id !=
Module && Id != Import) {
735 skipLine(
First, End);
748 skipLine(
First, End);
755 skipLine(
First, End);
759 (void)lexToken(
First, End);
760 if (!tryLexIdentifierOrSkipLine(
First, End))
766 if (Id ==
Module && !Export)
768 skipLine(
First, End);
776 skipLine(
First, End);
781 TheLexer.
seek(getOffsetAt(
First),
false);
789 return lexModuleDirectiveBody(Kind,
First, End);
792bool Scanner::lex_Pragma(
const char *&
First,
const char *
const End) {
793 if (!isNextTokenOrSkipLine(tok::l_paren,
First, End))
796 std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(
First, End);
798 if (!Str || !isNextTokenOrSkipLine(tok::r_paren,
First, End))
801 SmallString<64> Buffer(*Str);
807 SmallVector<dependency_directives_scan::Token> DiscardTokens;
808 const char *Begin = Buffer.c_str();
809 Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
813 if (PragmaScanner.lexPragma(Begin, Buffer.end()))
818 skipLine(
First, End);
822 assert(Begin == Buffer.end());
827bool Scanner::lexPragma(
const char *&
First,
const char *
const End) {
828 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
832 StringRef Id = *FoundId;
833 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
840 lexPPDirectiveBody(
First, End);
846 skipLine(
First, End);
850 FoundId = tryLexIdentifierOrSkipLine(
First, End);
856 if (Id ==
"system_header") {
857 lexPPDirectiveBody(
First, End);
862 if (Id !=
"module") {
863 skipLine(
First, End);
868 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
872 lexPPDirectiveBody(
First, End);
877bool Scanner::lexEndif(
const char *&
First,
const char *
const End) {
890 skipLine(
First, End);
898 const char *
const End) {
899 lexPPDirectiveBody(
First, End);
919 assert(
First <= End);
922 return Str.starts_with(
930bool Scanner::lexPPLine(
const char *&
First,
const char *
const End) {
931 assert(
First != End);
934 assert(
First <= End);
939 skipLine(
First, End);
940 assert(
First <= End);
944 LastTokenPtr =
First;
948 llvm::scope_exit ScEx1([&]() {
956 return lexAt(
First, End);
958 bool IsPreprocessedModule =
960 if (*
First ==
'_' && !IsPreprocessedModule) {
961 if (isNextIdentifierOrSkipLine(
"_Pragma",
First, End))
962 return lex_Pragma(
First, End);
969 llvm::scope_exit ScEx2(
973 if (*
First ==
'i' || *
First ==
'e' || *
First ==
'm' || IsPreprocessedModule)
974 return lexModule(
First, End);
977 const dependency_directives_scan::Token &HashTok = lexToken(
First, End);
978 if (HashTok.
is(tok::hashhash)) {
982 skipLine(
First, End);
983 assert(
First <= End);
986 assert(HashTok.
is(tok::hash));
989 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
993 StringRef Id = *FoundId;
996 return lexPragma(
First, End);
998 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
1015 skipDirective(Id,
First, End);
1020 return lexEndif(
First, End);
1028 if (lexIncludeFilename(
First, End).
is(tok::eod)) {
1037 return lexDefault(Kind,
First, End);
1046bool Scanner::scanImpl(
const char *
First,
const char *
const End) {
1048 while (
First != End)
1049 if (lexPPLine(
First, End))
1054bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
1056 bool Error = scanImpl(Input.begin(), Input.end());
1061 (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
1066 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
1067 for (
const DirectiveWithTokens &DirWithToks : DirsWithToks) {
1068 assert(RemainingTokens.size() >= DirWithToks.NumTokens);
1069 Directives.emplace_back(DirWithToks.Kind,
1070 RemainingTokens.take_front(DirWithToks.NumTokens));
1071 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
1073 assert(RemainingTokens.empty());
1082 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
1088 llvm::raw_ostream &OS) {
1090 auto needsSpaceSeparator =
1093 if (Prev ==
Tok.Kind)
1094 return !
Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
1096 if (Prev == tok::raw_identifier &&
1097 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
1098 tok::char_constant, tok::header_name))
1100 if (Prev == tok::r_paren &&
1101 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
1102 tok::char_constant, tok::unknown))
1104 if (Prev == tok::comma &&
1105 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
1112 OS <<
"<TokBeforeEOF>";
1113 std::optional<tok::TokenKind> PrevTokenKind;
1115 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind,
Tok))
1117 PrevTokenKind =
Tok.Kind;
1118 OS << Source.slice(
Tok.Offset,
Tok.getEnd());
1124 const char *
const End) {
1125 assert(
First <= End);
1126 while (
First != End) {
1127 if (*
First ==
'#') {
1141 const char *
First = Source.begin();
1142 const char *
const End = Source.end();
1155 if (S.lexModule(
First, End))
1157 auto IsCXXNamedModuleDirective = [](
const DirectiveWithTokens &D) {
1168 return llvm::any_of(S.DirsWithToks, IsCXXNamedModuleDirective);
1172 const char *
First = Source.begin();
1173 const char *
const End = Source.end();
1181 while (
First != End) {
1182 if (*
First ==
'#') {
1185 }
else if (*
First ==
'e') {
1186 S.TheLexer.
seek(S.getOffsetAt(
First),
true);
1187 StringRef Id = S.lexIdentifier(
First, End);
1188 if (Id ==
"export") {
1189 std::optional<StringRef> NextId =
1190 S.tryLexIdentifierOrSkipLine(
First, End);
1195 if (Id ==
"__preprocessed_module" || Id ==
"__preprocessed_import")
Defines the Diagnostic-related interfaces.
static void skipBlockComment(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
static void skipString(const char *&First, const char *const End)
static bool isStartOfRelevantLine(char First)
static bool isStartWithPreprocessedModuleDirective(const char *First, const char *End)
static bool isRawStringLiteral(const char *First, const char *Current)
static void skipUntilMaybeCXX20ModuleDirective(const char *&First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static unsigned isEOL(const char *First, const char *const End)
static char previousChar(const char *First, const char *&Current)
static void skipToNewlineRaw(const char *&First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setParsingPreprocessorDirective(bool f)
Inform the lexer whether or not we are currently lexing a preprocessor directive.
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
DirectiveKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the scanner ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
const char * getPPKeywordSpelling(PPKeywordKind Kind) LLVM_READNONE
Returns the spelling of preprocessor keywords, such as "else".
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
void printDependencyDirectivesAsSource(StringRef Source, ArrayRef< dependency_directives_scan::Directive > Directives, llvm::raw_ostream &OS)
Print the previously scanned dependency directives as minimized source text.
bool scanInputForCXX20ModulesUsage(StringRef Source)
Scan an input source buffer for C++20 named module usage.
bool isPreprocessedModuleFile(StringRef Source)
Scan an input source buffer, and check whether the input source is a preprocessed output.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
@ Module
Module linkage, which indicates that the entity can be referred to from other translation units withi...
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
void prepare_PragmaString(SmallVectorImpl< char > &StrVal)
Destringize a _Pragma("") string according to C11 6.10.9.1: "The string literal is destringized by de...
Diagnostic wrappers for TextAPI types for error reporting.
Represents a directive that's lexed as part of the dependency directives scanning.
DirectiveKind Kind
The kind of token.
Token lexed as part of dependency directive scanning.
bool isNot(tok::TokenKind K) const
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const
bool isOneOf(Ts... Ks) const