23#include "llvm/ADT/ScopeExit.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/StringSwitch.h"
35struct DirectiveWithTokens {
40 : Kind(Kind), NumTokens(NumTokens) {}
63 Scanner(StringRef Input,
64 SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
65 DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
66 : Input(Input), Tokens(Tokens), Diags(Diags),
67 InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
68 TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
71 static LangOptions getLangOptsForDepScanning() {
75 LangOpts.LineComment =
true;
76 LangOpts.RawStringLiterals =
true;
84 bool scan(SmallVectorImpl<Directive> &Directives);
88 [[nodiscard]] dependency_directives_scan::Token &
89 lexToken(
const char *&
First,
const char *
const End);
91 [[nodiscard]] dependency_directives_scan::Token &
92 lexIncludeFilename(
const char *&
First,
const char *
const End);
94 void skipLine(
const char *&
First,
const char *
const End);
95 void skipDirective(StringRef Name,
const char *&
First,
const char *
const End);
99 StringRef cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok);
106 [[nodiscard]] std::optional<StringRef>
107 tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End);
110 [[nodiscard]] StringRef lexIdentifier(
const char *&
First,
111 const char *
const End);
118 [[nodiscard]]
bool isNextIdentifierOrSkipLine(StringRef Id,
120 const char *
const End);
128 const char *
const End);
135 [[nodiscard]] std::optional<StringRef>
136 tryLexStringLiteralOrSkipLine(
const char *&
First,
const char *
const End);
138 [[nodiscard]]
bool scanImpl(
const char *
First,
const char *
const End);
139 [[nodiscard]]
bool lexPPLine(
const char *&
First,
const char *
const End);
140 [[nodiscard]]
bool lexAt(
const char *&
First,
const char *
const End);
141 [[nodiscard]]
bool lexModule(
const char *&
First,
const char *
const End);
142 [[nodiscard]]
bool lexDefine(
const char *HashLoc,
const char *&
First,
143 const char *
const End);
144 [[nodiscard]]
bool lexPragma(
const char *&
First,
const char *
const End);
145 [[nodiscard]]
bool lex_Pragma(
const char *&
First,
const char *
const End);
146 [[nodiscard]]
bool lexEndif(
const char *&
First,
const char *
const End);
148 const char *
const End);
149 [[nodiscard]]
bool lexModuleDirectiveBody(
DirectiveKind Kind,
151 const char *
const End);
152 void lexPPDirectiveBody(
const char *&
First,
const char *
const End);
155 Tokens.append(CurDirToks);
156 DirsWithToks.emplace_back(Kind, CurDirToks.size());
158 return DirsWithToks.back();
160 void popDirective() {
161 Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
164 return DirsWithToks.empty() ?
pp_none : DirsWithToks.back().Kind;
167 unsigned getOffsetAt(
const char *CurPtr)
const {
168 return CurPtr - Input.data();
173 bool reportError(
const char *CurPtr,
unsigned Err);
175 StringMap<char> SplitIds;
177 SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
178 DiagnosticsEngine *Diags;
179 SourceLocation InputSourceLoc;
181 const char *LastTokenPtr =
nullptr;
185 SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
189 SmallVector<DirectiveWithTokens, 64> DirsWithToks;
190 LangOptions LangOpts;
196bool Scanner::reportError(
const char *CurPtr,
unsigned Err) {
199 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
211 assert(Current >
First);
215 if (Current >
First && *(Current - 1) ==
'\\') {
219 if (EscapeSize > 0) {
222 Current -= (1 + EscapeSize);
234 const char *Current) {
235 assert(
First <= Current);
238 if (*Current !=
'"' ||
First == Current)
244 if (
First == Current ||
249 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
250 return First == Current ||
254 if (*Current !=
'8' ||
First == Current ||
257 return First == Current ||
262 assert(
First[0] ==
'"');
284 while (
Last != End &&
size_t(
Last -
First) < Terminator.size() &&
293 if (
size_t(
Last -
First) < Terminator.size())
303static unsigned isEOL(
const char *
First,
const char *
const End) {
314 const char Terminator = *
First ==
'<' ?
'>' : *
First;
329 const char *FirstAfterBackslashPastSpace =
First;
331 if (
unsigned NLSize =
isEOL(FirstAfterBackslashPastSpace, End)) {
334 First = FirstAfterBackslashPastSpace + NLSize - 1;
347 assert(Len &&
"expected newline");
361 char LastNonWhitespace =
' ';
364 LastNonWhitespace = *
First;
370 if (LastNonWhitespace !=
'\\')
386 if (End -
First < 4) {
400 const char *
const Cur,
401 const char *
const End) {
402 assert(*Cur ==
'\'' &&
"expected quotation character");
410 char Prev = *(Cur - 1);
411 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
413 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
421void Scanner::skipLine(
const char *&
First,
const char *
const End) {
423 assert(
First <= End);
431 const char *Start =
First;
434 char LastNonWhitespace =
' ';
439 LastTokenPtr =
First;
457 LastTokenPtr =
First;
459 LastNonWhitespace = *
First;
464 if (
First[1] ==
'/') {
470 if (
First[1] !=
'*') {
471 LastTokenPtr =
First;
473 LastNonWhitespace = *
First;
487 if (LastNonWhitespace !=
'\\')
492void Scanner::skipDirective(StringRef Name,
const char *&
First,
493 const char *
const End) {
494 if (llvm::StringSwitch<bool>(Name)
495 .Case(
"warning",
true)
501 skipLine(
First, End);
506 assert(
First <= End);
512 if (*
First ==
'\\') {
513 const char *Ptr =
First + 1;
529 if (
First[1] ==
'/') {
544 const char *
const End) {
545 const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
549 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
552 CurDirToks.pop_back();
556 if (
Tok.
is(tok::eof))
559 diag::err_dep_source_scanner_missing_semi_after_at_import);
560 if (
Tok.
is(tok::semi))
564 const auto &
Tok = lexToken(
First, End);
568 return reportError(DirectiveLoc,
569 diag::err_dep_source_scanner_unexpected_tokens_at_import);
572dependency_directives_scan::Token &Scanner::lexToken(
const char *&
First,
573 const char *
const End) {
577 assert(
First <= End);
580 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
582 return CurDirToks.back();
585dependency_directives_scan::Token &
586Scanner::lexIncludeFilename(
const char *&
First,
const char *
const End) {
590 assert(
First <= End);
593 CurDirToks.emplace_back(Offset,
Tok.getLength(),
Tok.getKind(),
595 return CurDirToks.back();
598void Scanner::lexPPDirectiveBody(
const char *&
First,
const char *
const End) {
600 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
607Scanner::cleanStringIfNeeded(
const dependency_directives_scan::Token &
Tok) {
609 if (LLVM_LIKELY(!NeedsCleaning))
612 SmallString<64> Spelling;
618 unsigned SpellingLength = 0;
619 const char *BufPtr = Input.begin() +
Tok.
Offset;
620 const char *AfterIdent = Input.begin() +
Tok.
getEnd();
621 while (BufPtr < AfterIdent) {
623 Spelling[SpellingLength++] = Char;
627 return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
631std::optional<StringRef>
632Scanner::tryLexIdentifierOrSkipLine(
const char *&
First,
const char *
const End) {
633 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
634 if (
Tok.
isNot(tok::raw_identifier)) {
635 if (!
Tok.
is(tok::eod))
636 skipLine(
First, End);
640 return cleanStringIfNeeded(
Tok);
643StringRef Scanner::lexIdentifier(
const char *&
First,
const char *
const End) {
644 std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(
First, End);
645 assert(Id &&
"expected identifier token");
649bool Scanner::isNextIdentifierOrSkipLine(StringRef Id,
const char *&
First,
650 const char *
const End) {
651 if (std::optional<StringRef> FoundId =
652 tryLexIdentifierOrSkipLine(
First, End)) {
655 skipLine(
First, End);
661 const char *
const End) {
662 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
665 skipLine(
First, End);
669std::optional<StringRef>
670Scanner::tryLexStringLiteralOrSkipLine(
const char *&
First,
671 const char *
const End) {
672 const dependency_directives_scan::Token &
Tok = lexToken(
First, End);
674 if (!
Tok.
is(tok::eod))
675 skipLine(
First, End);
679 return cleanStringIfNeeded(
Tok);
682bool Scanner::lexAt(
const char *&
First,
const char *
const End) {
686 const dependency_directives_scan::Token &AtTok = lexToken(
First, End);
687 assert(AtTok.
is(tok::at));
690 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
695bool Scanner::lexModule(
const char *&
First,
const char *
const End) {
696 StringRef Id = lexIdentifier(
First, End);
698 if (Id ==
"export") {
700 std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(
First, End);
706 if (Id !=
"module" && Id !=
"import") {
707 skipLine(
First, End);
719 if (Id ==
"module") {
720 skipLine(
First, End);
727 skipLine(
First, End);
731 (void)lexToken(
First, End);
732 if (!tryLexIdentifierOrSkipLine(
First, End))
738 if (Id ==
"module" && !Export)
740 skipLine(
First, End);
748 skipLine(
First, End);
753 TheLexer.
seek(getOffsetAt(
First),
false);
761 return lexModuleDirectiveBody(Kind,
First, End);
764bool Scanner::lex_Pragma(
const char *&
First,
const char *
const End) {
765 if (!isNextTokenOrSkipLine(tok::l_paren,
First, End))
768 std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(
First, End);
770 if (!Str || !isNextTokenOrSkipLine(tok::r_paren,
First, End))
773 SmallString<64> Buffer(*Str);
779 SmallVector<dependency_directives_scan::Token> DiscardTokens;
780 const char *Begin = Buffer.c_str();
781 Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
785 if (PragmaScanner.lexPragma(Begin, Buffer.end()))
790 skipLine(
First, End);
794 assert(Begin == Buffer.end());
799bool Scanner::lexPragma(
const char *&
First,
const char *
const End) {
800 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
804 StringRef Id = *FoundId;
805 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
812 lexPPDirectiveBody(
First, End);
818 skipLine(
First, End);
822 FoundId = tryLexIdentifierOrSkipLine(
First, End);
828 if (Id ==
"system_header") {
829 lexPPDirectiveBody(
First, End);
834 if (Id !=
"module") {
835 skipLine(
First, End);
840 if (!isNextIdentifierOrSkipLine(
"import",
First, End))
844 lexPPDirectiveBody(
First, End);
849bool Scanner::lexEndif(
const char *&
First,
const char *
const End) {
862 skipLine(
First, End);
870 const char *
const End) {
871 lexPPDirectiveBody(
First, End);
889bool Scanner::lexPPLine(
const char *&
First,
const char *
const End) {
890 assert(
First != End);
893 assert(
First <= End);
898 skipLine(
First, End);
899 assert(
First <= End);
903 LastTokenPtr =
First;
907 auto ScEx1 = make_scope_exit([&]() {
914 if (isNextIdentifierOrSkipLine(
"_Pragma",
First, End))
915 return lex_Pragma(
First, End);
922 auto ScEx2 = make_scope_exit(
927 return lexAt(
First, End);
931 return lexModule(
First, End);
934 const dependency_directives_scan::Token &HashTok = lexToken(
First, End);
935 if (HashTok.
is(tok::hashhash)) {
939 skipLine(
First, End);
940 assert(
First <= End);
943 assert(HashTok.
is(tok::hash));
946 std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(
First, End);
950 StringRef Id = *FoundId;
953 return lexPragma(
First, End);
955 auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
972 skipDirective(Id,
First, End);
977 return lexEndif(
First, End);
985 if (lexIncludeFilename(
First, End).
is(tok::eod)) {
994 return lexDefault(Kind,
First, End);
1003bool Scanner::scanImpl(
const char *
First,
const char *
const End) {
1005 while (
First != End)
1006 if (lexPPLine(
First, End))
1011bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
1012 bool Error = scanImpl(Input.begin(), Input.end());
1017 (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
1022 ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
1023 for (
const DirectiveWithTokens &DirWithToks : DirsWithToks) {
1024 assert(RemainingTokens.size() >= DirWithToks.NumTokens);
1025 Directives.emplace_back(DirWithToks.Kind,
1026 RemainingTokens.take_front(DirWithToks.NumTokens));
1027 RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
1029 assert(RemainingTokens.empty());
1038 return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
1044 llvm::raw_ostream &OS) {
1046 auto needsSpaceSeparator =
1049 if (Prev ==
Tok.Kind)
1050 return !
Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
1052 if (Prev == tok::raw_identifier &&
1053 Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
1054 tok::char_constant, tok::header_name))
1056 if (Prev == tok::r_paren &&
1057 Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
1058 tok::char_constant, tok::unknown))
1060 if (Prev == tok::comma &&
1061 Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
1068 OS <<
"<TokBeforeEOF>";
1069 std::optional<tok::TokenKind> PrevTokenKind;
1071 if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind,
Tok))
1073 PrevTokenKind =
Tok.Kind;
1074 OS << Source.slice(
Tok.Offset,
Tok.getEnd());
Defines the Diagnostic-related interfaces.
static void skipBlockComment(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
static void skipString(const char *&First, const char *const End)
static bool isStartOfRelevantLine(char First)
static bool isRawStringLiteral(const char *First, const char *Current)
static void skipOverSpaces(const char *&First, const char *const End)
static unsigned isEOL(const char *First, const char *const End)
static char previousChar(const char *First, const char *&Current)
static void skipToNewlineRaw(const char *&First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setParsingPreprocessorDirective(bool f)
Inform the lexer whether or not we are currently lexing a preprocessor directive.
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
DirectiveKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the scanner ...
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
void printDependencyDirectivesAsSource(StringRef Source, ArrayRef< dependency_directives_scan::Directive > Directives, llvm::raw_ostream &OS)
Print the previously scanned dependency directives as minimized source text.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
void prepare_PragmaString(SmallVectorImpl< char > &StrVal)
Destringize a _Pragma("") string according to C11 6.10.9.1: "The string literal is destringized by de...
Diagnostic wrappers for TextAPI types for error reporting.
Represents a directive that's lexed as part of the dependency directives scanning.
DirectiveKind Kind
The kind of token.
Token lexed as part of dependency directive scanning.
bool isNot(tok::TokenKind K) const
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const