10#include "clang/Basic/IdentifierTable.h"
11#include "clang/Basic/TokenKinds.h"
12#include "llvm/Support/FormatVariadic.h"
20class DirectiveParser {
22 explicit DirectiveParser(
const TokenStream &Code)
23 : Code(Code), Tok(&Code.front()) {}
24 void parse(DirectiveTree *Result) { parse(Result,
true); }
28 enum class Cond {
None, If, Else, End };
29 static Cond classifyDirective(tok::PPKeywordKind K) {
31 case clang::tok::pp_if:
32 case clang::tok::pp_ifdef:
33 case clang::tok::pp_ifndef:
35 case clang::tok::pp_elif:
36 case clang::tok::pp_elifdef:
37 case clang::tok::pp_elifndef:
38 case clang::tok::pp_else:
40 case clang::tok::pp_endif:
51 std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree,
53 auto StartsDirective =
54 [&, AllowDirectiveAt((
const Token *)
nullptr)]()
mutable {
58 if (Tok->Kind == tok::comment)
59 AllowDirectiveAt = Tok + 1;
60 return Tok->Kind == tok::hash;
62 return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
65 while (Tok->Kind != tok::eof) {
67 if (!StartsDirective()) {
68 const Token *Start = Tok;
71 while (Tok->Kind != tok::eof && !StartsDirective());
72 Tree->Chunks.push_back(DirectiveTree::Code{
73 Token::Range{Code.index(*Start), Code.index(*Tok)}});
78 DirectiveTree::Directive Directive;
79 parseDirective(&Directive);
80 Cond Kind = classifyDirective(Directive.Kind);
81 if (Kind == Cond::If) {
83 DirectiveTree::Conditional Conditional;
84 Conditional.Branches.emplace_back();
85 Conditional.Branches.back().first = std::move(Directive);
86 parseConditional(&Conditional);
87 Tree->Chunks.push_back(std::move(Conditional));
88 }
else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
91 return std::move(Directive);
94 Tree->Chunks.push_back(std::move(Directive));
102 void parseConditional(DirectiveTree::Conditional *C) {
103 assert(
C->Branches.size() == 1 &&
104 C->Branches.front().second.Chunks.empty() &&
105 "Should be ready to parse first branch body");
106 while (Tok->Kind != tok::eof) {
107 auto Terminator = parse(&
C->Branches.back().second,
false);
109 assert(Tok->Kind == tok::eof &&
"gave up parsing before eof?");
113 if (classifyDirective(Terminator->Kind) == Cond::End) {
114 C->End = std::move(*Terminator);
117 assert(classifyDirective(Terminator->Kind) == Cond::Else &&
118 "ended branch unexpectedly");
119 C->Branches.emplace_back();
120 C->Branches.back().first = std::move(*Terminator);
125 void parseDirective(DirectiveTree::Directive *D) {
126 assert(Tok->Kind == tok::hash);
129 const Token *Begin = Tok++;
132 ArrayRef<Token> Tokens{Begin, Tok};
133 D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())};
136 Tokens = Tokens.drop_front().drop_while(
137 [](
const Token &T) {
return T.Kind == tok::comment; });
139 D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
142 const TokenStream &Code;
144 clang::IdentifierTable PPKeywords;
148 llvm::raw_ostream &OS;
151 Dumper(llvm::raw_ostream& OS) : OS(OS) {}
152 void operator()(
const DirectiveTree& Tree) {
153 for (
const auto& Chunk : Tree.Chunks)
154 std::visit(*
this, Chunk);
156 void operator()(
const DirectiveTree::Conditional &Conditional) {
157 for (
unsigned I = 0; I < Conditional.Branches.size(); ++I) {
158 const auto &Branch = Conditional.Branches[I];
159 (*this)(Branch.first, Conditional.Taken == I);
161 (*this)(Branch.second);
164 (*this)(Conditional.End);
166 void operator()(
const DirectiveTree::Directive &Directive,
167 bool Taken =
false) {
168 OS.indent(Indent) << llvm::formatv(
169 "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind),
170 Directive.Tokens.size(), Taken ?
" TAKEN" :
"");
172 void operator()(
const DirectiveTree::Code &Code) {
173 OS.indent(Indent) << llvm::formatv(
"code ({0} tokens)\n",
179DirectiveTree DirectiveTree::parse(
const TokenStream &Code) {
180 DirectiveTree Result;
181 DirectiveParser(Code).parse(&Result);
186#define OSTREAM_DUMP(Type) \
187 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \
213 BranchChooser(
const TokenStream &Code) : Code(Code) {}
221 bool operator>(
const Score &Other)
const {
223 return std::make_tuple(-Errors, Tokens, Directives) >
224 std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives);
227 Score &operator+=(
const Score &Other) {
228 Tokens += Other.Tokens;
229 Directives += Other.Directives;
230 Errors += Other.Errors;
235 Score operator()(DirectiveTree::Code &C) {
237 for (
const Token &T : Code.tokens(C.Tokens))
238 if (T.Kind != tok::comment)
243 Score operator()(DirectiveTree::Directive &D) {
246 S.Errors = D.Kind == tok::pp_error;
250 Score operator()(DirectiveTree::Conditional &C) {
252 bool MayTakeTrivial =
true;
253 bool TookTrivial =
false;
255 for (
unsigned I = 0; I < C.Branches.size(); ++I) {
257 Score BranchScore = walk(C.Branches[I].second);
262 if (
auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) {
263 if (!*TriviallyTaken)
269 MayTakeTrivial =
false;
272 if (TookTrivial || !C.Taken || BranchScore > Best) {
279 Score walk(DirectiveTree &M) {
281 for (
auto &C : M.Chunks)
282 S += std::visit(*
this, C);
289 std::optional<bool> isTakenWhenReached(
const DirectiveTree::Directive &Dir) {
291 case clang::tok::pp_if:
292 case clang::tok::pp_elif:
294 case clang::tok::pp_else:
300 const auto &Tokens = Code.tokens(Dir.Tokens);
301 assert(!Tokens.empty() && Tokens.front().Kind == tok::hash);
302 const Token &Name = Tokens.front().nextNC();
303 const Token &Value = Name.nextNC();
305 if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end())
307 return llvm::StringSwitch<std::optional<bool>>(Value.text())
308 .Cases(
"true",
"1",
true)
309 .Cases(
"false",
"0",
false)
310 .Default(std::nullopt);
313 const TokenStream &Code;
319 BranchChooser{Code}.walk(Tree);
324 const TokenStream &In;
328 Preprocessor(
const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
329 ~Preprocessor() { Out.finalize(); }
331 Preprocessor(
const Preprocessor &other) =
delete;
332 Preprocessor &operator=(
const Preprocessor &other) =
delete;
334 void walk(
const DirectiveTree &T) {
335 for (
const auto &C :
T.Chunks)
336 std::visit(*
this, C);
339 void operator()(
const DirectiveTree::Code &C) {
340 for (
const auto &Tok : In.tokens(
C.Tokens))
344 void operator()(
const DirectiveTree::Directive &) {}
346 void operator()(
const DirectiveTree::Conditional &C) {
348 walk(
C.Branches[*
C.Taken].second);
355 Preprocessor(In, Out).walk(*
this);
361 std::vector<Token::Range> &Ranges;
364 RangePairer(std::vector<Token::Range> &Ranges) : Ranges(Ranges) {}
366 void walk(
const DirectiveTree &T) {
367 for (
const auto &C :
T.Chunks)
368 std::visit(*
this, C);
371 void operator()(
const DirectiveTree::Code &C) {}
373 void operator()(
const DirectiveTree::Directive &) {}
375 void operator()(
const DirectiveTree::Conditional &C) {
379 for (
const auto &[Directive, _] :
C.Branches) {
383 Range = {Last, Directive.Tokens.Begin};
384 Ranges.push_back(Range);
386 Last = Directive.Tokens.Begin;
389 if (
C.End.Kind != tok::pp_not_keyword) {
390 Range = {Last,
C.End.Tokens.Begin};
391 Ranges.push_back(Range);
394 for (
const auto &[_, SubTree] :
C.Branches)
402 std::vector<Token::Range> Ranges;
403 RangePairer(Ranges).walk(Tree);
406 for (
auto &R : Ranges) {
#define OSTREAM_DUMP(Type)
A complete sequence of Tokens representing a source file.
ArrayRef< Token > tokens() const
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
std::vector< Token::Range > pairDirectiveRanges(const DirectiveTree &Tree, const TokenStream &Code)
Pairs preprocessor conditional directives and computes their token ranges.
void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code)
Describes the structure of a source file, as seen by the preprocessor.
@ StartsPPLine
Marks the token at the start of a logical preprocessor line.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static Range emptyAt(Index Index)
A single C++ or preprocessor token.
Index OriginalIndex
Index into the original token stream (as raw-lexed from the source code).
uint32_t Index
An Index identifies a token within a stream.
clang::tok::TokenKind Kind
The type of token as determined by clang's lexer.