10#include "clang/Basic/IdentifierTable.h"
11#include "clang/Basic/SourceLocation.h"
12#include "clang/Basic/TokenKinds.h"
13#include "clang/Lex/Lexer.h"
14#include "clang/Lex/LiteralSupport.h"
19TokenStream lex(
const std::string &Code,
const clang::LangOptions &LangOpts) {
20 clang::SourceLocation Start;
23 clang::Lexer Lexer(Start, LangOpts, Code.data(), Code.data(),
24 Code.data() + Code.size());
25 Lexer.SetCommentRetentionState(
true);
31 unsigned LastOffset = 0;
34 for (Lexer.LexFromRawLexer(CT); CT.getKind() != clang::tok::eof;
35 Lexer.LexFromRawLexer(CT)) {
37 CT.getLocation().getRawEncoding() - Start.getRawEncoding();
40 Tok.
Data = &Code[Offset];
41 Tok.
Length = CT.getLength();
42 Tok.
Kind = CT.getKind();
45 unsigned NewLineStart = 0;
46 for (
unsigned I = LastOffset; I < Offset; ++I) {
47 if (Code[I] ==
'\n') {
52 if (NewLineStart || !LastOffset) {
54 for (
char C : StringRef(Code).slice(NewLineStart, Offset)) {
66 if (CT.isAtStartOfLine())
68 if (CT.needsCleaning() || CT.hasUCN())
80 auto CleanedStorage = std::make_shared<llvm::BumpPtrAllocator>();
81 clang::IdentifierTable Identifiers(LangOpts);
84 for (
auto Tok : Code.
tokens()) {
87 llvm::SmallString<64> CleanBuffer;
88 const char *Pos = Tok.text().begin();
89 while (Pos < Tok.text().end()) {
90 auto [Char, CharSize] =
91 clang::Lexer::getCharAndSizeNoWarn(Pos, LangOpts);
92 CleanBuffer.push_back(Char);
93 assert(CharSize != 0 &&
"no progress!");
96 llvm::StringRef
Text = CleanBuffer;
97 llvm::SmallString<64> UCNBuffer;
107 if (Tok.Kind == tok::raw_identifier) {
108 clang::expandUCNs(UCNBuffer, CleanBuffer);
112 Tok.Data =
Text.copy(*CleanedStorage).data();
113 Tok.Length =
Text.size();
117 if (Tok.Kind == tok::raw_identifier) {
119 Tok.Kind = Identifiers.get(Tok.text()).getTokenID();
120 }
else if (Tok.Kind == tok::greatergreater) {
123 assert(Tok.text() ==
">>");
124 Tok.Kind = tok::greater;
128 Tok.Data = Tok.text().data() + 1;
131 Result.
push(std::move(Tok));
A complete sequence of Tokens representing a source file.
ArrayRef< Token > tokens() const
std::shared_ptr< void > getPayload() const
Returns the shared payload.
void finalize()
Finalize the token stream, allowing tokens to be accessed.
void push(Token T)
Append a token to the stream, which must not be finalized.
void addPayload(std::shared_ptr< void > P)
Adds the given payload to the stream.
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
static void lex(llvm::StringRef Code, const LangOptions &LangOpts, llvm::function_ref< void(const syntax::Token &, const SourceManager &SM)> Action)
TokenStream cook(const TokenStream &Code, const LangOptions &LangOpts)
@ NeedsCleaning
Marks tokens containing trigraphs, escaped newlines, UCNs etc.
@ StartsPPLine
Marks the token at the start of a logical preprocessor line.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
A single C++ or preprocessor token.
uint8_t Indent
Width of whitespace before the first token on this line.
Index OriginalIndex
Index into the original token stream (as raw-lexed from the source code).
uint32_t Index
An Index identifies a token within a stream.
uint32_t Line
Zero-based line number for the start of the token.
clang::tok::TokenKind Kind
The type of token as determined by clang's lexer.