18#include "clang/Tooling/CompilationDatabase.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include "llvm/Support/Compression.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/raw_ostream.h"
43 const char *Begin, *End;
47 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
50 bool err()
const {
return Err; }
52 bool eof()
const {
return Begin == End || Err; }
54 llvm::StringRef rest()
const {
return llvm::StringRef(Begin, End - Begin); }
57 if (LLVM_UNLIKELY(Begin == End)) {
64 uint32_t consume32() {
65 if (LLVM_UNLIKELY(Begin + 4 > End)) {
69 auto Ret = llvm::support::endian::read32le(Begin);
74 llvm::StringRef
consume(
int N) {
75 if (LLVM_UNLIKELY(Begin + N > End)) {
77 return llvm::StringRef();
79 llvm::StringRef Ret(Begin, N);
84 uint32_t consumeVar() {
85 constexpr static uint8_t More = 1 << 7;
89 uint32_t
B = consume8();
90 if (LLVM_LIKELY(!(B & More)))
92 uint32_t Val =
B & ~More;
93 for (
int Shift = 7;
B & More && Shift < 32; Shift += 7) {
96 assert((Shift != 28 || B == (B & 0x0f)) &&
"Invalid varint encoding");
97 Val |= (
B & ~More) << Shift;
102 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef>
Strings) {
103 auto StringIndex = consumeVar();
104 if (LLVM_UNLIKELY(StringIndex >=
Strings.size())) {
106 return llvm::StringRef();
119 template <
typename T> [[nodiscard]]
bool consumeSize(T &
Container) {
120 auto Size = consumeVar();
122 if (Size > (
size_t)(End - Begin)) {
131void write32(uint32_t I, llvm::raw_ostream &
OS) {
133 llvm::support::endian::write32le(Buf, I);
134 OS.write(Buf,
sizeof(Buf));
137void writeVar(uint32_t I, llvm::raw_ostream &
OS) {
138 constexpr static uint8_t More = 1 << 7;
139 if (LLVM_LIKELY(I < 1 << 7)) {
167class StringTableOut {
168 llvm::DenseSet<llvm::StringRef> Unique;
169 std::vector<llvm::StringRef> Sorted;
171 llvm::DenseMap<std::pair<const char *, size_t>,
unsigned> Index;
180 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
182 void finalize(llvm::raw_ostream &
OS) {
183 Sorted = {Unique.begin(), Unique.end()};
185 for (
unsigned I = 0; I < Sorted.size(); ++I)
186 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
188 std::string RawTable;
189 for (llvm::StringRef S : Sorted) {
190 RawTable.append(std::string(S));
191 RawTable.push_back(0);
193 if (llvm::compression::zlib::isAvailable()) {
194 llvm::SmallVector<uint8_t, 0> Compressed;
195 llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable),
197 write32(RawTable.size(),
OS);
198 OS << llvm::toStringRef(Compressed);
205 unsigned index(llvm::StringRef S)
const {
206 assert(!Sorted.empty() &&
"table not finalized");
207 assert(Index.count({S.data(), S.size()}) &&
"string not interned");
208 return Index.find({S.data(), S.size()})->second;
212struct StringTableIn {
217llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
219 size_t UncompressedSize = R.consume32();
221 return error(
"Truncated string table");
223 llvm::StringRef Uncompressed;
224 llvm::SmallVector<uint8_t, 0> UncompressedStorage;
225 if (UncompressedSize == 0)
226 Uncompressed = R.rest();
227 else if (llvm::compression::zlib::isAvailable()) {
232 constexpr int MaxCompressionRatio = 1032;
233 if (UncompressedSize / MaxCompressionRatio > R.rest().size())
234 return error(
"Bad stri table: uncompress {0} -> {1} bytes is implausible",
235 R.rest().size(), UncompressedSize);
237 if (llvm::Error
E = llvm::compression::zlib::decompress(
238 llvm::arrayRefFromStringRef(R.rest()), UncompressedStorage,
241 Uncompressed = toStringRef(UncompressedStorage);
243 return error(
"Compressed string table, but zlib is unavailable");
246 llvm::StringSaver Saver(Table.Arena);
247 R = Reader(Uncompressed);
248 for (Reader R(Uncompressed); !R.eof();) {
249 auto Len = R.rest().find(0);
250 if (Len == llvm::StringRef::npos)
251 return error(
"Bad string table: not null terminated");
252 Table.Strings.push_back(Saver.save(R.consume(Len)));
256 return error(
"Truncated string table");
257 return std::move(Table);
266void writeLocation(
const SymbolLocation &
Loc,
const StringTableOut &
Strings,
267 llvm::raw_ostream &
OS) {
269 for (
const auto &Endpoint : {
Loc.Start,
Loc.End}) {
270 writeVar(Endpoint.line(),
OS);
271 writeVar(Endpoint.column(),
OS);
275SymbolLocation readLocation(Reader &Data,
276 llvm::ArrayRef<llvm::StringRef>
Strings) {
278 Loc.FileURI = Data.consumeString(
Strings).data();
279 for (
auto *Endpoint : {&
Loc.Start, &
Loc.End}) {
280 Endpoint->setLine(Data.consumeVar());
281 Endpoint->setColumn(Data.consumeVar());
286IncludeGraphNode readIncludeGraphNode(Reader &Data,
287 llvm::ArrayRef<llvm::StringRef>
Strings) {
288 IncludeGraphNode IGN;
290 IGN.URI = Data.consumeString(
Strings);
291 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
292 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
293 if (!Data.consumeSize(IGN.DirectIncludes))
295 for (llvm::StringRef &Include : IGN.DirectIncludes)
296 Include = Data.consumeString(
Strings);
300void writeIncludeGraphNode(
const IncludeGraphNode &IGN,
302 llvm::raw_ostream &
OS) {
303 OS.write(
static_cast<uint8_t
>(IGN.Flags));
305 llvm::StringRef Hash(
reinterpret_cast<const char *
>(IGN.Digest.data()),
308 writeVar(IGN.DirectIncludes.size(),
OS);
309 for (llvm::StringRef Include : IGN.DirectIncludes)
313void writeSymbol(
const Symbol &Sym,
const StringTableOut &
Strings,
314 llvm::raw_ostream &
OS) {
317 OS.write(
static_cast<uint8_t
>(Sym.SymInfo.Kind));
318 OS.write(
static_cast<uint8_t
>(Sym.SymInfo.Lang));
321 writeVar(
Strings.index(Sym.TemplateSpecializationArgs),
OS);
322 writeLocation(Sym.Definition,
Strings,
OS);
323 writeLocation(Sym.CanonicalDeclaration,
Strings,
OS);
324 writeVar(Sym.References,
OS);
325 OS.write(
static_cast<uint8_t
>(Sym.Flags));
326 writeVar(
Strings.index(Sym.Signature),
OS);
327 writeVar(
Strings.index(Sym.CompletionSnippetSuffix),
OS);
328 writeVar(
Strings.index(Sym.Documentation),
OS);
329 writeVar(
Strings.index(Sym.ReturnType),
OS);
332 auto WriteInclude = [&](
const Symbol::IncludeHeaderWithReferences &Include) {
333 writeVar(
Strings.index(Include.IncludeHeader),
OS);
334 writeVar((Include.References << 2) | Include.SupportedDirectives,
OS);
336 writeVar(Sym.IncludeHeaders.size(),
OS);
337 for (
const auto &Include : Sym.IncludeHeaders)
338 WriteInclude(Include);
341Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef>
Strings,
344 Sym.ID = Data.consumeID();
345 Sym.SymInfo.Kind =
static_cast<index::SymbolKind
>(Data.consume8());
346 Sym.SymInfo.Lang =
static_cast<index::SymbolLanguage
>(Data.consume8());
347 Sym.Name = Data.consumeString(
Strings);
348 Sym.Scope = Data.consumeString(
Strings);
349 Sym.TemplateSpecializationArgs = Data.consumeString(
Strings);
350 Sym.Definition = readLocation(Data,
Strings);
351 Sym.CanonicalDeclaration = readLocation(Data,
Strings);
352 Sym.References = Data.consumeVar();
355 Sym.Signature = Data.consumeString(
Strings);
356 Sym.CompletionSnippetSuffix = Data.consumeString(
Strings);
357 Sym.Documentation = Data.consumeString(
Strings);
358 Sym.ReturnType = Data.consumeString(
Strings);
359 Sym.Type = Data.consumeString(
Strings);
360 if (!Data.consumeSize(Sym.IncludeHeaders))
362 for (
auto &I : Sym.IncludeHeaders) {
363 I.IncludeHeader = Data.consumeString(
Strings);
364 uint32_t RefsWithDirectives = Data.consumeVar();
365 I.References = RefsWithDirectives >> 2;
366 I.SupportedDirectives = RefsWithDirectives & 0x3;
378void writeRefs(
const SymbolID &
ID, llvm::ArrayRef<Ref> Refs,
379 const StringTableOut &
Strings, llvm::raw_ostream &
OS) {
381 writeVar(Refs.size(),
OS);
382 for (
const auto &Ref : Refs) {
383 OS.write(
static_cast<unsigned char>(Ref.Kind));
384 writeLocation(Ref.Location,
Strings,
OS);
385 OS << Ref.Container.raw();
389std::pair<SymbolID, std::vector<Ref>>
390readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef>
Strings) {
391 std::pair<SymbolID, std::vector<Ref>> Result;
392 Result.first = Data.consumeID();
393 if (!Data.consumeSize(Result.second))
395 for (
auto &Ref : Result.second) {
396 Ref.Kind =
static_cast<RefKind>(Data.consume8());
397 Ref.Location = readLocation(Data,
Strings);
398 Ref.Container = Data.consumeID();
410void writeRelation(
const Relation &R, llvm::raw_ostream &
OS) {
411 OS << R.Subject.raw();
412 OS.write(
static_cast<uint8_t
>(R.Predicate));
413 OS << R.Object.raw();
416Relation readRelation(Reader &Data) {
417 SymbolID Subject = Data.consumeID();
420 return {Subject, Predicate,
Object};
423struct InternedCompileCommand {
428void writeCompileCommand(
const InternedCompileCommand &Cmd,
430 llvm::raw_ostream &CmdOS) {
431 writeVar(
Strings.index(Cmd.Directory), CmdOS);
432 writeVar(Cmd.CommandLine.size(), CmdOS);
433 for (llvm::StringRef
C : Cmd.CommandLine)
437InternedCompileCommand
438readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef>
Strings) {
439 InternedCompileCommand Cmd;
440 Cmd.Directory = CmdReader.consumeString(
Strings);
441 if (!CmdReader.consumeSize(Cmd.CommandLine))
443 for (llvm::StringRef &
C : Cmd.CommandLine)
444 C = CmdReader.consumeString(
Strings);
460constexpr static uint32_t Version = 20;
462llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
466 return RIFF.takeError();
469 llvm::StringMap<llvm::StringRef> Chunks;
470 for (
const auto &Chunk :
RIFF->Chunks)
471 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
474 if (!Chunks.count(
"meta"))
475 return error(
"missing meta chunk");
476 Reader Meta(Chunks.lookup(
"meta"));
477 auto SeenVersion = Meta.consume32();
478 if (SeenVersion != Version)
479 return error(
"wrong version: want {0}, got {1}", Version, SeenVersion);
482 for (llvm::StringRef RequiredChunk : {
"stri"})
483 if (!Chunks.count(RequiredChunk))
484 return error(
"missing required chunk {0}", RequiredChunk);
486 auto Strings = readStringTable(Chunks.lookup(
"stri"));
491 if (Chunks.count(
"srcs")) {
492 Reader SrcsReader(Chunks.lookup(
"srcs"));
493 Result.Sources.emplace();
494 while (!SrcsReader.eof()) {
495 auto IGN = readIncludeGraphNode(SrcsReader,
Strings->Strings);
496 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
497 Entry->getValue() = std::move(IGN);
501 for (
auto &Include :
Entry->getValue().DirectIncludes)
502 Include = Result.Sources->try_emplace(Include).first->getKey();
504 if (SrcsReader.err())
505 return error(
"malformed or truncated include uri");
508 if (Chunks.count(
"symb")) {
509 Reader SymbolReader(Chunks.lookup(
"symb"));
511 while (!SymbolReader.eof())
512 Symbols.insert(readSymbol(SymbolReader,
Strings->Strings, Origin));
513 if (SymbolReader.err())
514 return error(
"malformed or truncated symbol");
515 Result.Symbols = std::move(
Symbols).build();
517 if (Chunks.count(
"refs")) {
518 Reader RefsReader(Chunks.lookup(
"refs"));
519 RefSlab::Builder Refs;
520 while (!RefsReader.eof()) {
521 auto RefsBundle = readRefs(RefsReader,
Strings->Strings);
522 for (
const auto &Ref : RefsBundle.second)
523 Refs.
insert(RefsBundle.first, Ref);
525 if (RefsReader.err())
526 return error(
"malformed or truncated refs");
527 Result.Refs = std::move(Refs).build();
529 if (Chunks.count(
"rela")) {
530 Reader RelationsReader(Chunks.lookup(
"rela"));
531 RelationSlab::Builder Relations;
532 while (!RelationsReader.eof())
533 Relations.
insert(readRelation(RelationsReader));
534 if (RelationsReader.err())
535 return error(
"malformed or truncated relations");
536 Result.Relations = std::move(Relations).build();
538 if (Chunks.count(
"cmdl")) {
539 Reader CmdReader(Chunks.lookup(
"cmdl"));
540 InternedCompileCommand Cmd =
541 readCompileCommand(CmdReader,
Strings->Strings);
543 return error(
"malformed or truncated commandline section");
544 Result.Cmd.emplace();
545 Result.Cmd->Directory = std::string(Cmd.Directory);
546 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
547 for (llvm::StringRef
C : Cmd.CommandLine)
548 Result.Cmd->CommandLine.emplace_back(
C);
550 return std::move(Result);
553template <
class Callback>
556 for (llvm::StringRef &Include : IGN.DirectIncludes)
560void writeRIFF(
const IndexFileOut &Data, llvm::raw_ostream &
OS) {
561 assert(Data.Symbols &&
"An index file without symbols makes no sense!");
565 llvm::SmallString<4> Meta;
567 llvm::raw_svector_ostream MetaOS(Meta);
568 write32(Version, MetaOS);
574 for (
const auto &Sym : *Data.Symbols) {
577 [&](llvm::StringRef &S) { Strings.intern(S); });
579 std::vector<IncludeGraphNode> Sources;
581 for (
const auto &Source : *Data.Sources) {
582 Sources.push_back(Source.getValue());
584 [&](llvm::StringRef &S) { Strings.intern(S); });
587 std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
589 for (
const auto &Sym : *Data.Refs) {
590 Refs.emplace_back(Sym);
591 for (
auto &Ref : Refs.back().second) {
592 llvm::StringRef
File = Ref.Location.FileURI;
594 Ref.Location.FileURI =
File.data();
599 std::vector<Relation> Relations;
600 if (Data.Relations) {
601 for (
const auto &Relation : *Data.Relations) {
602 Relations.emplace_back(Relation);
607 InternedCompileCommand InternedCmd;
609 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
610 InternedCmd.Directory = Data.Cmd->Directory;
611 Strings.intern(InternedCmd.Directory);
612 for (llvm::StringRef
C : Data.Cmd->CommandLine) {
613 InternedCmd.CommandLine.emplace_back(
C);
614 Strings.intern(InternedCmd.CommandLine.back());
618 std::string StringSection;
620 llvm::raw_string_ostream StringOS(StringSection);
625 std::string SymbolSection;
627 llvm::raw_string_ostream SymbolOS(SymbolSection);
628 for (
const auto &Sym :
Symbols)
629 writeSymbol(Sym,
Strings, SymbolOS);
633 std::string RefsSection;
636 llvm::raw_string_ostream RefsOS(RefsSection);
637 for (
const auto &Sym : Refs)
638 writeRefs(Sym.first, Sym.second,
Strings, RefsOS);
643 std::string RelationSection;
644 if (Data.Relations) {
646 llvm::raw_string_ostream RelationOS{RelationSection};
647 for (
const auto &Relation : Relations)
648 writeRelation(Relation, RelationOS);
653 std::string SrcsSection;
656 llvm::raw_string_ostream SrcsOS(SrcsSection);
657 for (
const auto &SF : Sources)
658 writeIncludeGraphNode(SF,
Strings, SrcsOS);
663 std::string CmdlSection;
666 llvm::raw_string_ostream CmdOS(CmdlSection);
667 writeCompileCommand(InternedCmd,
Strings, CmdOS);
678void writeYAML(
const IndexFileOut &, llvm::raw_ostream &);
695 if (Data.starts_with(
"RIFF")) {
696 return readRIFF(Data, Origin);
698 if (
auto YAMLContents =
readYAML(Data, Origin)) {
699 return std::move(*YAMLContents);
701 return error(
"Not a RIFF file and failed to parse as YAML: {0}",
702 YAMLContents.takeError());
706std::unique_ptr<SymbolIndex>
loadIndex(llvm::StringRef SymbolFilename,
708 bool SupportContainedRefs) {
710 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
712 elog(
"Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
721 if (
auto I =
readIndexFile(Buffer->get()->getBuffer(), Origin)) {
723 Symbols = std::move(*I->Symbols);
725 Refs = std::move(*I->Refs);
727 Relations = std::move(*I->Relations);
729 elog(
"Bad index file: {0}", I.takeError());
734 size_t NumSym =
Symbols.size();
735 size_t NumRefs = Refs.numRefs();
736 size_t NumRelations = Relations.size();
741 std::move(Relations), SupportContainedRefs)
743 std::move(Relations));
744 vlog(
"Loaded {0} from {1} with estimated memory usage {2} bytes\n"
745 " - number of symbols: {3}\n"
746 " - number of refs: {4}\n"
747 " - number of relations: {5}",
748 UseDex ?
"Dex" :
"MemIndex", SymbolFilename,
749 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
This defines Dex - a symbol index implementation based on query iterators over symbol tokens,...
std::vector< llvm::StringRef > Strings
llvm::BumpPtrAllocator Arena
std::vector< llvm::StringRef > CommandLine
llvm::StringRef Directory
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
void insert(const SymbolID &ID, const Ref &S)
Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
An efficient structure of storing large set of symbol references in memory.
void insert(const Relation &R)
Adds a relation to the slab.
static constexpr size_t RawSize
An immutable symbol container that stores a set of symbols.
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab, bool SupportContainedRefs)
Builds an index from slabs. The index takes ownership of the slab.
Records an event whose duration is the lifetime of the Span object.
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
llvm::Expected< File > readFile(llvm::StringRef Stream)
constexpr FourCC fourCC(const char(&Literal)[5])
constexpr llvm::StringRef fourCCStr(const FourCC &Data)
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef, SymbolOrigin Origin)
void vlog(const char *Fmt, Ts &&... Vals)
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
RefKind
Describes the kind of a cross-reference.
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
void elog(const char *Fmt, Ts &&... Vals)
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex, bool SupportContainedRefs)
std::array< uint8_t, 20 > SymbolID
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//