clang-tools 20.0.0git
Serialization.cpp
Go to the documentation of this file.
1//===-- Serialization.cpp - Binary serialization of index data ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Serialization.h"
10#include "Headers.h"
11#include "RIFF.h"
12#include "index/MemIndex.h"
14#include "index/SymbolOrigin.h"
15#include "index/dex/Dex.h"
16#include "support/Logger.h"
17#include "support/Trace.h"
18#include "clang/Tooling/CompilationDatabase.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include "llvm/Support/Compression.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/raw_ostream.h"
25#include <cstdint>
26#include <vector>
27
28namespace clang {
29namespace clangd {
30namespace {
31
32// IO PRIMITIVES
33// We use little-endian 32 bit ints, sometimes with variable-length encoding.
34//
35// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36// to encode the number, and the top bit to indicate whether more bytes follow.
37// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38// This represents 0x1a | 0x2f<<7 = 6042.
39// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
40
41// Reads binary data from a StringRef, and keeps track of position.
42class Reader {
43 const char *Begin, *End;
44 bool Err = false;
45
46public:
47 Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
48 // The "error" bit is set by reading past EOF or reading invalid data.
49 // When in an error state, reads may return zero values: callers should check.
50 bool err() const { return Err; }
51 // Did we read all the data, or encounter an error?
52 bool eof() const { return Begin == End || Err; }
53 // All the data we didn't read yet.
54 llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
55
56 uint8_t consume8() {
57 if (LLVM_UNLIKELY(Begin == End)) {
58 Err = true;
59 return 0;
60 }
61 return *Begin++;
62 }
63
64 uint32_t consume32() {
65 if (LLVM_UNLIKELY(Begin + 4 > End)) {
66 Err = true;
67 return 0;
68 }
69 auto Ret = llvm::support::endian::read32le(Begin);
70 Begin += 4;
71 return Ret;
72 }
73
74 llvm::StringRef consume(int N) {
75 if (LLVM_UNLIKELY(Begin + N > End)) {
76 Err = true;
77 return llvm::StringRef();
78 }
79 llvm::StringRef Ret(Begin, N);
80 Begin += N;
81 return Ret;
82 }
83
84 uint32_t consumeVar() {
85 constexpr static uint8_t More = 1 << 7;
86
87 // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88 // is wider than 32 bits).
89 uint32_t B = consume8();
90 if (LLVM_LIKELY(!(B & More)))
91 return B;
92 uint32_t Val = B & ~More;
93 for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
94 B = consume8();
95 // 5th byte of a varint can only have lowest 4 bits set.
96 assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
97 Val |= (B & ~More) << Shift;
98 }
99 return Val;
100 }
101
102 llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
103 auto StringIndex = consumeVar();
104 if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
105 Err = true;
106 return llvm::StringRef();
107 }
108 return Strings[StringIndex];
109 }
110
111 SymbolID consumeID() {
112 llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
113 return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
114 }
115
116 // Read a varint (as consumeVar) and resize the container accordingly.
117 // If the size is invalid, return false and mark an error.
118 // (The caller should abort in this case).
119 template <typename T> [[nodiscard]] bool consumeSize(T &Container) {
120 auto Size = consumeVar();
121 // Conservatively assume each element is at least one byte.
122 if (Size > (size_t)(End - Begin)) {
123 Err = true;
124 return false;
125 }
126 Container.resize(Size);
127 return true;
128 }
129};
130
131void write32(uint32_t I, llvm::raw_ostream &OS) {
132 char Buf[4];
133 llvm::support::endian::write32le(Buf, I);
134 OS.write(Buf, sizeof(Buf));
135}
136
137void writeVar(uint32_t I, llvm::raw_ostream &OS) {
138 constexpr static uint8_t More = 1 << 7;
139 if (LLVM_LIKELY(I < 1 << 7)) {
140 OS.write(I);
141 return;
142 }
143 for (;;) {
144 OS.write(I | More);
145 I >>= 7;
146 if (I < 1 << 7) {
147 OS.write(I);
148 return;
149 }
150 }
151}
152
153// STRING TABLE ENCODING
154// Index data has many string fields, and many strings are identical.
155// We store each string once, and refer to them by index.
156//
157// The string table's format is:
158// - UncompressedSize : uint32 (or 0 for no compression)
159// - CompressedData : byte[CompressedSize]
160//
161// CompressedData is a zlib-compressed byte[UncompressedSize].
162// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163// These are sorted to improve compression.
164
165// Maps each string to a canonical representation.
166// Strings remain owned externally (e.g. by SymbolSlab).
167class StringTableOut {
168 llvm::DenseSet<llvm::StringRef> Unique;
169 std::vector<llvm::StringRef> Sorted;
170 // Since strings are interned, look up can be by pointer.
171 llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
172
173public:
174 StringTableOut() {
175 // Ensure there's at least one string in the table.
176 // Table size zero is reserved to indicate no compression.
177 Unique.insert("");
178 }
179 // Add a string to the table. Overwrites S if an identical string exists.
180 void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
181 // Finalize the table and write it to OS. No more strings may be added.
182 void finalize(llvm::raw_ostream &OS) {
183 Sorted = {Unique.begin(), Unique.end()};
184 llvm::sort(Sorted);
185 for (unsigned I = 0; I < Sorted.size(); ++I)
186 Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
187
188 std::string RawTable;
189 for (llvm::StringRef S : Sorted) {
190 RawTable.append(std::string(S));
191 RawTable.push_back(0);
192 }
193 if (llvm::compression::zlib::isAvailable()) {
194 llvm::SmallVector<uint8_t, 0> Compressed;
195 llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable),
196 Compressed);
197 write32(RawTable.size(), OS);
198 OS << llvm::toStringRef(Compressed);
199 } else {
200 write32(0, OS); // No compression.
201 OS << RawTable;
202 }
203 }
204 // Get the ID of an string, which must be interned. Table must be finalized.
205 unsigned index(llvm::StringRef S) const {
206 assert(!Sorted.empty() && "table not finalized");
207 assert(Index.count({S.data(), S.size()}) && "string not interned");
208 return Index.find({S.data(), S.size()})->second;
209 }
210};
211
212struct StringTableIn {
213 llvm::BumpPtrAllocator Arena;
214 std::vector<llvm::StringRef> Strings;
215};
216
217llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
218 Reader R(Data);
219 size_t UncompressedSize = R.consume32();
220 if (R.err())
221 return error("Truncated string table");
222
223 llvm::StringRef Uncompressed;
224 llvm::SmallVector<uint8_t, 0> UncompressedStorage;
225 if (UncompressedSize == 0) // No compression
226 Uncompressed = R.rest();
227 else if (llvm::compression::zlib::isAvailable()) {
228 // Don't allocate a massive buffer if UncompressedSize was corrupted
229 // This is effective for sharded index, but not big monolithic ones, as
230 // once compressed size reaches 4MB nothing can be ruled out.
231 // Theoretical max ratio from https://zlib.net/zlib_tech.html
232 constexpr int MaxCompressionRatio = 1032;
233 if (UncompressedSize / MaxCompressionRatio > R.rest().size())
234 return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
235 R.rest().size(), UncompressedSize);
236
237 if (llvm::Error E = llvm::compression::zlib::decompress(
238 llvm::arrayRefFromStringRef(R.rest()), UncompressedStorage,
239 UncompressedSize))
240 return std::move(E);
241 Uncompressed = toStringRef(UncompressedStorage);
242 } else
243 return error("Compressed string table, but zlib is unavailable");
244
245 StringTableIn Table;
246 llvm::StringSaver Saver(Table.Arena);
247 R = Reader(Uncompressed);
248 for (Reader R(Uncompressed); !R.eof();) {
249 auto Len = R.rest().find(0);
250 if (Len == llvm::StringRef::npos)
251 return error("Bad string table: not null terminated");
252 Table.Strings.push_back(Saver.save(R.consume(Len)));
253 R.consume8();
254 }
255 if (R.err())
256 return error("Truncated string table");
257 return std::move(Table);
258}
259
260// SYMBOL ENCODING
261// Each field of clangd::Symbol is encoded in turn (see implementation).
262// - StringRef fields encode as varint (index into the string table)
263// - enums encode as the underlying type
264// - most numbers encode as varint
265
266void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
267 llvm::raw_ostream &OS) {
268 writeVar(Strings.index(Loc.FileURI), OS);
269 for (const auto &Endpoint : {Loc.Start, Loc.End}) {
270 writeVar(Endpoint.line(), OS);
271 writeVar(Endpoint.column(), OS);
272 }
273}
274
275SymbolLocation readLocation(Reader &Data,
276 llvm::ArrayRef<llvm::StringRef> Strings) {
277 SymbolLocation Loc;
278 Loc.FileURI = Data.consumeString(Strings).data();
279 for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
280 Endpoint->setLine(Data.consumeVar());
281 Endpoint->setColumn(Data.consumeVar());
282 }
283 return Loc;
284}
285
286IncludeGraphNode readIncludeGraphNode(Reader &Data,
287 llvm::ArrayRef<llvm::StringRef> Strings) {
288 IncludeGraphNode IGN;
289 IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
290 IGN.URI = Data.consumeString(Strings);
291 llvm::StringRef Digest = Data.consume(IGN.Digest.size());
292 std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
293 if (!Data.consumeSize(IGN.DirectIncludes))
294 return IGN;
295 for (llvm::StringRef &Include : IGN.DirectIncludes)
296 Include = Data.consumeString(Strings);
297 return IGN;
298}
299
300void writeIncludeGraphNode(const IncludeGraphNode &IGN,
301 const StringTableOut &Strings,
302 llvm::raw_ostream &OS) {
303 OS.write(static_cast<uint8_t>(IGN.Flags));
304 writeVar(Strings.index(IGN.URI), OS);
305 llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
306 IGN.Digest.size());
307 OS << Hash;
308 writeVar(IGN.DirectIncludes.size(), OS);
309 for (llvm::StringRef Include : IGN.DirectIncludes)
310 writeVar(Strings.index(Include), OS);
311}
312
313void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
314 llvm::raw_ostream &OS) {
315 OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
316 // symbol IDs should probably be in a string table.
317 OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
318 OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
319 writeVar(Strings.index(Sym.Name), OS);
320 writeVar(Strings.index(Sym.Scope), OS);
321 writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
322 writeLocation(Sym.Definition, Strings, OS);
323 writeLocation(Sym.CanonicalDeclaration, Strings, OS);
324 writeVar(Sym.References, OS);
325 OS.write(static_cast<uint8_t>(Sym.Flags));
326 writeVar(Strings.index(Sym.Signature), OS);
327 writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
328 writeVar(Strings.index(Sym.Documentation), OS);
329 writeVar(Strings.index(Sym.ReturnType), OS);
330 writeVar(Strings.index(Sym.Type), OS);
331
332 auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
333 writeVar(Strings.index(Include.IncludeHeader), OS);
334 writeVar((Include.References << 2) | Include.SupportedDirectives, OS);
335 };
336 writeVar(Sym.IncludeHeaders.size(), OS);
337 for (const auto &Include : Sym.IncludeHeaders)
338 WriteInclude(Include);
339}
340
341Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings,
342 SymbolOrigin Origin) {
343 Symbol Sym;
344 Sym.ID = Data.consumeID();
345 Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
346 Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
347 Sym.Name = Data.consumeString(Strings);
348 Sym.Scope = Data.consumeString(Strings);
349 Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
350 Sym.Definition = readLocation(Data, Strings);
351 Sym.CanonicalDeclaration = readLocation(Data, Strings);
352 Sym.References = Data.consumeVar();
353 Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
354 Sym.Origin = Origin;
355 Sym.Signature = Data.consumeString(Strings);
356 Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
357 Sym.Documentation = Data.consumeString(Strings);
358 Sym.ReturnType = Data.consumeString(Strings);
359 Sym.Type = Data.consumeString(Strings);
360 if (!Data.consumeSize(Sym.IncludeHeaders))
361 return Sym;
362 for (auto &I : Sym.IncludeHeaders) {
363 I.IncludeHeader = Data.consumeString(Strings);
364 uint32_t RefsWithDirectives = Data.consumeVar();
365 I.References = RefsWithDirectives >> 2;
366 I.SupportedDirectives = RefsWithDirectives & 0x3;
367 }
368 return Sym;
369}
370
371// REFS ENCODING
372// A refs section has data grouped by Symbol. Each symbol has:
373// - SymbolID: 8 bytes
374// - NumRefs: varint
375// - Ref[NumRefs]
376// Fields of Ref are encoded in turn, see implementation.
377
378void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
379 const StringTableOut &Strings, llvm::raw_ostream &OS) {
380 OS << ID.raw();
381 writeVar(Refs.size(), OS);
382 for (const auto &Ref : Refs) {
383 OS.write(static_cast<unsigned char>(Ref.Kind));
384 writeLocation(Ref.Location, Strings, OS);
385 OS << Ref.Container.raw();
386 }
387}
388
389std::pair<SymbolID, std::vector<Ref>>
390readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
391 std::pair<SymbolID, std::vector<Ref>> Result;
392 Result.first = Data.consumeID();
393 if (!Data.consumeSize(Result.second))
394 return Result;
395 for (auto &Ref : Result.second) {
396 Ref.Kind = static_cast<RefKind>(Data.consume8());
397 Ref.Location = readLocation(Data, Strings);
398 Ref.Container = Data.consumeID();
399 }
400 return Result;
401}
402
403// RELATIONS ENCODING
404// A relations section is a flat list of relations. Each relation has:
405// - SymbolID (subject): 8 bytes
406// - relation kind (predicate): 1 byte
407// - SymbolID (object): 8 bytes
408// In the future, we might prefer a packed representation if the need arises.
409
410void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
411 OS << R.Subject.raw();
412 OS.write(static_cast<uint8_t>(R.Predicate));
413 OS << R.Object.raw();
414}
415
416Relation readRelation(Reader &Data) {
417 SymbolID Subject = Data.consumeID();
418 RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
419 SymbolID Object = Data.consumeID();
420 return {Subject, Predicate, Object};
421}
422
423struct InternedCompileCommand {
424 llvm::StringRef Directory;
425 std::vector<llvm::StringRef> CommandLine;
426};
427
428void writeCompileCommand(const InternedCompileCommand &Cmd,
429 const StringTableOut &Strings,
430 llvm::raw_ostream &CmdOS) {
431 writeVar(Strings.index(Cmd.Directory), CmdOS);
432 writeVar(Cmd.CommandLine.size(), CmdOS);
433 for (llvm::StringRef C : Cmd.CommandLine)
434 writeVar(Strings.index(C), CmdOS);
435}
436
437InternedCompileCommand
438readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
439 InternedCompileCommand Cmd;
440 Cmd.Directory = CmdReader.consumeString(Strings);
441 if (!CmdReader.consumeSize(Cmd.CommandLine))
442 return Cmd;
443 for (llvm::StringRef &C : Cmd.CommandLine)
444 C = CmdReader.consumeString(Strings);
445 return Cmd;
446}
447
448// FILE ENCODING
449// A file is a RIFF chunk with type 'CdIx'.
450// It contains the sections:
451// - meta: version number
452// - srcs: information related to include graph
453// - stri: string table
454// - symb: symbols
455// - refs: references to symbols
456
457// The current versioning scheme is simple - non-current versions are rejected.
458// If you make a breaking change, bump this version number to invalidate stored
459// data. Later we may want to support some backward compatibility.
460constexpr static uint32_t Version = 19;
461
462llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
463 SymbolOrigin Origin) {
464 auto RIFF = riff::readFile(Data);
465 if (!RIFF)
466 return RIFF.takeError();
467 if (RIFF->Type != riff::fourCC("CdIx"))
468 return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
469 llvm::StringMap<llvm::StringRef> Chunks;
470 for (const auto &Chunk : RIFF->Chunks)
471 Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
472 Chunk.Data);
473
474 if (!Chunks.count("meta"))
475 return error("missing meta chunk");
476 Reader Meta(Chunks.lookup("meta"));
477 auto SeenVersion = Meta.consume32();
478 if (SeenVersion != Version)
479 return error("wrong version: want {0}, got {1}", Version, SeenVersion);
480
481 // meta chunk is checked above, as we prefer the "version mismatch" error.
482 for (llvm::StringRef RequiredChunk : {"stri"})
483 if (!Chunks.count(RequiredChunk))
484 return error("missing required chunk {0}", RequiredChunk);
485
486 auto Strings = readStringTable(Chunks.lookup("stri"));
487 if (!Strings)
488 return Strings.takeError();
489
490 IndexFileIn Result;
491 if (Chunks.count("srcs")) {
492 Reader SrcsReader(Chunks.lookup("srcs"));
493 Result.Sources.emplace();
494 while (!SrcsReader.eof()) {
495 auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
496 auto Entry = Result.Sources->try_emplace(IGN.URI).first;
497 Entry->getValue() = std::move(IGN);
498 // We change all the strings inside the structure to point at the keys in
499 // the map, since it is the only copy of the string that's going to live.
500 Entry->getValue().URI = Entry->getKey();
501 for (auto &Include : Entry->getValue().DirectIncludes)
502 Include = Result.Sources->try_emplace(Include).first->getKey();
503 }
504 if (SrcsReader.err())
505 return error("malformed or truncated include uri");
506 }
507
508 if (Chunks.count("symb")) {
509 Reader SymbolReader(Chunks.lookup("symb"));
510 SymbolSlab::Builder Symbols;
511 while (!SymbolReader.eof())
512 Symbols.insert(readSymbol(SymbolReader, Strings->Strings, Origin));
513 if (SymbolReader.err())
514 return error("malformed or truncated symbol");
515 Result.Symbols = std::move(Symbols).build();
516 }
517 if (Chunks.count("refs")) {
518 Reader RefsReader(Chunks.lookup("refs"));
519 RefSlab::Builder Refs;
520 while (!RefsReader.eof()) {
521 auto RefsBundle = readRefs(RefsReader, Strings->Strings);
522 for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
523 Refs.insert(RefsBundle.first, Ref);
524 }
525 if (RefsReader.err())
526 return error("malformed or truncated refs");
527 Result.Refs = std::move(Refs).build();
528 }
529 if (Chunks.count("rela")) {
530 Reader RelationsReader(Chunks.lookup("rela"));
531 RelationSlab::Builder Relations;
532 while (!RelationsReader.eof())
533 Relations.insert(readRelation(RelationsReader));
534 if (RelationsReader.err())
535 return error("malformed or truncated relations");
536 Result.Relations = std::move(Relations).build();
537 }
538 if (Chunks.count("cmdl")) {
539 Reader CmdReader(Chunks.lookup("cmdl"));
540 InternedCompileCommand Cmd =
541 readCompileCommand(CmdReader, Strings->Strings);
542 if (CmdReader.err())
543 return error("malformed or truncated commandline section");
544 Result.Cmd.emplace();
545 Result.Cmd->Directory = std::string(Cmd.Directory);
546 Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
547 for (llvm::StringRef C : Cmd.CommandLine)
548 Result.Cmd->CommandLine.emplace_back(C);
549 }
550 return std::move(Result);
551}
552
553template <class Callback>
554void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
555 CB(IGN.URI);
556 for (llvm::StringRef &Include : IGN.DirectIncludes)
557 CB(Include);
558}
559
560void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
561 assert(Data.Symbols && "An index file without symbols makes no sense!");
562 riff::File RIFF;
563 RIFF.Type = riff::fourCC("CdIx");
564
565 llvm::SmallString<4> Meta;
566 {
567 llvm::raw_svector_ostream MetaOS(Meta);
568 write32(Version, MetaOS);
569 }
570 RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
571
572 StringTableOut Strings;
573 std::vector<Symbol> Symbols;
574 for (const auto &Sym : *Data.Symbols) {
575 Symbols.emplace_back(Sym);
576 visitStrings(Symbols.back(),
577 [&](llvm::StringRef &S) { Strings.intern(S); });
578 }
579 std::vector<IncludeGraphNode> Sources;
580 if (Data.Sources)
581 for (const auto &Source : *Data.Sources) {
582 Sources.push_back(Source.getValue());
583 visitStrings(Sources.back(),
584 [&](llvm::StringRef &S) { Strings.intern(S); });
585 }
586
587 std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
588 if (Data.Refs) {
589 for (const auto &Sym : *Data.Refs) {
590 Refs.emplace_back(Sym);
591 for (auto &Ref : Refs.back().second) {
592 llvm::StringRef File = Ref.Location.FileURI;
593 Strings.intern(File);
594 Ref.Location.FileURI = File.data();
595 }
596 }
597 }
598
599 std::vector<Relation> Relations;
600 if (Data.Relations) {
601 for (const auto &Relation : *Data.Relations) {
602 Relations.emplace_back(Relation);
603 // No strings to be interned in relations.
604 }
605 }
606
607 InternedCompileCommand InternedCmd;
608 if (Data.Cmd) {
609 InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
610 InternedCmd.Directory = Data.Cmd->Directory;
611 Strings.intern(InternedCmd.Directory);
612 for (llvm::StringRef C : Data.Cmd->CommandLine) {
613 InternedCmd.CommandLine.emplace_back(C);
614 Strings.intern(InternedCmd.CommandLine.back());
615 }
616 }
617
618 std::string StringSection;
619 {
620 llvm::raw_string_ostream StringOS(StringSection);
621 Strings.finalize(StringOS);
622 }
623 RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
624
625 std::string SymbolSection;
626 {
627 llvm::raw_string_ostream SymbolOS(SymbolSection);
628 for (const auto &Sym : Symbols)
629 writeSymbol(Sym, Strings, SymbolOS);
630 }
631 RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
632
633 std::string RefsSection;
634 if (Data.Refs) {
635 {
636 llvm::raw_string_ostream RefsOS(RefsSection);
637 for (const auto &Sym : Refs)
638 writeRefs(Sym.first, Sym.second, Strings, RefsOS);
639 }
640 RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
641 }
642
643 std::string RelationSection;
644 if (Data.Relations) {
645 {
646 llvm::raw_string_ostream RelationOS{RelationSection};
647 for (const auto &Relation : Relations)
648 writeRelation(Relation, RelationOS);
649 }
650 RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
651 }
652
653 std::string SrcsSection;
654 {
655 {
656 llvm::raw_string_ostream SrcsOS(SrcsSection);
657 for (const auto &SF : Sources)
658 writeIncludeGraphNode(SF, Strings, SrcsOS);
659 }
660 RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
661 }
662
663 std::string CmdlSection;
664 if (Data.Cmd) {
665 {
666 llvm::raw_string_ostream CmdOS(CmdlSection);
667 writeCompileCommand(InternedCmd, Strings, CmdOS);
668 }
669 RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
670 }
671
672 OS << RIFF;
673}
674
675} // namespace
676
677// Defined in YAMLSerialization.cpp.
678void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
679llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin);
680
681llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
682 switch (O.Format) {
684 writeRIFF(O, OS);
685 break;
687 writeYAML(O, OS);
688 break;
689 }
690 return OS;
691}
692
693llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data,
694 SymbolOrigin Origin) {
695 if (Data.starts_with("RIFF")) {
696 return readRIFF(Data, Origin);
697 }
698 if (auto YAMLContents = readYAML(Data, Origin)) {
699 return std::move(*YAMLContents);
700 } else {
701 return error("Not a RIFF file and failed to parse as YAML: {0}",
702 YAMLContents.takeError());
703 }
704}
705
706std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
707 SymbolOrigin Origin, bool UseDex) {
708 trace::Span OverallTracer("LoadIndex");
709 auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
710 if (!Buffer) {
711 elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
712 return nullptr;
713 }
714
716 RefSlab Refs;
717 RelationSlab Relations;
718 {
719 trace::Span Tracer("ParseIndex");
720 if (auto I = readIndexFile(Buffer->get()->getBuffer(), Origin)) {
721 if (I->Symbols)
722 Symbols = std::move(*I->Symbols);
723 if (I->Refs)
724 Refs = std::move(*I->Refs);
725 if (I->Relations)
726 Relations = std::move(*I->Relations);
727 } else {
728 elog("Bad index file: {0}", I.takeError());
729 return nullptr;
730 }
731 }
732
733 size_t NumSym = Symbols.size();
734 size_t NumRefs = Refs.numRefs();
735 size_t NumRelations = Relations.size();
736
737 trace::Span Tracer("BuildIndex");
738 auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
739 std::move(Relations))
740 : MemIndex::build(std::move(Symbols), std::move(Refs),
741 std::move(Relations));
742 vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
743 " - number of symbols: {3}\n"
744 " - number of refs: {4}\n"
745 " - number of relations: {5}",
746 UseDex ? "Dex" : "MemIndex", SymbolFilename,
747 Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
748 return Index;
749}
750
751} // namespace clangd
752} // namespace clang
const Expr * E
This defines Dex - a symbol index implementation based on query iterators over symbol tokens,...
const Criteria C
SourceLocation Loc
std::vector< llvm::StringRef > Strings
llvm::BumpPtrAllocator Arena
std::vector< llvm::StringRef > CommandLine
llvm::StringRef Directory
std::string Container
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
Definition: MemIndex.cpp:17
void insert(const SymbolID &ID, const Ref &S)
Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
Definition: Ref.cpp:36
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:108
void insert(const Relation &R)
Adds a relation to the slab.
Definition: Relation.h:78
static constexpr size_t RawSize
Definition: SymbolID.h:49
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:199
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:35
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:48
constexpr FourCC fourCC(const char(&Literal)[5])
Definition: RIFF.h:43
constexpr llvm::StringRef fourCCStr(const FourCC &Data)
Definition: RIFF.h:46
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
Definition: Symbol.h:169
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef, SymbolOrigin Origin)
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:72
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:79
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex)
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:28
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61
std::array< uint8_t, 20 > SymbolID
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//