clang-tools  14.0.0git
Serialization.cpp
Go to the documentation of this file.
1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Serialization.h"
10 #include "Headers.h"
11 #include "RIFF.h"
12 #include "SymbolLocation.h"
13 #include "SymbolOrigin.h"
14 #include "dex/Dex.h"
15 #include "support/Logger.h"
16 #include "support/Trace.h"
17 #include "clang/Tooling/CompilationDatabase.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/Compression.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <cstdint>
25 #include <vector>
26 
27 namespace clang {
28 namespace clangd {
29 namespace {
30 
31 // IO PRIMITIVES
32 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
33 //
34 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
35 // to encode the number, and the top bit to indicate whether more bytes follow.
36 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
37 // This represents 0x1a | 0x2f<<7 = 6042.
38 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
39 
40 // Reads binary data from a StringRef, and keeps track of position.
41 class Reader {
42  const char *Begin, *End;
43  bool Err = false;
44 
45 public:
46  Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
47  // The "error" bit is set by reading past EOF or reading invalid data.
48  // When in an error state, reads may return zero values: callers should check.
49  bool err() const { return Err; }
50  // Did we read all the data, or encounter an error?
51  bool eof() const { return Begin == End || Err; }
52  // All the data we didn't read yet.
53  llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
54 
55  uint8_t consume8() {
56  if (LLVM_UNLIKELY(Begin == End)) {
57  Err = true;
58  return 0;
59  }
60  return *Begin++;
61  }
62 
63  uint32_t consume32() {
64  if (LLVM_UNLIKELY(Begin + 4 > End)) {
65  Err = true;
66  return 0;
67  }
68  auto Ret = llvm::support::endian::read32le(Begin);
69  Begin += 4;
70  return Ret;
71  }
72 
73  llvm::StringRef consume(int N) {
74  if (LLVM_UNLIKELY(Begin + N > End)) {
75  Err = true;
76  return llvm::StringRef();
77  }
78  llvm::StringRef Ret(Begin, N);
79  Begin += N;
80  return Ret;
81  }
82 
83  uint32_t consumeVar() {
84  constexpr static uint8_t More = 1 << 7;
85 
86  // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
87  // is wider than 32 bits).
88  uint32_t B = consume8();
89  if (LLVM_LIKELY(!(B & More)))
90  return B;
91  uint32_t Val = B & ~More;
92  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
93  B = consume8();
94  // 5th byte of a varint can only have lowest 4 bits set.
95  assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
96  Val |= (B & ~More) << Shift;
97  }
98  return Val;
99  }
100 
101  llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
102  auto StringIndex = consumeVar();
103  if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
104  Err = true;
105  return llvm::StringRef();
106  }
107  return Strings[StringIndex];
108  }
109 
110  SymbolID consumeID() {
111  llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
112  return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
113  }
114 
115  // Read a varint (as consumeVar) and resize the container accordingly.
116  // If the size is invalid, return false and mark an error.
117  // (The caller should abort in this case).
118  template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) {
119  auto Size = consumeVar();
120  // Conservatively assume each element is at least one byte.
121  if (Size > (size_t)(End - Begin)) {
122  Err = true;
123  return false;
124  }
125  Container.resize(Size);
126  return true;
127  }
128 };
129 
130 void write32(uint32_t I, llvm::raw_ostream &OS) {
131  char Buf[4];
132  llvm::support::endian::write32le(Buf, I);
133  OS.write(Buf, sizeof(Buf));
134 }
135 
136 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
137  constexpr static uint8_t More = 1 << 7;
138  if (LLVM_LIKELY(I < 1 << 7)) {
139  OS.write(I);
140  return;
141  }
142  for (;;) {
143  OS.write(I | More);
144  I >>= 7;
145  if (I < 1 << 7) {
146  OS.write(I);
147  return;
148  }
149  }
150 }
151 
152 // STRING TABLE ENCODING
153 // Index data has many string fields, and many strings are identical.
154 // We store each string once, and refer to them by index.
155 //
156 // The string table's format is:
157 // - UncompressedSize : uint32 (or 0 for no compression)
158 // - CompressedData : byte[CompressedSize]
159 //
160 // CompressedData is a zlib-compressed byte[UncompressedSize].
161 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
162 // These are sorted to improve compression.
163 
164 // Maps each string to a canonical representation.
165 // Strings remain owned externally (e.g. by SymbolSlab).
166 class StringTableOut {
167  llvm::DenseSet<llvm::StringRef> Unique;
168  std::vector<llvm::StringRef> Sorted;
169  // Since strings are interned, look up can be by pointer.
170  llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
171 
172 public:
173  StringTableOut() {
174  // Ensure there's at least one string in the table.
175  // Table size zero is reserved to indicate no compression.
176  Unique.insert("");
177  }
178  // Add a string to the table. Overwrites S if an identical string exists.
179  void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
180  // Finalize the table and write it to OS. No more strings may be added.
181  void finalize(llvm::raw_ostream &OS) {
182  Sorted = {Unique.begin(), Unique.end()};
183  llvm::sort(Sorted);
184  for (unsigned I = 0; I < Sorted.size(); ++I)
185  Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
186 
187  std::string RawTable;
188  for (llvm::StringRef S : Sorted) {
189  RawTable.append(std::string(S));
190  RawTable.push_back(0);
191  }
192  if (llvm::zlib::isAvailable()) {
193  llvm::SmallString<1> Compressed;
194  llvm::cantFail(llvm::zlib::compress(RawTable, Compressed));
195  write32(RawTable.size(), OS);
196  OS << Compressed;
197  } else {
198  write32(0, OS); // No compression.
199  OS << RawTable;
200  }
201  }
202  // Get the ID of an string, which must be interned. Table must be finalized.
203  unsigned index(llvm::StringRef S) const {
204  assert(!Sorted.empty() && "table not finalized");
205  assert(Index.count({S.data(), S.size()}) && "string not interned");
206  return Index.find({S.data(), S.size()})->second;
207  }
208 };
209 
210 struct StringTableIn {
211  llvm::BumpPtrAllocator Arena;
212  std::vector<llvm::StringRef> Strings;
213 };
214 
215 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
216  Reader R(Data);
217  size_t UncompressedSize = R.consume32();
218  if (R.err())
219  return error("Truncated string table");
220 
221  llvm::StringRef Uncompressed;
222  llvm::SmallString<1> UncompressedStorage;
223  if (UncompressedSize == 0) // No compression
224  Uncompressed = R.rest();
225  else if (llvm::zlib::isAvailable()) {
226  // Don't allocate a massive buffer if UncompressedSize was corrupted
227  // This is effective for sharded index, but not big monolithic ones, as
228  // once compressed size reaches 4MB nothing can be ruled out.
229  // Theoretical max ratio from https://zlib.net/zlib_tech.html
230  constexpr int MaxCompressionRatio = 1032;
231  if (UncompressedSize / MaxCompressionRatio > R.rest().size())
232  return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
233  R.rest().size(), UncompressedSize);
234 
235  if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
236  UncompressedSize))
237  return std::move(E);
238  Uncompressed = UncompressedStorage;
239  } else
240  return error("Compressed string table, but zlib is unavailable");
241 
242  StringTableIn Table;
243  llvm::StringSaver Saver(Table.Arena);
244  R = Reader(Uncompressed);
245  for (Reader R(Uncompressed); !R.eof();) {
246  auto Len = R.rest().find(0);
247  if (Len == llvm::StringRef::npos)
248  return error("Bad string table: not null terminated");
249  Table.Strings.push_back(Saver.save(R.consume(Len)));
250  R.consume8();
251  }
252  if (R.err())
253  return error("Truncated string table");
254  return std::move(Table);
255 }
256 
257 // SYMBOL ENCODING
258 // Each field of clangd::Symbol is encoded in turn (see implementation).
259 // - StringRef fields encode as varint (index into the string table)
260 // - enums encode as the underlying type
261 // - most numbers encode as varint
262 
263 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
264  llvm::raw_ostream &OS) {
265  writeVar(Strings.index(Loc.FileURI), OS);
266  for (const auto &Endpoint : {Loc.Start, Loc.End}) {
267  writeVar(Endpoint.line(), OS);
268  writeVar(Endpoint.column(), OS);
269  }
270 }
271 
272 SymbolLocation readLocation(Reader &Data,
273  llvm::ArrayRef<llvm::StringRef> Strings) {
274  SymbolLocation Loc;
275  Loc.FileURI = Data.consumeString(Strings).data();
276  for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
277  Endpoint->setLine(Data.consumeVar());
278  Endpoint->setColumn(Data.consumeVar());
279  }
280  return Loc;
281 }
282 
283 IncludeGraphNode readIncludeGraphNode(Reader &Data,
284  llvm::ArrayRef<llvm::StringRef> Strings) {
285  IncludeGraphNode IGN;
286  IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
287  IGN.URI = Data.consumeString(Strings);
288  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
289  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
290  if (!Data.consumeSize(IGN.DirectIncludes))
291  return IGN;
292  for (llvm::StringRef &Include : IGN.DirectIncludes)
293  Include = Data.consumeString(Strings);
294  return IGN;
295 }
296 
297 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
298  const StringTableOut &Strings,
299  llvm::raw_ostream &OS) {
300  OS.write(static_cast<uint8_t>(IGN.Flags));
301  writeVar(Strings.index(IGN.URI), OS);
302  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
303  IGN.Digest.size());
304  OS << Hash;
305  writeVar(IGN.DirectIncludes.size(), OS);
306  for (llvm::StringRef Include : IGN.DirectIncludes)
307  writeVar(Strings.index(Include), OS);
308 }
309 
310 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
311  llvm::raw_ostream &OS) {
312  OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
313  // symbol IDs should probably be in a string table.
314  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
315  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
316  writeVar(Strings.index(Sym.Name), OS);
317  writeVar(Strings.index(Sym.Scope), OS);
318  writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
319  writeLocation(Sym.Definition, Strings, OS);
320  writeLocation(Sym.CanonicalDeclaration, Strings, OS);
321  writeVar(Sym.References, OS);
322  OS.write(static_cast<uint8_t>(Sym.Flags));
323  OS.write(static_cast<uint8_t>(Sym.Origin));
324  writeVar(Strings.index(Sym.Signature), OS);
325  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
326  writeVar(Strings.index(Sym.Documentation), OS);
327  writeVar(Strings.index(Sym.ReturnType), OS);
328  writeVar(Strings.index(Sym.Type), OS);
329 
330  auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
331  writeVar(Strings.index(Include.IncludeHeader), OS);
332  writeVar(Include.References, OS);
333  };
334  writeVar(Sym.IncludeHeaders.size(), OS);
335  for (const auto &Include : Sym.IncludeHeaders)
336  WriteInclude(Include);
337 }
338 
339 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
340  Symbol Sym;
341  Sym.ID = Data.consumeID();
342  Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
343  Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
344  Sym.Name = Data.consumeString(Strings);
345  Sym.Scope = Data.consumeString(Strings);
346  Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
347  Sym.Definition = readLocation(Data, Strings);
348  Sym.CanonicalDeclaration = readLocation(Data, Strings);
349  Sym.References = Data.consumeVar();
350  Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
351  Sym.Origin = static_cast<SymbolOrigin>(Data.consume8());
352  Sym.Signature = Data.consumeString(Strings);
353  Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
354  Sym.Documentation = Data.consumeString(Strings);
355  Sym.ReturnType = Data.consumeString(Strings);
356  Sym.Type = Data.consumeString(Strings);
357  if (!Data.consumeSize(Sym.IncludeHeaders))
358  return Sym;
359  for (auto &I : Sym.IncludeHeaders) {
360  I.IncludeHeader = Data.consumeString(Strings);
361  I.References = Data.consumeVar();
362  }
363  return Sym;
364 }
365 
366 // REFS ENCODING
367 // A refs section has data grouped by Symbol. Each symbol has:
368 // - SymbolID: 8 bytes
369 // - NumRefs: varint
370 // - Ref[NumRefs]
371 // Fields of Ref are encoded in turn, see implementation.
372 
373 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
374  const StringTableOut &Strings, llvm::raw_ostream &OS) {
375  OS << ID.raw();
376  writeVar(Refs.size(), OS);
377  for (const auto &Ref : Refs) {
378  OS.write(static_cast<unsigned char>(Ref.Kind));
379  writeLocation(Ref.Location, Strings, OS);
380  OS << Ref.Container.raw();
381  }
382 }
383 
384 std::pair<SymbolID, std::vector<Ref>>
385 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
386  std::pair<SymbolID, std::vector<Ref>> Result;
387  Result.first = Data.consumeID();
388  if (!Data.consumeSize(Result.second))
389  return Result;
390  for (auto &Ref : Result.second) {
391  Ref.Kind = static_cast<RefKind>(Data.consume8());
392  Ref.Location = readLocation(Data, Strings);
393  Ref.Container = Data.consumeID();
394  }
395  return Result;
396 }
397 
398 // RELATIONS ENCODING
399 // A relations section is a flat list of relations. Each relation has:
400 // - SymbolID (subject): 8 bytes
401 // - relation kind (predicate): 1 byte
402 // - SymbolID (object): 8 bytes
403 // In the future, we might prefer a packed representation if the need arises.
404 
405 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
406  OS << R.Subject.raw();
407  OS.write(static_cast<uint8_t>(R.Predicate));
408  OS << R.Object.raw();
409 }
410 
411 Relation readRelation(Reader &Data) {
412  SymbolID Subject = Data.consumeID();
413  RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
414  SymbolID Object = Data.consumeID();
415  return {Subject, Predicate, Object};
416 }
417 
418 struct InternedCompileCommand {
419  llvm::StringRef Directory;
420  std::vector<llvm::StringRef> CommandLine;
421 };
422 
423 void writeCompileCommand(const InternedCompileCommand &Cmd,
424  const StringTableOut &Strings,
425  llvm::raw_ostream &CmdOS) {
426  writeVar(Strings.index(Cmd.Directory), CmdOS);
427  writeVar(Cmd.CommandLine.size(), CmdOS);
428  for (llvm::StringRef C : Cmd.CommandLine)
429  writeVar(Strings.index(C), CmdOS);
430 }
431 
432 InternedCompileCommand
433 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
434  InternedCompileCommand Cmd;
435  Cmd.Directory = CmdReader.consumeString(Strings);
436  if (!CmdReader.consumeSize(Cmd.CommandLine))
437  return Cmd;
438  for (llvm::StringRef &C : Cmd.CommandLine)
439  C = CmdReader.consumeString(Strings);
440  return Cmd;
441 }
442 
443 // FILE ENCODING
444 // A file is a RIFF chunk with type 'CdIx'.
445 // It contains the sections:
446 // - meta: version number
447 // - srcs: information related to include graph
448 // - stri: string table
449 // - symb: symbols
450 // - refs: references to symbols
451 
452 // The current versioning scheme is simple - non-current versions are rejected.
453 // If you make a breaking change, bump this version number to invalidate stored
454 // data. Later we may want to support some backward compatibility.
455 constexpr static uint32_t Version = 16;
456 
457 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
458  auto RIFF = riff::readFile(Data);
459  if (!RIFF)
460  return RIFF.takeError();
461  if (RIFF->Type != riff::fourCC("CdIx"))
462  return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
463  llvm::StringMap<llvm::StringRef> Chunks;
464  for (const auto &Chunk : RIFF->Chunks)
465  Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
466  Chunk.Data);
467 
468  if (!Chunks.count("meta"))
469  return error("missing meta chunk");
470  Reader Meta(Chunks.lookup("meta"));
471  auto SeenVersion = Meta.consume32();
472  if (SeenVersion != Version)
473  return error("wrong version: want {0}, got {1}", Version, SeenVersion);
474 
475  // meta chunk is checked above, as we prefer the "version mismatch" error.
476  for (llvm::StringRef RequiredChunk : {"stri"})
477  if (!Chunks.count(RequiredChunk))
478  return error("missing required chunk {0}", RequiredChunk);
479 
480  auto Strings = readStringTable(Chunks.lookup("stri"));
481  if (!Strings)
482  return Strings.takeError();
483 
484  IndexFileIn Result;
485  if (Chunks.count("srcs")) {
486  Reader SrcsReader(Chunks.lookup("srcs"));
487  Result.Sources.emplace();
488  while (!SrcsReader.eof()) {
489  auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
490  auto Entry = Result.Sources->try_emplace(IGN.URI).first;
491  Entry->getValue() = std::move(IGN);
492  // We change all the strings inside the structure to point at the keys in
493  // the map, since it is the only copy of the string that's going to live.
494  Entry->getValue().URI = Entry->getKey();
495  for (auto &Include : Entry->getValue().DirectIncludes)
496  Include = Result.Sources->try_emplace(Include).first->getKey();
497  }
498  if (SrcsReader.err())
499  return error("malformed or truncated include uri");
500  }
501 
502  if (Chunks.count("symb")) {
503  Reader SymbolReader(Chunks.lookup("symb"));
504  SymbolSlab::Builder Symbols;
505  while (!SymbolReader.eof())
506  Symbols.insert(readSymbol(SymbolReader, Strings->Strings));
507  if (SymbolReader.err())
508  return error("malformed or truncated symbol");
509  Result.Symbols = std::move(Symbols).build();
510  }
511  if (Chunks.count("refs")) {
512  Reader RefsReader(Chunks.lookup("refs"));
514  while (!RefsReader.eof()) {
515  auto RefsBundle = readRefs(RefsReader, Strings->Strings);
516  for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
517  Refs.insert(RefsBundle.first, Ref);
518  }
519  if (RefsReader.err())
520  return error("malformed or truncated refs");
521  Result.Refs = std::move(Refs).build();
522  }
523  if (Chunks.count("rela")) {
524  Reader RelationsReader(Chunks.lookup("rela"));
525  RelationSlab::Builder Relations;
526  while (!RelationsReader.eof())
527  Relations.insert(readRelation(RelationsReader));
528  if (RelationsReader.err())
529  return error("malformed or truncated relations");
530  Result.Relations = std::move(Relations).build();
531  }
532  if (Chunks.count("cmdl")) {
533  Reader CmdReader(Chunks.lookup("cmdl"));
534  InternedCompileCommand Cmd =
535  readCompileCommand(CmdReader, Strings->Strings);
536  if (CmdReader.err())
537  return error("malformed or truncated commandline section");
538  Result.Cmd.emplace();
539  Result.Cmd->Directory = std::string(Cmd.Directory);
540  Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
541  for (llvm::StringRef C : Cmd.CommandLine)
542  Result.Cmd->CommandLine.emplace_back(C);
543  }
544  return std::move(Result);
545 }
546 
547 template <class Callback>
548 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
549  CB(IGN.URI);
550  for (llvm::StringRef &Include : IGN.DirectIncludes)
551  CB(Include);
552 }
553 
554 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
555  assert(Data.Symbols && "An index file without symbols makes no sense!");
556  riff::File RIFF;
557  RIFF.Type = riff::fourCC("CdIx");
558 
559  llvm::SmallString<4> Meta;
560  {
561  llvm::raw_svector_ostream MetaOS(Meta);
562  write32(Version, MetaOS);
563  }
564  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
565 
566  StringTableOut Strings;
567  std::vector<Symbol> Symbols;
568  for (const auto &Sym : *Data.Symbols) {
569  Symbols.emplace_back(Sym);
570  visitStrings(Symbols.back(),
571  [&](llvm::StringRef &S) { Strings.intern(S); });
572  }
573  std::vector<IncludeGraphNode> Sources;
574  if (Data.Sources)
575  for (const auto &Source : *Data.Sources) {
576  Sources.push_back(Source.getValue());
577  visitStrings(Sources.back(),
578  [&](llvm::StringRef &S) { Strings.intern(S); });
579  }
580 
581  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
582  if (Data.Refs) {
583  for (const auto &Sym : *Data.Refs) {
584  Refs.emplace_back(Sym);
585  for (auto &Ref : Refs.back().second) {
586  llvm::StringRef File = Ref.Location.FileURI;
587  Strings.intern(File);
588  Ref.Location.FileURI = File.data();
589  }
590  }
591  }
592 
593  std::vector<Relation> Relations;
594  if (Data.Relations) {
595  for (const auto &Relation : *Data.Relations) {
596  Relations.emplace_back(Relation);
597  // No strings to be interned in relations.
598  }
599  }
600 
601  InternedCompileCommand InternedCmd;
602  if (Data.Cmd) {
603  InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
604  InternedCmd.Directory = Data.Cmd->Directory;
605  Strings.intern(InternedCmd.Directory);
606  for (llvm::StringRef C : Data.Cmd->CommandLine) {
607  InternedCmd.CommandLine.emplace_back(C);
608  Strings.intern(InternedCmd.CommandLine.back());
609  }
610  }
611 
612  std::string StringSection;
613  {
614  llvm::raw_string_ostream StringOS(StringSection);
615  Strings.finalize(StringOS);
616  }
617  RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
618 
619  std::string SymbolSection;
620  {
621  llvm::raw_string_ostream SymbolOS(SymbolSection);
622  for (const auto &Sym : Symbols)
623  writeSymbol(Sym, Strings, SymbolOS);
624  }
625  RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
626 
627  std::string RefsSection;
628  if (Data.Refs) {
629  {
630  llvm::raw_string_ostream RefsOS(RefsSection);
631  for (const auto &Sym : Refs)
632  writeRefs(Sym.first, Sym.second, Strings, RefsOS);
633  }
634  RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
635  }
636 
637  std::string RelationSection;
638  if (Data.Relations) {
639  {
640  llvm::raw_string_ostream RelationOS{RelationSection};
641  for (const auto &Relation : Relations)
642  writeRelation(Relation, RelationOS);
643  }
644  RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
645  }
646 
647  std::string SrcsSection;
648  {
649  {
650  llvm::raw_string_ostream SrcsOS(SrcsSection);
651  for (const auto &SF : Sources)
652  writeIncludeGraphNode(SF, Strings, SrcsOS);
653  }
654  RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
655  }
656 
657  std::string CmdlSection;
658  if (Data.Cmd) {
659  {
660  llvm::raw_string_ostream CmdOS(CmdlSection);
661  writeCompileCommand(InternedCmd, Strings, CmdOS);
662  }
663  RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
664  }
665 
666  OS << RIFF;
667 }
668 
669 } // namespace
670 
671 // Defined in YAMLSerialization.cpp.
672 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
673 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef);
674 
675 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
676  switch (O.Format) {
678  writeRIFF(O, OS);
679  break;
681  writeYAML(O, OS);
682  break;
683  }
684  return OS;
685 }
686 
687 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data) {
688  if (Data.startswith("RIFF")) {
689  return readRIFF(Data);
690  } else if (auto YAMLContents = readYAML(Data)) {
691  return std::move(*YAMLContents);
692  } else {
693  return error("Not a RIFF file and failed to parse as YAML: {0}",
694  YAMLContents.takeError());
695  }
696 }
697 
698 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
699  bool UseDex) {
700  trace::Span OverallTracer("LoadIndex");
701  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
702  if (!Buffer) {
703  elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
704  return nullptr;
705  }
706 
707  SymbolSlab Symbols;
708  RefSlab Refs;
709  RelationSlab Relations;
710  {
711  trace::Span Tracer("ParseIndex");
712  if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
713  if (I->Symbols)
714  Symbols = std::move(*I->Symbols);
715  if (I->Refs)
716  Refs = std::move(*I->Refs);
717  if (I->Relations)
718  Relations = std::move(*I->Relations);
719  } else {
720  elog("Bad index file: {0}", I.takeError());
721  return nullptr;
722  }
723  }
724 
725  size_t NumSym = Symbols.size();
726  size_t NumRefs = Refs.numRefs();
727  size_t NumRelations = Relations.size();
728 
729  trace::Span Tracer("BuildIndex");
730  auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
731  std::move(Relations))
732  : MemIndex::build(std::move(Symbols), std::move(Refs),
733  std::move(Relations));
734  vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
735  " - number of symbols: {3}\n"
736  " - number of refs: {4}\n"
737  " - number of relations: {5}",
738  UseDex ? "Dex" : "MemIndex", SymbolFilename,
739  Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
740  return Index;
741 }
742 
743 } // namespace clangd
744 } // namespace clang
clang::clangd::IndexFileFormat::YAML
@ YAML
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
clang::clangd::riff::fourCC
constexpr FourCC fourCC(const char(&Literal)[5])
Definition: RIFF.h:44
Dex.h
Headers.h
SymbolOrigin.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:311
clang::clangd::Symbol::SymbolFlag
SymbolFlag
Definition: Symbol.h:113
clang::clangd::IndexFileFormat::RIFF
@ RIFF
SymbolLocation.h
Tracer
std::unique_ptr< trace::EventTracer > Tracer
Definition: TraceTests.cpp:164
clang::clangd::error
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:80
clang::clangd::SymbolKind::Object
@ Object
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
clang::clangd::RefKind
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:30
clang::clangd::RelationSlab::Builder::insert
void insert(const Relation &R)
Adds a relation to the slab.
Definition: Relation.h:80
clang::clangd::riff::fourCCStr
constexpr llvm::StringRef fourCCStr(const FourCC &Data)
Definition: RIFF.h:47
Trace.h
clang::clangd::dex::Dex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
Definition: Serialization.cpp:687
clang::clangd::RefSlab
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:110
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::SymbolIndex::estimateMemoryUsage
virtual size_t estimateMemoryUsage() const =0
Returns estimated size of index (in bytes).
clang::clangd::RefSlab::numRefs
size_t numRefs() const
Definition: Ref.h:125
clang::clangd::readYAML
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef)
Definition: YAMLSerialization.cpp:440
RIFF.h
clang::clangd::IndexFileOut::Format
IndexFileFormat Format
Definition: Serialization.h:62
clang::clangd::riff::readFile
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:48
clang::clangd::writeYAML
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
Definition: YAMLSerialization.cpp:407
clang::clangd::RelationSlab
Definition: Relation.h:52
Builder
CodeCompletionBuilder Builder
Definition: CodeCompletionStringsTests.cpp:36
Logger.h
CommandLine
std::vector< llvm::StringRef > CommandLine
Definition: Serialization.cpp:420
Directory
llvm::StringRef Directory
Definition: Serialization.cpp:419
clang::clangd::IncludeGraphNode::SourceFlag
SourceFlag
Definition: Headers.h:69
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
Definition: Serialization.cpp:698
clang::clangd::SymbolID::RawSize
constexpr static size_t RawSize
Definition: SymbolID.h:49
Serialization.h
clang::clangd::vlog
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:73
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:30
clang::clangd::MemIndex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
Definition: MemIndex.cpp:19
clang::clangd::SymbolOrigin
SymbolOrigin
Definition: SymbolOrigin.h:21
clang::clangd::operator<<
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Definition: CodeComplete.cpp:2013
SymbolKind
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
Definition: SymbolInfo.cpp:21
Entry
Definition: Modularize.cpp:428
Index
const SymbolIndex * Index
Definition: Dexp.cpp:99
Strings
std::vector< llvm::StringRef > Strings
Definition: Serialization.cpp:212
ID
static char ID
Definition: Logger.cpp:74
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:163
Arena
llvm::BumpPtrAllocator Arena
Definition: Serialization.cpp:211
clang::clangd::RefSlab::size
size_t size() const
Gets the number of symbols.
Definition: Ref.h:124
clang::clangd::Callback
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
clang::clangd::visitStrings
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
Definition: Symbol.h:147
ns1::ns2::B
@ B
Definition: CategoricalFeature.h:3
clang::clangd::SymbolSlab
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
clang::clangd::elog
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:62
clang::clangd::SymbolSlab::Builder::insert
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:50
clang::clangd::RelationKind
RelationKind
Definition: Relation.h:22
clang::clangd::trace::Span
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143