clang-tools  15.0.0git
Serialization.cpp
Go to the documentation of this file.
1 //===-- Serialization.cpp - Binary serialization of index data ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Serialization.h"
10 #include "Headers.h"
11 #include "RIFF.h"
12 #include "index/MemIndex.h"
13 #include "index/SymbolLocation.h"
14 #include "index/SymbolOrigin.h"
15 #include "index/dex/Dex.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/Tooling/CompilationDatabase.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/Compression.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cstdint>
26 #include <vector>
27 
28 namespace clang {
29 namespace clangd {
30 namespace {
31 
32 // IO PRIMITIVES
33 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
34 //
35 // Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36 // to encode the number, and the top bit to indicate whether more bytes follow.
37 // e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38 // This represents 0x1a | 0x2f<<7 = 6042.
39 // A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
40 
41 // Reads binary data from a StringRef, and keeps track of position.
42 class Reader {
43  const char *Begin, *End;
44  bool Err = false;
45 
46 public:
47  Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
48  // The "error" bit is set by reading past EOF or reading invalid data.
49  // When in an error state, reads may return zero values: callers should check.
50  bool err() const { return Err; }
51  // Did we read all the data, or encounter an error?
52  bool eof() const { return Begin == End || Err; }
53  // All the data we didn't read yet.
54  llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
55 
56  uint8_t consume8() {
57  if (LLVM_UNLIKELY(Begin == End)) {
58  Err = true;
59  return 0;
60  }
61  return *Begin++;
62  }
63 
64  uint32_t consume32() {
65  if (LLVM_UNLIKELY(Begin + 4 > End)) {
66  Err = true;
67  return 0;
68  }
69  auto Ret = llvm::support::endian::read32le(Begin);
70  Begin += 4;
71  return Ret;
72  }
73 
74  llvm::StringRef consume(int N) {
75  if (LLVM_UNLIKELY(Begin + N > End)) {
76  Err = true;
77  return llvm::StringRef();
78  }
79  llvm::StringRef Ret(Begin, N);
80  Begin += N;
81  return Ret;
82  }
83 
84  uint32_t consumeVar() {
85  constexpr static uint8_t More = 1 << 7;
86 
87  // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88  // is wider than 32 bits).
89  uint32_t B = consume8();
90  if (LLVM_LIKELY(!(B & More)))
91  return B;
92  uint32_t Val = B & ~More;
93  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
94  B = consume8();
95  // 5th byte of a varint can only have lowest 4 bits set.
96  assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
97  Val |= (B & ~More) << Shift;
98  }
99  return Val;
100  }
101 
102  llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
103  auto StringIndex = consumeVar();
104  if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
105  Err = true;
106  return llvm::StringRef();
107  }
108  return Strings[StringIndex];
109  }
110 
111  SymbolID consumeID() {
112  llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
113  return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
114  }
115 
116  // Read a varint (as consumeVar) and resize the container accordingly.
117  // If the size is invalid, return false and mark an error.
118  // (The caller should abort in this case).
119  template <typename T> LLVM_NODISCARD bool consumeSize(T &Container) {
120  auto Size = consumeVar();
121  // Conservatively assume each element is at least one byte.
122  if (Size > (size_t)(End - Begin)) {
123  Err = true;
124  return false;
125  }
126  Container.resize(Size);
127  return true;
128  }
129 };
130 
131 void write32(uint32_t I, llvm::raw_ostream &OS) {
132  char Buf[4];
133  llvm::support::endian::write32le(Buf, I);
134  OS.write(Buf, sizeof(Buf));
135 }
136 
137 void writeVar(uint32_t I, llvm::raw_ostream &OS) {
138  constexpr static uint8_t More = 1 << 7;
139  if (LLVM_LIKELY(I < 1 << 7)) {
140  OS.write(I);
141  return;
142  }
143  for (;;) {
144  OS.write(I | More);
145  I >>= 7;
146  if (I < 1 << 7) {
147  OS.write(I);
148  return;
149  }
150  }
151 }
152 
153 // STRING TABLE ENCODING
154 // Index data has many string fields, and many strings are identical.
155 // We store each string once, and refer to them by index.
156 //
157 // The string table's format is:
158 // - UncompressedSize : uint32 (or 0 for no compression)
159 // - CompressedData : byte[CompressedSize]
160 //
161 // CompressedData is a zlib-compressed byte[UncompressedSize].
162 // It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163 // These are sorted to improve compression.
164 
165 // Maps each string to a canonical representation.
166 // Strings remain owned externally (e.g. by SymbolSlab).
167 class StringTableOut {
168  llvm::DenseSet<llvm::StringRef> Unique;
169  std::vector<llvm::StringRef> Sorted;
170  // Since strings are interned, look up can be by pointer.
171  llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
172 
173 public:
174  StringTableOut() {
175  // Ensure there's at least one string in the table.
176  // Table size zero is reserved to indicate no compression.
177  Unique.insert("");
178  }
179  // Add a string to the table. Overwrites S if an identical string exists.
180  void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
181  // Finalize the table and write it to OS. No more strings may be added.
182  void finalize(llvm::raw_ostream &OS) {
183  Sorted = {Unique.begin(), Unique.end()};
184  llvm::sort(Sorted);
185  for (unsigned I = 0; I < Sorted.size(); ++I)
186  Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
187 
188  std::string RawTable;
189  for (llvm::StringRef S : Sorted) {
190  RawTable.append(std::string(S));
191  RawTable.push_back(0);
192  }
193  if (llvm::zlib::isAvailable()) {
194  llvm::SmallString<1> Compressed;
195  llvm::zlib::compress(RawTable, Compressed);
196  write32(RawTable.size(), OS);
197  OS << Compressed;
198  } else {
199  write32(0, OS); // No compression.
200  OS << RawTable;
201  }
202  }
203  // Get the ID of an string, which must be interned. Table must be finalized.
204  unsigned index(llvm::StringRef S) const {
205  assert(!Sorted.empty() && "table not finalized");
206  assert(Index.count({S.data(), S.size()}) && "string not interned");
207  return Index.find({S.data(), S.size()})->second;
208  }
209 };
210 
211 struct StringTableIn {
212  llvm::BumpPtrAllocator Arena;
213  std::vector<llvm::StringRef> Strings;
214 };
215 
216 llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
217  Reader R(Data);
218  size_t UncompressedSize = R.consume32();
219  if (R.err())
220  return error("Truncated string table");
221 
222  llvm::StringRef Uncompressed;
223  llvm::SmallString<1> UncompressedStorage;
224  if (UncompressedSize == 0) // No compression
225  Uncompressed = R.rest();
226  else if (llvm::zlib::isAvailable()) {
227  // Don't allocate a massive buffer if UncompressedSize was corrupted
228  // This is effective for sharded index, but not big monolithic ones, as
229  // once compressed size reaches 4MB nothing can be ruled out.
230  // Theoretical max ratio from https://zlib.net/zlib_tech.html
231  constexpr int MaxCompressionRatio = 1032;
232  if (UncompressedSize / MaxCompressionRatio > R.rest().size())
233  return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
234  R.rest().size(), UncompressedSize);
235 
236  if (llvm::Error E = llvm::zlib::uncompress(R.rest(), UncompressedStorage,
237  UncompressedSize))
238  return std::move(E);
239  Uncompressed = UncompressedStorage;
240  } else
241  return error("Compressed string table, but zlib is unavailable");
242 
243  StringTableIn Table;
244  llvm::StringSaver Saver(Table.Arena);
245  R = Reader(Uncompressed);
246  for (Reader R(Uncompressed); !R.eof();) {
247  auto Len = R.rest().find(0);
248  if (Len == llvm::StringRef::npos)
249  return error("Bad string table: not null terminated");
250  Table.Strings.push_back(Saver.save(R.consume(Len)));
251  R.consume8();
252  }
253  if (R.err())
254  return error("Truncated string table");
255  return std::move(Table);
256 }
257 
258 // SYMBOL ENCODING
259 // Each field of clangd::Symbol is encoded in turn (see implementation).
260 // - StringRef fields encode as varint (index into the string table)
261 // - enums encode as the underlying type
262 // - most numbers encode as varint
263 
264 void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
265  llvm::raw_ostream &OS) {
266  writeVar(Strings.index(Loc.FileURI), OS);
267  for (const auto &Endpoint : {Loc.Start, Loc.End}) {
268  writeVar(Endpoint.line(), OS);
269  writeVar(Endpoint.column(), OS);
270  }
271 }
272 
273 SymbolLocation readLocation(Reader &Data,
274  llvm::ArrayRef<llvm::StringRef> Strings) {
275  SymbolLocation Loc;
276  Loc.FileURI = Data.consumeString(Strings).data();
277  for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
278  Endpoint->setLine(Data.consumeVar());
279  Endpoint->setColumn(Data.consumeVar());
280  }
281  return Loc;
282 }
283 
284 IncludeGraphNode readIncludeGraphNode(Reader &Data,
285  llvm::ArrayRef<llvm::StringRef> Strings) {
286  IncludeGraphNode IGN;
287  IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
288  IGN.URI = Data.consumeString(Strings);
289  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
290  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
291  if (!Data.consumeSize(IGN.DirectIncludes))
292  return IGN;
293  for (llvm::StringRef &Include : IGN.DirectIncludes)
294  Include = Data.consumeString(Strings);
295  return IGN;
296 }
297 
298 void writeIncludeGraphNode(const IncludeGraphNode &IGN,
299  const StringTableOut &Strings,
300  llvm::raw_ostream &OS) {
301  OS.write(static_cast<uint8_t>(IGN.Flags));
302  writeVar(Strings.index(IGN.URI), OS);
303  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
304  IGN.Digest.size());
305  OS << Hash;
306  writeVar(IGN.DirectIncludes.size(), OS);
307  for (llvm::StringRef Include : IGN.DirectIncludes)
308  writeVar(Strings.index(Include), OS);
309 }
310 
311 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
312  llvm::raw_ostream &OS) {
313  OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
314  // symbol IDs should probably be in a string table.
315  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
316  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
317  writeVar(Strings.index(Sym.Name), OS);
318  writeVar(Strings.index(Sym.Scope), OS);
319  writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
320  writeLocation(Sym.Definition, Strings, OS);
321  writeLocation(Sym.CanonicalDeclaration, Strings, OS);
322  writeVar(Sym.References, OS);
323  OS.write(static_cast<uint8_t>(Sym.Flags));
324  writeVar(Strings.index(Sym.Signature), OS);
325  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
326  writeVar(Strings.index(Sym.Documentation), OS);
327  writeVar(Strings.index(Sym.ReturnType), OS);
328  writeVar(Strings.index(Sym.Type), OS);
329 
330  auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
331  writeVar(Strings.index(Include.IncludeHeader), OS);
332  writeVar(Include.References, OS);
333  };
334  writeVar(Sym.IncludeHeaders.size(), OS);
335  for (const auto &Include : Sym.IncludeHeaders)
336  WriteInclude(Include);
337 }
338 
339 Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings,
340  SymbolOrigin Origin) {
341  Symbol Sym;
342  Sym.ID = Data.consumeID();
343  Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
344  Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
345  Sym.Name = Data.consumeString(Strings);
346  Sym.Scope = Data.consumeString(Strings);
347  Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
348  Sym.Definition = readLocation(Data, Strings);
349  Sym.CanonicalDeclaration = readLocation(Data, Strings);
350  Sym.References = Data.consumeVar();
351  Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
352  Sym.Origin = Origin;
353  Sym.Signature = Data.consumeString(Strings);
354  Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
355  Sym.Documentation = Data.consumeString(Strings);
356  Sym.ReturnType = Data.consumeString(Strings);
357  Sym.Type = Data.consumeString(Strings);
358  if (!Data.consumeSize(Sym.IncludeHeaders))
359  return Sym;
360  for (auto &I : Sym.IncludeHeaders) {
361  I.IncludeHeader = Data.consumeString(Strings);
362  I.References = Data.consumeVar();
363  }
364  return Sym;
365 }
366 
367 // REFS ENCODING
368 // A refs section has data grouped by Symbol. Each symbol has:
369 // - SymbolID: 8 bytes
370 // - NumRefs: varint
371 // - Ref[NumRefs]
372 // Fields of Ref are encoded in turn, see implementation.
373 
374 void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
375  const StringTableOut &Strings, llvm::raw_ostream &OS) {
376  OS << ID.raw();
377  writeVar(Refs.size(), OS);
378  for (const auto &Ref : Refs) {
379  OS.write(static_cast<unsigned char>(Ref.Kind));
380  writeLocation(Ref.Location, Strings, OS);
381  OS << Ref.Container.raw();
382  }
383 }
384 
385 std::pair<SymbolID, std::vector<Ref>>
386 readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
387  std::pair<SymbolID, std::vector<Ref>> Result;
388  Result.first = Data.consumeID();
389  if (!Data.consumeSize(Result.second))
390  return Result;
391  for (auto &Ref : Result.second) {
392  Ref.Kind = static_cast<RefKind>(Data.consume8());
393  Ref.Location = readLocation(Data, Strings);
394  Ref.Container = Data.consumeID();
395  }
396  return Result;
397 }
398 
399 // RELATIONS ENCODING
400 // A relations section is a flat list of relations. Each relation has:
401 // - SymbolID (subject): 8 bytes
402 // - relation kind (predicate): 1 byte
403 // - SymbolID (object): 8 bytes
404 // In the future, we might prefer a packed representation if the need arises.
405 
406 void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
407  OS << R.Subject.raw();
408  OS.write(static_cast<uint8_t>(R.Predicate));
409  OS << R.Object.raw();
410 }
411 
412 Relation readRelation(Reader &Data) {
413  SymbolID Subject = Data.consumeID();
414  RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
415  SymbolID Object = Data.consumeID();
416  return {Subject, Predicate, Object};
417 }
418 
419 struct InternedCompileCommand {
420  llvm::StringRef Directory;
421  std::vector<llvm::StringRef> CommandLine;
422 };
423 
424 void writeCompileCommand(const InternedCompileCommand &Cmd,
425  const StringTableOut &Strings,
426  llvm::raw_ostream &CmdOS) {
427  writeVar(Strings.index(Cmd.Directory), CmdOS);
428  writeVar(Cmd.CommandLine.size(), CmdOS);
429  for (llvm::StringRef C : Cmd.CommandLine)
430  writeVar(Strings.index(C), CmdOS);
431 }
432 
433 InternedCompileCommand
434 readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
435  InternedCompileCommand Cmd;
436  Cmd.Directory = CmdReader.consumeString(Strings);
437  if (!CmdReader.consumeSize(Cmd.CommandLine))
438  return Cmd;
439  for (llvm::StringRef &C : Cmd.CommandLine)
440  C = CmdReader.consumeString(Strings);
441  return Cmd;
442 }
443 
444 // FILE ENCODING
445 // A file is a RIFF chunk with type 'CdIx'.
446 // It contains the sections:
447 // - meta: version number
448 // - srcs: information related to include graph
449 // - stri: string table
450 // - symb: symbols
451 // - refs: references to symbols
452 
453 // The current versioning scheme is simple - non-current versions are rejected.
454 // If you make a breaking change, bump this version number to invalidate stored
455 // data. Later we may want to support some backward compatibility.
456 constexpr static uint32_t Version = 17;
457 
458 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
459  SymbolOrigin Origin) {
460  auto RIFF = riff::readFile(Data);
461  if (!RIFF)
462  return RIFF.takeError();
463  if (RIFF->Type != riff::fourCC("CdIx"))
464  return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
465  llvm::StringMap<llvm::StringRef> Chunks;
466  for (const auto &Chunk : RIFF->Chunks)
467  Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
468  Chunk.Data);
469 
470  if (!Chunks.count("meta"))
471  return error("missing meta chunk");
472  Reader Meta(Chunks.lookup("meta"));
473  auto SeenVersion = Meta.consume32();
474  if (SeenVersion != Version)
475  return error("wrong version: want {0}, got {1}", Version, SeenVersion);
476 
477  // meta chunk is checked above, as we prefer the "version mismatch" error.
478  for (llvm::StringRef RequiredChunk : {"stri"})
479  if (!Chunks.count(RequiredChunk))
480  return error("missing required chunk {0}", RequiredChunk);
481 
482  auto Strings = readStringTable(Chunks.lookup("stri"));
483  if (!Strings)
484  return Strings.takeError();
485 
486  IndexFileIn Result;
487  if (Chunks.count("srcs")) {
488  Reader SrcsReader(Chunks.lookup("srcs"));
489  Result.Sources.emplace();
490  while (!SrcsReader.eof()) {
491  auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
492  auto Entry = Result.Sources->try_emplace(IGN.URI).first;
493  Entry->getValue() = std::move(IGN);
494  // We change all the strings inside the structure to point at the keys in
495  // the map, since it is the only copy of the string that's going to live.
496  Entry->getValue().URI = Entry->getKey();
497  for (auto &Include : Entry->getValue().DirectIncludes)
498  Include = Result.Sources->try_emplace(Include).first->getKey();
499  }
500  if (SrcsReader.err())
501  return error("malformed or truncated include uri");
502  }
503 
504  if (Chunks.count("symb")) {
505  Reader SymbolReader(Chunks.lookup("symb"));
506  SymbolSlab::Builder Symbols;
507  while (!SymbolReader.eof())
508  Symbols.insert(readSymbol(SymbolReader, Strings->Strings, Origin));
509  if (SymbolReader.err())
510  return error("malformed or truncated symbol");
511  Result.Symbols = std::move(Symbols).build();
512  }
513  if (Chunks.count("refs")) {
514  Reader RefsReader(Chunks.lookup("refs"));
516  while (!RefsReader.eof()) {
517  auto RefsBundle = readRefs(RefsReader, Strings->Strings);
518  for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
519  Refs.insert(RefsBundle.first, Ref);
520  }
521  if (RefsReader.err())
522  return error("malformed or truncated refs");
523  Result.Refs = std::move(Refs).build();
524  }
525  if (Chunks.count("rela")) {
526  Reader RelationsReader(Chunks.lookup("rela"));
527  RelationSlab::Builder Relations;
528  while (!RelationsReader.eof())
529  Relations.insert(readRelation(RelationsReader));
530  if (RelationsReader.err())
531  return error("malformed or truncated relations");
532  Result.Relations = std::move(Relations).build();
533  }
534  if (Chunks.count("cmdl")) {
535  Reader CmdReader(Chunks.lookup("cmdl"));
536  InternedCompileCommand Cmd =
537  readCompileCommand(CmdReader, Strings->Strings);
538  if (CmdReader.err())
539  return error("malformed or truncated commandline section");
540  Result.Cmd.emplace();
541  Result.Cmd->Directory = std::string(Cmd.Directory);
542  Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
543  for (llvm::StringRef C : Cmd.CommandLine)
544  Result.Cmd->CommandLine.emplace_back(C);
545  }
546  return std::move(Result);
547 }
548 
549 template <class Callback>
550 void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
551  CB(IGN.URI);
552  for (llvm::StringRef &Include : IGN.DirectIncludes)
553  CB(Include);
554 }
555 
556 void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
557  assert(Data.Symbols && "An index file without symbols makes no sense!");
558  riff::File RIFF;
559  RIFF.Type = riff::fourCC("CdIx");
560 
561  llvm::SmallString<4> Meta;
562  {
563  llvm::raw_svector_ostream MetaOS(Meta);
564  write32(Version, MetaOS);
565  }
566  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
567 
568  StringTableOut Strings;
569  std::vector<Symbol> Symbols;
570  for (const auto &Sym : *Data.Symbols) {
571  Symbols.emplace_back(Sym);
572  visitStrings(Symbols.back(),
573  [&](llvm::StringRef &S) { Strings.intern(S); });
574  }
575  std::vector<IncludeGraphNode> Sources;
576  if (Data.Sources)
577  for (const auto &Source : *Data.Sources) {
578  Sources.push_back(Source.getValue());
579  visitStrings(Sources.back(),
580  [&](llvm::StringRef &S) { Strings.intern(S); });
581  }
582 
583  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
584  if (Data.Refs) {
585  for (const auto &Sym : *Data.Refs) {
586  Refs.emplace_back(Sym);
587  for (auto &Ref : Refs.back().second) {
588  llvm::StringRef File = Ref.Location.FileURI;
589  Strings.intern(File);
590  Ref.Location.FileURI = File.data();
591  }
592  }
593  }
594 
595  std::vector<Relation> Relations;
596  if (Data.Relations) {
597  for (const auto &Relation : *Data.Relations) {
598  Relations.emplace_back(Relation);
599  // No strings to be interned in relations.
600  }
601  }
602 
603  InternedCompileCommand InternedCmd;
604  if (Data.Cmd) {
605  InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
606  InternedCmd.Directory = Data.Cmd->Directory;
607  Strings.intern(InternedCmd.Directory);
608  for (llvm::StringRef C : Data.Cmd->CommandLine) {
609  InternedCmd.CommandLine.emplace_back(C);
610  Strings.intern(InternedCmd.CommandLine.back());
611  }
612  }
613 
614  std::string StringSection;
615  {
616  llvm::raw_string_ostream StringOS(StringSection);
617  Strings.finalize(StringOS);
618  }
619  RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
620 
621  std::string SymbolSection;
622  {
623  llvm::raw_string_ostream SymbolOS(SymbolSection);
624  for (const auto &Sym : Symbols)
625  writeSymbol(Sym, Strings, SymbolOS);
626  }
627  RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
628 
629  std::string RefsSection;
630  if (Data.Refs) {
631  {
632  llvm::raw_string_ostream RefsOS(RefsSection);
633  for (const auto &Sym : Refs)
634  writeRefs(Sym.first, Sym.second, Strings, RefsOS);
635  }
636  RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
637  }
638 
639  std::string RelationSection;
640  if (Data.Relations) {
641  {
642  llvm::raw_string_ostream RelationOS{RelationSection};
643  for (const auto &Relation : Relations)
644  writeRelation(Relation, RelationOS);
645  }
646  RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
647  }
648 
649  std::string SrcsSection;
650  {
651  {
652  llvm::raw_string_ostream SrcsOS(SrcsSection);
653  for (const auto &SF : Sources)
654  writeIncludeGraphNode(SF, Strings, SrcsOS);
655  }
656  RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
657  }
658 
659  std::string CmdlSection;
660  if (Data.Cmd) {
661  {
662  llvm::raw_string_ostream CmdOS(CmdlSection);
663  writeCompileCommand(InternedCmd, Strings, CmdOS);
664  }
665  RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
666  }
667 
668  OS << RIFF;
669 }
670 
671 } // namespace
672 
673 // Defined in YAMLSerialization.cpp.
674 void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
675 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin);
676 
677 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
678  switch (O.Format) {
680  writeRIFF(O, OS);
681  break;
683  writeYAML(O, OS);
684  break;
685  }
686  return OS;
687 }
688 
689 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data,
690  SymbolOrigin Origin) {
691  if (Data.startswith("RIFF")) {
692  return readRIFF(Data, Origin);
693  }
694  if (auto YAMLContents = readYAML(Data, Origin)) {
695  return std::move(*YAMLContents);
696  } else {
697  return error("Not a RIFF file and failed to parse as YAML: {0}",
698  YAMLContents.takeError());
699  }
700 }
701 
702 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
703  SymbolOrigin Origin, bool UseDex) {
704  trace::Span OverallTracer("LoadIndex");
705  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
706  if (!Buffer) {
707  elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
708  return nullptr;
709  }
710 
711  SymbolSlab Symbols;
712  RefSlab Refs;
713  RelationSlab Relations;
714  {
715  trace::Span Tracer("ParseIndex");
716  if (auto I = readIndexFile(Buffer->get()->getBuffer(), Origin)) {
717  if (I->Symbols)
718  Symbols = std::move(*I->Symbols);
719  if (I->Refs)
720  Refs = std::move(*I->Refs);
721  if (I->Relations)
722  Relations = std::move(*I->Relations);
723  } else {
724  elog("Bad index file: {0}", I.takeError());
725  return nullptr;
726  }
727  }
728 
729  size_t NumSym = Symbols.size();
730  size_t NumRefs = Refs.numRefs();
731  size_t NumRelations = Relations.size();
732 
733  trace::Span Tracer("BuildIndex");
734  auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
735  std::move(Relations))
736  : MemIndex::build(std::move(Symbols), std::move(Refs),
737  std::move(Relations));
738  vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
739  " - number of symbols: {3}\n"
740  " - number of refs: {4}\n"
741  " - number of relations: {5}",
742  UseDex ? "Dex" : "MemIndex", SymbolFilename,
743  Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
744  return Index;
745 }
746 
747 } // namespace clangd
748 } // namespace clang
clang::clangd::readYAML
llvm::Expected< IndexFileIn > readYAML(llvm::StringRef, SymbolOrigin Origin)
Definition: YAMLSerialization.cpp:421
clang::clangd::IndexFileFormat::YAML
@ YAML
Loc
SourceLocation Loc
Definition: KernelNameRestrictionCheck.cpp:45
clang::clangd::riff::fourCC
constexpr FourCC fourCC(const char(&Literal)[5])
Definition: RIFF.h:43
Dex.h
Headers.h
SymbolOrigin.h
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:312
clang::clangd::Symbol::SymbolFlag
SymbolFlag
Definition: Symbol.h:113
clang::clangd::IndexFileFormat::RIFF
@ RIFF
SymbolLocation.h
Tracer
std::unique_ptr< trace::EventTracer > Tracer
Definition: TraceTests.cpp:161
clang::clangd::error
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:79
clang::clangd::SymbolKind::Object
@ Object
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
clang::clangd::RefKind
RefKind
Describes the kind of a cross-reference.
Definition: Ref.h:28
clang::clangd::RelationSlab::Builder::insert
void insert(const Relation &R)
Adds a relation to the slab.
Definition: Relation.h:78
clang::clangd::riff::fourCCStr
constexpr llvm::StringRef fourCCStr(const FourCC &Data)
Definition: RIFF.h:46
Trace.h
clang::clangd::dex::Dex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:34
clang::clangd::RefSlab
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:108
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::SymbolIndex::estimateMemoryUsage
virtual size_t estimateMemoryUsage() const =0
Returns estimated size of index (in bytes).
clang::clangd::RefSlab::numRefs
size_t numRefs() const
Definition: Ref.h:123
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
Definition: Serialization.cpp:689
MemIndex.h
RIFF.h
clang::clangd::IndexFileOut::Format
IndexFileFormat Format
Definition: Serialization.h:62
clang::clangd::riff::readFile
llvm::Expected< File > readFile(llvm::StringRef Stream)
Definition: RIFF.cpp:48
clang::clangd::writeYAML
void writeYAML(const IndexFileOut &, llvm::raw_ostream &)
Definition: YAMLSerialization.cpp:388
clang::clangd::RelationSlab
Definition: Relation.h:50
Builder
CodeCompletionBuilder Builder
Definition: CodeCompletionStringsTests.cpp:36
Logger.h
CommandLine
std::vector< llvm::StringRef > CommandLine
Definition: Serialization.cpp:421
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex)
Definition: Serialization.cpp:702
Directory
llvm::StringRef Directory
Definition: Serialization.cpp:420
clang::clangd::IncludeGraphNode::SourceFlag
SourceFlag
Definition: Headers.h:75
clang::clangd::SymbolID::RawSize
constexpr static size_t RawSize
Definition: SymbolID.h:47
Serialization.h
clang::clangd::vlog
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:72
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:30
clang::clangd::MemIndex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab Symbols, RefSlab Refs, RelationSlab Relations)
Builds an index from slabs. The index takes ownership of the data.
Definition: MemIndex.cpp:17
clang::clangd::operator<<
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
Definition: CodeComplete.cpp:2182
SymbolKind
clang::find_all_symbols::SymbolInfo::SymbolKind SymbolKind
Definition: SymbolInfo.cpp:19
Entry
Definition: Modularize.cpp:427
Index
const SymbolIndex * Index
Definition: Dexp.cpp:98
Strings
std::vector< llvm::StringRef > Strings
Definition: Serialization.cpp:213
ID
static char ID
Definition: Logger.cpp:74
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
clang::clangd::SymbolOrigin
SymbolOrigin
Definition: SymbolOrigin.h:21
Arena
llvm::BumpPtrAllocator Arena
Definition: Serialization.cpp:212
clang::clangd::RefSlab::size
size_t size() const
Gets the number of symbols.
Definition: Ref.h:122
clang::clangd::Callback
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
clang::clangd::visitStrings
void visitStrings(Symbol &S, const Callback &CB)
Invokes Callback with each StringRef& contained in the Symbol.
Definition: Symbol.h:147
ns1::ns2::B
@ B
Definition: CategoricalFeature.h:3
clang::clangd::SymbolSlab
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
clang::clangd::elog
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61
clang::clangd::SymbolSlab::Builder::insert
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:50
clang::clangd::RelationKind
RelationKind
Definition: Relation.h:20
clang::clangd::trace::Span
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143