clang-tools  12.0.0git
FileIndex.cpp
Go to the documentation of this file.
1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileIndex.h"
10 #include "CollectMacros.h"
11 #include "ParsedAST.h"
12 #include "SymbolCollector.h"
14 #include "index/Index.h"
15 #include "index/MemIndex.h"
16 #include "index/Merge.h"
17 #include "index/Ref.h"
18 #include "index/Relation.h"
19 #include "index/Serialization.h"
20 #include "index/Symbol.h"
21 #include "index/SymbolID.h"
22 #include "index/SymbolOrigin.h"
23 #include "index/dex/Dex.h"
24 #include "support/Logger.h"
25 #include "support/Path.h"
26 #include "clang/AST/ASTContext.h"
27 #include "clang/Index/IndexingAction.h"
28 #include "clang/Index/IndexingOptions.h"
29 #include "clang/Lex/MacroInfo.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/StringMap.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/Support/Error.h"
37 #include <memory>
38 #include <tuple>
39 #include <utility>
40 #include <vector>
41 
42 namespace clang {
43 namespace clangd {
44 namespace {
45 
46 SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr<Preprocessor> PP,
47  llvm::ArrayRef<Decl *> DeclsToIndex,
48  const MainFileMacros *MacroRefsToIndex,
49  const CanonicalIncludes &Includes, bool IsIndexMainAST,
50  llvm::StringRef Version) {
51  SymbolCollector::Options CollectorOpts;
52  CollectorOpts.CollectIncludePath = true;
53  CollectorOpts.Includes = &Includes;
54  CollectorOpts.CountReferences = false;
55  CollectorOpts.Origin = SymbolOrigin::Dynamic;
56 
57  index::IndexingOptions IndexOpts;
58  // We only need declarations, because we don't count references.
59  IndexOpts.SystemSymbolFilter =
60  index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
61  IndexOpts.IndexFunctionLocals = false;
62  if (IsIndexMainAST) {
63  // We only collect refs when indexing main AST.
64  CollectorOpts.RefFilter = RefKind::All;
65  // Comments for main file can always be obtained from sema, do not store
66  // them in the index.
67  CollectorOpts.StoreAllDocumentation = false;
68  } else {
69  IndexOpts.IndexMacrosInPreprocessor = true;
70  CollectorOpts.CollectMacro = true;
71  CollectorOpts.StoreAllDocumentation = true;
72  }
73 
74  SymbolCollector Collector(std::move(CollectorOpts));
75  Collector.setPreprocessor(PP);
76  if (MacroRefsToIndex)
77  Collector.handleMacros(*MacroRefsToIndex);
78  index::indexTopLevelDecls(AST, *PP, DeclsToIndex, Collector, IndexOpts);
79 
80  const auto &SM = AST.getSourceManager();
81  const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
82  std::string FileName =
83  std::string(MainFileEntry ? MainFileEntry->getName() : "");
84 
85  auto Syms = Collector.takeSymbols();
86  auto Refs = Collector.takeRefs();
87  auto Relations = Collector.takeRelations();
88 
89  vlog("indexed {0} AST for {1} version {2}:\n"
90  " symbol slab: {3} symbols, {4} bytes\n"
91  " ref slab: {5} symbols, {6} refs, {7} bytes\n"
92  " relations slab: {8} relations, {9} bytes",
93  IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),
94  Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),
95  Relations.size(), Relations.bytes());
96  return std::make_tuple(std::move(Syms), std::move(Refs),
97  std::move(Relations));
98 }
99 
100 // We keep only the node "U" and its edges. Any node other than "U" will be
101 // empty in the resultant graph.
102 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
103  IncludeGraph IG;
104 
105  auto Entry = IG.try_emplace(URI).first;
106  auto &Node = Entry->getValue();
107  Node = FullGraph.lookup(Entry->getKey());
108  Node.URI = Entry->getKey();
109 
110  // URIs inside nodes must point into the keys of the same IncludeGraph.
111  for (auto &Include : Node.DirectIncludes) {
112  auto I = IG.try_emplace(Include).first;
113  I->getValue().URI = I->getKey();
114  Include = I->getKey();
115  }
116  return IG;
117 }
118 } // namespace
119 
121  : Index(std::move(Input)) {
122  // Used to build RelationSlabs.
123  llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
124 
125  // Attribute each Symbol to both their declaration and definition locations.
126  if (Index.Symbols) {
127  for (const auto &S : *Index.Symbols) {
128  auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
129  It.first->getValue().Symbols.insert(&S);
130  SymbolIDToFile[S.ID] = &It.first->getValue();
131  // Only bother if definition file is different than declaration file.
132  if (S.Definition &&
133  S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
134  auto It = Shards.try_emplace(S.Definition.FileURI);
135  It.first->getValue().Symbols.insert(&S);
136  }
137  }
138  }
139  // Attribute references into each file they occured in.
140  if (Index.Refs) {
141  for (const auto &SymRefs : *Index.Refs) {
142  for (const auto &R : SymRefs.second) {
143  const auto It = Shards.try_emplace(R.Location.FileURI);
144  It.first->getValue().Refs.insert(&R);
145  RefToSymID[&R] = SymRefs.first;
146  }
147  }
148  }
149  // Attribute relations to the file declaraing their Subject as Object might
150  // not have been indexed, see SymbolCollector::processRelations for details.
151  if (Index.Relations) {
152  for (const auto &R : *Index.Relations) {
153  // FIXME: RelationSlab shouldn't contain dangling relations.
154  if (auto *File = SymbolIDToFile.lookup(R.Subject))
155  File->Relations.insert(&R);
156  }
157  }
158  // Store only the direct includes of a file in a shard.
159  if (Index.Sources) {
160  const auto &FullGraph = *Index.Sources;
161  for (const auto &It : FullGraph) {
162  auto ShardIt = Shards.try_emplace(It.first());
163  ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
164  }
165  }
166 }
167 std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
168  // It should be enough to construct a vector with {Shards.keys().begin(),
169  // Shards.keys().end()} but MSVC fails to compile that.
170  std::vector<PathRef> Result;
171  Result.reserve(Shards.size());
172  for (auto Key : Shards.keys())
173  Result.push_back(Key);
174  return Result;
175 }
176 
177 llvm::Optional<IndexFileIn>
178 FileShardedIndex::getShard(llvm::StringRef Uri) const {
179  auto It = Shards.find(Uri);
180  if (It == Shards.end())
181  return llvm::None;
182 
183  IndexFileIn IF;
184  IF.Sources = It->getValue().IG;
185  IF.Cmd = Index.Cmd;
186 
187  SymbolSlab::Builder SymB;
188  for (const auto *S : It->getValue().Symbols)
189  SymB.insert(*S);
190  IF.Symbols = std::move(SymB).build();
191 
192  RefSlab::Builder RefB;
193  for (const auto *Ref : It->getValue().Refs) {
194  auto SID = RefToSymID.lookup(Ref);
195  RefB.insert(SID, *Ref);
196  }
197  IF.Refs = std::move(RefB).build();
198 
200  for (const auto *Rel : It->getValue().Relations) {
201  RelB.insert(*Rel);
202  }
203  IF.Relations = std::move(RelB).build();
204  // Explicit move here is needed by some compilers.
205  return std::move(IF);
206 }
207 
209  return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(),
210  AST.getLocalTopLevelDecls(), &AST.getMacros(),
211  AST.getCanonicalIncludes(),
212  /*IsIndexMainAST=*/true, AST.version());
213 }
214 
215 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
216  std::shared_ptr<Preprocessor> PP,
217  const CanonicalIncludes &Includes) {
218  std::vector<Decl *> DeclsToIndex(
219  AST.getTranslationUnitDecl()->decls().begin(),
220  AST.getTranslationUnitDecl()->decls().end());
221  return indexSymbols(AST, std::move(PP), DeclsToIndex,
222  /*MainFileMacros=*/nullptr, Includes,
223  /*IsIndexMainAST=*/false, Version);
224 }
225 
226 void FileSymbols::update(llvm::StringRef Key,
227  std::unique_ptr<SymbolSlab> Symbols,
228  std::unique_ptr<RefSlab> Refs,
229  std::unique_ptr<RelationSlab> Relations,
230  bool CountReferences) {
231  std::lock_guard<std::mutex> Lock(Mutex);
232  ++Version;
233  if (!Symbols)
234  SymbolsSnapshot.erase(Key);
235  else
236  SymbolsSnapshot[Key] = std::move(Symbols);
237  if (!Refs) {
238  RefsSnapshot.erase(Key);
239  } else {
240  RefSlabAndCountReferences Item;
241  Item.CountReferences = CountReferences;
242  Item.Slab = std::move(Refs);
243  RefsSnapshot[Key] = std::move(Item);
244  }
245  if (!Relations)
246  RelatiosSnapshot.erase(Key);
247  else
248  RelatiosSnapshot[Key] = std::move(Relations);
249 }
250 
251 std::unique_ptr<SymbolIndex>
253  size_t *Version) {
254  std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
255  std::vector<std::shared_ptr<RefSlab>> RefSlabs;
256  std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
257  std::vector<RefSlab *> MainFileRefs;
258  {
259  std::lock_guard<std::mutex> Lock(Mutex);
260  for (const auto &FileAndSymbols : SymbolsSnapshot)
261  SymbolSlabs.push_back(FileAndSymbols.second);
262  for (const auto &FileAndRefs : RefsSnapshot) {
263  RefSlabs.push_back(FileAndRefs.second.Slab);
264  if (FileAndRefs.second.CountReferences)
265  MainFileRefs.push_back(RefSlabs.back().get());
266  }
267  for (const auto &FileAndRelations : RelatiosSnapshot)
268  RelationSlabs.push_back(FileAndRelations.second);
269 
270  if (Version)
271  *Version = this->Version;
272  }
273  std::vector<const Symbol *> AllSymbols;
274  std::vector<Symbol> SymsStorage;
275  switch (DuplicateHandle) {
277  llvm::DenseMap<SymbolID, Symbol> Merged;
278  for (const auto &Slab : SymbolSlabs) {
279  for (const auto &Sym : *Slab) {
280  assert(Sym.References == 0 &&
281  "Symbol with non-zero references sent to FileSymbols");
282  auto I = Merged.try_emplace(Sym.ID, Sym);
283  if (!I.second)
284  I.first->second = mergeSymbol(I.first->second, Sym);
285  }
286  }
287  for (const RefSlab *Refs : MainFileRefs)
288  for (const auto &Sym : *Refs) {
289  auto It = Merged.find(Sym.first);
290  // This might happen while background-index is still running.
291  if (It == Merged.end())
292  continue;
293  It->getSecond().References += Sym.second.size();
294  }
295  SymsStorage.reserve(Merged.size());
296  for (auto &Sym : Merged) {
297  SymsStorage.push_back(std::move(Sym.second));
298  AllSymbols.push_back(&SymsStorage.back());
299  }
300  break;
301  }
303  llvm::DenseSet<SymbolID> AddedSymbols;
304  for (const auto &Slab : SymbolSlabs)
305  for (const auto &Sym : *Slab) {
306  assert(Sym.References == 0 &&
307  "Symbol with non-zero references sent to FileSymbols");
308  if (AddedSymbols.insert(Sym.ID).second)
309  AllSymbols.push_back(&Sym);
310  }
311  break;
312  }
313  }
314 
315  std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
316  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
317  {
318  llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
319  size_t Count = 0;
320  for (const auto &RefSlab : RefSlabs)
321  for (const auto &Sym : *RefSlab) {
322  MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
323  Count += Sym.second.size();
324  }
325  RefsStorage.reserve(Count);
326  AllRefs.reserve(MergedRefs.size());
327  for (auto &Sym : MergedRefs) {
328  auto &SymRefs = Sym.second;
329  // Sorting isn't required, but yields more stable results over rebuilds.
330  llvm::sort(SymRefs);
331  llvm::copy(SymRefs, back_inserter(RefsStorage));
332  AllRefs.try_emplace(
333  Sym.first,
334  llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
335  SymRefs.size()));
336  }
337  }
338 
339  std::vector<Relation> AllRelations;
340  for (const auto &RelationSlab : RelationSlabs) {
341  for (const auto &R : *RelationSlab)
342  AllRelations.push_back(R);
343  }
344 
345  size_t StorageSize =
346  RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
347  for (const auto &Slab : SymbolSlabs)
348  StorageSize += Slab->bytes();
349  for (const auto &RefSlab : RefSlabs)
350  StorageSize += RefSlab->bytes();
351  for (const auto &RelationSlab : RelationSlabs)
352  StorageSize += RelationSlab->bytes();
353 
354  // Index must keep the slabs and contiguous ranges alive.
355  switch (Type) {
356  case IndexType::Light:
357  return std::make_unique<MemIndex>(
358  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
359  std::move(AllRelations),
360  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
361  std::move(RefsStorage), std::move(SymsStorage)),
362  StorageSize);
363  case IndexType::Heavy:
364  return std::make_unique<dex::Dex>(
365  llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
366  std::move(AllRelations),
367  std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
368  std::move(RefsStorage), std::move(SymsStorage)),
369  StorageSize);
370  }
371  llvm_unreachable("Unknown clangd::IndexType");
372 }
373 
375  : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex),
376  PreambleIndex(std::make_unique<MemIndex>()),
377  MainFileIndex(std::make_unique<MemIndex>()) {}
378 
379 void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
380  ASTContext &AST,
381  std::shared_ptr<Preprocessor> PP,
382  const CanonicalIncludes &Includes) {
383  IndexFileIn IF;
384  std::tie(IF.Symbols, std::ignore, IF.Relations) =
385  indexHeaderSymbols(Version, AST, std::move(PP), Includes);
386  FileShardedIndex ShardedIndex(std::move(IF));
387  for (auto Uri : ShardedIndex.getAllSources()) {
388  auto IF = ShardedIndex.getShard(Uri);
389  // We are using the key received from ShardedIndex, so it should always
390  // exist.
391  assert(IF);
392  PreambleSymbols.update(
393  Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
394  std::make_unique<RefSlab>(),
395  std::make_unique<RelationSlab>(std::move(*IF->Relations)),
396  /*CountReferences=*/false);
397  }
398  size_t IndexVersion = 0;
399  auto NewIndex =
400  PreambleSymbols.buildIndex(UseDex ? IndexType::Heavy : IndexType::Light,
401  DuplicateHandling::PickOne, &IndexVersion);
402  {
403  std::lock_guard<std::mutex> Lock(UpdateIndexMu);
404  if (IndexVersion <= PreambleIndexVersion) {
405  // We lost the race, some other thread built a later version.
406  return;
407  }
408  PreambleIndexVersion = IndexVersion;
409  PreambleIndex.reset(std::move(NewIndex));
410  vlog(
411  "Build dynamic index for header symbols with estimated memory usage of "
412  "{0} bytes",
413  PreambleIndex.estimateMemoryUsage());
414  }
415 }
416 
418  auto Contents = indexMainDecls(AST);
419  MainFileSymbols.update(
420  Path, std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
421  std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
422  std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
423  /*CountReferences=*/true);
424  size_t IndexVersion = 0;
425  auto NewIndex = MainFileSymbols.buildIndex(
427  {
428  std::lock_guard<std::mutex> Lock(UpdateIndexMu);
429  if (IndexVersion <= MainIndexVersion) {
430  // We lost the race, some other thread built a later version.
431  return;
432  }
433  MainIndexVersion = IndexVersion;
434  MainFileIndex.reset(std::move(NewIndex));
435  vlog(
436  "Build dynamic index for main-file symbols with estimated memory usage "
437  "of {0} bytes",
438  MainFileIndex.estimateMemoryUsage());
439  }
440 }
441 
442 } // namespace clangd
443 } // namespace clang
std::unique_ptr< SymbolIndex > buildIndex(IndexType, DuplicateHandling DuplicateHandle=DuplicateHandling::PickOne, size_t *Version=nullptr)
The index keeps the slabs alive.
Definition: FileIndex.cpp:252
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:150
FileShardedIndex(IndexFileIn Input)
HintPath is used to convert file URIs stored in symbols into absolute paths.
Definition: FileIndex.cpp:120
llvm::Optional< SymbolSlab > Symbols
Definition: Serialization.h:43
SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Index declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:215
llvm::Optional< tooling::CompileCommand > Cmd
Definition: Serialization.h:49
SymbolCollector::Options CollectorOpts
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:104
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Update preamble symbols of file Path with all declarations in AST and macros in PP.
Definition: FileIndex.cpp:379
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:43
llvm::Optional< IndexFileIn > getShard(llvm::StringRef Uri) const
Generates index shard for the Uri.
Definition: FileIndex.cpp:178
Represents a symbol occurrence in the source file.
Definition: Ref.h:87
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:208
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:23
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:50
Values in a Context are indexed by typed keys.
Definition: Context.h:40
size_t numRefs() const
Definition: Ref.h:119
ArrayRef< Decl * > getLocalTopLevelDecls()
This function returns top-level decls present in the main file of the AST.
Definition: ParsedAST.cpp:487
Documents should not be synced at all.
llvm::Optional< RelationSlab > Relations
Definition: Serialization.h:45
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
void updateMain(PathRef Path, ParsedAST &AST)
Update symbols and references from main file Path with indexMainDecls.
Definition: FileIndex.cpp:417
llvm::Optional< IncludeGraph > Sources
Definition: Serialization.h:47
ASTContext & getASTContext()
Note that the returned ast will not contain decls from the preamble that were not deserialized during...
Definition: ParsedAST.cpp:471
SymbolSlab::Builder is a mutable container that can &#39;freeze&#39; to SymbolSlab.
Definition: Symbol.h:200
llvm::StringRef Contents
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:51
Maps a definition location onto an #include file, based on a set of filename rules.
const CanonicalIncludes & getCanonicalIncludes() const
Definition: ParsedAST.cpp:531
void update(llvm::StringRef Key, std::unique_ptr< SymbolSlab > Symbols, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all slabs associated with the Key.
Definition: FileIndex.cpp:226
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:86
MemIndex is a naive in-memory index suitable for a small set of symbols.
Definition: MemIndex.h:19
std::vector< llvm::StringRef > getAllSources() const
Returns uris for all files that has a shard.
Definition: FileIndex.cpp:167
std::shared_ptr< Preprocessor > getPreprocessorPtr()
Definition: ParsedAST.cpp:479
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
size_t size() const
Gets the number of symbols.
Definition: Ref.h:118
Symbol mergeSymbol(const Symbol &L, const Symbol &R)
Definition: Merge.cpp:172
std::shared_ptr< SymbolCollector > Collector
void insert(const Relation &R)
Adds a relation to the slab.
Definition: Relation.h:73
PathRef FileName
SymbolSlab Symbols
RelationSlab::Builder is a mutable container that can &#39;freeze&#39; to RelationSlab.
Definition: Relation.h:70
RefSlab::Builder is a mutable container that can &#39;freeze&#39; to RefSlab.
Definition: Ref.h:128
Stores and provides access to parsed AST.
Definition: ParsedAST.h:48
FileIndex(bool UseDex=true)
Definition: FileIndex.cpp:374
size_t bytes() const
Definition: Relation.h:60
llvm::Optional< RefSlab > Refs
Definition: Serialization.h:44
const MainFileMacros & getMacros() const
Gets all macro references (definition, expansions) present in the main file, including those in the p...
Definition: ParsedAST.cpp:491
Takes slabs coming from a TU (multiple files) and shards them per declaration location.
Definition: FileIndex.h:165
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
llvm::StringRef version() const
Returns the version of the ParseInputs this AST was built from.
Definition: ParsedAST.h:106
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Index.cpp:78
void insert(const SymbolID &ID, const Ref &S)
Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
Definition: Ref.cpp:36
size_t bytes() const
Definition: Ref.h:122
RefSlab Refs
NodeType Type
void reset(std::unique_ptr< SymbolIndex >)
Definition: Index.cpp:20
const SymbolIndex * Index
Definition: Dexp.cpp:95