clang-tools  11.0.0git
FileIndex.h
Go to the documentation of this file.
1 //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
10 // maintained at source-file granularity (e.g. with ASTs), and files can be
11 // updated dynamically.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
16 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
17 
18 #include "Headers.h"
19 #include "Index.h"
20 #include "MemIndex.h"
21 #include "Merge.h"
23 #include "index/Ref.h"
24 #include "index/Relation.h"
25 #include "index/Serialization.h"
26 #include "index/Symbol.h"
27 #include "support/Path.h"
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Tooling/CompilationDatabase.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/Optional.h"
32 #include "llvm/ADT/StringMap.h"
33 #include "llvm/ADT/StringRef.h"
34 #include <memory>
35 #include <vector>
36 
37 namespace clang {
38 class ASTContext;
39 namespace clangd {
40 class ParsedAST;
41 
42 /// Select between in-memory index implementations, which have tradeoffs.
43 enum class IndexType {
44  // MemIndex is trivially cheap to build, but has poor query performance.
45  Light,
46  // Dex is relatively expensive to build and uses more memory, but is fast.
47  Heavy,
48 };
49 
50 /// How to handle duplicated symbols across multiple files.
51 enum class DuplicateHandling {
52  // Pick a random symbol. Less accurate but faster.
53  PickOne,
54  // Merge symbols. More accurate but slower.
55  Merge,
56 };
57 
58 /// A container of slabs associated with a key. It can be updated at key
59 /// granularity, replacing all slabs belonging to a key with a new set. Keys are
60 /// usually file paths/uris.
61 ///
62 /// This implements snapshot semantics. Each update will create a new snapshot
63 /// for all slabs of the Key. Snapshots are managed with shared pointers that
64 /// are shared between this class and the users. For each key, this class only
65 /// stores a pointer pointing to the newest snapshot, and an outdated snapshot
66 /// is deleted by the last owner of the snapshot, either this class or the
67 /// symbol index.
68 ///
69 /// The snapshot semantics keeps critical sections minimal since we only need
70 /// locking when we swap or obtain references to snapshots.
71 class FileSymbols {
72 public:
73  /// Updates all slabs associated with the \p Key.
74  /// If either is nullptr, corresponding data for \p Key will be removed.
75  /// If CountReferences is true, \p Refs will be used for counting references
76  /// during merging.
77  void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
78  std::unique_ptr<RefSlab> Refs,
79  std::unique_ptr<RelationSlab> Relations, bool CountReferences);
80 
81  /// The index keeps the slabs alive.
82  /// Will count Symbol::References based on number of references in the main
83  /// files, while building the index with DuplicateHandling::Merge option.
84  /// Version is populated with an increasing sequence counter.
85  std::unique_ptr<SymbolIndex>
86  buildIndex(IndexType,
88  size_t *Version = nullptr);
89 
90 private:
91  struct RefSlabAndCountReferences {
92  std::shared_ptr<RefSlab> Slab;
93  bool CountReferences = false;
94  };
95  mutable std::mutex Mutex;
96 
97  size_t Version = 0;
98  llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
99  llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
100  llvm::StringMap<std::shared_ptr<RelationSlab>> RelatiosSnapshot;
101 };
102 
103 /// This manages symbols from files and an in-memory index on all symbols.
104 /// FIXME: Expose an interface to remove files that are closed.
105 class FileIndex : public MergedIndex {
106 public:
107  FileIndex(bool UseDex = true);
108 
109  /// Update preamble symbols of file \p Path with all declarations in \p AST
110  /// and macros in \p PP.
111  void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
112  std::shared_ptr<Preprocessor> PP,
113  const CanonicalIncludes &Includes);
114 
115  /// Update symbols and references from main file \p Path with
116  /// `indexMainDecls`.
117  void updateMain(PathRef Path, ParsedAST &AST);
118 
119 private:
120  bool UseDex; // FIXME: this should be always on.
121 
122  // Contains information from each file's preamble only. Symbols and relations
123  // are sharded per declaration file to deduplicate multiple symbols and reduce
124  // memory usage.
125  // Missing information:
126  // - symbol refs (these are always "from the main file")
127  // - definition locations in the main file
128  //
129  // Note that we store only one version of a header, hence symbols appearing in
130  // different PP states will be missing.
131  FileSymbols PreambleSymbols;
132  SwapIndex PreambleIndex;
133 
134  // Contains information from each file's main AST.
135  // These are updated frequently (on file change), but are relatively small.
136  // Mostly contains:
137  // - refs to symbols declared in the preamble and referenced from main
138  // - symbols declared both in the main file and the preamble
139  // (Note that symbols *only* in the main file are not indexed).
140  FileSymbols MainFileSymbols;
141  SwapIndex MainFileIndex;
142 
143  // While both the FileIndex and SwapIndex are threadsafe, we need to track
144  // versions to ensure that we don't overwrite newer indexes with older ones.
145  std::mutex UpdateIndexMu;
146  unsigned MainIndexVersion = 0;
147  unsigned PreambleIndexVersion = 0;
148 };
149 
150 using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>;
151 
152 /// Retrieves symbols and refs of local top level decls in \p AST (i.e.
153 /// `AST.getLocalTopLevelDecls()`).
154 /// Exposed to assist in unit tests.
156 
157 /// Index declarations from \p AST and macros from \p PP that are declared in
158 /// included headers.
159 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
160  std::shared_ptr<Preprocessor> PP,
161  const CanonicalIncludes &Includes);
162 
163 /// Takes slabs coming from a TU (multiple files) and shards them per
164 /// declaration location.
166  /// \p HintPath is used to convert file URIs stored in symbols into absolute
167  /// paths.
168  explicit FileShardedIndex(IndexFileIn Input);
169 
170  /// Returns uris for all files that has a shard.
171  std::vector<llvm::StringRef> getAllSources() const;
172 
173  /// Generates index shard for the \p Uri. Note that this function results in
174  /// a copy of all the relevant data.
175  /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
176  /// they are empty.
177  llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
178 
179 private:
180  // Contains all the information that belongs to a single file.
181  struct FileShard {
182  // Either declared or defined in the file.
183  llvm::DenseSet<const Symbol *> Symbols;
184  // Reference occurs in the file.
185  llvm::DenseSet<const Ref *> Refs;
186  // Subject is declared in the file.
187  llvm::DenseSet<const Relation *> Relations;
188  // Contains edges for only the direct includes.
189  IncludeGraph IG;
190  };
191 
192  // Keeps all the information alive.
193  const IndexFileIn Index;
194  // Mapping from URIs to slab information.
195  llvm::StringMap<FileShard> Shards;
196  // Used to build RefSlabs.
197  llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
198 };
199 
200 } // namespace clangd
201 } // namespace clang
202 
203 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:150
SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, std::shared_ptr< Preprocessor > PP, const CanonicalIncludes &Includes)
Index declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:215
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:43
A container of slabs associated with a key.
Definition: FileIndex.h:71
This manages symbols from files and an in-memory index on all symbols.
Definition: FileIndex.h:105
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:208
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:23
Values in a Context are indexed by typed keys.
Definition: Context.h:40
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:51
Maps a definition location onto an #include file, based on a set of filename rules.
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:86
std::string Path
A typedef to represent a file path.
Definition: Path.h:20
SymbolSlab Symbols
Stores and provides access to parsed AST.
Definition: ParsedAST.h:48
Takes slabs coming from a TU (multiple files) and shards them per declaration location.
Definition: FileIndex.h:165
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
RefSlab Refs
const SymbolIndex * Index
Definition: Dexp.cpp:95