clang-tools  15.0.0git
FileIndex.h
Go to the documentation of this file.
1 //===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
10 // maintained at source-file granularity (e.g. with ASTs), and files can be
11 // updated dynamically.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
16 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
17 
18 #include "Headers.h"
20 #include "index/Index.h"
21 #include "index/Merge.h"
22 #include "index/Ref.h"
23 #include "index/Relation.h"
24 #include "index/Serialization.h"
25 #include "index/Symbol.h"
26 #include "support/MemoryTree.h"
27 #include "support/Path.h"
28 #include "clang/Lex/Preprocessor.h"
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/Optional.h"
31 #include "llvm/ADT/StringMap.h"
32 #include "llvm/ADT/StringRef.h"
33 #include <memory>
34 #include <vector>
35 
36 namespace clang {
37 class ASTContext;
38 namespace clangd {
39 class ParsedAST;
40 
41 /// Select between in-memory index implementations, which have tradeoffs.
42 enum class IndexType {
43  // MemIndex is trivially cheap to build, but has poor query performance.
44  Light,
45  // Dex is relatively expensive to build and uses more memory, but is fast.
46  Heavy,
47 };
48 
49 /// How to handle duplicated symbols across multiple files.
50 enum class DuplicateHandling {
51  // Pick a random symbol. Less accurate but faster.
52  PickOne,
53  // Merge symbols. More accurate but slower.
54  Merge,
55 };
56 
57 /// A container of slabs associated with a key. It can be updated at key
58 /// granularity, replacing all slabs belonging to a key with a new set. Keys are
59 /// usually file paths/uris.
60 ///
61 /// This implements snapshot semantics. Each update will create a new snapshot
62 /// for all slabs of the Key. Snapshots are managed with shared pointers that
63 /// are shared between this class and the users. For each key, this class only
64 /// stores a pointer pointing to the newest snapshot, and an outdated snapshot
65 /// is deleted by the last owner of the snapshot, either this class or the
66 /// symbol index.
67 ///
68 /// The snapshot semantics keeps critical sections minimal since we only need
69 /// locking when we swap or obtain references to snapshots.
70 class FileSymbols {
71 public:
72  FileSymbols(IndexContents IdxContents);
73  /// Updates all slabs associated with the \p Key.
74  /// If either is nullptr, corresponding data for \p Key will be removed.
75  /// If CountReferences is true, \p Refs will be used for counting references
76  /// during merging.
77  void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
78  std::unique_ptr<RefSlab> Refs,
79  std::unique_ptr<RelationSlab> Relations, bool CountReferences);
80 
81  /// The index keeps the slabs alive.
82  /// Will count Symbol::References based on number of references in the main
83  /// files, while building the index with DuplicateHandling::Merge option.
84  /// Version is populated with an increasing sequence counter.
85  std::unique_ptr<SymbolIndex>
88  size_t *Version = nullptr);
89 
90  void profile(MemoryTree &MT) const;
91 
92 private:
93  IndexContents IdxContents;
94 
95  struct RefSlabAndCountReferences {
96  std::shared_ptr<RefSlab> Slab;
97  bool CountReferences = false;
98  };
99  mutable std::mutex Mutex;
100 
101  size_t Version = 0;
102  llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
103  llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
104  llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot;
105 };
106 
107 /// This manages symbols from files and an in-memory index on all symbols.
108 /// FIXME: Expose an interface to remove files that are closed.
109 class FileIndex : public MergedIndex {
110 public:
111  FileIndex();
112 
113  /// Update preamble symbols of file \p Path with all declarations in \p AST
114  /// and macros in \p PP.
115  void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
116  Preprocessor &PP, const CanonicalIncludes &Includes);
118 
119  /// Update symbols and references from main file \p Path with
120  /// `indexMainDecls`.
121  void updateMain(PathRef Path, ParsedAST &AST);
122 
123  void profile(MemoryTree &MT) const;
124 
125 private:
126  // Contains information from each file's preamble only. Symbols and relations
127  // are sharded per declaration file to deduplicate multiple symbols and reduce
128  // memory usage.
129  // Missing information:
130  // - symbol refs (these are always "from the main file")
131  // - definition locations in the main file
132  //
133  // Note that we store only one version of a header, hence symbols appearing in
134  // different PP states will be missing.
135  FileSymbols PreambleSymbols;
136  SwapIndex PreambleIndex;
137 
138  // Contains information from each file's main AST.
139  // These are updated frequently (on file change), but are relatively small.
140  // Mostly contains:
141  // - refs to symbols declared in the preamble and referenced from main
142  // - symbols declared both in the main file and the preamble
143  // (Note that symbols *only* in the main file are not indexed).
144  FileSymbols MainFileSymbols;
145  SwapIndex MainFileIndex;
146 
147  // While both the FileIndex and SwapIndex are threadsafe, we need to track
148  // versions to ensure that we don't overwrite newer indexes with older ones.
149  std::mutex UpdateIndexMu;
150  unsigned MainIndexVersion = 0;
151  unsigned PreambleIndexVersion = 0;
152 };
153 
154 using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>;
155 
156 /// Retrieves symbols and refs of local top level decls in \p AST (i.e.
157 /// `AST.getLocalTopLevelDecls()`).
158 /// Exposed to assist in unit tests.
160 
161 /// Index declarations from \p AST and macros from \p PP that are declared in
162 /// included headers.
163 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
164  Preprocessor &PP,
165  const CanonicalIncludes &Includes);
166 
167 /// Takes slabs coming from a TU (multiple files) and shards them per
168 /// declaration location.
170  /// \p HintPath is used to convert file URIs stored in symbols into absolute
171  /// paths.
172  explicit FileShardedIndex(IndexFileIn Input);
173 
174  /// Returns uris for all files that has a shard.
175  std::vector<llvm::StringRef> getAllSources() const;
176 
177  /// Generates index shard for the \p Uri. Note that this function results in
178  /// a copy of all the relevant data.
179  /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
180  /// they are empty.
181  llvm::Optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
182 
183 private:
184  // Contains all the information that belongs to a single file.
185  struct FileShard {
186  // Either declared or defined in the file.
187  llvm::DenseSet<const Symbol *> Symbols;
188  // Reference occurs in the file.
189  llvm::DenseSet<const Ref *> Refs;
190  // Subject is declared in the file.
191  llvm::DenseSet<const Relation *> Relations;
192  // Contains edges for only the direct includes.
193  IncludeGraph IG;
194  };
195 
196  // Keeps all the information alive.
197  const IndexFileIn Index;
198  // Mapping from URIs to slab information.
199  llvm::StringMap<FileShard> Shards;
200  // Used to build RefSlabs.
201  llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
202 };
203 
204 } // namespace clangd
205 } // namespace clang
206 
207 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
clang::clangd::IndexType
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:42
clang::clangd::indexHeaderSymbols
SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, Preprocessor &PP, const CanonicalIncludes &Includes)
Index declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:229
clang::clangd::FileIndex::FileIndex
FileIndex()
Definition: FileIndex.cpp:421
Headers.h
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:312
clang::clangd::Path
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
clang::clangd::IndexFileIn
Definition: Serialization.h:42
Path.h
clang::clangd::SwapIndex
Definition: Index.h:163
Index.h
clang::clangd::FileShardedIndex::FileShardedIndex
FileShardedIndex(IndexFileIn Input)
HintPath is used to convert file URIs stored in symbols into absolute paths.
Definition: FileIndex.cpp:126
clang::clangd::FileIndex::updatePreamble
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, Preprocessor &PP, const CanonicalIncludes &Includes)
Update preamble symbols of file Path with all declarations in AST and macros in PP.
Definition: FileIndex.cpp:459
clang::clangd::FileIndex
This manages symbols from files and an in-memory index on all symbols.
Definition: FileIndex.h:109
clang::clangd::FileSymbols
A container of slabs associated with a key.
Definition: FileIndex.h:70
clang::clangd::DuplicateHandling
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:50
clang::clangd::FileShardedIndex::getShard
llvm::Optional< IndexFileIn > getShard(llvm::StringRef Uri) const
Generates index shard for the Uri.
Definition: FileIndex.cpp:192
Relation.h
clang::clangd::MergedIndex
Definition: Merge.h:26
CanonicalIncludes.h
clang::clangd::MemoryTree
A tree that can be used to represent memory usage of nested components while preserving the hierarchy...
Definition: MemoryTree.h:30
clang::clangd::CanonicalIncludes
Maps a definition location onto an #include file, based on a set of filename rules.
Definition: CanonicalIncludes.h:37
clang::clangd::IndexContents
IndexContents
Describes what data is covered by an index.
Definition: Index.h:93
clang::clangd::FileSymbols::update
void update(llvm::StringRef Key, std::unique_ptr< SymbolSlab > Symbols, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all slabs associated with the Key.
Definition: FileIndex.cpp:244
clang::clangd::DuplicateHandling::PickOne
@ PickOne
clang::clangd::FileIndex::profile
void profile(MemoryTree &MT) const
Definition: FileIndex.cpp:494
clang::clangd::SlabTuple
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:154
Serialization.h
clang::clangd::Key
Values in a Context are indexed by typed keys.
Definition: Context.h:40
clang::clangd::FileSymbols::profile
void profile(MemoryTree &MT) const
Definition: FileIndex.cpp:402
clang::clangd::DuplicateHandling::Merge
@ Merge
Symbol.h
clang::tidy::bugprone::PP
static Preprocessor * PP
Definition: BadSignalToKillThreadCheck.cpp:29
clang::clangd::FileIndex::updateMain
void updateMain(PathRef Path, ParsedAST &AST)
Update symbols and references from main file Path with indexMainDecls.
Definition: FileIndex.cpp:468
clang::clangd::FileShardedIndex::getAllSources
std::vector< llvm::StringRef > getAllSources() const
Returns uris for all files that has a shard.
Definition: FileIndex.cpp:181
clang::clangd::PathRef
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:29
clang::clangd::FileShardedIndex
Takes slabs coming from a TU (multiple files) and shards them per declaration location.
Definition: FileIndex.h:169
Ref.h
MemoryTree.h
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::IncludeGraph
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:92
clang::clangd::FileSymbols::buildIndex
std::unique_ptr< SymbolIndex > buildIndex(IndexType, DuplicateHandling DuplicateHandle=DuplicateHandling::PickOne, size_t *Version=nullptr)
The index keeps the slabs alive.
Definition: FileIndex.cpp:270
clang::clangd::ParsedAST
Stores and provides access to parsed AST.
Definition: ParsedAST.h:45
Merge.h
clang::clangd::IndexType::Heavy
@ Heavy
clang::clangd::FileSymbols::FileSymbols
FileSymbols(IndexContents IdxContents)
Definition: FileIndex.cpp:241
clang::clangd::IndexType::Light
@ Light
clang::clangd::indexMainDecls
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:222