clang-tools 20.0.0git
FileIndex.h
Go to the documentation of this file.
1//===--- FileIndex.h - Index for files. ---------------------------- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// FileIndex implements SymbolIndex for symbols from a set of files. Symbols are
10// maintained at source-file granularity (e.g. with ASTs), and files can be
11// updated dynamically.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
16#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
17
18#include "Headers.h"
19#include "clang-include-cleaner/Record.h"
20#include "index/Index.h"
21#include "index/Merge.h"
22#include "index/Ref.h"
23#include "index/Relation.h"
24#include "index/Serialization.h"
25#include "index/Symbol.h"
26#include "support/MemoryTree.h"
27#include "support/Path.h"
28#include "clang/Lex/Preprocessor.h"
29#include "llvm/ADT/DenseSet.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/ADT/StringRef.h"
32#include <memory>
33#include <optional>
34#include <vector>
35
36namespace clang {
37class ASTContext;
38namespace clangd {
39class ParsedAST;
40
41/// Select between in-memory index implementations, which have tradeoffs.
42enum class IndexType {
43 // MemIndex is trivially cheap to build, but has poor query performance.
44 Light,
45 // Dex is relatively expensive to build and uses more memory, but is fast.
46 Heavy,
47};
48
49/// How to handle duplicated symbols across multiple files.
51 // Pick a random symbol. Less accurate but faster.
52 PickOne,
53 // Merge symbols. More accurate but slower.
54 Merge,
55};
56
57/// A container of slabs associated with a key. It can be updated at key
58/// granularity, replacing all slabs belonging to a key with a new set. Keys are
59/// usually file paths/uris.
60///
61/// This implements snapshot semantics. Each update will create a new snapshot
62/// for all slabs of the Key. Snapshots are managed with shared pointers that
63/// are shared between this class and the users. For each key, this class only
64/// stores a pointer pointing to the newest snapshot, and an outdated snapshot
65/// is deleted by the last owner of the snapshot, either this class or the
66/// symbol index.
67///
68/// The snapshot semantics keeps critical sections minimal since we only need
69/// locking when we swap or obtain references to snapshots.
71public:
72 FileSymbols(IndexContents IdxContents, bool SupportContainedRefs);
73 /// Updates all slabs associated with the \p Key.
74 /// If either is nullptr, corresponding data for \p Key will be removed.
75 /// If CountReferences is true, \p Refs will be used for counting references
76 /// during merging.
77 void update(llvm::StringRef Key, std::unique_ptr<SymbolSlab> Symbols,
78 std::unique_ptr<RefSlab> Refs,
79 std::unique_ptr<RelationSlab> Relations, bool CountReferences);
80
81 /// The index keeps the slabs alive.
82 /// Will count Symbol::References based on number of references in the main
83 /// files, while building the index with DuplicateHandling::Merge option.
84 /// Version is populated with an increasing sequence counter.
85 std::unique_ptr<SymbolIndex>
88 size_t *Version = nullptr);
89
90 void profile(MemoryTree &MT) const;
91
92private:
93 IndexContents IdxContents;
94 bool SupportContainedRefs;
95
96 struct RefSlabAndCountReferences {
97 std::shared_ptr<RefSlab> Slab;
98 bool CountReferences = false;
99 };
100 mutable std::mutex Mutex;
101
102 size_t Version = 0;
103 llvm::StringMap<std::shared_ptr<SymbolSlab>> SymbolsSnapshot;
104 llvm::StringMap<RefSlabAndCountReferences> RefsSnapshot;
105 llvm::StringMap<std::shared_ptr<RelationSlab>> RelationsSnapshot;
106};
107
108/// This manages symbols from files and an in-memory index on all symbols.
109/// FIXME: Expose an interface to remove files that are closed.
110class FileIndex : public MergedIndex {
111public:
112 FileIndex(bool SupportContainedRefs);
113
114 /// Update preamble symbols of file \p Path with all declarations in \p AST
115 /// and macros in \p PP.
116 void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
117 Preprocessor &PP,
118 const include_cleaner::PragmaIncludes &PI);
120
121 /// Update symbols and references from main file \p Path with
122 /// `indexMainDecls`.
124
125 void profile(MemoryTree &MT) const;
126
127private:
128 // Contains information from each file's preamble only. Symbols and relations
129 // are sharded per declaration file to deduplicate multiple symbols and reduce
130 // memory usage.
131 // Missing information:
132 // - symbol refs (these are always "from the main file")
133 // - definition locations in the main file
134 //
135 // Note that we store only one version of a header, hence symbols appearing in
136 // different PP states will be missing.
137 FileSymbols PreambleSymbols;
138 SwapIndex PreambleIndex;
139
140 // Contains information from each file's main AST.
141 // These are updated frequently (on file change), but are relatively small.
142 // Mostly contains:
143 // - refs to symbols declared in the preamble and referenced from main
144 // - symbols declared both in the main file and the preamble
145 // (Note that symbols *only* in the main file are not indexed).
146 FileSymbols MainFileSymbols;
147 SwapIndex MainFileIndex;
148
149 // While both the FileIndex and SwapIndex are threadsafe, we need to track
150 // versions to ensure that we don't overwrite newer indexes with older ones.
151 std::mutex UpdateIndexMu;
152 unsigned MainIndexVersion = 0;
153 unsigned PreambleIndexVersion = 0;
154};
155
156using SlabTuple = std::tuple<SymbolSlab, RefSlab, RelationSlab>;
157
158/// Retrieves symbols and refs of local top level decls in \p AST (i.e.
159/// `AST.getLocalTopLevelDecls()`).
160/// Exposed to assist in unit tests.
162
163/// Index declarations from \p AST and macros from \p PP that are declared in
164/// included headers.
165SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
166 Preprocessor &PP,
167 const include_cleaner::PragmaIncludes &PI);
168
169/// Takes slabs coming from a TU (multiple files) and shards them per
170/// declaration location.
172 /// \p HintPath is used to convert file URIs stored in symbols into absolute
173 /// paths.
174 explicit FileShardedIndex(IndexFileIn Input);
175
176 /// Returns uris for all files that has a shard.
177 std::vector<llvm::StringRef> getAllSources() const;
178
179 /// Generates index shard for the \p Uri. Note that this function results in
180 /// a copy of all the relevant data.
181 /// Returned index will always have Symbol/Refs/Relation Slabs set, even if
182 /// they are empty.
183 std::optional<IndexFileIn> getShard(llvm::StringRef Uri) const;
184
185private:
186 // Contains all the information that belongs to a single file.
187 struct FileShard {
188 // Either declared or defined in the file.
189 llvm::DenseSet<const Symbol *> Symbols;
190 // Reference occurs in the file.
191 llvm::DenseSet<const Ref *> Refs;
192 // Subject is declared in the file.
193 llvm::DenseSet<const Relation *> Relations;
194 // Contains edges for only the direct includes.
195 IncludeGraph IG;
196 };
197
198 // Keeps all the information alive.
199 const IndexFileIn Index;
200 // Mapping from URIs to slab information.
201 llvm::StringMap<FileShard> Shards;
202 // Used to build RefSlabs.
203 llvm::DenseMap<const Ref *, SymbolID> RefToSymID;
204};
205
206} // namespace clangd
207} // namespace clang
208
209#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_FILEINDEX_H
This manages symbols from files and an in-memory index on all symbols.
Definition: FileIndex.h:110
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, Preprocessor &PP, const include_cleaner::PragmaIncludes &PI)
Update preamble symbols of file Path with all declarations in AST and macros in PP.
Definition: FileIndex.cpp:461
void profile(MemoryTree &MT) const
Definition: FileIndex.cpp:496
void updateMain(PathRef Path, ParsedAST &AST)
Update symbols and references from main file Path with indexMainDecls.
Definition: FileIndex.cpp:470
A container of slabs associated with a key.
Definition: FileIndex.h:70
void update(llvm::StringRef Key, std::unique_ptr< SymbolSlab > Symbols, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all slabs associated with the Key.
Definition: FileIndex.cpp:245
std::unique_ptr< SymbolIndex > buildIndex(IndexType, DuplicateHandling DuplicateHandle=DuplicateHandling::PickOne, size_t *Version=nullptr)
The index keeps the slabs alive.
Definition: FileIndex.cpp:271
void profile(MemoryTree &MT) const
Definition: FileIndex.cpp:403
Values in a Context are indexed by typed keys.
Definition: Context.h:40
Stores and provides access to parsed AST.
Definition: ParsedAST.h:46
IndexType
Select between in-memory index implementations, which have tradeoffs.
Definition: FileIndex.h:42
IndexContents
Describes what data is covered by an index.
Definition: Index.h:114
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
SlabTuple indexMainDecls(ParsedAST &AST)
Retrieves symbols and refs of local top level decls in AST (i.e.
Definition: FileIndex.cpp:223
std::tuple< SymbolSlab, RefSlab, RelationSlab > SlabTuple
Definition: FileIndex.h:156
DuplicateHandling
How to handle duplicated symbols across multiple files.
Definition: FileIndex.h:50
SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, Preprocessor &PP, const include_cleaner::PragmaIncludes &PI)
Index declarations from AST and macros from PP that are declared in included headers.
Definition: FileIndex.cpp:230
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:101
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:29
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Takes slabs coming from a TU (multiple files) and shards them per declaration location.
Definition: FileIndex.h:171
std::vector< llvm::StringRef > getAllSources() const
Returns uris for all files that has a shard.
Definition: FileIndex.cpp:182
std::optional< IndexFileIn > getShard(llvm::StringRef Uri) const
Generates index shard for the Uri.
Definition: FileIndex.cpp:193
A tree that can be used to represent memory usage of nested components while preserving the hierarchy...
Definition: MemoryTree.h:30