clang-tools 20.0.0git
Merge.cpp
Go to the documentation of this file.
1//===--- Merge.cpp -----------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Merge.h"
10#include "index/Symbol.h"
12#include "index/SymbolOrigin.h"
13#include "support/Trace.h"
14#include "llvm/ADT/StringRef.h"
15#include <algorithm>
16#include <iterator>
17
18namespace clang {
19namespace clangd {
20
21namespace {
22
23// Returns true if file defining/declaring \p S is covered by \p Index.
24bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index,
25 const Symbol &S) {
26 // We expect the definition to see the canonical declaration, so it seems to
27 // be enough to check only the definition if it exists.
28 const char *OwningFile =
29 S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI;
30 return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None;
31}
32} // namespace
33
35 const FuzzyFindRequest &Req,
36 llvm::function_ref<void(const Symbol &)> Callback) const {
37 // We can't step through both sources in parallel. So:
38 // 1) query all dynamic symbols, slurping results into a slab
39 // 2) query the static symbols, for each one:
40 // a) if it's not in the dynamic slab, yield it directly
41 // b) if it's in the dynamic slab, merge it and yield the result
42 // 3) now yield all the dynamic symbols we haven't processed.
43 trace::Span Tracer("MergedIndex fuzzyFind");
44 bool More = false; // We'll be incomplete if either source was.
46 unsigned DynamicCount = 0;
47 unsigned StaticCount = 0;
48 unsigned MergedCount = 0;
49 // Number of results ignored due to staleness.
50 unsigned StaticDropped = 0;
51 More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
52 ++DynamicCount;
53 DynB.insert(S);
54 });
55 SymbolSlab Dyn = std::move(DynB).build();
56
57 llvm::DenseSet<SymbolID> ReportedDynSymbols;
58 {
59 auto DynamicContainsFile = Dynamic->indexedFiles();
60 More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
61 ++StaticCount;
62 auto DynS = Dyn.find(S.ID);
63 // If symbol also exist in the dynamic index, just merge and report.
64 if (DynS != Dyn.end()) {
65 ++MergedCount;
66 ReportedDynSymbols.insert(S.ID);
67 return Callback(mergeSymbol(*DynS, S));
68 }
69
70 // Otherwise, if the dynamic index owns the symbol's file, it means static
71 // index is stale just drop the symbol.
72 if (isIndexAuthoritative(DynamicContainsFile, S)) {
73 ++StaticDropped;
74 return;
75 }
76
77 // If not just report the symbol from static index as is.
78 return Callback(S);
79 });
80 }
81 SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
82 SPAN_ATTACH(Tracer, "static", StaticCount);
83 SPAN_ATTACH(Tracer, "static_dropped", StaticDropped);
84 SPAN_ATTACH(Tracer, "merged", MergedCount);
85 for (const Symbol &S : Dyn)
86 if (!ReportedDynSymbols.count(S.ID))
87 Callback(S);
88 return More;
89}
90
92 const LookupRequest &Req,
93 llvm::function_ref<void(const Symbol &)> Callback) const {
94 trace::Span Tracer("MergedIndex lookup");
96
97 Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); });
98
99 auto RemainingIDs = Req.IDs;
100 {
101 auto DynamicContainsFile = Dynamic->indexedFiles();
102 Static->lookup(Req, [&](const Symbol &S) {
103 // If we've seen the symbol before, just merge.
104 if (const Symbol *Sym = B.find(S.ID)) {
105 RemainingIDs.erase(S.ID);
106 return Callback(mergeSymbol(*Sym, S));
107 }
108
109 // If symbol is missing in dynamic index, and dynamic index owns the
110 // symbol's file. Static index is stale, just drop the symbol.
111 if (isIndexAuthoritative(DynamicContainsFile, S))
112 return;
113
114 // Dynamic index doesn't know about this file, just use the symbol from
115 // static index.
116 RemainingIDs.erase(S.ID);
117 Callback(S);
118 });
119 }
120 for (const auto &ID : RemainingIDs)
121 if (const Symbol *Sym = B.find(ID))
122 Callback(*Sym);
123}
124
126 llvm::function_ref<void(const Ref &)> Callback) const {
127 trace::Span Tracer("MergedIndex refs");
128 bool More = false;
129 uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
130 // We don't want duplicated refs from the static/dynamic indexes,
131 // and we can't reliably deduplicate them because offsets may differ slightly.
132 // We consider the dynamic index authoritative and report all its refs,
133 // and only report static index refs from other files.
134 More |= Dynamic->refs(Req, [&](const Ref &O) {
135 Callback(O);
136 assert(Remaining != 0);
137 --Remaining;
138 });
139 if (Remaining == 0 && More)
140 return More;
141 auto DynamicContainsFile = Dynamic->indexedFiles();
142 // We return less than Req.Limit if static index returns more refs for dirty
143 // files.
144 bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
145 if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) !=
147 return; // ignore refs that have been seen from dynamic index.
148 if (Remaining == 0) {
149 More = true;
150 return;
151 }
152 --Remaining;
153 Callback(O);
154 });
155 return More || StaticHadMore;
156}
157
159 const ContainedRefsRequest &Req,
160 llvm::function_ref<void(const ContainedRefsResult &)> Callback) const {
161 trace::Span Tracer("MergedIndex refersTo");
162 bool More = false;
163 uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
164 // We don't want duplicated refs from the static/dynamic indexes,
165 // and we can't reliably deduplicate them because offsets may differ slightly.
166 // We consider the dynamic index authoritative and report all its refs,
167 // and only report static index refs from other files.
168 More |= Dynamic->containedRefs(Req, [&](const auto &O) {
169 Callback(O);
170 assert(Remaining != 0);
171 --Remaining;
172 });
173 if (Remaining == 0 && More)
174 return More;
175 auto DynamicContainsFile = Dynamic->indexedFiles();
176 // We return less than Req.Limit if static index returns more refs for dirty
177 // files.
178 bool StaticHadMore = Static->containedRefs(Req, [&](const auto &O) {
179 if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) !=
181 return; // ignore refs that have been seen from dynamic index.
182 if (Remaining == 0) {
183 More = true;
184 return;
185 }
186 --Remaining;
187 Callback(O);
188 });
189 return More || StaticHadMore;
190}
191
192llvm::unique_function<IndexContents(llvm::StringRef) const>
194 return [DynamicContainsFile{Dynamic->indexedFiles()},
195 StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) {
196 return DynamicContainsFile(FileURI) | StaticContainsFile(FileURI);
197 };
198}
199
201 const RelationsRequest &Req,
202 llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {
203 uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
204 // Return results from both indexes but avoid duplicates.
205 // We might return stale relations from the static index;
206 // we don't currently have a good way of identifying them.
207 llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations;
208 Dynamic->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
209 Callback(Subject, Object);
210 SeenRelations.insert(std::make_pair(Subject, Object.ID));
211 --Remaining;
212 });
213 if (Remaining == 0)
214 return;
215 Static->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
216 if (Remaining > 0 &&
217 !SeenRelations.count(std::make_pair(Subject, Object.ID))) {
218 --Remaining;
219 Callback(Subject, Object);
220 }
221 });
222}
223
224// Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If
225// neither is preferred, this returns false.
226static bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
227 if (!L)
228 return false;
229 if (!R)
230 return true;
231 auto HasCodeGenSuffix = [](const SymbolLocation &Loc) {
232 constexpr static const char *CodegenSuffixes[] = {".proto"};
233 return llvm::any_of(CodegenSuffixes, [&](llvm::StringRef Suffix) {
234 return llvm::StringRef(Loc.FileURI).ends_with(Suffix);
235 });
236 };
237 return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R);
238}
239
240Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
241 assert(L.ID == R.ID);
242 // We prefer information from TUs that saw the definition.
243 // Classes: this is the def itself. Functions: hopefully the header decl.
244 // If both did (or both didn't), continue to prefer L over R.
245 bool PreferR = R.Definition && !L.Definition;
246 // Merge include headers only if both have definitions or both have no
247 // definition; otherwise, only accumulate references of common includes.
248 assert(L.Definition.FileURI && R.Definition.FileURI);
249 bool MergeIncludes =
250 bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI);
251 Symbol S = PreferR ? R : L; // The target symbol we're merging into.
252 const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.
253
254 // Only use locations in \p O if it's (strictly) preferred.
255 if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration))
256 S.CanonicalDeclaration = O.CanonicalDeclaration;
257 if (prefer(O.Definition, S.Definition))
258 S.Definition = O.Definition;
259 S.References += O.References;
260 if (S.Signature == "")
261 S.Signature = O.Signature;
262 if (S.CompletionSnippetSuffix == "")
263 S.CompletionSnippetSuffix = O.CompletionSnippetSuffix;
264 if (S.Documentation == "") {
265 // Don't accept documentation from bare forward class declarations, if there
266 // is a definition and it didn't provide one. S is often an undocumented
267 // class, and O is a non-canonical forward decl preceded by an irrelevant
268 // comment.
269 bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class ||
270 S.SymInfo.Kind == index::SymbolKind::Struct ||
271 S.SymInfo.Kind == index::SymbolKind::Union;
272 if (!IsClass || !S.Definition)
273 S.Documentation = O.Documentation;
274 }
275 if (S.ReturnType == "")
276 S.ReturnType = O.ReturnType;
277 if (S.Type == "")
278 S.Type = O.Type;
279 for (const auto &OI : O.IncludeHeaders) {
280 bool Found = false;
281 for (auto &SI : S.IncludeHeaders) {
282 if (SI.IncludeHeader == OI.IncludeHeader) {
283 Found = true;
284 SI.References += OI.References;
285 SI.SupportedDirectives |= OI.SupportedDirectives;
286 break;
287 }
288 }
289 if (!Found && MergeIncludes)
290 S.IncludeHeaders.emplace_back(OI.IncludeHeader, OI.References,
291 OI.supportedDirectives());
292 }
293
294 S.Origin |= O.Origin | SymbolOrigin::Merge;
295 S.Flags |= O.Flags;
296 return S;
297}
298
299} // namespace clangd
300} // namespace clang
std::string Suffix
Definition: AddUsing.cpp:132
SourceLocation Loc
#define SPAN_ATTACH(S, Name, Expr)
Attach a key-value pair to a Span event.
Definition: Trace.h:164
void relations(const RelationsRequest &, llvm::function_ref< void(const SymbolID &, const Symbol &)>) const override
Definition: Merge.cpp:200
bool containedRefs(const ContainedRefsRequest &, llvm::function_ref< void(const ContainedRefsResult &)>) const override
Find all symbols that are referenced by a symbol and apply Callback on each result.
Definition: Merge.cpp:158
bool refs(const RefsRequest &, llvm::function_ref< void(const Ref &)>) const override
Finds all occurrences (e.g.
Definition: Merge.cpp:125
void lookup(const LookupRequest &, llvm::function_ref< void(const Symbol &)>) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol.
Definition: Merge.cpp:91
bool fuzzyFind(const FuzzyFindRequest &, llvm::function_ref< void(const Symbol &)>) const override
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.
Definition: Merge.cpp:34
llvm::unique_function< IndexContents(llvm::StringRef) const > indexedFiles() const override
Definition: Merge.cpp:193
virtual bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.
virtual bool containedRefs(const ContainedRefsRequest &Req, llvm::function_ref< void(const ContainedRefsResult &)> Callback) const =0
Find all symbols that are referenced by a symbol and apply Callback on each result.
virtual void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &Subject, const Symbol &Object)> Callback) const =0
Finds all relations (S, P, O) stored in the index such that S is among Req.Subjects and P is Req....
virtual bool refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const =0
Finds all occurrences (e.g.
virtual void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol.
llvm::unique_function< IndexContents(llvm::StringRef) const > IndexedFiles
Returns function which checks if the specified file was used to build this index or not.
Definition: Index.h:187
virtual IndexedFiles indexedFiles() const =0
SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
Definition: Symbol.h:224
void insert(const Symbol &S)
Adds a symbol, overwriting any existing one with the same ID.
Definition: Symbol.cpp:52
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:201
const_iterator end() const
Definition: Symbol.h:210
const_iterator find(const SymbolID &SymID) const
Definition: Symbol.cpp:39
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143
IndexContents
Describes what data is covered by an index.
Definition: Index.h:114
Symbol mergeSymbol(const Symbol &L, const Symbol &R)
Definition: Merge.cpp:240
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
static bool prefer(const SymbolLocation &L, const SymbolLocation &R)
Definition: Merge.cpp:226
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::optional< uint32_t > Limit
If set, limit the number of refers returned from the index.
Definition: Index.h:90
llvm::DenseSet< SymbolID > IDs
Definition: Index.h:65
Represents a symbol occurrence in the source file.
Definition: Ref.h:88
SymbolLocation Location
The source location where the symbol is named.
Definition: Ref.h:90
std::optional< uint32_t > Limit
If set, limit the number of refers returned from the index.
Definition: Index.h:74
std::optional< uint32_t > Limit
If set, limit the number of relations returned from the index.
Definition: Index.h:97
The class presents a C++ symbol, e.g.
Definition: Symbol.h:39
SymbolFlag Flags
Definition: Symbol.h:151
SymbolLocation Definition
The location of the symbol's definition, if one was found.
Definition: Symbol.h:50
llvm::StringRef Type
Raw representation of the OpaqueType of the symbol, used for scoring purposes.
Definition: Symbol.h:88
llvm::StringRef Documentation
Documentation including comment for the symbol declaration.
Definition: Symbol.h:79
llvm::SmallVector< IncludeHeaderWithReferences, 1 > IncludeHeaders
One Symbol can potentially be included via different headers.
Definition: Symbol.h:133
llvm::StringRef Signature
A brief description of the symbol that can be appended in the completion candidate list.
Definition: Symbol.h:68
unsigned References
The number of translation units that reference this symbol from their main file.
Definition: Symbol.h:62
llvm::StringRef ReturnType
Type when this symbol is used in an expression.
Definition: Symbol.h:83
SymbolLocation CanonicalDeclaration
The location of the preferred declaration of the symbol.
Definition: Symbol.h:59
llvm::StringRef CompletionSnippetSuffix
What to insert when completing this symbol, after the symbol name.
Definition: Symbol.h:77
SymbolID ID
The ID of the symbol.
Definition: Symbol.h:41
SymbolOrigin Origin
Where this symbol came from. Usually an index provides a constant value.
Definition: Symbol.h:64