clang-tools 20.0.0git
Dex.h
Go to the documentation of this file.
1//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This defines Dex - a symbol index implementation based on query iterators
11/// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12/// While consuming more memory and having longer build stage due to
13/// preprocessing, Dex will have substantially lower latency. It will also allow
14/// efficient symbol searching which is crucial for operations like code
15/// completion, and can be very important for a number of different code
16/// transformations which will be eventually supported by Clangd.
17///
18//===----------------------------------------------------------------------===//
19
20#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22
23#include "index/dex/Iterator.h"
24#include "index/Index.h"
25#include "index/Relation.h"
27#include "index/dex/Token.h"
28#include "llvm/ADT/StringSet.h"
29
30namespace clang {
31namespace clangd {
32namespace dex {
33
34/// In-memory Dex trigram-based index implementation.
35class Dex : public SymbolIndex {
36public:
37 // All data must outlive this index.
38 template <typename SymbolRange, typename RefsRange, typename RelationsRange>
39 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
40 bool SupportContainedRefs)
41 : Corpus(0) {
42 for (auto &&Sym : Symbols)
43 this->Symbols.push_back(&Sym);
44 for (auto &&Ref : Refs)
45 this->Refs.try_emplace(Ref.first, Ref.second);
46 for (auto &&Rel : Relations)
47 this->Relations[std::make_pair(Rel.Subject,
48 static_cast<uint8_t>(Rel.Predicate))]
49 .push_back(Rel.Object);
50 buildIndex(SupportContainedRefs);
51 }
52 // Symbols and Refs are owned by BackingData, Index takes ownership.
53 template <typename SymbolRange, typename RefsRange, typename RelationsRange,
54 typename Payload>
55 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
56 Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs)
57 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
58 std::forward<RelationsRange>(Relations), SupportContainedRefs) {
59 KeepAlive = std::shared_ptr<void>(
60 std::make_shared<Payload>(std::move(BackingData)), nullptr);
61 this->BackingDataSize = BackingDataSize;
62 }
63
64 template <typename SymbolRange, typename RefsRange, typename RelationsRange,
65 typename FileRange, typename Payload>
66 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
67 FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
68 size_t BackingDataSize, bool SupportContainedRefs)
69 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
70 std::forward<RelationsRange>(Relations),
71 std::forward<Payload>(BackingData), BackingDataSize,
72 SupportContainedRefs) {
73 this->Files = std::forward<FileRange>(Files);
74 this->IdxContents = IdxContents;
75 }
76
77 /// Builds an index from slabs. The index takes ownership of the slab.
78 static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab,
79 bool SupportContainedRefs);
80
81 bool
82 fuzzyFind(const FuzzyFindRequest &Req,
83 llvm::function_ref<void(const Symbol &)> Callback) const override;
84
85 void lookup(const LookupRequest &Req,
86 llvm::function_ref<void(const Symbol &)> Callback) const override;
87
88 bool refs(const RefsRequest &Req,
89 llvm::function_ref<void(const Ref &)> Callback) const override;
90
91 bool containedRefs(const ContainedRefsRequest &Req,
92 llvm::function_ref<void(const ContainedRefsResult &)>
93 Callback) const override;
94
95 void relations(const RelationsRequest &Req,
96 llvm::function_ref<void(const SymbolID &, const Symbol &)>
97 Callback) const override;
98
99 llvm::unique_function<IndexContents(llvm::StringRef) const>
100 indexedFiles() const override;
101
102 size_t estimateMemoryUsage() const override;
103
104private:
105 class RevRef {
106 const Ref *Reference;
107 SymbolID Target;
108
109 public:
110 RevRef(const Ref &Reference, SymbolID Target)
111 : Reference(&Reference), Target(Target) {}
112 const Ref &ref() const { return *Reference; }
113 ContainedRefsResult containedRefsResult() const {
114 return {ref().Location, ref().Kind, Target};
115 }
116 };
117
118 void buildIndex(bool EnableOutgoingCalls);
119 llvm::iterator_range<std::vector<RevRef>::const_iterator>
120 lookupRevRefs(const SymbolID &Container) const;
121 std::unique_ptr<Iterator> iterator(const Token &Tok) const;
122 std::unique_ptr<Iterator>
123 createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
124 std::unique_ptr<Iterator>
125 createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
126
127 /// Stores symbols sorted in the descending order of symbol quality..
128 std::vector<const Symbol *> Symbols;
129 /// SymbolQuality[I] is the quality of Symbols[I].
130 std::vector<float> SymbolQuality;
131 llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
132 /// Inverted index is a mapping from the search token to the posting list,
133 /// which contains all items which can be characterized by such search token.
134 /// For example, if the search token is scope "std::", the corresponding
135 /// posting list would contain all indices of symbols defined in namespace
136 /// std. Inverted index is used to retrieve posting lists which are processed
137 /// during the fuzzyFind process.
138 llvm::DenseMap<Token, PostingList> InvertedIndex;
139 dex::Corpus Corpus;
140 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
141 std::vector<RevRef> RevRefs; // sorted by container ID
142 static_assert(sizeof(RelationKind) == sizeof(uint8_t),
143 "RelationKind should be of same size as a uint8_t");
144 llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
145 std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
146 // Set of files which were used during this index build.
147 llvm::StringSet<> Files;
148 // Contents of the index (symbols, references, etc.)
149 // This is only populated if `Files` is, which applies to some but not all
150 // consumers of this class.
152 // Size of memory retained by KeepAlive.
153 size_t BackingDataSize = 0;
154};
155
156/// Returns Search Token for a number of parent directories of given Path.
157/// Should be used within the index build process.
158///
159/// This function is exposed for testing only.
160llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef);
161
162} // namespace dex
163} // namespace clangd
164} // namespace clang
165
166#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
This defines posting list interface: a storage for identifiers of symbols which can be characterized ...
std::string Payload
Definition: SourceCode.cpp:673
std::string Container
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:111
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:134
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:201
In-memory Dex trigram-based index implementation.
Definition: Dex.h:35
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:390
bool refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:316
void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &, const Symbol &)> Callback) const override
Definition: Dex.cpp:362
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, bool SupportContainedRefs)
Definition: Dex.h:39
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab, bool SupportContainedRefs)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:35
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs)
Definition: Dex.h:66
bool containedRefs(const ContainedRefsRequest &Req, llvm::function_ref< void(const ContainedRefsResult &)> Callback) const override
Find all symbols that are referenced by a symbol and apply Callback on each result.
Definition: Dex.cpp:347
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol.
Definition: Dex.cpp:306
llvm::unique_function< IndexContents(llvm::StringRef) const > indexedFiles() const override
Definition: Dex.cpp:384
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs)
Definition: Dex.h:55
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:227
Token objects represent a characteristic of a symbol, which can be used to perform efficient search.
llvm::SmallVector< llvm::StringRef, ProximityURILimit > generateProximityURIs(llvm::StringRef URI)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:420
IndexContents
Describes what data is covered by an index.
Definition: Index.h:114
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Represents a symbol occurrence in the source file.
Definition: Ref.h:88
Represents a symbol range where the symbol can potentially have multiple tokens.
Definition: Rename.h:69
The class presents a C++ symbol, e.g.
Definition: Symbol.h:39