clang-tools  14.0.0git
Dex.h
Go to the documentation of this file.
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "Iterator.h"
24 #include "PostingList.h"
25 #include "Token.h"
26 #include "Trigram.h"
27 #include "index/Index.h"
28 #include "index/MemIndex.h"
29 #include "index/Relation.h"
30 #include "index/SymbolCollector.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace dex {
35 
36 /// In-memory Dex trigram-based index implementation.
37 class Dex : public SymbolIndex {
38 public:
39  // All data must outlive this index.
40  template <typename SymbolRange, typename RefsRange, typename RelationsRange>
41  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
42  : Corpus(0) {
43  for (auto &&Sym : Symbols)
44  this->Symbols.push_back(&Sym);
45  for (auto &&Ref : Refs)
46  this->Refs.try_emplace(Ref.first, Ref.second);
47  for (auto &&Rel : Relations)
48  this->Relations[std::make_pair(Rel.Subject,
49  static_cast<uint8_t>(Rel.Predicate))]
50  .push_back(Rel.Object);
51  buildIndex();
52  }
53  // Symbols and Refs are owned by BackingData, Index takes ownership.
54  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
55  typename Payload>
56  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
57  Payload &&BackingData, size_t BackingDataSize)
58  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
59  std::forward<RelationsRange>(Relations)) {
60  KeepAlive = std::shared_ptr<void>(
61  std::make_shared<Payload>(std::move(BackingData)), nullptr);
62  this->BackingDataSize = BackingDataSize;
63  }
64 
65  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
66  typename FileRange, typename Payload>
67  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
68  FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
69  size_t BackingDataSize)
70  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
71  std::forward<RelationsRange>(Relations),
72  std::forward<Payload>(BackingData), BackingDataSize) {
73  this->Files = std::forward<FileRange>(Files);
74  this->IdxContents = IdxContents;
75  }
76 
77  /// Builds an index from slabs. The index takes ownership of the slab.
78  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
79 
80  bool
81  fuzzyFind(const FuzzyFindRequest &Req,
82  llvm::function_ref<void(const Symbol &)> Callback) const override;
83 
84  void lookup(const LookupRequest &Req,
85  llvm::function_ref<void(const Symbol &)> Callback) const override;
86 
87  bool refs(const RefsRequest &Req,
88  llvm::function_ref<void(const Ref &)> Callback) const override;
89 
90  void relations(const RelationsRequest &Req,
91  llvm::function_ref<void(const SymbolID &, const Symbol &)>
92  Callback) const override;
93 
94  llvm::unique_function<IndexContents(llvm::StringRef) const>
95  indexedFiles() const override;
96 
97  size_t estimateMemoryUsage() const override;
98 
99 private:
100  void buildIndex();
101  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
102  std::unique_ptr<Iterator>
103  createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
104  std::unique_ptr<Iterator>
105  createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
106 
107  /// Stores symbols sorted in the descending order of symbol quality..
108  std::vector<const Symbol *> Symbols;
109  /// SymbolQuality[I] is the quality of Symbols[I].
110  std::vector<float> SymbolQuality;
111  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
112  /// Inverted index is a mapping from the search token to the posting list,
113  /// which contains all items which can be characterized by such search token.
114  /// For example, if the search token is scope "std::", the corresponding
115  /// posting list would contain all indices of symbols defined in namespace
116  /// std. Inverted index is used to retrieve posting lists which are processed
117  /// during the fuzzyFind process.
118  llvm::DenseMap<Token, PostingList> InvertedIndex;
120  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
121  static_assert(sizeof(RelationKind) == sizeof(uint8_t),
122  "RelationKind should be of same size as a uint8_t");
123  llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
124  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
125  // Set of files which were used during this index build.
126  llvm::StringSet<> Files;
127  // Contents of the index (symbols, references, etc.)
128  IndexContents IdxContents;
129  // Size of memory retained by KeepAlive.
130  size_t BackingDataSize = 0;
131 };
132 
133 /// Returns Search Token for a number of parent directories of given Path.
134 /// Should be used within the index build process.
135 ///
136 /// This function is exposed for testing only.
137 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
138 
139 } // namespace dex
140 } // namespace clangd
141 } // namespace clang
142 
143 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
clang::clangd::dex::Dex::Dex
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:67
clang::clangd::dex::Dex::lookup
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol.
Definition: Dex.cpp:267
clang::clangd::dex::Dex::relations
void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &, const Symbol &)> Callback) const override
Definition: Dex.cpp:294
Trigram.h
clang::clangd::dex::Dex::Dex
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:56
Index.h
clang::clangd::dex::Dex::build
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
clang::clangd::RefSlab
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:110
clang::clangd::FuzzyFindRequest
Definition: Index.h:27
clang::clangd::dex::Dex::estimateMemoryUsage
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:323
clang::clangd::dex::Corpus
Definition: Iterator.h:134
Relation.h
clang::clangd::dex::Dex
In-memory Dex trigram-based index implementation.
Definition: Dex.h:37
MemIndex.h
clang::clangd::RelationSlab
Definition: Relation.h:52
PostingList.h
clang::clangd::dex::generateProximityURIs
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:335
clang::clangd::Symbol
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
clang::clangd::IndexContents
IndexContents
Describes what data is covered by an index.
Definition: Index.h:91
Payload
std::string Payload
Definition: SourceCode.cpp:656
clang::clangd::dex::Dex::indexedFiles
llvm::unique_function< IndexContents(llvm::StringRef) const > indexedFiles() const override
Definition: Dex.cpp:317
clang::clangd::dex::Token
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:40
Iterator.h
clang::clangd::dex::Dex::refs
bool refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:277
clang::clangd::SymbolIndex
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:111
clang::clangd::LookupRequest
Definition: Index.h:65
clang::clangd::Ref
Represents a symbol occurrence in the source file.
Definition: Ref.h:87
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
clang::clangd::dex::Dex::fuzzyFind
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:188
SymbolCollector.h
clang::clangd::Callback
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
clang::clangd::RelationsRequest
Definition: Index.h:78
clang::clangd::SymbolSlab
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
clang::clangd::dex::Dex::Dex
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
Definition: Dex.h:41
clang::clangd::SymbolID
Definition: SymbolID.h:32
Token.h
clang::clangd::RefsRequest
Definition: Index.h:69
clang::clangd::RelationKind
RelationKind
Definition: Relation.h:22