clang-tools  10.0.0svn
Dex.h
Go to the documentation of this file.
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "Iterator.h"
24 #include "PostingList.h"
25 #include "Token.h"
26 #include "Trigram.h"
27 #include "index/Index.h"
28 #include "index/MemIndex.h"
29 #include "index/SymbolCollector.h"
30 
31 namespace clang {
32 namespace clangd {
33 namespace dex {
34 
35 /// In-memory Dex trigram-based index implementation.
36 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
37 // index so that it can be loaded from the disk. Since static index is not
38 // changed frequently, it's safe to assume that it has to be built only once
39 // (when the clangd process starts). Therefore, it can be easier to store built
40 // index on disk and then load it if available.
41 class Dex : public SymbolIndex {
42 public:
43  // All data must outlive this index.
44  template <typename SymbolRange, typename RefsRange, typename RelationsRange>
45  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
46  : Corpus(0) {
47  for (auto &&Sym : Symbols)
48  this->Symbols.push_back(&Sym);
49  for (auto &&Ref : Refs)
50  this->Refs.try_emplace(Ref.first, Ref.second);
51  for (auto &&Rel : Relations)
52  this->Relations[std::make_pair(Rel.Subject, Rel.Predicate)].push_back(
53  Rel.Object);
54  buildIndex();
55  }
56  // Symbols and Refs are owned by BackingData, Index takes ownership.
57  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
58  typename Payload>
59  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
60  Payload &&BackingData, size_t BackingDataSize)
61  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
62  std::forward<RelationsRange>(Relations)) {
63  KeepAlive = std::shared_ptr<void>(
64  std::make_shared<Payload>(std::move(BackingData)), nullptr);
65  this->BackingDataSize = BackingDataSize;
66  }
67 
68  /// Builds an index from slabs. The index takes ownership of the slab.
69  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
70 
71  bool
72  fuzzyFind(const FuzzyFindRequest &Req,
73  llvm::function_ref<void(const Symbol &)> Callback) const override;
74 
75  void lookup(const LookupRequest &Req,
76  llvm::function_ref<void(const Symbol &)> Callback) const override;
77 
78  void refs(const RefsRequest &Req,
79  llvm::function_ref<void(const Ref &)> Callback) const override;
80 
81  void relations(const RelationsRequest &Req,
82  llvm::function_ref<void(const SymbolID &, const Symbol &)>
83  Callback) const override;
84 
85  size_t estimateMemoryUsage() const override;
86 
87 private:
88  void buildIndex();
89  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
90  std::unique_ptr<Iterator>
91  createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
92  std::unique_ptr<Iterator>
93  createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
94 
95  /// Stores symbols sorted in the descending order of symbol quality..
96  std::vector<const Symbol *> Symbols;
97  /// SymbolQuality[I] is the quality of Symbols[I].
98  std::vector<float> SymbolQuality;
99  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
100  /// Inverted index is a mapping from the search token to the posting list,
101  /// which contains all items which can be characterized by such search token.
102  /// For example, if the search token is scope "std::", the corresponding
103  /// posting list would contain all indices of symbols defined in namespace
104  /// std. Inverted index is used to retrieve posting lists which are processed
105  /// during the fuzzyFind process.
106  llvm::DenseMap<Token, PostingList> InvertedIndex;
108  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
109  llvm::DenseMap<std::pair<SymbolID, index::SymbolRole>, std::vector<SymbolID>>
110  Relations;
111  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
112  // Size of memory retained by KeepAlive.
113  size_t BackingDataSize = 0;
114 };
115 
116 /// Returns Search Token for a number of parent directories of given Path.
117 /// Should be used within the index build process.
118 ///
119 /// This function is exposed for testing only.
120 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
121 
122 } // namespace dex
123 } // namespace clangd
124 } // namespace clang
125 
126 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
void refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:252
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:85
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:163
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
Definition: Dex.h:45
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:299
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &, const Symbol &)> Callback) const override
Definition: Dex.cpp:266
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol...
Definition: Dex.cpp:242
This defines posting list interface: a storage for identifiers of symbols which can be characterized ...
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Trigrams are attributes of the symbol unqualified name used to effectively extract symbols which can ...
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:59
In-memory Dex trigram-based index implementation.
Definition: Dex.h:41
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:40
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:287
Token objects represent a characteristic of a symbol, which can be used to perform efficient search...