clang-tools  10.0.0svn
Dex.h
Go to the documentation of this file.
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "Iterator.h"
24 #include "PostingList.h"
25 #include "Token.h"
26 #include "Trigram.h"
27 #include "index/Index.h"
28 #include "index/MemIndex.h"
29 #include "index/Relation.h"
30 #include "index/SymbolCollector.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace dex {
35 
36 /// In-memory Dex trigram-based index implementation.
37 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
38 // index so that it can be loaded from the disk. Since static index is not
39 // changed frequently, it's safe to assume that it has to be built only once
40 // (when the clangd process starts). Therefore, it can be easier to store built
41 // index on disk and then load it if available.
42 class Dex : public SymbolIndex {
43 public:
44  // All data must outlive this index.
45  template <typename SymbolRange, typename RefsRange, typename RelationsRange>
46  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
47  : Corpus(0) {
48  for (auto &&Sym : Symbols)
49  this->Symbols.push_back(&Sym);
50  for (auto &&Ref : Refs)
51  this->Refs.try_emplace(Ref.first, Ref.second);
52  for (auto &&Rel : Relations)
53  this->Relations[std::make_pair(Rel.Subject,
54  static_cast<uint8_t>(Rel.Predicate))]
55  .push_back(Rel.Object);
56  buildIndex();
57  }
58  // Symbols and Refs are owned by BackingData, Index takes ownership.
59  template <typename SymbolRange, typename RefsRange, typename RelationsRange,
60  typename Payload>
61  Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
62  Payload &&BackingData, size_t BackingDataSize)
63  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
64  std::forward<RelationsRange>(Relations)) {
65  KeepAlive = std::shared_ptr<void>(
66  std::make_shared<Payload>(std::move(BackingData)), nullptr);
67  this->BackingDataSize = BackingDataSize;
68  }
69 
70  /// Builds an index from slabs. The index takes ownership of the slab.
71  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
72 
73  bool
74  fuzzyFind(const FuzzyFindRequest &Req,
75  llvm::function_ref<void(const Symbol &)> Callback) const override;
76 
77  void lookup(const LookupRequest &Req,
78  llvm::function_ref<void(const Symbol &)> Callback) const override;
79 
80  void refs(const RefsRequest &Req,
81  llvm::function_ref<void(const Ref &)> Callback) const override;
82 
83  void relations(const RelationsRequest &Req,
84  llvm::function_ref<void(const SymbolID &, const Symbol &)>
85  Callback) const override;
86 
87  size_t estimateMemoryUsage() const override;
88 
89 private:
90  void buildIndex();
91  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
92  std::unique_ptr<Iterator>
93  createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
94  std::unique_ptr<Iterator>
95  createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
96 
97  /// Stores symbols sorted in the descending order of symbol quality..
98  std::vector<const Symbol *> Symbols;
99  /// SymbolQuality[I] is the quality of Symbols[I].
100  std::vector<float> SymbolQuality;
101  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
102  /// Inverted index is a mapping from the search token to the posting list,
103  /// which contains all items which can be characterized by such search token.
104  /// For example, if the search token is scope "std::", the corresponding
105  /// posting list would contain all indices of symbols defined in namespace
106  /// std. Inverted index is used to retrieve posting lists which are processed
107  /// during the fuzzyFind process.
108  llvm::DenseMap<Token, PostingList> InvertedIndex;
110  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
111  static_assert(sizeof(RelationKind) == sizeof(uint8_t),
112  "RelationKind should be of same size as a uint8_t");
113  llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
114  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
115  // Size of memory retained by KeepAlive.
116  size_t BackingDataSize = 0;
117 };
118 
119 /// Returns Search Token for a number of parent directories of given Path.
120 /// Should be used within the index build process.
121 ///
122 /// This function is exposed for testing only.
123 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
124 
125 } // namespace dex
126 } // namespace clangd
127 } // namespace clang
128 
129 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
An immutable symbol container that stores a set of symbols.
Definition: Symbol.h:177
void refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:252
An efficient structure of storing large set of symbol references in memory.
Definition: Ref.h:69
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:85
Represents a symbol occurrence in the source file.
Definition: Ref.h:52
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:28
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:163
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
Definition: Dex.h:46
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:300
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab, RelationSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:26
void relations(const RelationsRequest &Req, llvm::function_ref< void(const SymbolID &, const Symbol &)> Callback) const override
Definition: Dex.cpp:266
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol...
Definition: Dex.cpp:242
This defines posting list interface: a storage for identifiers of symbols which can be characterized ...
The class presents a C++ symbol, e.g.
Definition: Symbol.h:36
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Trigrams are attributes of the symbol unqualified name used to effectively extract symbols which can ...
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:61
In-memory Dex trigram-based index implementation.
Definition: Dex.h:42
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:40
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:288
Token objects represent a characteristic of a symbol, which can be used to perform efficient search...