clang-tools  16.0.0git
Iterator.h
Go to the documentation of this file.
1 //===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Symbol index queries consist of specific requirements for the requested
11 /// symbol, such as high fuzzy matching score, scope, type etc. The lists of all
12 /// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope)
13 /// are expressed in a form of Search Tokens which are stored in the inverted
14 /// index. Inverted index maps these tokens to the posting lists - sorted (by
15 /// symbol quality) sequences of symbol IDs matching the token, e.g. scope token
16 /// "clangd::clangd::" is mapped to the list of IDs of all symbols which are
17 /// declared in this namespace. Search queries are build from a set of
18 /// requirements which can be combined with each other forming the query trees.
19 /// The leafs of such trees are posting lists, and the nodes are operations on
20 /// these posting lists, e.g. intersection or union. Efficient processing of
21 /// these multi-level queries is handled by Iterators. Iterators advance through
22 /// all leaf posting lists producing the result of search query, which preserves
23 /// the sorted order of IDs. Having the resulting IDs sorted is important,
24 /// because it allows receiving a certain number of the most valuable items
25 /// (e.g. symbols with highest quality which was the sorting key in the first
26 /// place) without processing all items with requested properties (this might
27 /// not be computationally effective if search request is not very restrictive).
28 //
29 //===----------------------------------------------------------------------===//
30 
31 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
32 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
33 
34 #include "llvm/Support/raw_ostream.h"
35 #include <algorithm>
36 #include <memory>
37 #include <vector>
38 
39 namespace clang {
40 namespace clangd {
41 namespace dex {
42 
43 /// Symbol position in the list of all index symbols sorted by a pre-computed
44 /// symbol quality.
45 using DocID = uint32_t;
46 
47 /// Iterator is the interface for Query Tree node. The simplest type of Iterator
48 /// is DocumentIterator which is simply a wrapper around PostingList iterator
49 /// and serves as the Query Tree leaf. More sophisticated examples of iterators
50 /// can manage intersection, union of the elements produced by other iterators
51 /// (their children) to form a multi-level Query Tree. The interface is designed
52 /// to be extensible in order to support multiple types of iterators.
53 class Iterator {
54 public:
55  /// Returns true if all valid DocIDs were processed and hence the iterator is
56  /// exhausted.
57  virtual bool reachedEnd() const = 0;
58  /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted
59  /// and proceeds to the END.
60  ///
61  /// Note: reachedEnd() must be false.
62  virtual void advance() = 0;
63  /// Moves to the first valid DocID which is equal or higher than given ID. If
64  /// it doesn't exist, the iterator is exhausted and proceeds to the END.
65  ///
66  /// Note: reachedEnd() must be false.
67  virtual void advanceTo(DocID ID) = 0;
68  /// Returns the current element this iterator points to.
69  ///
70  /// Note: reachedEnd() must be false.
71  virtual DocID peek() const = 0;
72  /// Informs the iterator that the current document was consumed, and returns
73  /// its boost.
74  ///
75  /// Note: If this iterator has any child iterators that contain the document,
76  /// consume() should be called on those and their boosts incorporated.
77  /// consume() must *not* be called on children that don't contain the current
78  /// doc.
79  virtual float consume() = 0;
80  /// Returns an estimate of advance() calls before the iterator is exhausted.
81  virtual size_t estimateSize() const = 0;
82 
83  virtual ~Iterator() {}
84 
85  /// Prints a convenient human-readable iterator representation by recursively
86  /// dumping iterators in the following format:
87  ///
88  /// (Type Child1 Child2 ...)
89  ///
90  /// Where Type is the iterator type representation: "&" for And, "|" for Or,
91  /// ChildN is N-th iterator child. Raw iterators over PostingList are
92  /// represented as "[... CurID ...]" where CurID is the current PostingList
93  /// entry being inspected.
94  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
95  const Iterator &Iterator) {
96  return Iterator.dump(OS);
97  }
98 
99  /// Inspect iterator type, used internally for optimizing query trees.
100  enum class Kind { And, Or, True, False, Other };
101  Kind kind() const { return MyKind; }
102 
103 protected:
104  Iterator(Kind MyKind = Kind::Other) : MyKind(MyKind) {}
105 
106 private:
107  virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
108  Kind MyKind;
109 };
110 
111 /// Advances the iterator until it is exhausted. Returns pairs of document IDs
112 /// with the corresponding boosting score.
113 ///
114 /// Boosting can be seen as a compromise between retrieving too many items and
115 /// calculating finals score for each of them (which might be very expensive)
116 /// and not retrieving enough items so that items with very high final score
117 /// would not be processed. Boosting score is a computationally efficient way
118 /// to acquire preliminary scores of requested items.
119 std::vector<std::pair<DocID, float>> consume(Iterator &It);
120 
121 namespace detail {
122 // Variadic template machinery.
123 inline void populateChildren(std::vector<std::unique_ptr<Iterator>> &) {}
124 template <typename... TailT>
125 void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
126  std::unique_ptr<Iterator> Head, TailT... Tail) {
127  Children.push_back(std::move(Head));
128  populateChildren(Children, std::move(Tail)...);
129 }
130 } // namespace detail
131 
132 // A corpus is a set of documents, and a factory for iterators over them.
133 class Corpus {
134  DocID Size;
135 
136 public:
137  explicit Corpus(DocID Size) : Size(Size) {}
138 
139  /// Returns AND Iterator which performs the intersection of the PostingLists
140  /// of its children.
141  ///
142  /// consume(): AND Iterator returns the product of Childrens' boosting
143  /// scores.
144  std::unique_ptr<Iterator>
145  intersect(std::vector<std::unique_ptr<Iterator>> Children) const;
146 
147  /// Returns OR Iterator which performs the union of the PostingLists of its
148  /// children.
149  ///
150  /// consume(): OR Iterator returns the highest boost value among children
151  /// containing the requested item.
152  std::unique_ptr<Iterator>
153  unionOf(std::vector<std::unique_ptr<Iterator>> Children) const;
154 
155  /// Returns TRUE Iterator which iterates over "virtual" PostingList
156  /// containing all items in range [0, Size) in an efficient manner.
157  std::unique_ptr<Iterator> all() const;
158 
159  /// Returns FALSE Iterator which iterates over no documents.
160  std::unique_ptr<Iterator> none() const;
161 
162  /// Returns BOOST iterator which multiplies the score of each item by given
163  /// factor. Boosting can be used as a computationally inexpensive filtering.
164  /// Users can return significantly more items using consumeAndBoost() and
165  /// then trim Top K using retrieval score.
166  std::unique_ptr<Iterator> boost(std::unique_ptr<Iterator> Child,
167  float Factor) const;
168 
169  /// Returns LIMIT iterator, which yields up to N elements of its child
170  /// iterator. Elements only count towards the limit if they are part of the
171  /// final result set. Therefore the following iterator (AND (2) (LIMIT (1 2)
172  /// 1)) yields (2), not ().
173  std::unique_ptr<Iterator> limit(std::unique_ptr<Iterator> Child,
174  size_t Limit) const;
175 
176  /// This allows intersect(create(...), create(...)) syntax.
177  template <typename... Args>
178  std::unique_ptr<Iterator> intersect(Args... args) const {
179  std::vector<std::unique_ptr<Iterator>> Children;
180  detail::populateChildren(Children, std::forward<Args>(args)...);
181  return intersect(std::move(Children));
182  }
183 
184  /// This allows unionOf(create(...), create(...)) syntax.
185  template <typename... Args>
186  std::unique_ptr<Iterator> unionOf(Args... args) const {
187  std::vector<std::unique_ptr<Iterator>> Children;
188  detail::populateChildren(Children, std::forward<Args>(args)...);
189  return unionOf(std::move(Children));
190  }
191 };
192 
193 } // namespace dex
194 } // namespace clangd
195 } // namespace clang
196 
197 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
clang::clangd::dex::Iterator::estimateSize
virtual size_t estimateSize() const =0
Returns an estimate of advance() calls before the iterator is exhausted.
clang::clangd::Head
@ Head
Definition: FuzzyMatch.h:58
clang::clangd::dex::Iterator::operator<<
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Iterator &Iterator)
Prints a convenient human-readable iterator representation by recursively dumping iterators in the fo...
Definition: Iterator.h:94
clang::clangd::dex::Corpus::boost
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:432
clang::clangd::dex::Corpus::Corpus
Corpus(DocID Size)
Definition: Iterator.h:137
clang::clangd::dex::Iterator::~Iterator
virtual ~Iterator()
Definition: Iterator.h:83
TidyFastChecks.args
args
Definition: TidyFastChecks.py:37
clang::clangd::dex::Iterator::Kind::True
@ True
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
clang::clangd::dex::Corpus::all
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0,...
Definition: Iterator.cpp:424
clang::clangd::dex::Iterator::advanceTo
virtual void advanceTo(DocID ID)=0
Moves to the first valid DocID which is equal or higher than given ID.
clang::clangd::dex::Corpus::intersect
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:365
clang::clangd::dex::Corpus::none
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:428
clang::clangd::Tail
@ Tail
Definition: FuzzyMatch.h:57
clang::clangd::dex::Iterator::kind
Kind kind() const
Definition: Iterator.h:101
Children
std::vector< std::unique_ptr< HTMLNode > > Children
Definition: HTMLGenerator.cpp:92
clang::clangd::dex::Iterator::advance
virtual void advance()=0
Moves to next valid DocID.
clang::clangd::dex::Corpus
Definition: Iterator.h:133
clang::clangd::dex::detail::populateChildren
void populateChildren(std::vector< std::unique_ptr< Iterator >> &)
Definition: Iterator.h:123
clang::clangd::dex::Iterator::Kind::Or
@ Or
clang::clangd::dex::Corpus::unionOf
std::unique_ptr< Iterator > unionOf(Args... args) const
This allows unionOf(create(...), create(...)) syntax.
Definition: Iterator.h:186
clang::clangd::dex::Iterator::Kind::And
@ And
clang::clangd::dex::Iterator::Kind
Kind
Inspect iterator type, used internally for optimizing query trees.
Definition: Iterator.h:100
Args
llvm::json::Object Args
Definition: Trace.cpp:138
clang::clangd::dex::Iterator::Kind::Other
@ Other
clang::clangd::dex::Iterator::peek
virtual DocID peek() const =0
Returns the current element this iterator points to.
ID
static char ID
Definition: Logger.cpp:74
clang::clangd::dex::Iterator
Iterator is the interface for Query Tree node.
Definition: Iterator.h:53
clang::clangd::dex::Iterator::reachedEnd
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
clang::clangd::dex::Iterator::Iterator
Iterator(Kind MyKind=Kind::Other)
Definition: Iterator.h:104
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
clang::clangd::dex::DocID
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:45
clang::clangd::dex::Corpus::limit
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:441
clang::clangd::dex::Iterator::consume
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
clang::clangd::dex::Corpus::unionOf
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children.
Definition: Iterator.cpp:395
clang::clangd::dex::Corpus::intersect
std::unique_ptr< Iterator > intersect(Args... args) const
This allows intersect(create(...), create(...)) syntax.
Definition: Iterator.h:178
clang::clangd::dex::Iterator::Kind::False
@ False