clang-tools  12.0.0git
Iterator.cpp
Go to the documentation of this file.
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Iterator.h"
10 #include "llvm/Support/Casting.h"
11 #include <algorithm>
12 #include <cassert>
13 #include <numeric>
14 
15 namespace clang {
16 namespace clangd {
17 namespace dex {
18 namespace {
19 
20 /// Implements Iterator over the intersection of other iterators.
21 ///
22 /// AndIterator iterates through common items among all children. It becomes
23 /// exhausted as soon as any child becomes exhausted. After each mutation, the
24 /// iterator restores the invariant: all children must point to the same item.
25 class AndIterator : public Iterator {
26 public:
27  explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
28  : Iterator(Kind::And), Children(std::move(AllChildren)) {
29  assert(!Children.empty() && "AND iterator should have at least one child.");
30  // Establish invariants.
31  for (const auto &Child : Children)
32  ReachedEnd |= Child->reachedEnd();
33  sync();
34  // When children are sorted by the estimateSize(), sync() calls are more
35  // effective. Each sync() starts with the first child and makes sure all
36  // children point to the same element. If any child is "above" the previous
37  // ones, the algorithm resets and and advances the children to the next
38  // highest element starting from the front. When child iterators in the
39  // beginning have smaller estimated size, the sync() will have less restarts
40  // and become more effective.
41  llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
42  const std::unique_ptr<Iterator> &RHS) {
43  return LHS->estimateSize() < RHS->estimateSize();
44  });
45  }
46 
47  bool reachedEnd() const override { return ReachedEnd; }
48 
49  /// Advances all children to the next common item.
50  void advance() override {
51  assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52  Children.front()->advance();
53  sync();
54  }
55 
56  /// Advances all children to the next common item with DocumentID >= ID.
57  void advanceTo(DocID ID) override {
58  assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59  Children.front()->advanceTo(ID);
60  sync();
61  }
62 
63  DocID peek() const override { return Children.front()->peek(); }
64 
65  float consume() override {
66  assert(!reachedEnd() && "AND iterator can't consume() at the end.");
67  float Boost = 1;
68  for (const auto &Child : Children)
69  Boost *= Child->consume();
70  return Boost;
71  }
72 
73  size_t estimateSize() const override {
74  return Children.front()->estimateSize();
75  }
76 
77 private:
78  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
79  OS << "(& ";
80  auto Separator = "";
81  for (const auto &Child : Children) {
82  OS << Separator << *Child;
83  Separator = " ";
84  }
85  OS << ')';
86  return OS;
87  }
88 
89  /// Restores class invariants: each child will point to the same element after
90  /// sync.
91  void sync() {
92  ReachedEnd |= Children.front()->reachedEnd();
93  if (ReachedEnd)
94  return;
95  auto SyncID = Children.front()->peek();
96  // Indicates whether any child needs to be advanced to new SyncID.
97  bool NeedsAdvance = false;
98  do {
99  NeedsAdvance = false;
100  for (auto &Child : Children) {
101  Child->advanceTo(SyncID);
102  ReachedEnd |= Child->reachedEnd();
103  // If any child reaches end And iterator can not match any other items.
104  // In this case, just terminate the process.
105  if (ReachedEnd)
106  return;
107  // If any child goes beyond given ID (i.e. ID is not the common item),
108  // all children should be advanced to the next common item.
109  if (Child->peek() > SyncID) {
110  SyncID = Child->peek();
111  NeedsAdvance = true;
112  }
113  }
114  } while (NeedsAdvance);
115  }
116 
117  /// AndIterator owns its children and ensures that all of them point to the
118  /// same element. As soon as one child gets exhausted, AndIterator can no
119  /// longer advance and has reached its end.
120  std::vector<std::unique_ptr<Iterator>> Children;
121  /// Indicates whether any child is exhausted. It is cheaper to maintain and
122  /// update the field, rather than traversing the whole subtree in each
123  /// reachedEnd() call.
124  bool ReachedEnd = false;
125  friend Corpus; // For optimizations.
126 };
127 
128 /// Implements Iterator over the union of other iterators.
129 ///
130 /// OrIterator iterates through all items which can be pointed to by at least
131 /// one child. To preserve the sorted order, this iterator always advances the
132 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
133 /// soon as all of its children are exhausted.
134 class OrIterator : public Iterator {
135 public:
136  explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
137  : Iterator(Kind::Or), Children(std::move(AllChildren)) {
138  assert(!Children.empty() && "OR iterator should have at least one child.");
139  }
140 
141  /// Returns true if all children are exhausted.
142  bool reachedEnd() const override {
143  for (const auto &Child : Children)
144  if (!Child->reachedEnd())
145  return false;
146  return true;
147  }
148 
149  /// Moves each child pointing to the smallest DocID to the next item.
150  void advance() override {
151  assert(!reachedEnd() && "OR iterator can't advance() at the end.");
152  const auto SmallestID = peek();
153  for (const auto &Child : Children)
154  if (!Child->reachedEnd() && Child->peek() == SmallestID)
155  Child->advance();
156  }
157 
158  /// Advances each child to the next existing element with DocumentID >= ID.
159  void advanceTo(DocID ID) override {
160  assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
161  for (const auto &Child : Children)
162  if (!Child->reachedEnd())
163  Child->advanceTo(ID);
164  }
165 
166  /// Returns the element under cursor of the child with smallest Child->peek()
167  /// value.
168  DocID peek() const override {
169  assert(!reachedEnd() && "OR iterator can't peek() at the end.");
170  DocID Result = std::numeric_limits<DocID>::max();
171 
172  for (const auto &Child : Children)
173  if (!Child->reachedEnd())
174  Result = std::min(Result, Child->peek());
175 
176  return Result;
177  }
178 
179  // Returns the maximum boosting score among all Children when iterator
180  // points to the current ID.
181  float consume() override {
182  assert(!reachedEnd() && "OR iterator can't consume() at the end.");
183  const DocID ID = peek();
184  float Boost = 1;
185  for (const auto &Child : Children)
186  if (!Child->reachedEnd() && Child->peek() == ID)
187  Boost = std::max(Boost, Child->consume());
188  return Boost;
189  }
190 
191  size_t estimateSize() const override {
192  size_t Size = 0;
193  for (const auto &Child : Children)
194  Size = std::max(Size, Child->estimateSize());
195  return Size;
196  }
197 
198 private:
199  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
200  OS << "(| ";
201  auto Separator = "";
202  for (const auto &Child : Children) {
203  OS << Separator << *Child;
204  Separator = " ";
205  }
206  OS << ')';
207  return OS;
208  }
209 
210  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
211  std::vector<std::unique_ptr<Iterator>> Children;
212  friend Corpus; // For optimizations.
213 };
214 
215 /// TrueIterator handles PostingLists which contain all items of the index. It
216 /// stores size of the virtual posting list, and all operations are performed
217 /// in O(1).
218 class TrueIterator : public Iterator {
219 public:
220  explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
221 
222  bool reachedEnd() const override { return Index >= Size; }
223 
224  void advance() override {
225  assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
226  ++Index;
227  }
228 
229  void advanceTo(DocID ID) override {
230  assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
231  Index = std::min(ID, Size);
232  }
233 
234  DocID peek() const override {
235  assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
236  return Index;
237  }
238 
239  float consume() override {
240  assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
241  return 1;
242  }
243 
244  size_t estimateSize() const override { return Size; }
245 
246 private:
247  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
248  return OS << "true";
249  }
250 
251  DocID Index = 0;
252  /// Size of the underlying virtual PostingList.
253  DocID Size;
254 };
255 
256 /// FalseIterator yields no results.
257 class FalseIterator : public Iterator {
258 public:
259  FalseIterator() : Iterator(Kind::False) {}
260  bool reachedEnd() const override { return true; }
261  void advance() override { assert(false); }
262  void advanceTo(DocID ID) override { assert(false); }
263  DocID peek() const override {
264  assert(false);
265  return 0;
266  }
267  float consume() override {
268  assert(false);
269  return 1;
270  }
271  size_t estimateSize() const override { return 0; }
272 
273 private:
274  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
275  return OS << "false";
276  }
277 };
278 
279 /// Boost iterator is a wrapper around its child which multiplies scores of
280 /// each retrieved item by a given factor.
281 class BoostIterator : public Iterator {
282 public:
283  BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
284  : Child(std::move(Child)), Factor(Factor) {}
285 
286  bool reachedEnd() const override { return Child->reachedEnd(); }
287 
288  void advance() override { Child->advance(); }
289 
290  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
291 
292  DocID peek() const override { return Child->peek(); }
293 
294  float consume() override { return Child->consume() * Factor; }
295 
296  size_t estimateSize() const override { return Child->estimateSize(); }
297 
298 private:
299  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
300  return OS << "(* " << Factor << ' ' << *Child << ')';
301  }
302 
303  std::unique_ptr<Iterator> Child;
304  float Factor;
305 };
306 
307 /// This iterator limits the number of items retrieved from the child iterator
308 /// on top of the query tree. To ensure that query tree with LIMIT iterators
309 /// inside works correctly, users have to call Root->consume(Root->peek()) each
310 /// time item is retrieved at the root of query tree.
311 class LimitIterator : public Iterator {
312 public:
313  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
314  : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
315 
316  bool reachedEnd() const override {
317  return ItemsLeft == 0 || Child->reachedEnd();
318  }
319 
320  void advance() override { Child->advance(); }
321 
322  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
323 
324  DocID peek() const override { return Child->peek(); }
325 
326  /// Decreases the limit in case the element consumed at top of the query tree
327  /// comes from the underlying iterator.
328  float consume() override {
329  assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
330  --ItemsLeft;
331  return Child->consume();
332  }
333 
334  size_t estimateSize() const override {
335  return std::min(Child->estimateSize(), Limit);
336  }
337 
338 private:
339  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
340  return OS << "(LIMIT " << Limit << " " << *Child << ')';
341  }
342 
343  std::unique_ptr<Iterator> Child;
344  size_t Limit;
345  size_t ItemsLeft;
346 };
347 
348 } // end namespace
349 
350 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
351  std::vector<std::pair<DocID, float>> Result;
352  for (; !It.reachedEnd(); It.advance())
353  Result.emplace_back(It.peek(), It.consume());
354  return Result;
355 }
356 
357 std::unique_ptr<Iterator>
358 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
359  std::vector<std::unique_ptr<Iterator>> RealChildren;
360  for (auto &Child : Children) {
361  switch (Child->kind()) {
363  break; // No effect, drop the iterator.
365  return std::move(Child); // Intersection is empty.
366  case Iterator::Kind::And: {
367  // Inline nested AND into parent AND.
368  auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
369  std::move(NewChildren.begin(), NewChildren.end(),
370  std::back_inserter(RealChildren));
371  break;
372  }
373  default:
374  RealChildren.push_back(std::move(Child));
375  }
376  }
377  switch (RealChildren.size()) {
378  case 0:
379  return all();
380  case 1:
381  return std::move(RealChildren.front());
382  default:
383  return std::make_unique<AndIterator>(std::move(RealChildren));
384  }
385 }
386 
387 std::unique_ptr<Iterator>
388 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
389  std::vector<std::unique_ptr<Iterator>> RealChildren;
390  for (auto &Child : Children) {
391  switch (Child->kind()) {
393  break; // No effect, drop the iterator.
394  case Iterator::Kind::Or: {
395  // Inline nested OR into parent OR.
396  auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
397  std::move(NewChildren.begin(), NewChildren.end(),
398  std::back_inserter(RealChildren));
399  break;
400  }
402  // Don't return all(), which would discard sibling boosts.
403  default:
404  RealChildren.push_back(std::move(Child));
405  }
406  }
407  switch (RealChildren.size()) {
408  case 0:
409  return none();
410  case 1:
411  return std::move(RealChildren.front());
412  default:
413  return std::make_unique<OrIterator>(std::move(RealChildren));
414  }
415 }
416 
417 std::unique_ptr<Iterator> Corpus::all() const {
418  return std::make_unique<TrueIterator>(Size);
419 }
420 
421 std::unique_ptr<Iterator> Corpus::none() const {
422  return std::make_unique<FalseIterator>();
423 }
424 
425 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
426  float Factor) const {
427  if (Factor == 1)
428  return Child;
429  if (Child->kind() == Iterator::Kind::False)
430  return Child;
431  return std::make_unique<BoostIterator>(std::move(Child), Factor);
432 }
433 
434 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
435  size_t Limit) const {
436  if (Child->kind() == Iterator::Kind::False)
437  return Child;
438  return std::make_unique<LimitIterator>(std::move(Child), Limit);
439 }
440 
441 } // namespace dex
442 } // namespace clangd
443 } // namespace clang
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:358
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
Iterator is the interface for Query Tree node.
Definition: Iterator.h:54
BindArgumentKind Kind
virtual DocID peek() const =0
Returns the current element this iterator points to.
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:350
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children. ...
Definition: Iterator.cpp:388
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:434
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:421
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:162
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:46
virtual void advance()=0
Moves to next valid DocID.
std::vector< std::unique_ptr< HTMLNode > > Children
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:425
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0...
Definition: Iterator.cpp:417
const SymbolIndex * Index
Definition: Dexp.cpp:95