clang-tools  14.0.0git
Iterator.cpp
Go to the documentation of this file.
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Iterator.h"
10 #include "llvm/Support/Casting.h"
11 #include <algorithm>
12 #include <cassert>
13 #include <numeric>
14 
15 namespace clang {
16 namespace clangd {
17 namespace dex {
18 namespace {
19 
20 /// Implements Iterator over the intersection of other iterators.
21 ///
22 /// AndIterator iterates through common items among all children. It becomes
23 /// exhausted as soon as any child becomes exhausted. After each mutation, the
24 /// iterator restores the invariant: all children must point to the same item.
25 class AndIterator : public Iterator {
26 public:
27  explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
28  : Iterator(Kind::And), Children(std::move(AllChildren)) {
29  assert(!Children.empty() && "AND iterator should have at least one child.");
30  // Establish invariants.
31  for (const auto &Child : Children)
32  ReachedEnd |= Child->reachedEnd();
33  sync();
34  // When children are sorted by the estimateSize(), sync() calls are more
35  // effective. Each sync() starts with the first child and makes sure all
36  // children point to the same element. If any child is "above" the previous
37  // ones, the algorithm resets and and advances the children to the next
38  // highest element starting from the front. When child iterators in the
39  // beginning have smaller estimated size, the sync() will have less restarts
40  // and become more effective.
41  llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
42  const std::unique_ptr<Iterator> &RHS) {
43  return LHS->estimateSize() < RHS->estimateSize();
44  });
45  }
46 
47  bool reachedEnd() const override { return ReachedEnd; }
48 
49  /// Advances all children to the next common item.
50  void advance() override {
51  assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52  Children.front()->advance();
53  sync();
54  }
55 
56  /// Advances all children to the next common item with DocumentID >= ID.
57  void advanceTo(DocID ID) override {
58  assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59  Children.front()->advanceTo(ID);
60  sync();
61  }
62 
63  DocID peek() const override { return Children.front()->peek(); }
64 
65  float consume() override {
66  assert(!reachedEnd() && "AND iterator can't consume() at the end.");
67  float Boost = 1;
68  for (const auto &Child : Children)
69  Boost *= Child->consume();
70  return Boost;
71  }
72 
73  size_t estimateSize() const override {
74  return Children.front()->estimateSize();
75  }
76 
77 private:
78  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
79  OS << "(& ";
80  auto Separator = "";
81  for (const auto &Child : Children) {
82  OS << Separator << *Child;
83  Separator = " ";
84  }
85  OS << ')';
86  return OS;
87  }
88 
89  /// Restores class invariants: each child will point to the same element after
90  /// sync.
91  void sync() {
92  ReachedEnd |= Children.front()->reachedEnd();
93  if (ReachedEnd)
94  return;
95  auto SyncID = Children.front()->peek();
96  // Indicates whether any child needs to be advanced to new SyncID.
97  bool NeedsAdvance = false;
98  do {
99  NeedsAdvance = false;
100  for (auto &Child : Children) {
101  Child->advanceTo(SyncID);
102  ReachedEnd |= Child->reachedEnd();
103  // If any child reaches end And iterator can not match any other items.
104  // In this case, just terminate the process.
105  if (ReachedEnd)
106  return;
107  // Cache the result so that peek() is not called again as it may be
108  // quite expensive in AND with large subtrees.
109  auto Candidate = Child->peek();
110  // If any child goes beyond given ID (i.e. ID is not the common item),
111  // all children should be advanced to the next common item.
112  if (Candidate > SyncID) {
113  SyncID = Candidate;
114  NeedsAdvance = true;
115  // Reset and try to sync again. Sync starts with the first child as
116  // this is the cheapest (smallest size estimate). This way advanceTo
117  // is called on the large posting lists once the sync point is very
118  // likely.
119  break;
120  }
121  }
122  } while (NeedsAdvance);
123  }
124 
125  /// AndIterator owns its children and ensures that all of them point to the
126  /// same element. As soon as one child gets exhausted, AndIterator can no
127  /// longer advance and has reached its end.
128  std::vector<std::unique_ptr<Iterator>> Children;
129  /// Indicates whether any child is exhausted. It is cheaper to maintain and
130  /// update the field, rather than traversing the whole subtree in each
131  /// reachedEnd() call.
132  bool ReachedEnd = false;
133  friend Corpus; // For optimizations.
134 };
135 
136 /// Implements Iterator over the union of other iterators.
137 ///
138 /// OrIterator iterates through all items which can be pointed to by at least
139 /// one child. To preserve the sorted order, this iterator always advances the
140 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
141 /// soon as all of its children are exhausted.
142 class OrIterator : public Iterator {
143 public:
144  explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
145  : Iterator(Kind::Or), Children(std::move(AllChildren)) {
146  assert(!Children.empty() && "OR iterator should have at least one child.");
147  }
148 
149  /// Returns true if all children are exhausted.
150  bool reachedEnd() const override {
151  for (const auto &Child : Children)
152  if (!Child->reachedEnd())
153  return false;
154  return true;
155  }
156 
157  /// Moves each child pointing to the smallest DocID to the next item.
158  void advance() override {
159  assert(!reachedEnd() && "OR iterator can't advance() at the end.");
160  const auto SmallestID = peek();
161  for (const auto &Child : Children)
162  if (!Child->reachedEnd() && Child->peek() == SmallestID)
163  Child->advance();
164  }
165 
166  /// Advances each child to the next existing element with DocumentID >= ID.
167  void advanceTo(DocID ID) override {
168  assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
169  for (const auto &Child : Children)
170  if (!Child->reachedEnd())
171  Child->advanceTo(ID);
172  }
173 
174  /// Returns the element under cursor of the child with smallest Child->peek()
175  /// value.
176  DocID peek() const override {
177  assert(!reachedEnd() && "OR iterator can't peek() at the end.");
178  DocID Result = std::numeric_limits<DocID>::max();
179 
180  for (const auto &Child : Children)
181  if (!Child->reachedEnd())
182  Result = std::min(Result, Child->peek());
183 
184  return Result;
185  }
186 
187  // Returns the maximum boosting score among all Children when iterator
188  // points to the current ID.
189  float consume() override {
190  assert(!reachedEnd() && "OR iterator can't consume() at the end.");
191  const DocID ID = peek();
192  float Boost = 1;
193  for (const auto &Child : Children)
194  if (!Child->reachedEnd() && Child->peek() == ID)
195  Boost = std::max(Boost, Child->consume());
196  return Boost;
197  }
198 
199  size_t estimateSize() const override {
200  size_t Size = 0;
201  for (const auto &Child : Children)
202  Size = std::max(Size, Child->estimateSize());
203  return Size;
204  }
205 
206 private:
207  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
208  OS << "(| ";
209  auto Separator = "";
210  for (const auto &Child : Children) {
211  OS << Separator << *Child;
212  Separator = " ";
213  }
214  OS << ')';
215  return OS;
216  }
217 
218  std::vector<std::unique_ptr<Iterator>> Children;
219  friend Corpus; // For optimizations.
220 };
221 
222 /// TrueIterator handles PostingLists which contain all items of the index. It
223 /// stores size of the virtual posting list, and all operations are performed
224 /// in O(1).
225 class TrueIterator : public Iterator {
226 public:
227  explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
228 
229  bool reachedEnd() const override { return Index >= Size; }
230 
231  void advance() override {
232  assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
233  ++Index;
234  }
235 
236  void advanceTo(DocID ID) override {
237  assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
238  Index = std::min(ID, Size);
239  }
240 
241  DocID peek() const override {
242  assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
243  return Index;
244  }
245 
246  float consume() override {
247  assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
248  return 1;
249  }
250 
251  size_t estimateSize() const override { return Size; }
252 
253 private:
254  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
255  return OS << "true";
256  }
257 
258  DocID Index = 0;
259  /// Size of the underlying virtual PostingList.
260  DocID Size;
261 };
262 
263 /// FalseIterator yields no results.
264 class FalseIterator : public Iterator {
265 public:
266  FalseIterator() : Iterator(Kind::False) {}
267  bool reachedEnd() const override { return true; }
268  void advance() override { assert(false); }
269  void advanceTo(DocID ID) override { assert(false); }
270  DocID peek() const override {
271  assert(false);
272  return 0;
273  }
274  float consume() override {
275  assert(false);
276  return 1;
277  }
278  size_t estimateSize() const override { return 0; }
279 
280 private:
281  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
282  return OS << "false";
283  }
284 };
285 
286 /// Boost iterator is a wrapper around its child which multiplies scores of
287 /// each retrieved item by a given factor.
288 class BoostIterator : public Iterator {
289 public:
290  BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
291  : Child(std::move(Child)), Factor(Factor) {}
292 
293  bool reachedEnd() const override { return Child->reachedEnd(); }
294 
295  void advance() override { Child->advance(); }
296 
297  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
298 
299  DocID peek() const override { return Child->peek(); }
300 
301  float consume() override { return Child->consume() * Factor; }
302 
303  size_t estimateSize() const override { return Child->estimateSize(); }
304 
305 private:
306  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
307  return OS << "(* " << Factor << ' ' << *Child << ')';
308  }
309 
310  std::unique_ptr<Iterator> Child;
311  float Factor;
312 };
313 
314 /// This iterator limits the number of items retrieved from the child iterator
315 /// on top of the query tree. To ensure that query tree with LIMIT iterators
316 /// inside works correctly, users have to call Root->consume(Root->peek()) each
317 /// time item is retrieved at the root of query tree.
318 class LimitIterator : public Iterator {
319 public:
320  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
321  : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
322 
323  bool reachedEnd() const override {
324  return ItemsLeft == 0 || Child->reachedEnd();
325  }
326 
327  void advance() override { Child->advance(); }
328 
329  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
330 
331  DocID peek() const override { return Child->peek(); }
332 
333  /// Decreases the limit in case the element consumed at top of the query tree
334  /// comes from the underlying iterator.
335  float consume() override {
336  assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
337  --ItemsLeft;
338  return Child->consume();
339  }
340 
341  size_t estimateSize() const override {
342  return std::min(Child->estimateSize(), Limit);
343  }
344 
345 private:
346  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
347  return OS << "(LIMIT " << Limit << " " << *Child << ')';
348  }
349 
350  std::unique_ptr<Iterator> Child;
351  size_t Limit;
352  size_t ItemsLeft;
353 };
354 
355 } // end namespace
356 
357 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
358  std::vector<std::pair<DocID, float>> Result;
359  for (; !It.reachedEnd(); It.advance())
360  Result.emplace_back(It.peek(), It.consume());
361  return Result;
362 }
363 
364 std::unique_ptr<Iterator>
365 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
366  std::vector<std::unique_ptr<Iterator>> RealChildren;
367  for (auto &Child : Children) {
368  switch (Child->kind()) {
370  break; // No effect, drop the iterator.
372  return std::move(Child); // Intersection is empty.
373  case Iterator::Kind::And: {
374  // Inline nested AND into parent AND.
375  auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
376  std::move(NewChildren.begin(), NewChildren.end(),
377  std::back_inserter(RealChildren));
378  break;
379  }
380  default:
381  RealChildren.push_back(std::move(Child));
382  }
383  }
384  switch (RealChildren.size()) {
385  case 0:
386  return all();
387  case 1:
388  return std::move(RealChildren.front());
389  default:
390  return std::make_unique<AndIterator>(std::move(RealChildren));
391  }
392 }
393 
394 std::unique_ptr<Iterator>
395 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
396  std::vector<std::unique_ptr<Iterator>> RealChildren;
397  for (auto &Child : Children) {
398  switch (Child->kind()) {
400  break; // No effect, drop the iterator.
401  case Iterator::Kind::Or: {
402  // Inline nested OR into parent OR.
403  auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
404  std::move(NewChildren.begin(), NewChildren.end(),
405  std::back_inserter(RealChildren));
406  break;
407  }
409  // Don't return all(), which would discard sibling boosts.
410  default:
411  RealChildren.push_back(std::move(Child));
412  }
413  }
414  switch (RealChildren.size()) {
415  case 0:
416  return none();
417  case 1:
418  return std::move(RealChildren.front());
419  default:
420  return std::make_unique<OrIterator>(std::move(RealChildren));
421  }
422 }
423 
424 std::unique_ptr<Iterator> Corpus::all() const {
425  return std::make_unique<TrueIterator>(Size);
426 }
427 
428 std::unique_ptr<Iterator> Corpus::none() const {
429  return std::make_unique<FalseIterator>();
430 }
431 
432 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
433  float Factor) const {
434  if (Factor == 1)
435  return Child;
436  if (Child->kind() == Iterator::Kind::False)
437  return Child;
438  return std::make_unique<BoostIterator>(std::move(Child), Factor);
439 }
440 
441 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
442  size_t Limit) const {
443  if (Child->kind() == Iterator::Kind::False)
444  return Child;
445  return std::make_unique<LimitIterator>(std::move(Child), Limit);
446 }
447 
448 } // namespace dex
449 } // namespace clangd
450 } // namespace clang
clang::clangd::dex::Corpus::boost
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:432
clang::clangd::dex::Iterator::Kind::True
@ True
clang::clangd::dex::consume
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
Kind
BindArgumentKind Kind
Definition: AvoidBindCheck.cpp:59
clang::clangd::dex::Corpus::all
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0,...
Definition: Iterator.cpp:424
clang::clangd::dex::Corpus::intersect
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:365
clang::clangd::dex::Corpus::none
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:428
Children
std::vector< std::unique_ptr< HTMLNode > > Children
Definition: HTMLGenerator.cpp:91
clang::clangd::dex::Iterator::advance
virtual void advance()=0
Moves to next valid DocID.
clang::clangd::dex::Iterator::Kind::Or
@ Or
clang::clangd::dex::Iterator::Kind::And
@ And
clang::clangd::Separator
@ Separator
Definition: FuzzyMatch.h:59
clang::clangd::dex::Iterator::peek
virtual DocID peek() const =0
Returns the current element this iterator points to.
Index
const SymbolIndex * Index
Definition: Dexp.cpp:99
Iterator.h
ID
static char ID
Definition: Logger.cpp:74
Candidate
ExpectedMatch Candidate
Definition: FuzzyMatchTests.cpp:47
clang::clangd::dex::Iterator
Iterator is the interface for Query Tree node.
Definition: Iterator.h:54
clang::clangd::dex::Iterator::reachedEnd
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:163
clang::clangd::dex::DocID
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:46
clang::clangd::dex::Corpus::limit
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:441
clang::clangd::dex::Iterator::consume
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
clang::clangd::dex::Corpus::unionOf
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children.
Definition: Iterator.cpp:395
clang::clangd::dex::Iterator::Kind::False
@ False