clang-tools 20.0.0git
Iterator.cpp
Go to the documentation of this file.
1//===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Iterator.h"
10#include "llvm/ADT/STLExtras.h"
11#include <algorithm>
12#include <cassert>
13#include <numeric>
14
15namespace clang {
16namespace clangd {
17namespace dex {
18namespace {
19
20/// Implements Iterator over the intersection of other iterators.
21///
22/// AndIterator iterates through common items among all children. It becomes
23/// exhausted as soon as any child becomes exhausted. After each mutation, the
24/// iterator restores the invariant: all children must point to the same item.
25class AndIterator : public Iterator {
26public:
27 explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
28 : Iterator(Kind::And), Children(std::move(AllChildren)) {
29 assert(!Children.empty() && "AND iterator should have at least one child.");
30 // Establish invariants.
31 for (const auto &Child : Children)
32 ReachedEnd |= Child->reachedEnd();
33 sync();
34 // When children are sorted by the estimateSize(), sync() calls are more
35 // effective. Each sync() starts with the first child and makes sure all
36 // children point to the same element. If any child is "above" the previous
37 // ones, the algorithm resets and advances the children to the next
38 // highest element starting from the front. When child iterators in the
39 // beginning have smaller estimated size, the sync() will have less restarts
40 // and become more effective.
41 llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
42 const std::unique_ptr<Iterator> &RHS) {
43 return LHS->estimateSize() < RHS->estimateSize();
44 });
45 }
46
47 bool reachedEnd() const override { return ReachedEnd; }
48
49 /// Advances all children to the next common item.
50 void advance() override {
51 assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52 Children.front()->advance();
53 sync();
54 }
55
56 /// Advances all children to the next common item with DocumentID >= ID.
57 void advanceTo(DocID ID) override {
58 assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59 Children.front()->advanceTo(ID);
60 sync();
61 }
62
63 DocID peek() const override { return Children.front()->peek(); }
64
65 float consume() override {
66 assert(!reachedEnd() && "AND iterator can't consume() at the end.");
67 float Boost = 1;
68 for (const auto &Child : Children)
69 Boost *= Child->consume();
70 return Boost;
71 }
72
73 size_t estimateSize() const override {
74 return Children.front()->estimateSize();
75 }
76
77private:
78 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
79 OS << "(& ";
80 auto *Separator = "";
81 for (const auto &Child : Children) {
82 OS << Separator << *Child;
83 Separator = " ";
84 }
85 OS << ')';
86 return OS;
87 }
88
89 /// Restores class invariants: each child will point to the same element after
90 /// sync.
91 void sync() {
92 ReachedEnd |= Children.front()->reachedEnd();
93 if (ReachedEnd)
94 return;
95 auto SyncID = Children.front()->peek();
96 // Indicates whether any child needs to be advanced to new SyncID.
97 bool NeedsAdvance = false;
98 do {
99 NeedsAdvance = false;
100 for (auto &Child : Children) {
101 Child->advanceTo(SyncID);
102 ReachedEnd |= Child->reachedEnd();
103 // If any child reaches end And iterator can not match any other items.
104 // In this case, just terminate the process.
105 if (ReachedEnd)
106 return;
107 // Cache the result so that peek() is not called again as it may be
108 // quite expensive in AND with large subtrees.
109 auto Candidate = Child->peek();
110 // If any child goes beyond given ID (i.e. ID is not the common item),
111 // all children should be advanced to the next common item.
112 if (Candidate > SyncID) {
113 SyncID = Candidate;
114 NeedsAdvance = true;
115 // Reset and try to sync again. Sync starts with the first child as
116 // this is the cheapest (smallest size estimate). This way advanceTo
117 // is called on the large posting lists once the sync point is very
118 // likely.
119 break;
120 }
121 }
122 } while (NeedsAdvance);
123 }
124
125 /// AndIterator owns its children and ensures that all of them point to the
126 /// same element. As soon as one child gets exhausted, AndIterator can no
127 /// longer advance and has reached its end.
128 std::vector<std::unique_ptr<Iterator>> Children;
129 /// Indicates whether any child is exhausted. It is cheaper to maintain and
130 /// update the field, rather than traversing the whole subtree in each
131 /// reachedEnd() call.
132 bool ReachedEnd = false;
133 friend Corpus; // For optimizations.
134};
135
136/// Implements Iterator over the union of other iterators.
137///
138/// OrIterator iterates through all items which can be pointed to by at least
139/// one child. To preserve the sorted order, this iterator always advances the
140/// child with smallest Child->peek() value. OrIterator becomes exhausted as
141/// soon as all of its children are exhausted.
142class OrIterator : public Iterator {
143public:
144 explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
145 : Iterator(Kind::Or), Children(std::move(AllChildren)) {
146 assert(!Children.empty() && "OR iterator should have at least one child.");
147 }
148
149 /// Returns true if all children are exhausted.
150 bool reachedEnd() const override {
151 for (const auto &Child : Children)
152 if (!Child->reachedEnd())
153 return false;
154 return true;
155 }
156
157 /// Moves each child pointing to the smallest DocID to the next item.
158 void advance() override {
159 assert(!reachedEnd() && "OR iterator can't advance() at the end.");
160 const auto SmallestID = peek();
161 for (const auto &Child : Children)
162 if (!Child->reachedEnd() && Child->peek() == SmallestID)
163 Child->advance();
164 }
165
166 /// Advances each child to the next existing element with DocumentID >= ID.
167 void advanceTo(DocID ID) override {
168 assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
169 for (const auto &Child : Children)
170 if (!Child->reachedEnd())
171 Child->advanceTo(ID);
172 }
173
174 /// Returns the element under cursor of the child with smallest Child->peek()
175 /// value.
176 DocID peek() const override {
177 assert(!reachedEnd() && "OR iterator can't peek() at the end.");
178 DocID Result = std::numeric_limits<DocID>::max();
179
180 for (const auto &Child : Children)
181 if (!Child->reachedEnd())
182 Result = std::min(Result, Child->peek());
183
184 return Result;
185 }
186
187 // Returns the maximum boosting score among all Children when iterator
188 // points to the current ID.
189 float consume() override {
190 assert(!reachedEnd() && "OR iterator can't consume() at the end.");
191 const DocID ID = peek();
192 float Boost = 1;
193 for (const auto &Child : Children)
194 if (!Child->reachedEnd() && Child->peek() == ID)
195 Boost = std::max(Boost, Child->consume());
196 return Boost;
197 }
198
199 size_t estimateSize() const override {
200 size_t Size = 0;
201 for (const auto &Child : Children)
202 Size = std::max(Size, Child->estimateSize());
203 return Size;
204 }
205
206private:
207 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
208 OS << "(| ";
209 auto *Separator = "";
210 for (const auto &Child : Children) {
211 OS << Separator << *Child;
212 Separator = " ";
213 }
214 OS << ')';
215 return OS;
216 }
217
218 std::vector<std::unique_ptr<Iterator>> Children;
219 friend Corpus; // For optimizations.
220};
221
222/// TrueIterator handles PostingLists which contain all items of the index. It
223/// stores size of the virtual posting list, and all operations are performed
224/// in O(1).
225class TrueIterator : public Iterator {
226public:
227 explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
228
229 bool reachedEnd() const override { return Index >= Size; }
230
231 void advance() override {
232 assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
233 ++Index;
234 }
235
236 void advanceTo(DocID ID) override {
237 assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
238 Index = std::min(ID, Size);
239 }
240
241 DocID peek() const override {
242 assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
243 return Index;
244 }
245
246 float consume() override {
247 assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
248 return 1;
249 }
250
251 size_t estimateSize() const override { return Size; }
252
253private:
254 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
255 return OS << "true";
256 }
257
258 DocID Index = 0;
259 /// Size of the underlying virtual PostingList.
260 DocID Size;
261};
262
263/// FalseIterator yields no results.
264class FalseIterator : public Iterator {
265public:
266 FalseIterator() : Iterator(Kind::False) {}
267 bool reachedEnd() const override { return true; }
268 void advance() override { assert(false); }
269 void advanceTo(DocID ID) override { assert(false); }
270 DocID peek() const override {
271 assert(false);
272 return 0;
273 }
274 float consume() override {
275 assert(false);
276 return 1;
277 }
278 size_t estimateSize() const override { return 0; }
279
280private:
281 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
282 return OS << "false";
283 }
284};
285
286/// Boost iterator is a wrapper around its child which multiplies scores of
287/// each retrieved item by a given factor.
288class BoostIterator : public Iterator {
289public:
290 BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
291 : Child(std::move(Child)), Factor(Factor) {}
292
293 bool reachedEnd() const override { return Child->reachedEnd(); }
294
295 void advance() override { Child->advance(); }
296
297 void advanceTo(DocID ID) override { Child->advanceTo(ID); }
298
299 DocID peek() const override { return Child->peek(); }
300
301 float consume() override { return Child->consume() * Factor; }
302
303 size_t estimateSize() const override { return Child->estimateSize(); }
304
305private:
306 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
307 return OS << "(* " << Factor << ' ' << *Child << ')';
308 }
309
310 std::unique_ptr<Iterator> Child;
311 float Factor;
312};
313
314/// This iterator limits the number of items retrieved from the child iterator
315/// on top of the query tree. To ensure that query tree with LIMIT iterators
316/// inside works correctly, users have to call Root->consume(Root->peek()) each
317/// time item is retrieved at the root of query tree.
318class LimitIterator : public Iterator {
319public:
320 LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
321 : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
322
323 bool reachedEnd() const override {
324 return ItemsLeft == 0 || Child->reachedEnd();
325 }
326
327 void advance() override { Child->advance(); }
328
329 void advanceTo(DocID ID) override { Child->advanceTo(ID); }
330
331 DocID peek() const override { return Child->peek(); }
332
333 /// Decreases the limit in case the element consumed at top of the query tree
334 /// comes from the underlying iterator.
335 float consume() override {
336 assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
337 --ItemsLeft;
338 return Child->consume();
339 }
340
341 size_t estimateSize() const override {
342 return std::min(Child->estimateSize(), Limit);
343 }
344
345private:
346 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
347 return OS << "(LIMIT " << Limit << " " << *Child << ')';
348 }
349
350 std::unique_ptr<Iterator> Child;
351 size_t Limit;
352 size_t ItemsLeft;
353};
354
355} // end namespace
356
357std::vector<std::pair<DocID, float>> consume(Iterator &It) {
358 std::vector<std::pair<DocID, float>> Result;
359 for (; !It.reachedEnd(); It.advance())
360 Result.emplace_back(It.peek(), It.consume());
361 return Result;
362}
363
364std::unique_ptr<Iterator>
365Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
366 std::vector<std::unique_ptr<Iterator>> RealChildren;
367 for (auto &Child : Children) {
368 switch (Child->kind()) {
370 break; // No effect, drop the iterator.
372 return std::move(Child); // Intersection is empty.
373 case Iterator::Kind::And: {
374 // Inline nested AND into parent AND.
375 auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
376 std::move(NewChildren.begin(), NewChildren.end(),
377 std::back_inserter(RealChildren));
378 break;
379 }
380 default:
381 RealChildren.push_back(std::move(Child));
382 }
383 }
384 switch (RealChildren.size()) {
385 case 0:
386 return all();
387 case 1:
388 return std::move(RealChildren.front());
389 default:
390 return std::make_unique<AndIterator>(std::move(RealChildren));
391 }
392}
393
394std::unique_ptr<Iterator>
395Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
396 std::vector<std::unique_ptr<Iterator>> RealChildren;
397 for (auto &Child : Children) {
398 switch (Child->kind()) {
400 break; // No effect, drop the iterator.
401 case Iterator::Kind::Or: {
402 // Inline nested OR into parent OR.
403 auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
404 std::move(NewChildren.begin(), NewChildren.end(),
405 std::back_inserter(RealChildren));
406 break;
407 }
409 // Don't return all(), which would discard sibling boosts.
410 default:
411 RealChildren.push_back(std::move(Child));
412 }
413 }
414 switch (RealChildren.size()) {
415 case 0:
416 return none();
417 case 1:
418 return std::move(RealChildren.front());
419 default:
420 return std::make_unique<OrIterator>(std::move(RealChildren));
421 }
422}
423
424std::unique_ptr<Iterator> Corpus::all() const {
425 return std::make_unique<TrueIterator>(Size);
426}
427
428std::unique_ptr<Iterator> Corpus::none() const {
429 return std::make_unique<FalseIterator>();
430}
431
432std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
433 float Factor) const {
434 if (Factor == 1)
435 return Child;
436 if (Child->kind() == Iterator::Kind::False)
437 return Child;
438 return std::make_unique<BoostIterator>(std::move(Child), Factor);
439}
440
441std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
442 size_t Limit) const {
443 if (Child->kind() == Iterator::Kind::False)
444 return Child;
445 return std::make_unique<LimitIterator>(std::move(Child), Limit);
446}
447
448} // namespace dex
449} // namespace clangd
450} // namespace clang
BindArgumentKind Kind
llvm::raw_ostream & OS
ExpectedMatch Candidate
std::vector< std::unique_ptr< HTMLNode > > Children
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:428
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:441
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator > > Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:365
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:432
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0,...
Definition: Iterator.cpp:424
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator > > Children) const
Returns OR Iterator which performs the union of the PostingLists of its children.
Definition: Iterator.cpp:395
Iterator is the interface for Query Tree node.
Definition: Iterator.h:53
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
virtual void advance()=0
Moves to next valid DocID.
virtual DocID peek() const =0
Returns the current element this iterator points to.
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:357
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:45
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//