12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/DenseSet.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/ADT/StringRef.h"
29template <
typename Func>
33 llvm::SmallVector<CharRole> Roles(
Identifier.size());
35 llvm::MutableArrayRef(Roles.data(),
Identifier.size()));
37 std::string LowercaseIdentifier =
Identifier.lower();
47 llvm::SmallVector<std::array<unsigned, 2>, 12> Next(
48 LowercaseIdentifier.size());
49 unsigned NextTail = 0, NextHead = 0;
50 for (
int I = LowercaseIdentifier.size() - 1; I >= 0; --I) {
51 Next[I] = {{NextTail, NextHead}};
52 NextTail = Roles[I] ==
Tail ? I : 0;
53 if (Roles[I] ==
Head) {
60 for (
unsigned I = 0; I < LowercaseIdentifier.size(); ++I) {
62 if (Roles[I] !=
Head && Roles[I] !=
Tail)
64 for (
unsigned J : Next[I]) {
67 for (
unsigned K : Next[J]) {
70 Out(
Trigram(LowercaseIdentifier[I], LowercaseIdentifier[J],
71 LowercaseIdentifier[
K]));
85 for (
unsigned Position = 0, HeadsSeen = 0; HeadsSeen < 2;) {
101 std::vector<Trigram> &Result) {
106 constexpr unsigned ManyTrigramsIdentifierThreshold = 14;
111 if (
Identifier.size() < ManyTrigramsIdentifierThreshold) {
113 if (!llvm::is_contained(Result, T))
119 Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
128 llvm::SmallVector<CharRole> Roles(Query.size());
129 calculateRoles(Query, llvm::MutableArrayRef(Roles.data(), Query.size()));
131 std::string LowercaseQuery = Query.lower();
133 llvm::DenseSet<Token> UniqueTrigrams;
135 for (
unsigned I = 0; I < LowercaseQuery.size(); ++I) {
136 if (Roles[I] !=
Head && Roles[I] !=
Tail)
138 Chars.push_back(LowercaseQuery[I]);
139 if (Chars.size() > 3)
140 Chars.erase(Chars.begin());
141 if (Chars.size() == 3)
147 if (UniqueTrigrams.empty()) {
149 std::string Result(1, LowercaseQuery.front());
150 for (
unsigned I = 1; I < LowercaseQuery.size(); ++I)
151 if (Roles[I] ==
Head || Roles[I] ==
Tail)
152 Result += LowercaseQuery[I];
153 UniqueTrigrams.insert(
157 return {UniqueTrigrams.begin(), UniqueTrigrams.end()};
CompiledFragmentImpl & Out
Token objects represent a characteristic of a symbol, which can be used to perform efficient search.
Trigrams are attributes of the symbol unqualified name used to effectively extract symbols which can ...
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
@ Trigram
Represents trigram used for fuzzy search of unqualified symbol names.
static void identifierTrigrams(llvm::StringRef Identifier, Func Out)
void generateIdentifierTrigrams(llvm::StringRef Identifier, std::vector< Trigram > &Result)
Produces list of unique fuzzy-search trigrams from unqualified symbol.
std::vector< Token > generateQueryTrigrams(llvm::StringRef Query)
Returns list of unique fuzzy-search trigrams given a query.
CharTypeSet calculateRoles(llvm::StringRef Text, llvm::MutableArrayRef< CharRole > Roles)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//