clang-tools  14.0.0git
FuzzySymbolIndex.cpp
Go to the documentation of this file.
1 //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FuzzySymbolIndex.h"
9 #include "llvm/Support/Regex.h"
10 
12 using llvm::StringRef;
13 
14 namespace clang {
15 namespace include_fixer {
16 namespace {
17 
18 class MemSymbolIndex : public FuzzySymbolIndex {
19 public:
20  MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
21  for (auto &Symbol : Symbols) {
22  auto Tokens = tokenize(Symbol.Symbol.getName());
23  this->Symbols.emplace_back(
24  StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
25  std::move(Symbol));
26  }
27  }
28 
29  std::vector<SymbolAndSignals> search(StringRef Query) override {
30  auto Tokens = tokenize(Query);
31  llvm::Regex Pattern("^" + queryRegexp(Tokens));
32  std::vector<SymbolAndSignals> Results;
33  for (const Entry &E : Symbols)
34  if (Pattern.match(E.first))
35  Results.push_back(E.second);
36  return Results;
37  }
38 
39 private:
40  using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
41  std::vector<Entry> Symbols;
42 };
43 
44 // Helpers for tokenize state machine.
45 enum TokenizeState {
46  EMPTY, // No pending characters.
47  ONE_BIG, // Read one uppercase letter, could be WORD or Word.
48  BIG_WORD, // Reading an uppercase WORD.
49  SMALL_WORD, // Reading a lowercase word.
50  NUMBER // Reading a number.
51 };
52 
53 enum CharType { UPPER, LOWER, DIGIT, MISC };
54 CharType classify(char c) {
55  if (isupper(c))
56  return UPPER;
57  if (islower(c))
58  return LOWER;
59  if (isdigit(c))
60  return DIGIT;
61  return MISC;
62 }
63 
64 } // namespace
65 
66 std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
67  std::vector<std::string> Result;
68  // State describes the treatment of text from Start to I.
69  // Once text is Flush()ed into Result, we're done with it and advance Start.
70  TokenizeState State = EMPTY;
71  size_t Start = 0;
72  auto Flush = [&](size_t End) {
73  if (State != EMPTY) {
74  Result.push_back(Text.substr(Start, End - Start).lower());
75  State = EMPTY;
76  }
77  Start = End;
78  };
79  for (size_t I = 0; I < Text.size(); ++I) {
80  CharType Type = classify(Text[I]);
81  if (Type == MISC)
82  Flush(I);
83  else if (Type == LOWER)
84  switch (State) {
85  case BIG_WORD:
86  Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
87  LLVM_FALLTHROUGH;
88  case ONE_BIG:
89  State = SMALL_WORD;
90  LLVM_FALLTHROUGH;
91  case SMALL_WORD:
92  break;
93  default:
94  Flush(I);
95  State = SMALL_WORD;
96  }
97  else if (Type == UPPER)
98  switch (State) {
99  case ONE_BIG:
100  State = BIG_WORD;
101  LLVM_FALLTHROUGH;
102  case BIG_WORD:
103  break;
104  default:
105  Flush(I);
106  State = ONE_BIG;
107  }
108  else if (Type == DIGIT && State != NUMBER) {
109  Flush(I);
110  State = NUMBER;
111  }
112  }
113  Flush(Text.size());
114  return Result;
115 }
116 
117 std::string
118 FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
119  std::string Result;
120  for (size_t I = 0; I < Tokens.size(); ++I) {
121  if (I)
122  Result.append("[[:alnum:]]* ");
123  for (size_t J = 0; J < Tokens[I].size(); ++J) {
124  if (J)
125  Result.append("([[:alnum:]]* )?");
126  Result.push_back(Tokens[I][J]);
127  }
128  }
129  return Result;
130 }
131 
132 llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
133 FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
134  auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
135  if (!Buffer)
136  return llvm::errorCodeToError(Buffer.getError());
137  return std::make_unique<MemSymbolIndex>(
138  find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
139 }
140 
141 } // namespace include_fixer
142 } // namespace clang
clang::find_all_symbols::SymbolAndSignals
Definition: SymbolInfo.h:126
E
const Expr * E
Definition: AvoidBindCheck.cpp:88
Type
NodeType Type
Definition: HTMLGenerator.cpp:73
clang::include_fixer::FuzzySymbolIndex::createFromYAML
static llvm::Expected< std::unique_ptr< FuzzySymbolIndex > > createFromYAML(llvm::StringRef File)
Definition: FuzzySymbolIndex.cpp:133
Text
std::string Text
Definition: HTMLGenerator.cpp:80
clang::tidy::cppcoreguidelines::join
static std::string join(ArrayRef< SpecialMemberFunctionsCheck::SpecialMemberFunctionKind > SMFS, llvm::StringRef AndOr)
Definition: SpecialMemberFunctionsCheck.cpp:78
clang::include_fixer::FuzzySymbolIndex
Definition: FuzzySymbolIndex.h:32
Results
std::vector< CodeCompletionResult > Results
Definition: CodeComplete.cpp:765
clang::find_all_symbols::ReadSymbolInfosFromYAML
std::vector< SymbolAndSignals > ReadSymbolInfosFromYAML(llvm::StringRef Yaml)
Read SymbolInfos from a YAML document.
Definition: SymbolInfo.cpp:128
clang::clangd::CharType
CharType
Definition: FuzzyMatch.h:41
Entry
Definition: Modularize.cpp:428
clang::include_fixer::FuzzySymbolIndex::queryRegexp
static std::string queryRegexp(const std::vector< std::string > &Tokens)
Definition: FuzzySymbolIndex.cpp:118
clang::include_fixer::FuzzySymbolIndex::tokenize
static std::vector< std::string > tokenize(llvm::StringRef Text)
Definition: FuzzySymbolIndex.cpp:66
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
FuzzySymbolIndex.h