clang-tools  10.0.0svn
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SourceCode.h"
15 #include "index/Serialization.h"
16 #include "index/dex/Dex.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/LineEditor/LineEditor.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Signals.h"
23 
24 namespace clang {
25 namespace clangd {
26 namespace {
27 
28 llvm::cl::opt<std::string> IndexPath("index-path",
29  llvm::cl::desc("Path to the index"),
30  llvm::cl::Positional, llvm::cl::Required);
31 
32 static const std::string Overview = R"(
33 This is an **experimental** interactive tool to process user-provided search
34 queries over given symbol collection obtained via clangd-indexer. The
35 tool can be used to evaluate search quality of existing index implementations
36 and manually construct non-trivial test cases.
37 
38 Type use "help" request to get information about the details.
39 )";
40 
41 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
42  const auto TimerStart = std::chrono::high_resolution_clock::now();
43  F();
44  const auto TimerStop = std::chrono::high_resolution_clock::now();
45  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
46  TimerStop - TimerStart);
47  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
48 }
49 
50 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
51  const SymbolIndex *Index) {
52  FuzzyFindRequest Request;
53  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
54  // qualifier for global scope.
55  bool IsGlobalScope = QualifiedName.consume_front("::");
56  auto Names = splitQualifiedName(QualifiedName);
57  if (IsGlobalScope || !Names.first.empty())
58  Request.Scopes = {Names.first};
59  else
60  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
61  // add the global scope to the request.
62  Request.Scopes = {""};
63 
64  Request.Query = Names.second;
65  std::vector<SymbolID> SymIDs;
66  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
67  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
68  if (QualifiedName == SymQualifiedName)
69  SymIDs.push_back(Sym.ID);
70  });
71  return SymIDs;
72 }
73 
74 // REPL commands inherit from Command and contain their options as members.
75 // Creating a Command populates parser options, parseAndRun() resets them.
76 class Command {
77  // By resetting the parser options, we lost the standard -help flag.
78  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
79  "help", llvm::cl::desc("Display available options"),
80  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
81  virtual void run() = 0;
82 
83 protected:
84  const SymbolIndex *Index;
85 
86 public:
87  virtual ~Command() = default;
88  virtual void parseAndRun(llvm::ArrayRef<const char *> Argv,
89  const char *Overview, const SymbolIndex &Index) {
90  std::string ParseErrs;
91  llvm::raw_string_ostream OS(ParseErrs);
92  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
93  Overview, &OS);
94  if (Help.getNumOccurrences() > 0) {
95  // Avoid printing parse errors in this case.
96  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
97  llvm::cl::PrintHelpMessage();
98  } else {
99  llvm::outs() << OS.str();
100  if (Ok) {
101  this->Index = &Index;
102  reportTime(Argv[0], [&] { run(); });
103  }
104  }
105  llvm::cl::ResetCommandLineParser(); // must do this before opts are
106  // destroyed.
107  }
108 };
109 
110 // FIXME(kbobyrev): Ideas for more commands:
111 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
112 // usages in the tool driver and actually use llvm::cl library in the REPL.
113 // * show posting list density histogram (our dump data somewhere so that user
114 // could build one)
115 // * show number of tokens of each kind
116 // * print out tokens with the most dense posting lists
117 // * print out tokens with least dense posting lists
118 
119 class FuzzyFind : public Command {
120  llvm::cl::opt<std::string> Query{
121  "query",
122  llvm::cl::Positional,
123  llvm::cl::Required,
124  llvm::cl::desc("Query string to be fuzzy-matched"),
125  };
126  llvm::cl::opt<std::string> Scopes{
127  "scopes",
128  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
129  };
130  llvm::cl::opt<unsigned> Limit{
131  "limit",
132  llvm::cl::init(10),
133  llvm::cl::desc("Max results to display"),
134  };
135 
136  void run() override {
137  FuzzyFindRequest Request;
138  Request.Limit = Limit;
139  Request.Query = Query;
140  if (Scopes.getNumOccurrences() > 0) {
141  llvm::SmallVector<llvm::StringRef, 8> Scopes;
142  llvm::StringRef(this->Scopes).split(Scopes, ',');
143  Request.Scopes = {Scopes.begin(), Scopes.end()};
144  }
145  Request.AnyScope = Request.Scopes.empty();
146  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
147  static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
148  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
149  "Symbol Name");
150  size_t Rank = 0;
151  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
152  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
153  Sym.Scope + Sym.Name);
154  });
155  }
156 };
157 
158 class Lookup : public Command {
159  llvm::cl::opt<std::string> ID{
160  "id",
161  llvm::cl::Positional,
162  llvm::cl::desc("Symbol ID to look up (hex)"),
163  };
164  llvm::cl::opt<std::string> Name{
165  "name",
166  llvm::cl::desc("Qualified name to look up."),
167  };
168 
169  void run() override {
170  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
171  llvm::outs()
172  << "Missing required argument: please provide id or -name.\n";
173  return;
174  }
175  std::vector<SymbolID> IDs;
176  if (ID.getNumOccurrences()) {
177  auto SID = SymbolID::fromStr(ID);
178  if (!SID) {
179  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
180  return;
181  }
182  IDs.push_back(*SID);
183  } else {
184  IDs = getSymbolIDsFromIndex(Name, Index);
185  }
186 
187  LookupRequest Request;
188  Request.IDs.insert(IDs.begin(), IDs.end());
189  bool FoundSymbol = false;
190  Index->lookup(Request, [&](const Symbol &Sym) {
191  FoundSymbol = true;
192  llvm::outs() << toYAML(Sym);
193  });
194  if (!FoundSymbol)
195  llvm::outs() << "not found\n";
196  }
197 };
198 
199 class Refs : public Command {
200  llvm::cl::opt<std::string> ID{
201  "id",
202  llvm::cl::Positional,
203  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
204  };
205  llvm::cl::opt<std::string> Name{
206  "name",
207  llvm::cl::desc("Qualified name of the symbol being queried."),
208  };
209  llvm::cl::opt<std::string> Filter{
210  "filter",
211  llvm::cl::init(".*"),
212  llvm::cl::desc(
213  "Print all results from files matching this regular expression."),
214  };
215 
216  void run() override {
217  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
218  llvm::outs()
219  << "Missing required argument: please provide id or -name.\n";
220  return;
221  }
222  std::vector<SymbolID> IDs;
223  if (ID.getNumOccurrences()) {
224  auto SID = SymbolID::fromStr(ID);
225  if (!SID) {
226  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
227  return;
228  }
229  IDs.push_back(*SID);
230  } else {
231  IDs = getSymbolIDsFromIndex(Name, Index);
232  if (IDs.size() > 1) {
233  llvm::outs() << llvm::formatv(
234  "The name {0} is ambiguous, found {1} different "
235  "symbols. Please use id flag to disambiguate.\n",
236  Name, IDs.size());
237  return;
238  }
239  }
240  RefsRequest RefRequest;
241  RefRequest.IDs.insert(IDs.begin(), IDs.end());
242  llvm::Regex RegexFilter(Filter);
243  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
244  auto U = URI::parse(R.Location.FileURI);
245  if (!U) {
246  llvm::outs() << U.takeError();
247  return;
248  }
249  if (RegexFilter.match(U->body()))
250  llvm::outs() << R << "\n";
251  });
252  }
253 };
254 
255 struct {
256  const char *Name;
257  const char *Description;
258  std::function<std::unique_ptr<Command>()> Implementation;
259 } CommandInfo[] = {
260  {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
261  {"lookup", "Dump symbol details by ID or qualified name",
262  std::make_unique<Lookup>},
263  {"refs", "Find references by ID or qualified name",
264  std::make_unique<Refs>},
265 };
266 
267 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
268  return loadIndex(Index, /*UseDex=*/true);
269 }
270 
271 } // namespace
272 } // namespace clangd
273 } // namespace clang
274 
275 int main(int argc, const char *argv[]) {
276  using namespace clang::clangd;
277 
278  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
279  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
280  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
281 
282  std::unique_ptr<SymbolIndex> Index;
283  reportTime("Dex build", [&]() {
284  Index = openIndex(IndexPath);
285  });
286 
287  if (!Index) {
288  llvm::outs() << "Failed to open the index.\n";
289  return -1;
290  }
291 
292  llvm::LineEditor LE("dexp");
293 
294  while (llvm::Optional<std::string> Request = LE.readLine()) {
295  // Split on spaces and add required null-termination.
296  std::replace(Request->begin(), Request->end(), ' ', '\0');
297  llvm::SmallVector<llvm::StringRef, 8> Args;
298  llvm::StringRef(*Request).split(Args, '\0', /*MaxSplit=*/-1,
299  /*KeepEmpty=*/false);
300  if (Args.empty())
301  continue;
302  if (Args.front() == "help") {
303  llvm::outs() << "dexp - Index explorer\nCommands:\n";
304  for (const auto &C : CommandInfo)
305  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
306  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
307  continue;
308  }
309  llvm::SmallVector<const char *, 8> FakeArgv;
310  for (llvm::StringRef S : Args)
311  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
312 
313  bool Recognized = false;
314  for (const auto &Cmd : CommandInfo) {
315  if (Cmd.Name == Args.front()) {
316  Recognized = true;
317  Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description, *Index);
318  break;
319  }
320  }
321  if (!Recognized)
322  llvm::outs() << "Unknown command. Try 'help'.\n";
323  }
324 
325  return 0;
326 }
int Limit
const tooling::CompileCommand & Command
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:258
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:35
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:602
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
std::string toYAML(const Symbol &)
static std::string replace(llvm::StringRef Haystack, llvm::StringRef Needle, llvm::StringRef Repl)
Definition: TestIndex.cpp:30
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:164
const char * Description
Definition: Dexp.cpp:257
RefSlab Refs
int main(int argc, const char *argv[])
Definition: Dexp.cpp:275
const SymbolIndex * Index
Definition: Dexp.cpp:84