clang-tools  14.0.0git
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "index/Index.h"
15 #include "index/Relation.h"
16 #include "index/Serialization.h"
17 #include "index/dex/Dex.h"
18 #include "index/remote/Client.h"
19 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/LineEditor/LineEditor.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Signals.h"
25 
26 namespace clang {
27 namespace clangd {
28 namespace {
29 
30 llvm::cl::opt<std::string> IndexLocation(
31  llvm::cl::desc("<path to index file | remote:server.address>"),
32  llvm::cl::Positional);
33 
34 llvm::cl::opt<std::string>
35  ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
36 
37 llvm::cl::opt<std::string> ProjectRoot(
38  "project-root",
39  llvm::cl::desc(
40  "Path to the project. Required when connecting using remote index."));
41 
42 static constexpr char Overview[] = R"(
43 This is an **experimental** interactive tool to process user-provided search
44 queries over given symbol collection obtained via clangd-indexer. The
45 tool can be used to evaluate search quality of existing index implementations
46 and manually construct non-trivial test cases.
47 
48 You can connect to remote index by passing remote:address to dexp. Example:
49 
50 $ dexp remote:0.0.0.0:9000
51 
52 Type use "help" request to get information about the details.
53 )";
54 
55 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
56  const auto TimerStart = std::chrono::high_resolution_clock::now();
57  F();
58  const auto TimerStop = std::chrono::high_resolution_clock::now();
59  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
60  TimerStop - TimerStart);
61  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
62 }
63 
64 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
65  const SymbolIndex *Index) {
66  FuzzyFindRequest Request;
67  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
68  // qualifier for global scope.
69  bool IsGlobalScope = QualifiedName.consume_front("::");
70  auto Names = splitQualifiedName(QualifiedName);
71  if (IsGlobalScope || !Names.first.empty())
72  Request.Scopes = {std::string(Names.first)};
73  else
74  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
75  // add the global scope to the request.
76  Request.Scopes = {""};
77 
78  Request.Query = std::string(Names.second);
79  std::vector<SymbolID> SymIDs;
80  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
81  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
82  if (QualifiedName == SymQualifiedName)
83  SymIDs.push_back(Sym.ID);
84  });
85  return SymIDs;
86 }
87 
88 // REPL commands inherit from Command and contain their options as members.
89 // Creating a Command populates parser options, parseAndRun() resets them.
90 class Command {
91  // By resetting the parser options, we lost the standard -help flag.
92  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
93  "help", llvm::cl::desc("Display available options"),
94  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())};
95  // FIXME: Allow commands to signal failure.
96  virtual void run() = 0;
97 
98 protected:
99  const SymbolIndex *Index;
100 
101 public:
102  virtual ~Command() = default;
103  bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
104  const SymbolIndex &Index) {
105  std::string ParseErrs;
106  llvm::raw_string_ostream OS(ParseErrs);
107  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
108  Overview, &OS);
109  // must do this before opts are destroyed
110  auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
111  if (Help.getNumOccurrences() > 0) {
112  // Avoid printing parse errors in this case.
113  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
114  llvm::cl::PrintHelpMessage();
115  return true;
116  }
117 
118  llvm::outs() << OS.str();
119  if (Ok) {
120  this->Index = &Index;
121  reportTime(Argv[0], [&] { run(); });
122  }
123  return Ok;
124  }
125 };
126 
127 // FIXME(kbobyrev): Ideas for more commands:
128 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
129 // usages in the tool driver and actually use llvm::cl library in the REPL.
130 // * show posting list density histogram (our dump data somewhere so that user
131 // could build one)
132 // * show number of tokens of each kind
133 // * print out tokens with the most dense posting lists
134 // * print out tokens with least dense posting lists
135 
136 class FuzzyFind : public Command {
137  llvm::cl::opt<std::string> Query{
138  "query",
139  llvm::cl::Positional,
140  llvm::cl::Required,
141  llvm::cl::desc("Query string to be fuzzy-matched"),
142  };
143  llvm::cl::opt<std::string> Scopes{
144  "scopes",
145  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
146  };
147  llvm::cl::opt<unsigned> Limit{
148  "limit",
149  llvm::cl::init(10),
150  llvm::cl::desc("Max results to display"),
151  };
152 
153  void run() override {
154  FuzzyFindRequest Request;
155  Request.Limit = Limit;
156  Request.Query = Query;
157  if (Scopes.getNumOccurrences() > 0) {
158  llvm::SmallVector<llvm::StringRef> Scopes;
159  llvm::StringRef(this->Scopes).split(Scopes, ',');
160  Request.Scopes = {Scopes.begin(), Scopes.end()};
161  }
162  Request.AnyScope = Request.Scopes.empty();
163  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
164  static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
165  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
166  "Symbol Name");
167  size_t Rank = 0;
168  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
169  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
170  Sym.Scope + Sym.Name);
171  });
172  }
173 };
174 
175 class Lookup : public Command {
176  llvm::cl::opt<std::string> ID{
177  "id",
178  llvm::cl::Positional,
179  llvm::cl::desc("Symbol ID to look up (hex)"),
180  };
181  llvm::cl::opt<std::string> Name{
182  "name",
183  llvm::cl::desc("Qualified name to look up."),
184  };
185 
186  void run() override {
187  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
188  llvm::errs()
189  << "Missing required argument: please provide id or -name.\n";
190  return;
191  }
192  std::vector<SymbolID> IDs;
193  if (ID.getNumOccurrences()) {
194  auto SID = SymbolID::fromStr(ID);
195  if (!SID) {
196  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
197  return;
198  }
199  IDs.push_back(*SID);
200  } else {
201  IDs = getSymbolIDsFromIndex(Name, Index);
202  }
203 
204  LookupRequest Request;
205  Request.IDs.insert(IDs.begin(), IDs.end());
206  bool FoundSymbol = false;
207  Index->lookup(Request, [&](const Symbol &Sym) {
208  FoundSymbol = true;
209  llvm::outs() << toYAML(Sym);
210  });
211  if (!FoundSymbol)
212  llvm::errs() << "not found\n";
213  }
214 };
215 
216 class Refs : public Command {
217  llvm::cl::opt<std::string> ID{
218  "id",
219  llvm::cl::Positional,
220  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
221  };
222  llvm::cl::opt<std::string> Name{
223  "name",
224  llvm::cl::desc("Qualified name of the symbol being queried."),
225  };
226  llvm::cl::opt<std::string> Filter{
227  "filter",
228  llvm::cl::init(".*"),
229  llvm::cl::desc(
230  "Print all results from files matching this regular expression."),
231  };
232 
233  void run() override {
234  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
235  llvm::errs()
236  << "Missing required argument: please provide id or -name.\n";
237  return;
238  }
239  std::vector<SymbolID> IDs;
240  if (ID.getNumOccurrences()) {
241  auto SID = SymbolID::fromStr(ID);
242  if (!SID) {
243  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
244  return;
245  }
246  IDs.push_back(*SID);
247  } else {
248  IDs = getSymbolIDsFromIndex(Name, Index);
249  if (IDs.size() > 1) {
250  llvm::errs() << llvm::formatv(
251  "The name {0} is ambiguous, found {1} different "
252  "symbols. Please use id flag to disambiguate.\n",
253  Name, IDs.size());
254  return;
255  }
256  }
257  RefsRequest RefRequest;
258  RefRequest.IDs.insert(IDs.begin(), IDs.end());
259  llvm::Regex RegexFilter(Filter);
260  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
261  auto U = URI::parse(R.Location.FileURI);
262  if (!U) {
263  llvm::errs() << U.takeError();
264  return;
265  }
266  if (RegexFilter.match(U->body()))
267  llvm::outs() << R << "\n";
268  });
269  }
270 };
271 
272 class Relations : public Command {
273  llvm::cl::opt<std::string> ID{
274  "id",
275  llvm::cl::Positional,
276  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
277  };
278  llvm::cl::opt<RelationKind> Relation{
279  "relation",
280  llvm::cl::desc("Relation kind for the predicate."),
281  values(clEnumValN(RelationKind::BaseOf, "base_of",
282  "Find subclasses of a class."),
283  clEnumValN(RelationKind::OverriddenBy, "overridden_by",
284  "Find methods that overrides a virtual method.")),
285  };
286 
287  void run() override {
288  if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) {
289  llvm::errs()
290  << "Missing required argument: please provide id and -relation.\n";
291  return;
292  }
293  RelationsRequest Req;
294  if (ID.getNumOccurrences()) {
295  auto SID = SymbolID::fromStr(ID);
296  if (!SID) {
297  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
298  return;
299  }
300  Req.Subjects.insert(*SID);
301  }
302  Req.Predicate = Relation.getValue();
303  Index->relations(Req, [](const SymbolID &SID, const Symbol &S) {
304  llvm::outs() << toYAML(S);
305  });
306  }
307 };
308 
309 class Export : public Command {
310  llvm::cl::opt<IndexFileFormat> Format{
311  "format",
312  llvm::cl::desc("Format of index export"),
313  llvm::cl::values(
314  clEnumValN(IndexFileFormat::YAML, "yaml",
315  "human-readable YAML format"),
316  clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
317  llvm::cl::init(IndexFileFormat::YAML),
318  };
319  llvm::cl::opt<std::string> OutputFile{
320  "output-file",
321  llvm::cl::Positional,
322  llvm::cl::Required,
323  llvm::cl::desc("Output file for export"),
324  };
325 
326 public:
327  void run() override {
328  using namespace clang::clangd;
329  // Read input file (as specified in global option)
330  auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
331  if (!Buffer) {
332  llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
333  return;
334  }
335 
336  // Auto-detects input format when parsing
337  auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer());
338  if (!IndexIn) {
339  llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
340  return;
341  }
342 
343  // Prepare output file
344  std::error_code EC;
345  llvm::raw_fd_ostream OutputStream(OutputFile, EC);
346  if (EC) {
347  llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
348  << "\n";
349  return;
350  }
351 
352  // Export
353  clang::clangd::IndexFileOut IndexOut(IndexIn.get());
354  IndexOut.Format = Format;
355  OutputStream << IndexOut;
356  }
357 };
358 
359 struct {
360  const char *Name;
361  const char *Description;
362  std::function<std::unique_ptr<Command>()> Implementation;
363 } CommandInfo[] = {
364  {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
365  {"lookup", "Dump symbol details by ID or qualified name",
366  std::make_unique<Lookup>},
367  {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
368  {"relations", "Find relations by ID and relation kind",
369  std::make_unique<Relations>},
370  {"export", "Export index", std::make_unique<Export>},
371 };
372 
373 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
374  return Index.startswith("remote:")
375  ? remote::getClient(Index.drop_front(strlen("remote:")),
376  ProjectRoot)
377  : loadIndex(Index, /*UseDex=*/true);
378 }
379 
380 bool runCommand(std::string Request, const SymbolIndex &Index) {
381  // Split on spaces and add required null-termination.
382  std::replace(Request.begin(), Request.end(), ' ', '\0');
383  llvm::SmallVector<llvm::StringRef> Args;
384  llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
385  /*KeepEmpty=*/false);
386  if (Args.empty())
387  return false;
388  if (Args.front() == "help") {
389  llvm::outs() << "dexp - Index explorer\nCommands:\n";
390  for (const auto &C : CommandInfo)
391  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
392  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
393  return true;
394  }
395  llvm::SmallVector<const char *> FakeArgv;
396  for (llvm::StringRef S : Args)
397  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
398 
399  for (const auto &Cmd : CommandInfo) {
400  if (Cmd.Name == Args.front())
401  return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
402  Index);
403  }
404  llvm::errs() << "Unknown command. Try 'help'.\n";
405  return false;
406 }
407 
408 } // namespace
409 } // namespace clangd
410 } // namespace clang
411 
412 int main(int argc, const char *argv[]) {
413  using namespace clang::clangd;
414 
415  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
416  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
417  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
418 
419  bool RemoteMode = llvm::StringRef(IndexLocation).startswith("remote:");
420  if (RemoteMode && ProjectRoot.empty()) {
421  llvm::errs() << "--project-root is required in remote mode\n";
422  return -1;
423  }
424 
425  std::unique_ptr<SymbolIndex> Index;
426  reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
427  [&]() { Index = openIndex(IndexLocation); });
428 
429  if (!Index) {
430  llvm::errs() << "Failed to open the index.\n";
431  return -1;
432  }
433 
434  if (!ExecCommand.empty())
435  return runCommand(ExecCommand, *Index) ? 0 : 1;
436 
437  llvm::LineEditor LE("dexp");
438  while (llvm::Optional<std::string> Request = LE.readLine())
439  runCommand(std::move(*Request), *Index);
440 }
clang::clangd::IndexFileFormat::YAML
@ YAML
Client.h
Dex.h
clang::clangd::RelationKind::BaseOf
@ BaseOf
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:311
clang::clangd::IndexFileFormat::RIFF
@ RIFF
clang::clangd::splitQualifiedName
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:491
Index.h
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data)
Definition: Serialization.cpp:687
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:179
clang::clangd
Definition: AST.cpp:41
Relation.h
Name
llvm::StringRef Name
Definition: CodeComplete.cpp:162
Implementation
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:362
clang::clangd::toYAML
std::string toYAML(const Symbol &)
Definition: YAMLSerialization.cpp:488
Args
llvm::json::Object Args
Definition: Trace.cpp:139
Description
const char * Description
Definition: Dexp.cpp:361
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
Definition: Serialization.cpp:698
clang::clangd::replace
static std::string replace(llvm::StringRef Haystack, llvm::StringRef Needle, llvm::StringRef Repl)
Definition: TestIndex.cpp:30
clang::clangd::remote::getClient
std::unique_ptr< clangd::SymbolIndex > getClient(llvm::StringRef Address, llvm::StringRef ProjectRoot)
Returns an SymbolIndex client that passes requests to remote index located at Address.
Definition: Client.cpp:185
Serialization.h
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:30
main
int main(int argc, const char *argv[])
Definition: Dexp.cpp:412
Index
const SymbolIndex * Index
Definition: Dexp.cpp:99
clang::clangd::SymbolIndex
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:111
ID
static char ID
Definition: Logger.cpp:74
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang::clangd::SymbolID::fromStr
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:36
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:163
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:55
clang::clangd::RelationKind::OverriddenBy
@ OverriddenBy
clang::clangd::SymbolIndex::fuzzyFind
virtual bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.