clang-tools  17.0.0git
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "index/Index.h"
15 #include "index/Relation.h"
16 #include "index/Serialization.h"
17 #include "index/remote/Client.h"
18 #include "llvm/ADT/ScopeExit.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/LineEditor/LineEditor.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Signals.h"
24 #include <optional>
25 
26 namespace clang {
27 namespace clangd {
28 namespace {
29 
30 llvm::cl::opt<std::string> IndexLocation(
31  llvm::cl::desc("<path to index file | remote:server.address>"),
32  llvm::cl::Positional);
33 
34 llvm::cl::opt<std::string>
35  ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
36 
37 llvm::cl::opt<std::string> ProjectRoot(
38  "project-root",
39  llvm::cl::desc(
40  "Path to the project. Required when connecting using remote index."));
41 
42 static constexpr char Overview[] = R"(
43 This is an **experimental** interactive tool to process user-provided search
44 queries over given symbol collection obtained via clangd-indexer. The
45 tool can be used to evaluate search quality of existing index implementations
46 and manually construct non-trivial test cases.
47 
48 You can connect to remote index by passing remote:address to dexp. Example:
49 
50 $ dexp remote:0.0.0.0:9000
51 
52 Type use "help" request to get information about the details.
53 )";
54 
55 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
56  const auto TimerStart = std::chrono::high_resolution_clock::now();
57  F();
58  const auto TimerStop = std::chrono::high_resolution_clock::now();
59  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
60  TimerStop - TimerStart);
61  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
62 }
63 
64 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
65  const SymbolIndex *Index) {
66  FuzzyFindRequest Request;
67  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
68  // qualifier for global scope.
69  bool IsGlobalScope = QualifiedName.consume_front("::");
70  auto Names = splitQualifiedName(QualifiedName);
71  if (IsGlobalScope || !Names.first.empty())
72  Request.Scopes = {std::string(Names.first)};
73  else
74  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
75  // add the global scope to the request.
76  Request.Scopes = {""};
77 
78  Request.Query = std::string(Names.second);
79  std::vector<SymbolID> SymIDs;
80  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
81  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
82  if (QualifiedName == SymQualifiedName)
83  SymIDs.push_back(Sym.ID);
84  });
85  return SymIDs;
86 }
87 
88 // REPL commands inherit from Command and contain their options as members.
89 // Creating a Command populates parser options, parseAndRun() resets them.
90 class Command {
91  // By resetting the parser options, we lost the standard -help flag.
92  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
93  "help", llvm::cl::desc("Display available options"),
94  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())};
95  // FIXME: Allow commands to signal failure.
96  virtual void run() = 0;
97 
98 protected:
99  const SymbolIndex *Index;
100 
101 public:
102  virtual ~Command() = default;
103  bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
104  const SymbolIndex &Index) {
105  std::string ParseErrs;
106  llvm::raw_string_ostream OS(ParseErrs);
107  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
108  Overview, &OS);
109  // must do this before opts are destroyed
110  auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
111  if (Help.getNumOccurrences() > 0) {
112  // Avoid printing parse errors in this case.
113  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
114  llvm::cl::PrintHelpMessage();
115  return true;
116  }
117 
118  llvm::outs() << OS.str();
119  if (Ok) {
120  this->Index = &Index;
121  reportTime(Argv[0], [&] { run(); });
122  }
123  return Ok;
124  }
125 };
126 
127 // FIXME(kbobyrev): Ideas for more commands:
128 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
129 // usages in the tool driver and actually use llvm::cl library in the REPL.
130 // * show posting list density histogram (our dump data somewhere so that user
131 // could build one)
132 // * show number of tokens of each kind
133 // * print out tokens with the most dense posting lists
134 // * print out tokens with least dense posting lists
135 
136 class FuzzyFind : public Command {
137  llvm::cl::opt<std::string> Query{
138  "query",
139  llvm::cl::Positional,
140  llvm::cl::Required,
141  llvm::cl::desc("Query string to be fuzzy-matched"),
142  };
143  llvm::cl::opt<std::string> Scopes{
144  "scopes",
145  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
146  };
147  llvm::cl::opt<unsigned> Limit{
148  "limit",
149  llvm::cl::init(10),
150  llvm::cl::desc("Max results to display"),
151  };
152 
153  void run() override {
154  FuzzyFindRequest Request;
155  Request.Limit = Limit;
156  Request.Query = Query;
157  if (Scopes.getNumOccurrences() > 0) {
158  llvm::SmallVector<llvm::StringRef> Scopes;
159  llvm::StringRef(this->Scopes).split(Scopes, ',');
160  Request.Scopes = {Scopes.begin(), Scopes.end()};
161  }
162  Request.AnyScope = Request.Scopes.empty();
163  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
164  static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
165  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
166  "Symbol Name");
167  size_t Rank = 0;
168  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
169  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
170  Sym.Scope + Sym.Name);
171  });
172  }
173 };
174 
175 class Lookup : public Command {
176  llvm::cl::opt<std::string> ID{
177  "id",
178  llvm::cl::Positional,
179  llvm::cl::desc("Symbol ID to look up (hex)"),
180  };
181  llvm::cl::opt<std::string> Name{
182  "name",
183  llvm::cl::desc("Qualified name to look up."),
184  };
185 
186  void run() override {
187  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
188  llvm::errs()
189  << "Missing required argument: please provide id or -name.\n";
190  return;
191  }
192  std::vector<SymbolID> IDs;
193  if (ID.getNumOccurrences()) {
194  auto SID = SymbolID::fromStr(ID);
195  if (!SID) {
196  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
197  return;
198  }
199  IDs.push_back(*SID);
200  } else {
201  IDs = getSymbolIDsFromIndex(Name, Index);
202  }
203 
204  LookupRequest Request;
205  Request.IDs.insert(IDs.begin(), IDs.end());
206  bool FoundSymbol = false;
207  Index->lookup(Request, [&](const Symbol &Sym) {
208  FoundSymbol = true;
209  llvm::outs() << toYAML(Sym);
210  });
211  if (!FoundSymbol)
212  llvm::errs() << "not found\n";
213  }
214 };
215 
216 class Refs : public Command {
217  llvm::cl::opt<std::string> ID{
218  "id",
219  llvm::cl::Positional,
220  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
221  };
222  llvm::cl::opt<std::string> Name{
223  "name",
224  llvm::cl::desc("Qualified name of the symbol being queried."),
225  };
226  llvm::cl::opt<std::string> Filter{
227  "filter",
228  llvm::cl::init(".*"),
229  llvm::cl::desc(
230  "Print all results from files matching this regular expression."),
231  };
232 
233  void run() override {
234  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
235  llvm::errs()
236  << "Missing required argument: please provide id or -name.\n";
237  return;
238  }
239  std::vector<SymbolID> IDs;
240  if (ID.getNumOccurrences()) {
241  auto SID = SymbolID::fromStr(ID);
242  if (!SID) {
243  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
244  return;
245  }
246  IDs.push_back(*SID);
247  } else {
248  IDs = getSymbolIDsFromIndex(Name, Index);
249  if (IDs.size() > 1) {
250  llvm::errs() << llvm::formatv(
251  "The name {0} is ambiguous, found {1} different "
252  "symbols. Please use id flag to disambiguate.\n",
253  Name, IDs.size());
254  return;
255  }
256  }
257  RefsRequest RefRequest;
258  RefRequest.IDs.insert(IDs.begin(), IDs.end());
259  llvm::Regex RegexFilter(Filter);
260  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
261  auto U = URI::parse(R.Location.FileURI);
262  if (!U) {
263  llvm::errs() << U.takeError();
264  return;
265  }
266  if (RegexFilter.match(U->body()))
267  llvm::outs() << R << "\n";
268  });
269  }
270 };
271 
272 class Relations : public Command {
273  llvm::cl::opt<std::string> ID{
274  "id",
275  llvm::cl::Positional,
276  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
277  };
278  llvm::cl::opt<RelationKind> Relation{
279  "relation",
280  llvm::cl::desc("Relation kind for the predicate."),
281  values(clEnumValN(RelationKind::BaseOf, "base_of",
282  "Find subclasses of a class."),
283  clEnumValN(RelationKind::OverriddenBy, "overridden_by",
284  "Find methods that overrides a virtual method.")),
285  };
286 
287  void run() override {
288  if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) {
289  llvm::errs()
290  << "Missing required argument: please provide id and -relation.\n";
291  return;
292  }
293  RelationsRequest Req;
294  if (ID.getNumOccurrences()) {
295  auto SID = SymbolID::fromStr(ID);
296  if (!SID) {
297  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
298  return;
299  }
300  Req.Subjects.insert(*SID);
301  }
302  Req.Predicate = Relation.getValue();
303  Index->relations(Req, [](const SymbolID &SID, const Symbol &S) {
304  llvm::outs() << toYAML(S);
305  });
306  }
307 };
308 
309 class Export : public Command {
310  llvm::cl::opt<IndexFileFormat> Format{
311  "format",
312  llvm::cl::desc("Format of index export"),
313  llvm::cl::values(
314  clEnumValN(IndexFileFormat::YAML, "yaml",
315  "human-readable YAML format"),
316  clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
317  llvm::cl::init(IndexFileFormat::YAML),
318  };
319  llvm::cl::opt<std::string> OutputFile{
320  "output-file",
321  llvm::cl::Positional,
322  llvm::cl::Required,
323  llvm::cl::desc("Output file for export"),
324  };
325 
326 public:
327  void run() override {
328  using namespace clang::clangd;
329  // Read input file (as specified in global option)
330  auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
331  if (!Buffer) {
332  llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
333  return;
334  }
335 
336  // Auto-detects input format when parsing
337  auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(),
339  if (!IndexIn) {
340  llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
341  return;
342  }
343 
344  // Prepare output file
345  std::error_code EC;
346  llvm::raw_fd_ostream OutputStream(OutputFile, EC);
347  if (EC) {
348  llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
349  << "\n";
350  return;
351  }
352 
353  // Export
354  clang::clangd::IndexFileOut IndexOut(IndexIn.get());
355  IndexOut.Format = Format;
356  OutputStream << IndexOut;
357  }
358 };
359 
360 struct {
361  const char *Name;
362  const char *Description;
363  std::function<std::unique_ptr<Command>()> Implementation;
364 } CommandInfo[] = {
365  {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
366  {"lookup", "Dump symbol details by ID or qualified name",
367  std::make_unique<Lookup>},
368  {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
369  {"relations", "Find relations by ID and relation kind",
370  std::make_unique<Relations>},
371  {"export", "Export index", std::make_unique<Export>},
372 };
373 
374 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
375  return Index.startswith("remote:")
376  ? remote::getClient(Index.drop_front(strlen("remote:")),
377  ProjectRoot)
378  : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true);
379 }
380 
381 bool runCommand(std::string Request, const SymbolIndex &Index) {
382  // Split on spaces and add required null-termination.
383  std::replace(Request.begin(), Request.end(), ' ', '\0');
384  llvm::SmallVector<llvm::StringRef> Args;
385  llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
386  /*KeepEmpty=*/false);
387  if (Args.empty())
388  return false;
389  if (Args.front() == "help") {
390  llvm::outs() << "dexp - Index explorer\nCommands:\n";
391  for (const auto &C : CommandInfo)
392  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
393  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
394  return true;
395  }
396  llvm::SmallVector<const char *> FakeArgv;
397  for (llvm::StringRef S : Args)
398  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
399 
400  for (const auto &Cmd : CommandInfo) {
401  if (Cmd.Name == Args.front())
402  return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
403  Index);
404  }
405  llvm::errs() << "Unknown command. Try 'help'.\n";
406  return false;
407 }
408 
409 } // namespace
410 } // namespace clangd
411 } // namespace clang
412 
413 int main(int argc, const char *argv[]) {
414  using namespace clang::clangd;
415 
416  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
417 
418  // Preserve global options when flag parser is reset, so commands can use
419  // them.
420  IndexLocation.setValue(IndexLocation, /*initial=*/true);
421  ExecCommand.setValue(ExecCommand, /*initial=*/true);
422  ProjectRoot.setValue(ProjectRoot, /*initial=*/true);
423 
424  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
425  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
426 
427  bool RemoteMode = llvm::StringRef(IndexLocation).startswith("remote:");
428  if (RemoteMode && ProjectRoot.empty()) {
429  llvm::errs() << "--project-root is required in remote mode\n";
430  return -1;
431  }
432 
433  std::unique_ptr<SymbolIndex> Index;
434  reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
435  [&]() { Index = openIndex(IndexLocation); });
436 
437  if (!Index) {
438  llvm::errs() << "Failed to open the index.\n";
439  return -1;
440  }
441 
442  if (!ExecCommand.empty())
443  return runCommand(ExecCommand, *Index) ? 0 : 1;
444 
445  llvm::LineEditor LE("dexp");
446  while (std::optional<std::string> Request = LE.readLine())
447  runCommand(std::move(*Request), *Index);
448 }
clang::clangd::IndexFileFormat::YAML
@ YAML
Client.h
Static
bool Static
Definition: ExtractFunction.cpp:367
clang::clangd::RelationKind::BaseOf
@ BaseOf
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:312
clang::clangd::IndexFileFormat::RIFF
@ RIFF
clang::clangd::splitQualifiedName
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:492
Index.h
clang::clangd::IndexFileOut
Definition: Serialization.h:56
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:177
clang::clangd
Definition: AST.cpp:44
Relation.h
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
Definition: Serialization.cpp:693
Implementation
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:363
clang::clangd::toYAML
std::string toYAML(const Symbol &)
Definition: YAMLSerialization.cpp:523
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex)
Definition: Serialization.cpp:706
Args
llvm::json::Object Args
Definition: Trace.cpp:138
Description
const char * Description
Definition: Dexp.cpp:362
clang::clangd::replace
static std::string replace(llvm::StringRef Haystack, llvm::StringRef Needle, llvm::StringRef Repl)
Definition: TestIndex.cpp:30
clang::clangd::remote::getClient
std::unique_ptr< clangd::SymbolIndex > getClient(llvm::StringRef Address, llvm::StringRef ProjectRoot)
Returns an SymbolIndex client that passes requests to remote index located at Address.
Definition: Client.cpp:185
Serialization.h
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:31
Name
Token Name
Definition: MacroToEnumCheck.cpp:87
main
int main(int argc, const char *argv[])
Definition: Dexp.cpp:413
Index
const SymbolIndex * Index
Definition: Dexp.cpp:99
clang::clangd::SymbolIndex
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:113
clang::clangd::SymbolOrigin::Static
@ Static
ID
static char ID
Definition: Logger.cpp:74
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:91
clang::clangd::SymbolID::fromStr
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:36
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
clang::clangd::SymbolOrigin
SymbolOrigin
Definition: SymbolOrigin.h:21
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:53
clang::clangd::RelationKind::OverriddenBy
@ OverriddenBy
clang::clangd::SymbolIndex::fuzzyFind
virtual bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.