clang-tools  15.0.0git
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "index/Index.h"
15 #include "index/Relation.h"
16 #include "index/Serialization.h"
17 #include "index/remote/Client.h"
18 #include "llvm/ADT/ScopeExit.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/LineEditor/LineEditor.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Signals.h"
24 
25 namespace clang {
26 namespace clangd {
27 namespace {
28 
29 llvm::cl::opt<std::string> IndexLocation(
30  llvm::cl::desc("<path to index file | remote:server.address>"),
31  llvm::cl::Positional);
32 
33 llvm::cl::opt<std::string>
34  ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
35 
36 llvm::cl::opt<std::string> ProjectRoot(
37  "project-root",
38  llvm::cl::desc(
39  "Path to the project. Required when connecting using remote index."));
40 
41 static constexpr char Overview[] = R"(
42 This is an **experimental** interactive tool to process user-provided search
43 queries over given symbol collection obtained via clangd-indexer. The
44 tool can be used to evaluate search quality of existing index implementations
45 and manually construct non-trivial test cases.
46 
47 You can connect to remote index by passing remote:address to dexp. Example:
48 
49 $ dexp remote:0.0.0.0:9000
50 
51 Type use "help" request to get information about the details.
52 )";
53 
54 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
55  const auto TimerStart = std::chrono::high_resolution_clock::now();
56  F();
57  const auto TimerStop = std::chrono::high_resolution_clock::now();
58  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
59  TimerStop - TimerStart);
60  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
61 }
62 
63 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
64  const SymbolIndex *Index) {
65  FuzzyFindRequest Request;
66  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
67  // qualifier for global scope.
68  bool IsGlobalScope = QualifiedName.consume_front("::");
69  auto Names = splitQualifiedName(QualifiedName);
70  if (IsGlobalScope || !Names.first.empty())
71  Request.Scopes = {std::string(Names.first)};
72  else
73  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
74  // add the global scope to the request.
75  Request.Scopes = {""};
76 
77  Request.Query = std::string(Names.second);
78  std::vector<SymbolID> SymIDs;
79  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
80  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
81  if (QualifiedName == SymQualifiedName)
82  SymIDs.push_back(Sym.ID);
83  });
84  return SymIDs;
85 }
86 
87 // REPL commands inherit from Command and contain their options as members.
88 // Creating a Command populates parser options, parseAndRun() resets them.
89 class Command {
90  // By resetting the parser options, we lost the standard -help flag.
91  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
92  "help", llvm::cl::desc("Display available options"),
93  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())};
94  // FIXME: Allow commands to signal failure.
95  virtual void run() = 0;
96 
97 protected:
98  const SymbolIndex *Index;
99 
100 public:
101  virtual ~Command() = default;
102  bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
103  const SymbolIndex &Index) {
104  std::string ParseErrs;
105  llvm::raw_string_ostream OS(ParseErrs);
106  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
107  Overview, &OS);
108  // must do this before opts are destroyed
109  auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
110  if (Help.getNumOccurrences() > 0) {
111  // Avoid printing parse errors in this case.
112  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
113  llvm::cl::PrintHelpMessage();
114  return true;
115  }
116 
117  llvm::outs() << OS.str();
118  if (Ok) {
119  this->Index = &Index;
120  reportTime(Argv[0], [&] { run(); });
121  }
122  return Ok;
123  }
124 };
125 
126 // FIXME(kbobyrev): Ideas for more commands:
127 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
128 // usages in the tool driver and actually use llvm::cl library in the REPL.
129 // * show posting list density histogram (our dump data somewhere so that user
130 // could build one)
131 // * show number of tokens of each kind
132 // * print out tokens with the most dense posting lists
133 // * print out tokens with least dense posting lists
134 
135 class FuzzyFind : public Command {
136  llvm::cl::opt<std::string> Query{
137  "query",
138  llvm::cl::Positional,
139  llvm::cl::Required,
140  llvm::cl::desc("Query string to be fuzzy-matched"),
141  };
142  llvm::cl::opt<std::string> Scopes{
143  "scopes",
144  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
145  };
146  llvm::cl::opt<unsigned> Limit{
147  "limit",
148  llvm::cl::init(10),
149  llvm::cl::desc("Max results to display"),
150  };
151 
152  void run() override {
153  FuzzyFindRequest Request;
154  Request.Limit = Limit;
155  Request.Query = Query;
156  if (Scopes.getNumOccurrences() > 0) {
157  llvm::SmallVector<llvm::StringRef> Scopes;
158  llvm::StringRef(this->Scopes).split(Scopes, ',');
159  Request.Scopes = {Scopes.begin(), Scopes.end()};
160  }
161  Request.AnyScope = Request.Scopes.empty();
162  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
163  static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
164  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
165  "Symbol Name");
166  size_t Rank = 0;
167  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
168  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
169  Sym.Scope + Sym.Name);
170  });
171  }
172 };
173 
174 class Lookup : public Command {
175  llvm::cl::opt<std::string> ID{
176  "id",
177  llvm::cl::Positional,
178  llvm::cl::desc("Symbol ID to look up (hex)"),
179  };
180  llvm::cl::opt<std::string> Name{
181  "name",
182  llvm::cl::desc("Qualified name to look up."),
183  };
184 
185  void run() override {
186  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
187  llvm::errs()
188  << "Missing required argument: please provide id or -name.\n";
189  return;
190  }
191  std::vector<SymbolID> IDs;
192  if (ID.getNumOccurrences()) {
193  auto SID = SymbolID::fromStr(ID);
194  if (!SID) {
195  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
196  return;
197  }
198  IDs.push_back(*SID);
199  } else {
200  IDs = getSymbolIDsFromIndex(Name, Index);
201  }
202 
203  LookupRequest Request;
204  Request.IDs.insert(IDs.begin(), IDs.end());
205  bool FoundSymbol = false;
206  Index->lookup(Request, [&](const Symbol &Sym) {
207  FoundSymbol = true;
208  llvm::outs() << toYAML(Sym);
209  });
210  if (!FoundSymbol)
211  llvm::errs() << "not found\n";
212  }
213 };
214 
215 class Refs : public Command {
216  llvm::cl::opt<std::string> ID{
217  "id",
218  llvm::cl::Positional,
219  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
220  };
221  llvm::cl::opt<std::string> Name{
222  "name",
223  llvm::cl::desc("Qualified name of the symbol being queried."),
224  };
225  llvm::cl::opt<std::string> Filter{
226  "filter",
227  llvm::cl::init(".*"),
228  llvm::cl::desc(
229  "Print all results from files matching this regular expression."),
230  };
231 
232  void run() override {
233  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
234  llvm::errs()
235  << "Missing required argument: please provide id or -name.\n";
236  return;
237  }
238  std::vector<SymbolID> IDs;
239  if (ID.getNumOccurrences()) {
240  auto SID = SymbolID::fromStr(ID);
241  if (!SID) {
242  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
243  return;
244  }
245  IDs.push_back(*SID);
246  } else {
247  IDs = getSymbolIDsFromIndex(Name, Index);
248  if (IDs.size() > 1) {
249  llvm::errs() << llvm::formatv(
250  "The name {0} is ambiguous, found {1} different "
251  "symbols. Please use id flag to disambiguate.\n",
252  Name, IDs.size());
253  return;
254  }
255  }
256  RefsRequest RefRequest;
257  RefRequest.IDs.insert(IDs.begin(), IDs.end());
258  llvm::Regex RegexFilter(Filter);
259  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
260  auto U = URI::parse(R.Location.FileURI);
261  if (!U) {
262  llvm::errs() << U.takeError();
263  return;
264  }
265  if (RegexFilter.match(U->body()))
266  llvm::outs() << R << "\n";
267  });
268  }
269 };
270 
271 class Relations : public Command {
272  llvm::cl::opt<std::string> ID{
273  "id",
274  llvm::cl::Positional,
275  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
276  };
277  llvm::cl::opt<RelationKind> Relation{
278  "relation",
279  llvm::cl::desc("Relation kind for the predicate."),
280  values(clEnumValN(RelationKind::BaseOf, "base_of",
281  "Find subclasses of a class."),
282  clEnumValN(RelationKind::OverriddenBy, "overridden_by",
283  "Find methods that overrides a virtual method.")),
284  };
285 
286  void run() override {
287  if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) {
288  llvm::errs()
289  << "Missing required argument: please provide id and -relation.\n";
290  return;
291  }
292  RelationsRequest Req;
293  if (ID.getNumOccurrences()) {
294  auto SID = SymbolID::fromStr(ID);
295  if (!SID) {
296  llvm::errs() << llvm::toString(SID.takeError()) << "\n";
297  return;
298  }
299  Req.Subjects.insert(*SID);
300  }
301  Req.Predicate = Relation.getValue();
302  Index->relations(Req, [](const SymbolID &SID, const Symbol &S) {
303  llvm::outs() << toYAML(S);
304  });
305  }
306 };
307 
308 class Export : public Command {
309  llvm::cl::opt<IndexFileFormat> Format{
310  "format",
311  llvm::cl::desc("Format of index export"),
312  llvm::cl::values(
313  clEnumValN(IndexFileFormat::YAML, "yaml",
314  "human-readable YAML format"),
315  clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
316  llvm::cl::init(IndexFileFormat::YAML),
317  };
318  llvm::cl::opt<std::string> OutputFile{
319  "output-file",
320  llvm::cl::Positional,
321  llvm::cl::Required,
322  llvm::cl::desc("Output file for export"),
323  };
324 
325 public:
326  void run() override {
327  using namespace clang::clangd;
328  // Read input file (as specified in global option)
329  auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
330  if (!Buffer) {
331  llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
332  return;
333  }
334 
335  // Auto-detects input format when parsing
336  auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(),
338  if (!IndexIn) {
339  llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
340  return;
341  }
342 
343  // Prepare output file
344  std::error_code EC;
345  llvm::raw_fd_ostream OutputStream(OutputFile, EC);
346  if (EC) {
347  llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
348  << "\n";
349  return;
350  }
351 
352  // Export
353  clang::clangd::IndexFileOut IndexOut(IndexIn.get());
354  IndexOut.Format = Format;
355  OutputStream << IndexOut;
356  }
357 };
358 
359 struct {
360  const char *Name;
361  const char *Description;
362  std::function<std::unique_ptr<Command>()> Implementation;
363 } CommandInfo[] = {
364  {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
365  {"lookup", "Dump symbol details by ID or qualified name",
366  std::make_unique<Lookup>},
367  {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
368  {"relations", "Find relations by ID and relation kind",
369  std::make_unique<Relations>},
370  {"export", "Export index", std::make_unique<Export>},
371 };
372 
373 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
374  return Index.startswith("remote:")
375  ? remote::getClient(Index.drop_front(strlen("remote:")),
376  ProjectRoot)
377  : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true);
378 }
379 
380 bool runCommand(std::string Request, const SymbolIndex &Index) {
381  // Split on spaces and add required null-termination.
382  std::replace(Request.begin(), Request.end(), ' ', '\0');
383  llvm::SmallVector<llvm::StringRef> Args;
384  llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
385  /*KeepEmpty=*/false);
386  if (Args.empty())
387  return false;
388  if (Args.front() == "help") {
389  llvm::outs() << "dexp - Index explorer\nCommands:\n";
390  for (const auto &C : CommandInfo)
391  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
392  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
393  return true;
394  }
395  llvm::SmallVector<const char *> FakeArgv;
396  for (llvm::StringRef S : Args)
397  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
398 
399  for (const auto &Cmd : CommandInfo) {
400  if (Cmd.Name == Args.front())
401  return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
402  Index);
403  }
404  llvm::errs() << "Unknown command. Try 'help'.\n";
405  return false;
406 }
407 
408 } // namespace
409 } // namespace clangd
410 } // namespace clang
411 
412 int main(int argc, const char *argv[]) {
413  using namespace clang::clangd;
414 
415  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
416 
417  // Preserve global options when flag parser is reset, so commands can use
418  // them.
419  IndexLocation.setValue(IndexLocation, /*initial=*/true);
420  ExecCommand.setValue(ExecCommand, /*initial=*/true);
421  ProjectRoot.setValue(ProjectRoot, /*initial=*/true);
422 
423  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
424  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
425 
426  bool RemoteMode = llvm::StringRef(IndexLocation).startswith("remote:");
427  if (RemoteMode && ProjectRoot.empty()) {
428  llvm::errs() << "--project-root is required in remote mode\n";
429  return -1;
430  }
431 
432  std::unique_ptr<SymbolIndex> Index;
433  reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
434  [&]() { Index = openIndex(IndexLocation); });
435 
436  if (!Index) {
437  llvm::errs() << "Failed to open the index.\n";
438  return -1;
439  }
440 
441  if (!ExecCommand.empty())
442  return runCommand(ExecCommand, *Index) ? 0 : 1;
443 
444  llvm::LineEditor LE("dexp");
445  while (llvm::Optional<std::string> Request = LE.readLine())
446  runCommand(std::move(*Request), *Index);
447 }
clang::clangd::IndexFileFormat::YAML
@ YAML
Client.h
Static
bool Static
Definition: ExtractFunction.cpp:359
clang::clangd::RelationKind::BaseOf
@ BaseOf
Refs
RefSlab Refs
Definition: SymbolCollectorTests.cpp:312
clang::clangd::IndexFileFormat::RIFF
@ RIFF
clang::clangd::splitQualifiedName
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:488
Index.h
clang::clangd::IndexFileOut
Definition: Serialization.h:55
clang::clangd::URI::parse
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:177
clang::clangd
Definition: AST.cpp:39
Relation.h
clang::clangd::readIndexFile
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
Definition: Serialization.cpp:689
Implementation
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:362
clang::clangd::toYAML
std::string toYAML(const Symbol &)
Definition: YAMLSerialization.cpp:472
clang::clangd::loadIndex
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex)
Definition: Serialization.cpp:702
Args
llvm::json::Object Args
Definition: Trace.cpp:138
Description
const char * Description
Definition: Dexp.cpp:361
clang::clangd::replace
static std::string replace(llvm::StringRef Haystack, llvm::StringRef Needle, llvm::StringRef Repl)
Definition: TestIndex.cpp:30
clang::clangd::remote::getClient
std::unique_ptr< clangd::SymbolIndex > getClient(llvm::StringRef Address, llvm::StringRef ProjectRoot)
Returns an SymbolIndex client that passes requests to remote index located at Address.
Definition: Client.cpp:185
Serialization.h
clang::doc::SymbolID
std::array< uint8_t, 20 > SymbolID
Definition: Representation.h:30
Name
Token Name
Definition: MacroToEnumCheck.cpp:89
main
int main(int argc, const char *argv[])
Definition: Dexp.cpp:412
Index
const SymbolIndex * Index
Definition: Dexp.cpp:98
clang::clangd::SymbolIndex
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:113
clang::clangd::SymbolOrigin::Static
@ Static
ID
static char ID
Definition: Logger.cpp:74
C
const Criteria C
Definition: FunctionCognitiveComplexityCheck.cpp:93
clang::clangd::SymbolID::fromStr
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:36
clang
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Definition: ApplyReplacements.h:27
OS
llvm::raw_string_ostream OS
Definition: TraceTests.cpp:160
clang::clangd::SymbolOrigin
SymbolOrigin
Definition: SymbolOrigin.h:21
clang::tidy::cppcoreguidelines::toString
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
Definition: SpecialMemberFunctionsCheck.cpp:55
clang::clangd::RelationKind::OverriddenBy
@ OverriddenBy
clang::clangd::SymbolIndex::fuzzyFind
virtual bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const =0
Matches symbols in the index fuzzily and applies Callback on each matched symbol before returning.