clang-tools 20.0.0git
Dexp.cpp
Go to the documentation of this file.
1//===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a simple interactive tool which can be used to manually
10// evaluate symbol search quality of Clangd index.
11//
12//===----------------------------------------------------------------------===//
13
14#include "index/Index.h"
15#include "index/Relation.h"
16#include "index/Serialization.h"
17#include "index/remote/Client.h"
18#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/LineEditor/LineEditor.h"
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/Signals.h"
24#include <optional>
25
26namespace clang {
27namespace clangd {
28namespace {
29
30llvm::cl::opt<std::string> IndexLocation(
31 llvm::cl::desc("<path to index file | remote:server.address>"),
32 llvm::cl::Positional);
33
34llvm::cl::opt<std::string>
35 ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
36
37llvm::cl::opt<std::string> ProjectRoot(
38 "project-root",
39 llvm::cl::desc(
40 "Path to the project. Required when connecting using remote index."));
41
42static constexpr char Overview[] = R"(
43This is an **experimental** interactive tool to process user-provided search
44queries over given symbol collection obtained via clangd-indexer. The
45tool can be used to evaluate search quality of existing index implementations
46and manually construct non-trivial test cases.
47
48You can connect to remote index by passing remote:address to dexp. Example:
49
50$ dexp remote:0.0.0.0:9000
51
52Type use "help" request to get information about the details.
53)";
54
55void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
56 const auto TimerStart = std::chrono::high_resolution_clock::now();
57 F();
58 const auto TimerStop = std::chrono::high_resolution_clock::now();
59 const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
60 TimerStop - TimerStart);
61 llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
62}
63
64std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
65 const SymbolIndex *Index) {
66 FuzzyFindRequest Request;
67 // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
68 // qualifier for global scope.
69 bool IsGlobalScope = QualifiedName.consume_front("::");
70 auto Names = splitQualifiedName(QualifiedName);
71 if (IsGlobalScope || !Names.first.empty())
72 Request.Scopes = {std::string(Names.first)};
73 else
74 // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
75 // add the global scope to the request.
76 Request.Scopes = {""};
77
78 Request.Query = std::string(Names.second);
79 std::vector<SymbolID> SymIDs;
80 Index->fuzzyFind(Request, [&](const Symbol &Sym) {
81 std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
82 if (QualifiedName == SymQualifiedName)
83 SymIDs.push_back(Sym.ID);
84 });
85 return SymIDs;
86}
87
88// REPL commands inherit from Command and contain their options as members.
89// Creating a Command populates parser options, parseAndRun() resets them.
90class Command {
91 // By resetting the parser options, we lost the standard -help flag.
92 llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
93 "help", llvm::cl::desc("Display available options"),
94 llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())};
95 // FIXME: Allow commands to signal failure.
96 virtual void run() = 0;
97
98protected:
99 const SymbolIndex *Index;
100
101public:
102 virtual ~Command() = default;
103 bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
104 const SymbolIndex &Index) {
105 std::string ParseErrs;
106 llvm::raw_string_ostream OS(ParseErrs);
107 bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
108 Overview, &OS);
109 // must do this before opts are destroyed
110 auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
111 if (Help.getNumOccurrences() > 0) {
112 // Avoid printing parse errors in this case.
113 // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
114 llvm::cl::PrintHelpMessage();
115 return true;
116 }
117
118 llvm::outs() << OS.str();
119 if (Ok) {
120 this->Index = &Index;
121 reportTime(Argv[0], [&] { run(); });
122 }
123 return Ok;
124 }
125};
126
127// FIXME(kbobyrev): Ideas for more commands:
128// * load/swap/reload index: this would make it possible to get rid of llvm::cl
129// usages in the tool driver and actually use llvm::cl library in the REPL.
130// * show posting list density histogram (our dump data somewhere so that user
131// could build one)
132// * show number of tokens of each kind
133// * print out tokens with the most dense posting lists
134// * print out tokens with least dense posting lists
135
136class FuzzyFind : public Command {
137 llvm::cl::opt<std::string> Query{
138 "query",
139 llvm::cl::Positional,
140 llvm::cl::Required,
141 llvm::cl::desc("Query string to be fuzzy-matched"),
142 };
143 llvm::cl::opt<std::string> Scopes{
144 "scopes",
145 llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
146 };
147 llvm::cl::opt<unsigned> Limit{
148 "limit",
149 llvm::cl::init(10),
150 llvm::cl::desc("Max results to display"),
151 };
152
153 void run() override {
154 FuzzyFindRequest Request;
155 Request.Limit = Limit;
156 Request.Query = Query;
157 if (Scopes.getNumOccurrences() > 0) {
158 llvm::SmallVector<llvm::StringRef> Scopes;
159 llvm::StringRef(this->Scopes).split(Scopes, ',');
160 Request.Scopes = {Scopes.begin(), Scopes.end()};
161 }
162 Request.AnyScope = Request.Scopes.empty();
163 // FIXME(kbobyrev): Print symbol final scores to see the distribution.
164 static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
165 llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
166 "Symbol Name");
167 size_t Rank = 0;
168 Index->fuzzyFind(Request, [&](const Symbol &Sym) {
169 llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
170 Sym.Scope + Sym.Name);
171 });
172 }
173};
174
175class Lookup : public Command {
176 llvm::cl::opt<std::string> ID{
177 "id",
178 llvm::cl::Positional,
179 llvm::cl::desc("Symbol ID to look up (hex)"),
180 };
181 llvm::cl::opt<std::string> Name{
182 "name",
183 llvm::cl::desc("Qualified name to look up."),
184 };
185
186 void run() override {
187 if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
188 llvm::errs()
189 << "Missing required argument: please provide id or -name.\n";
190 return;
191 }
192 std::vector<SymbolID> IDs;
193 if (ID.getNumOccurrences()) {
194 auto SID = SymbolID::fromStr(ID);
195 if (!SID) {
196 llvm::errs() << llvm::toString(SID.takeError()) << "\n";
197 return;
198 }
199 IDs.push_back(*SID);
200 } else {
201 IDs = getSymbolIDsFromIndex(Name, Index);
202 }
203
204 LookupRequest Request;
205 Request.IDs.insert(IDs.begin(), IDs.end());
206 bool FoundSymbol = false;
207 Index->lookup(Request, [&](const Symbol &Sym) {
208 FoundSymbol = true;
209 llvm::outs() << toYAML(Sym);
210 });
211 if (!FoundSymbol)
212 llvm::errs() << "not found\n";
213 }
214};
215
216class Refs : public Command {
217 llvm::cl::opt<std::string> ID{
218 "id",
219 llvm::cl::Positional,
220 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
221 };
222 llvm::cl::opt<std::string> Name{
223 "name",
224 llvm::cl::desc("Qualified name of the symbol being queried."),
225 };
226 llvm::cl::opt<std::string> Filter{
227 "filter",
228 llvm::cl::init(".*"),
229 llvm::cl::desc(
230 "Print all results from files matching this regular expression."),
231 };
232
233 void run() override {
234 if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
235 llvm::errs()
236 << "Missing required argument: please provide id or -name.\n";
237 return;
238 }
239 std::vector<SymbolID> IDs;
240 if (ID.getNumOccurrences()) {
241 auto SID = SymbolID::fromStr(ID);
242 if (!SID) {
243 llvm::errs() << llvm::toString(SID.takeError()) << "\n";
244 return;
245 }
246 IDs.push_back(*SID);
247 } else {
248 IDs = getSymbolIDsFromIndex(Name, Index);
249 if (IDs.size() > 1) {
250 llvm::errs() << llvm::formatv(
251 "The name {0} is ambiguous, found {1} different "
252 "symbols. Please use id flag to disambiguate.\n",
253 Name, IDs.size());
254 return;
255 }
256 }
257 RefsRequest RefRequest;
258 RefRequest.IDs.insert(IDs.begin(), IDs.end());
259 llvm::Regex RegexFilter(Filter);
260 Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
261 auto U = URI::parse(R.Location.FileURI);
262 if (!U) {
263 llvm::errs() << U.takeError();
264 return;
265 }
266 if (RegexFilter.match(U->body()))
267 llvm::outs() << R << "\n";
268 });
269 }
270};
271
272class Relations : public Command {
273 llvm::cl::opt<std::string> ID{
274 "id",
275 llvm::cl::Positional,
276 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
277 };
278 llvm::cl::opt<RelationKind> Relation{
279 "relation",
280 llvm::cl::desc("Relation kind for the predicate."),
281 values(clEnumValN(RelationKind::BaseOf, "base_of",
282 "Find subclasses of a class."),
283 clEnumValN(RelationKind::OverriddenBy, "overridden_by",
284 "Find methods that overrides a virtual method.")),
285 };
286
287 void run() override {
288 if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) {
289 llvm::errs()
290 << "Missing required argument: please provide id and -relation.\n";
291 return;
292 }
293 RelationsRequest Req;
294 if (ID.getNumOccurrences()) {
295 auto SID = SymbolID::fromStr(ID);
296 if (!SID) {
297 llvm::errs() << llvm::toString(SID.takeError()) << "\n";
298 return;
299 }
300 Req.Subjects.insert(*SID);
301 }
302 Req.Predicate = Relation.getValue();
303 Index->relations(Req, [](const SymbolID &SID, const Symbol &S) {
304 llvm::outs() << toYAML(S);
305 });
306 }
307};
308
309class Export : public Command {
310 llvm::cl::opt<IndexFileFormat> Format{
311 "format",
312 llvm::cl::desc("Format of index export"),
313 llvm::cl::values(
314 clEnumValN(IndexFileFormat::YAML, "yaml",
315 "human-readable YAML format"),
316 clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
317 llvm::cl::init(IndexFileFormat::YAML),
318 };
319 llvm::cl::opt<std::string> OutputFile{
320 "output-file",
321 llvm::cl::Positional,
322 llvm::cl::Required,
323 llvm::cl::desc("Output file for export"),
324 };
325
326public:
327 void run() override {
328 using namespace clang::clangd;
329 // Read input file (as specified in global option)
330 auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
331 if (!Buffer) {
332 llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
333 return;
334 }
335
336 // Auto-detects input format when parsing
337 auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(),
339 if (!IndexIn) {
340 llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
341 return;
342 }
343
344 // Prepare output file
345 std::error_code EC;
346 llvm::raw_fd_ostream OutputStream(OutputFile, EC);
347 if (EC) {
348 llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
349 << "\n";
350 return;
351 }
352
353 // Export
354 clang::clangd::IndexFileOut IndexOut(IndexIn.get());
355 IndexOut.Format = Format;
356 OutputStream << IndexOut;
357 }
358};
359
360struct {
361 const char *Name;
362 const char *Description;
363 std::function<std::unique_ptr<Command>()> Implementation;
364} CommandInfo[] = {
365 {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
366 {"lookup", "Dump symbol details by ID or qualified name",
367 std::make_unique<Lookup>},
368 {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
369 {"relations", "Find relations by ID and relation kind",
370 std::make_unique<Relations>},
371 {"export", "Export index", std::make_unique<Export>},
372};
373
374std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
375 return Index.starts_with("remote:")
376 ? remote::getClient(Index.drop_front(strlen("remote:")),
377 ProjectRoot)
378 : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true,
379 /*SupportContainedRefs=*/true);
380}
381
382bool runCommand(std::string Request, const SymbolIndex &Index) {
383 // Split on spaces and add required null-termination.
384 std::replace(Request.begin(), Request.end(), ' ', '\0');
385 llvm::SmallVector<llvm::StringRef> Args;
386 llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
387 /*KeepEmpty=*/false);
388 if (Args.empty())
389 return false;
390 if (Args.front() == "help") {
391 llvm::outs() << "dexp - Index explorer\nCommands:\n";
392 for (const auto &C : CommandInfo)
393 llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
394 llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
395 return true;
396 }
397 llvm::SmallVector<const char *> FakeArgv;
398 for (llvm::StringRef S : Args)
399 FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
400
401 for (const auto &Cmd : CommandInfo) {
402 if (Cmd.Name == Args.front())
403 return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
404 Index);
405 }
406 llvm::errs() << "Unknown command. Try 'help'.\n";
407 return false;
408}
409
410} // namespace
411} // namespace clangd
412} // namespace clang
413
414int main(int argc, const char *argv[]) {
415 using namespace clang::clangd;
416
417 llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
418
419 // Preserve global options when flag parser is reset, so commands can use
420 // them.
421 IndexLocation.setValue(IndexLocation, /*initial=*/true);
422 ExecCommand.setValue(ExecCommand, /*initial=*/true);
423 ProjectRoot.setValue(ProjectRoot, /*initial=*/true);
424
425 llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
426 llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
427
428 bool RemoteMode = llvm::StringRef(IndexLocation).starts_with("remote:");
429 if (RemoteMode && ProjectRoot.empty()) {
430 llvm::errs() << "--project-root is required in remote mode\n";
431 return -1;
432 }
433
434 std::unique_ptr<SymbolIndex> Index;
435 reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
436 [&]() { Index = openIndex(IndexLocation); });
437
438 if (!Index) {
439 llvm::errs() << "Failed to open the index.\n";
440 return -1;
441 }
442
443 if (!ExecCommand.empty())
444 return runCommand(ExecCommand, *Index) ? 0 : 1;
445
446 llvm::LineEditor LE("dexp");
447 while (std::optional<std::string> Request = LE.readLine())
448 runCommand(std::move(*Request), *Index);
449}
llvm::SmallString< 256U > Name
std::function< std::unique_ptr< Command >()> Implementation
Definition: Dexp.cpp:363
int main(int argc, const char *argv[])
Definition: Dexp.cpp:414
const SymbolIndex * Index
Definition: Dexp.cpp:99
llvm::raw_ostream & OS
const Criteria C
llvm::json::Object Args
Definition: Trace.cpp:138
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:37
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:134
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:176
std::unique_ptr< clangd::SymbolIndex > getClient(llvm::StringRef Address, llvm::StringRef ProjectRoot)
Returns an SymbolIndex client that passes requests to remote index located at Address.
Definition: Client.cpp:192
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition: AST.cpp:44
std::pair< StringRef, StringRef > splitQualifiedName(StringRef QName)
Definition: SourceCode.cpp:497
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
std::string toYAML(const Symbol &)
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, SymbolOrigin Origin, bool UseDex, bool SupportContainedRefs)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
-clang-tidy