clang-tools 23.0.0git
RemapMain.cpp
Go to the documentation of this file.
1//===--- RemapMain.cpp - Remap paths in background index shards -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// clangd-remap is a standalone tool that rewrites paths inside every .idx shard
10// in a background index directory. An index generated on one machine (or at one
11// workspace path) can be remapped and reused within a source tree at a
12// different location.
13//
14// Usage:
15// clangd-remap --path-mappings=/old/root=/new/root /path/to/index-dir
16//
17//===----------------------------------------------------------------------===//
18
19#include "Headers.h"
20#include "PathMapping.h"
21#include "SourceCode.h"
22#include "URI.h"
23#include "index/Ref.h"
24#include "index/Serialization.h"
25#include "index/Symbol.h"
26#include "support/Logger.h"
27#include "llvm/ADT/StringRef.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Error.h"
30#include "llvm/Support/FileSystem.h"
31#include "llvm/Support/MemoryBuffer.h"
32#include "llvm/Support/Parallel.h"
33#include "llvm/Support/Path.h"
34#include "llvm/Support/Signals.h"
35#include "llvm/Support/raw_ostream.h"
36#include <atomic>
37#include <string>
38#include <vector>
39
40namespace clang {
41namespace clangd {
42namespace {
43
44static llvm::cl::OptionCategory RemapCategory("clangd-remap options");
45
46static llvm::cl::opt<std::string> PathMappingsArg{
47 "path-mappings",
48 llvm::cl::cat(RemapCategory),
49 llvm::cl::desc(
50 "List of path mappings applied to every string in each background "
51 "index shard. Format: /old/path=/new/path[,/old2=/new2,...]"),
52 llvm::cl::Required,
53};
54
55static llvm::cl::opt<std::string> IndexDir{
56 llvm::cl::desc("<index-dir>"),
57 llvm::cl::cat(RemapCategory),
58 llvm::cl::Positional,
59 llvm::cl::Required,
60};
61
62static llvm::cl::opt<unsigned> NumThreads{
63 "j",
64 llvm::cl::cat(RemapCategory),
65 llvm::cl::desc("Number of worker threads (0 = all)"),
66 llvm::cl::init(0),
67};
68
69static llvm::cl::opt<Logger::Level> LogLevel{
70 "log",
71 llvm::cl::cat(RemapCategory),
72 llvm::cl::desc("Verbosity of log messages written to stderr"),
73 llvm::cl::values(
74 clEnumValN(Logger::Error, "error", "Error messages only"),
75 clEnumValN(Logger::Info, "info", "High level execution tracing"),
76 clEnumValN(Logger::Debug, "verbose", "Low level details")),
77 llvm::cl::init(Logger::Info),
78};
79
80// Apply a path mapping to a URI or raw path string
81//
82// Ex. given "-I/old/root/include" and mapping /old/root=/new/root, the result
83// is "-I/new/root/include"
84std::optional<std::string> remapString(llvm::StringRef S,
85 const PathMappings &Mappings) {
86 // Client = old path, Server = new path; ClientToServer maps old -> new
87 if (S.starts_with("file://"))
89
90 // For non-URI strings (compilation flags, directory paths, etc.) only match
91 // at the first '/' (where an absolute path begins)
92 // FIXME: This does not handle Windows paths; only POSIX paths are supported.
93 size_t FirstSlash = S.find('/');
94 if (FirstSlash == llvm::StringRef::npos)
95 return std::nullopt;
96
97 for (const auto &Mapping : Mappings) {
98 size_t Pos = S.find(Mapping.ClientPath);
99 if (Pos == FirstSlash) {
100 llvm::StringRef After = S.substr(Pos + Mapping.ClientPath.size());
101 // Ensure a full path-component match: "/old" must not match "/older"
102 if (After.empty() || After.front() == '/')
103 return (S.substr(0, Pos) + Mapping.ServerPath + After).str();
104 }
105 }
106 return std::nullopt;
107}
108
109// Remap a StringRef in-place, saving the result into the Arena so the
110// pointer remains valid
111void remapRef(llvm::StringRef &S, const PathMappings &Mappings,
112 llvm::StringSaver &Saver) {
113 if (auto R = remapString(S, Mappings))
114 S = Saver.save(std::move(*R));
115}
116
117// Like remapRef, but _always_ saves into Saver (even on no match). Used for
118// StringRefs that will outlive their original storage.
119void remapOrCopyRef(llvm::StringRef &S, const PathMappings &Mappings,
120 llvm::StringSaver &Saver) {
121 if (auto R = remapString(S, Mappings))
122 S = Saver.save(std::move(*R));
123 else
124 S = Saver.save(S);
125}
126
127void remapCharURI(const char *&P, const PathMappings &Mappings,
128 llvm::StringSaver &Saver) {
129 llvm::StringRef S(P);
130 if (auto R = remapString(S, Mappings))
131 P = Saver.save(std::move(*R)).data();
132}
133
134void remapStdStr(std::string &S, const PathMappings &Mappings) {
135 if (auto R = remapString(S, Mappings))
136 S = std::move(*R);
137}
138
139std::vector<std::string> collectShards(llvm::StringRef Dir) {
140 std::vector<std::string> Paths;
141 std::error_code EC;
142 for (llvm::sys::fs::recursive_directory_iterator It(Dir, EC), End;
143 It != End && !EC; It.increment(EC)) {
144 if (llvm::sys::path::extension(It->path()) == ".idx")
145 Paths.push_back(It->path());
146 }
147 if (EC)
148 elog("Error scanning directory {0}: {1}", Dir, EC.message());
149 return Paths;
150}
151
152// Compute shard filename for a source path. (See getShardPathFromFilePath()
153// in BackgroundIndexStorage.cpp.)
154std::string shardName(llvm::StringRef SourceFilePath) {
155 return (llvm::sys::path::filename(SourceFilePath) + "." +
156 llvm::toHex(digest(SourceFilePath)) + ".idx")
157 .str();
158}
159
160// For each source entry, resolve its URI to get the original absolute path and
161// compute that shard name. Find the entry whose shard name matches, and apply
162// the path mappings to that path to compute the new shard name.
163//
164// This must be called before remapIndexData(), since it needs the original (not
165// remapped) URIs.
166std::string deriveNewFilename(const IndexFileIn &Data,
167 llvm::StringRef OldFilename,
168 const PathMappings &Mappings) {
169 if (!Data.Sources || Data.Sources->empty())
170 return OldFilename.str();
171
172 for (const auto &Entry : *Data.Sources) {
173 auto U = URI::parse(Entry.first());
174 if (!U) {
175 llvm::consumeError(U.takeError());
176 continue;
177 }
178 auto Path = URI::resolve(*U);
179 if (!Path) {
180 llvm::consumeError(Path.takeError());
181 continue;
182 }
183 if (shardName(*Path) == OldFilename) {
184 std::string NewPath = *Path;
185 remapStdStr(NewPath, Mappings);
186 return shardName(NewPath);
187 }
188 }
189 return OldFilename.str();
190}
191
192// Remap all paths inside a parsed IndexFileIn in-place. Saver is used to
193// allocate new strings for fields stored as StringRef or raw pointers.
194void remapIndexData(IndexFileIn &Data, const PathMappings &Mappings,
195 llvm::StringSaver &Saver) {
196 if (Data.Symbols) {
197 // SymbolSlab is immutable, so we rebuild it
198 SymbolSlab::Builder Builder;
199 for (const auto &Sym : *Data.Symbols) {
200 Symbol S = Sym;
201 remapCharURI(S.CanonicalDeclaration.FileURI, Mappings, Saver);
202 remapCharURI(S.Definition.FileURI, Mappings, Saver);
203 for (auto &Inc : S.IncludeHeaders)
204 remapRef(Inc.IncludeHeader, Mappings, Saver);
205 Builder.insert(S);
206 }
207 Data.Symbols = std::move(Builder).build();
208 }
209
210 if (Data.Refs) {
211 RefSlab::Builder Builder;
212 for (const auto &Entry : *Data.Refs) {
213 for (const auto &R : Entry.second) {
214 Ref MR = R; // mutable copy
215 remapCharURI(MR.Location.FileURI, Mappings, Saver);
216 Builder.insert(Entry.first, MR);
217 }
218 }
219 Data.Refs = std::move(Builder).build();
220 }
221
222 // We must rebuild the StringMap because keys may change. All StringRef
223 // fields (URI, DirectIncludes) are saved into Saver because the old
224 // StringMap is destroyed below.
225 if (Data.Sources) {
226 IncludeGraph NewSources;
227 for (auto &Entry : *Data.Sources) {
228 IncludeGraphNode IGN = Entry.getValue();
229 remapOrCopyRef(IGN.URI, Mappings, Saver);
230 for (auto &Inc : IGN.DirectIncludes)
231 remapOrCopyRef(Inc, Mappings, Saver);
232 NewSources[IGN.URI] = std::move(IGN);
233 }
234 Data.Sources = std::move(NewSources);
235 }
236
237 if (Data.Cmd) {
238 remapStdStr(Data.Cmd->Directory, Mappings);
239 for (auto &Arg : Data.Cmd->CommandLine)
240 remapStdStr(Arg, Mappings);
241 remapStdStr(Data.Cmd->Filename, Mappings);
242 }
243}
244
245} // namespace
246} // namespace clangd
247} // namespace clang
248
249int main(int Argc, const char **Argv) {
250 using namespace clang::clangd;
251
252 llvm::sys::PrintStackTraceOnErrorSignal(Argv[0]);
253 llvm::cl::HideUnrelatedOptions(RemapCategory);
254 llvm::cl::ParseCommandLineOptions(Argc, Argv,
255 "clangd-remap: rewrite paths inside "
256 "background-index .idx shards\n");
257
258 StreamLogger Logger(llvm::errs(), LogLevel);
260
261 auto Mappings = parsePathMappings(PathMappingsArg);
262 if (!Mappings) {
263 elog("Invalid --path-mappings: {0}", Mappings.takeError());
264 return 1;
265 }
266 if (Mappings->empty()) {
267 elog("No path mappings specified.");
268 return 1;
269 }
270
271 // Gather all shard files from the index directory.
272 auto AllShards = collectShards(IndexDir);
273 if (AllShards.empty()) {
274 log("No .idx files found in the specified directories.");
275 return 0;
276 }
277
278 log("Found {0} shard(s) to process.", AllShards.size());
279 for (const auto &M : *Mappings)
280 log(" Path mapping: {0}", M);
281
282 if (NumThreads.getValue() != 0)
283 llvm::parallel::strategy = llvm::hardware_concurrency(NumThreads);
284
285 std::atomic<unsigned> Errors{0};
286 std::atomic<unsigned> FilesRenamed{0};
287 std::atomic<unsigned> FilesUnchanged{0};
288
289 llvm::parallelFor(0, AllShards.size(), [&](size_t I) {
290 const std::string &ShardPath = AllShards[I];
291
292 auto Buf = llvm::MemoryBuffer::getFile(ShardPath);
293 if (!Buf) {
294 elog("Cannot read {0}: {1}", ShardPath, Buf.getError().message());
295 ++Errors;
296 return;
297 }
298
299 auto Parsed = readIndexFile((*Buf)->getBuffer(), SymbolOrigin::Background);
300 if (!Parsed) {
301 elog("Cannot parse {0}: {1}", ShardPath, Parsed.takeError());
302 ++Errors;
303 return;
304 }
305
306 // Derive the new shard filename before remapping, so we can match
307 // against original (un-remapped) source URIs.
308 llvm::StringRef OldFilename = llvm::sys::path::filename(ShardPath);
309 std::string NewFilename =
310 deriveNewFilename(*Parsed, OldFilename, *Mappings);
311
312 // Remap all paths in the parsed data
313 llvm::BumpPtrAllocator Arena;
314 llvm::StringSaver Saver(Arena);
315 remapIndexData(*Parsed, *Mappings, Saver);
316
317 // Write the remapped shard (possibly under a new name)
318 llvm::StringRef ParentDir = llvm::sys::path::parent_path(ShardPath);
319 llvm::SmallString<256> NewPath(ParentDir);
320 llvm::sys::path::append(NewPath, NewFilename);
321 if (auto Err = llvm::writeToOutput(NewPath, [&](llvm::raw_ostream &OS) {
322 IndexFileOut Out(*Parsed);
324 OS << Out;
325 return llvm::Error::success();
326 })) {
327 elog("Cannot write {0}: {1}", NewPath, std::move(Err));
328 ++Errors;
329 return;
330 }
331
332 // If the filename changed, remove the old shard
333 if (NewFilename != OldFilename) {
334 llvm::sys::fs::remove(ShardPath);
335 ++FilesRenamed;
336 } else
337 ++FilesUnchanged;
338 });
339
340 unsigned Renamed = FilesRenamed.load();
341 unsigned Unchanged = FilesUnchanged.load();
342 log("Processed: {0} shard(s), {1} renamed, {2} unchanged, {3} error(s).",
343 Renamed + Unchanged, Renamed, Unchanged, Errors.load());
344 return Errors.load() > 0 ? 1 : 0;
345}
int main(int Argc, const char **Argv)
void elog(const char *Fmt, Ts &&... Vals)
Definition Logger.h:61
Interface to allow custom logging in clangd.
Definition Logger.h:22
Only one LoggingSession can be active at a time.
Definition Logger.h:106
RefSlab::Builder is a mutable container that can 'freeze' to RefSlab.
Definition Ref.h:135
RefSlab build() &&
Consumes the builder to finalize the slab.
Definition Ref.cpp:42
SymbolSlab::Builder is a mutable container that can 'freeze' to SymbolSlab.
Definition Symbol.h:224
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition URI.cpp:244
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition URI.cpp:176
FIXME: Skip testing on windows temporarily due to the different escaping code mode.
Definition AST.cpp:44
llvm::Expected< IndexFileIn > readIndexFile(llvm::StringRef Data, SymbolOrigin Origin)
std::vector< PathMapping > PathMappings
Definition PathMapping.h:42
FileDigest digest(llvm::StringRef Content)
llvm::Expected< PathMappings > parsePathMappings(llvm::StringRef RawPathMappings)
Parse the command line RawPathMappings (e.g.
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition Headers.h:103
void log(const char *Fmt, Ts &&... Vals)
Definition Logger.h:67
std::string Path
A typedef to represent a file path.
Definition Path.h:26
void elog(const char *Fmt, Ts &&... Vals)
Definition Logger.h:61
std::optional< std::string > doPathMapping(llvm::StringRef S, PathMapping::Direction Dir, const PathMappings &Mappings)
Returns a modified S with the first matching path in Mappings substituted, if applicable.
cppcoreguidelines::ProBoundsAvoidUncheckedContainerAccessCheck P
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Represents a symbol occurrence in the source file.
Definition Ref.h:88
The class presents a C++ symbol, e.g.
Definition Symbol.h:39