clang-tools 20.0.0git
Background.cpp
Go to the documentation of this file.
1//===-- Background.cpp - Build an index in a background thread ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "index/Background.h"
10#include "Compiler.h"
11#include "Config.h"
12#include "Headers.h"
13#include "SourceCode.h"
14#include "URI.h"
16#include "index/FileIndex.h"
17#include "index/Index.h"
18#include "index/IndexAction.h"
19#include "index/MemIndex.h"
20#include "index/Ref.h"
21#include "index/Relation.h"
22#include "index/Serialization.h"
23#include "index/Symbol.h"
25#include "support/Context.h"
26#include "support/Logger.h"
27#include "support/Path.h"
28#include "support/Threading.h"
30#include "support/Trace.h"
31#include "clang/Basic/SourceLocation.h"
32#include "clang/Basic/SourceManager.h"
33#include "clang/Basic/Stack.h"
34#include "clang/Frontend/FrontendAction.h"
35#include "llvm/ADT/ArrayRef.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/STLExtras.h"
38#include "llvm/ADT/StringMap.h"
39#include "llvm/ADT/StringRef.h"
40#include "llvm/Support/Error.h"
41#include "llvm/Support/Path.h"
42#include "llvm/Support/Threading.h"
43#include "llvm/Support/xxhash.h"
44
45#include <algorithm>
46#include <atomic>
47#include <chrono>
48#include <condition_variable>
49#include <cstddef>
50#include <memory>
51#include <mutex>
52#include <numeric>
53#include <optional>
54#include <queue>
55#include <random>
56#include <string>
57#include <thread>
58#include <utility>
59#include <vector>
60
61namespace clang {
62namespace clangd {
63namespace {
64
65// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
66// relative to Cmd.Directory, which might not be the same as current working
67// directory.
68llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
69 llvm::SmallString<128> AbsolutePath;
70 if (llvm::sys::path::is_absolute(Cmd.Filename)) {
71 AbsolutePath = Cmd.Filename;
72 } else {
73 AbsolutePath = Cmd.Directory;
74 llvm::sys::path::append(AbsolutePath, Cmd.Filename);
75 llvm::sys::path::remove_dots(AbsolutePath, true);
76 }
77 return AbsolutePath;
78}
79
80bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
81 auto Buf = FS->getBufferForFile(LS.AbsolutePath);
82 if (!Buf) {
83 vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
84 LS.AbsolutePath, Buf.getError().message());
85 // There is no point in indexing an unreadable file.
86 return false;
87 }
88 return digest(Buf->get()->getBuffer()) != LS.Digest;
89}
90
91} // namespace
92
94 const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB,
95 BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts)
96 : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB),
97 IndexingPriority(Opts.IndexingPriority),
98 ContextProvider(std::move(Opts.ContextProvider)),
99 IndexedSymbols(IndexContents::All, Opts.SupportContainedRefs),
100 Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize),
101 IndexStorageFactory(std::move(IndexStorageFactory)),
102 Queue(std::move(Opts.OnProgress)),
103 CommandsChanged(
104 CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
105 enqueue(ChangedFiles);
106 })) {
107 assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero.");
108 assert(this->IndexStorageFactory && "Storage factory can not be null!");
109 for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) {
110 ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1),
111 [this, Ctx(Context::current().clone())]() mutable {
112 clang::noteBottomOfStack();
113 WithContext BGContext(std::move(Ctx));
114 Queue.work([&] { Rebuilder.idle(); });
115 });
116 }
117}
118
120 stop();
121 ThreadPool.wait();
122}
123
124BackgroundQueue::Task BackgroundIndex::changedFilesTask(
125 const std::vector<std::string> &ChangedFiles) {
126 BackgroundQueue::Task T([this, ChangedFiles] {
127 trace::Span Tracer("BackgroundIndexEnqueue");
128
129 std::optional<WithContext> WithProvidedContext;
130 if (ContextProvider)
131 WithProvidedContext.emplace(ContextProvider(/*Path=*/""));
132
133 // We're doing this asynchronously, because we'll read shards here too.
134 log("Enqueueing {0} commands for indexing", ChangedFiles.size());
135 SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
136
137 auto NeedsReIndexing = loadProject(std::move(ChangedFiles));
138 // Run indexing for files that need to be updated.
139 std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
140 std::mt19937(std::random_device{}()));
141 std::vector<BackgroundQueue::Task> Tasks;
142 Tasks.reserve(NeedsReIndexing.size());
143 for (const auto &File : NeedsReIndexing)
144 Tasks.push_back(indexFileTask(std::move(File)));
145 Queue.append(std::move(Tasks));
146 });
147
148 T.QueuePri = LoadShards;
149 T.ThreadPri = llvm::ThreadPriority::Default;
150 return T;
151}
152
153static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
154 Path = llvm::sys::path::filename(Path);
155 return Path.drop_back(llvm::sys::path::extension(Path).size());
156}
157
158BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
159 std::string Tag = filenameWithoutExtension(Path).str();
160 uint64_t Key = llvm::xxh3_64bits(Path);
161 BackgroundQueue::Task T([this, Path(std::move(Path))] {
162 std::optional<WithContext> WithProvidedContext;
163 if (ContextProvider)
164 WithProvidedContext.emplace(ContextProvider(Path));
165 auto Cmd = CDB.getCompileCommand(Path);
166 if (!Cmd)
167 return;
168 if (auto Error = index(std::move(*Cmd)))
169 elog("Indexing {0} failed: {1}", Path, std::move(Error));
170 });
171 T.QueuePri = IndexFile;
172 T.ThreadPri = IndexingPriority;
173 T.Tag = std::move(Tag);
174 T.Key = Key;
175 return T;
176}
177
178void BackgroundIndex::boostRelated(llvm::StringRef Path) {
179 if (isHeaderFile(Path))
180 Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
181}
182
183/// Given index results from a TU, only update symbols coming from files that
184/// are different or missing from than \p ShardVersionsSnapshot. Also stores new
185/// index information on IndexStorage.
186void BackgroundIndex::update(
187 llvm::StringRef MainFile, IndexFileIn Index,
188 const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
189 bool HadErrors) {
190 // Keys are URIs.
191 llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
192 // Note that sources do not contain any information regarding missing headers,
193 // since we don't even know what absolute path they should fall in.
194 for (const auto &IndexIt : *Index.Sources) {
195 const auto &IGN = IndexIt.getValue();
196 auto AbsPath = URI::resolve(IGN.URI, MainFile);
197 if (!AbsPath) {
198 elog("Failed to resolve URI: {0}", AbsPath.takeError());
199 continue;
200 }
201 const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
202 // File has different contents, or indexing was successful this time.
203 if (DigestIt == ShardVersionsSnapshot.end() ||
204 DigestIt->getValue().Digest != IGN.Digest ||
205 (DigestIt->getValue().HadErrors && !HadErrors))
206 FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
207 }
208
209 // Shard slabs into files.
210 FileShardedIndex ShardedIndex(std::move(Index));
211
212 // Build and store new slabs for each updated file.
213 for (const auto &FileIt : FilesToUpdate) {
214 auto Uri = FileIt.first();
215 auto IF = ShardedIndex.getShard(Uri);
216 assert(IF && "no shard for file in Index.Sources?");
217 PathRef Path = FileIt.getValue().first;
218
219 // Only store command line hash for main files of the TU, since our
220 // current model keeps only one version of a header file.
221 if (Path != MainFile)
222 IF->Cmd.reset();
223
224 // We need to store shards before updating the index, since the latter
225 // consumes slabs.
226 // FIXME: Also skip serializing the shard if it is already up-to-date.
227 if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF))
228 elog("Failed to write background-index shard for file {0}: {1}", Path,
229 std::move(Error));
230
231 {
232 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
233 const auto &Hash = FileIt.getValue().second;
234 auto DigestIt = ShardVersions.try_emplace(Path);
235 ShardVersion &SV = DigestIt.first->second;
236 // Skip if file is already up to date, unless previous index was broken
237 // and this one is not.
238 if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors)
239 continue;
240 SV.Digest = Hash;
241 SV.HadErrors = HadErrors;
242
243 // This can override a newer version that is added in another thread, if
244 // this thread sees the older version but finishes later. This should be
245 // rare in practice.
246 IndexedSymbols.update(
247 Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
248 std::make_unique<RefSlab>(std::move(*IF->Refs)),
249 std::make_unique<RelationSlab>(std::move(*IF->Relations)),
250 Path == MainFile);
251 }
252 }
253}
254
255llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
256 trace::Span Tracer("BackgroundIndex");
257 SPAN_ATTACH(Tracer, "file", Cmd.Filename);
258 auto AbsolutePath = getAbsolutePath(Cmd);
259
260 auto FS = TFS.view(Cmd.Directory);
261 auto Buf = FS->getBufferForFile(AbsolutePath);
262 if (!Buf)
263 return llvm::errorCodeToError(Buf.getError());
264 auto Hash = digest(Buf->get()->getBuffer());
265
266 // Take a snapshot of the versions to avoid locking for each file in the TU.
267 llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
268 {
269 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
270 ShardVersionsSnapshot = ShardVersions;
271 }
272
273 vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
274 ParseInputs Inputs;
275 Inputs.TFS = &TFS;
276 Inputs.CompileCommand = std::move(Cmd);
277 IgnoreDiagnostics IgnoreDiags;
278 auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
279 if (!CI)
280 return error("Couldn't build compiler invocation");
281
282 auto Clang =
283 prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
284 std::move(*Buf), std::move(FS), IgnoreDiags);
285 if (!Clang)
286 return error("Couldn't build compiler instance");
287
288 SymbolCollector::Options IndexOpts;
289 // Creates a filter to not collect index results from files with unchanged
290 // digests.
291 IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
292 FileID FID) {
293 const auto F = SM.getFileEntryRefForID(FID);
294 if (!F)
295 return false; // Skip invalid files.
296 auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
297 if (!AbsPath)
298 return false; // Skip files without absolute path.
299 auto Digest = digestFile(SM, FID);
300 if (!Digest)
301 return false;
302 auto D = ShardVersionsSnapshot.find(*AbsPath);
303 if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
304 !D->second.HadErrors)
305 return false; // Skip files that haven't changed, without errors.
306 return true;
307 };
308 IndexOpts.CollectMainFileRefs = true;
309
310 IndexFileIn Index;
312 IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); },
313 [&](RefSlab R) { Index.Refs = std::move(R); },
314 [&](RelationSlab R) { Index.Relations = std::move(R); },
315 [&](IncludeGraph IG) { Index.Sources = std::move(IG); });
316
317 // We're going to run clang here, and it could potentially crash.
318 // We could use CrashRecoveryContext to try to make indexing crashes nonfatal,
319 // but the leaky "recovery" is pretty scary too in a long-running process.
320 // If crashes are a real problem, maybe we should fork a child process.
321
322 const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front();
323 if (!Action->BeginSourceFile(*Clang, Input))
324 return error("BeginSourceFile() failed");
325 if (llvm::Error Err = Action->Execute())
326 return Err;
327
328 Action->EndSourceFile();
329
330 Index.Cmd = Inputs.CompileCommand;
331 assert(Index.Symbols && Index.Refs && Index.Sources &&
332 "Symbols, Refs and Sources must be set.");
333
334 log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
335 Inputs.CompileCommand.Filename, Index.Symbols->size(),
336 Index.Refs->numRefs(), Index.Sources->size());
337 SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size()));
338 SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs()));
339 SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
340
341 bool HadErrors = Clang->hasDiagnostics() &&
342 Clang->getDiagnostics().hasUncompilableErrorOccurred();
343 if (HadErrors) {
344 log("Failed to compile {0}, index may be incomplete", AbsolutePath);
345 for (auto &It : *Index.Sources)
347 }
348 update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
349
350 Rebuilder.indexedTU();
351 return llvm::Error::success();
352}
353
354// Restores shards for \p MainFiles from index storage. Then checks staleness of
355// those shards and returns a list of TUs that needs to be indexed to update
356// staleness.
357std::vector<std::string>
358BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
359 // Drop files where background indexing is disabled in config.
360 if (ContextProvider)
361 llvm::erase_if(MainFiles, [&](const std::string &TU) {
362 // Load the config for each TU, as indexing may be selectively enabled.
363 WithContext WithProvidedContext(ContextProvider(TU));
366 });
367 Rebuilder.startLoading();
368 // Load shards for all of the mainfiles.
369 const std::vector<LoadedShard> Result =
370 loadIndexShards(MainFiles, IndexStorageFactory, CDB);
371 size_t LoadedShards = 0;
372 {
373 // Update in-memory state.
374 std::lock_guard<std::mutex> Lock(ShardVersionsMu);
375 for (auto &LS : Result) {
376 if (!LS.Shard)
377 continue;
378 auto SS =
379 LS.Shard->Symbols
380 ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols))
381 : nullptr;
382 auto RS = LS.Shard->Refs
383 ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs))
384 : nullptr;
385 auto RelS =
386 LS.Shard->Relations
387 ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations))
388 : nullptr;
389 ShardVersion &SV = ShardVersions[LS.AbsolutePath];
390 SV.Digest = LS.Digest;
391 SV.HadErrors = LS.HadErrors;
392 ++LoadedShards;
393
394 IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(),
395 std::move(SS), std::move(RS), std::move(RelS),
396 LS.CountReferences);
397 }
398 }
399 Rebuilder.loadedShard(LoadedShards);
400 Rebuilder.doneLoading();
401
402 auto FS = TFS.view(/*CWD=*/std::nullopt);
403 llvm::DenseSet<PathRef> TUsToIndex;
404 // We'll accept data from stale shards, but ensure the files get reindexed
405 // soon.
406 for (auto &LS : Result) {
407 if (!shardIsStale(LS, FS.get()))
408 continue;
409 PathRef TUForFile = LS.DependentTU;
410 assert(!TUForFile.empty() && "File without a TU!");
411
412 // FIXME: Currently, we simply schedule indexing on a TU whenever any of
413 // its dependencies needs re-indexing. We might do it smarter by figuring
414 // out a minimal set of TUs that will cover all the stale dependencies.
415 // FIXME: Try looking at other TUs if no compile commands are available
416 // for this TU, i.e TU was deleted after we performed indexing.
417 TUsToIndex.insert(TUForFile);
418 }
419
420 return {TUsToIndex.begin(), TUsToIndex.end()};
421}
422
424 IndexedSymbols.profile(MT.child("slabs"));
425 // We don't want to mix memory used by index and symbols, so call base class.
427}
428} // namespace clangd
429} // namespace clang
HTMLTag Tag
IgnoringDiagConsumer IgnoreDiags
std::unique_ptr< CompilerInstance > Clang
std::string MainFile
FieldAction Action
std::unique_ptr< CompilerInvocation > CI
#define SPAN_ATTACH(S, Name, Expr)
Attach a key-value pair to a Span event.
Definition: Trace.h:164
void runAsync(const llvm::Twine &Name, llvm::unique_function< void()> Action)
Definition: Threading.cpp:81
llvm::unique_function< BackgroundIndexStorage *(PathRef)> Factory
Definition: Background.h:58
void profile(MemoryTree &MT) const
Definition: Background.cpp:423
BackgroundIndex(const ThreadsafeFS &, const GlobalCompilationDatabase &CDB, BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts)
Creates a new background index and starts its threads.
Definition: Background.cpp:93
void boostRelated(llvm::StringRef Path)
Boosts priority of indexing related to Path.
Definition: Background.cpp:178
void enqueue(const std::vector< std::string > &ChangedFiles)
Definition: Background.h:163
void append(std::vector< Task >)
void work(std::function< void()> OnIdle=nullptr)
void boost(llvm::StringRef Tag, unsigned NewPriority)
static const Context & current()
Returns the context for the current thread, creating it if needed.
Definition: Context.cpp:27
void update(llvm::StringRef Key, std::unique_ptr< SymbolSlab > Symbols, std::unique_ptr< RefSlab > Refs, std::unique_ptr< RelationSlab > Relations, bool CountReferences)
Updates all slabs associated with the Key.
Definition: FileIndex.cpp:245
void profile(MemoryTree &MT) const
Definition: FileIndex.cpp:403
Provides compilation arguments used for parsing C and C++ files.
virtual std::optional< tooling::CompileCommand > getCompileCommand(PathRef File) const =0
If there are any known-good commands for building this file, returns one.
MemIndex is a naive in-memory index suitable for a small set of symbols.
Definition: MemIndex.h:20
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Index.cpp:91
Wrapper for vfs::FileSystem for use in multithreaded programs like clangd.
Definition: ThreadsafeFS.h:26
llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > view(std::nullopt_t CWD) const
Obtain a vfs::FileSystem with an arbitrary initial working directory.
Definition: ThreadsafeFS.h:32
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:208
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition: URI.cpp:244
WithContext replaces Context::current() with a provided scope.
Definition: Context.h:185
Records an event whose duration is the lifetime of the Span object.
Definition: Trace.h:143
std::vector< LoadedShard > loadIndexShards(llvm::ArrayRef< Path > MainFiles, BackgroundIndexStorage::Factory &IndexStorageFactory, const GlobalCompilationDatabase &CDB)
Loads all shards for the TU MainFile from Storage.
@ Error
An error message.
IndexContents
Describes what data is covered by an index.
Definition: Index.h:114
std::string Path
A typedef to represent a file path.
Definition: Path.h:26
std::unique_ptr< CompilerInvocation > buildCompilerInvocation(const ParseInputs &Inputs, clang::DiagnosticConsumer &D, std::vector< std::string > *CC1Args)
Builds compiler invocation that could be used to build AST or preamble.
Definition: Compiler.cpp:95
static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path)
Definition: Background.cpp:153
FileDigest digest(llvm::StringRef Content)
Definition: SourceCode.cpp:565
void vlog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:72
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&... Vals)
Definition: Logger.h:79
std::optional< FileDigest > digestFile(const SourceManager &SM, FileID FID)
Definition: SourceCode.cpp:575
std::optional< std::string > getCanonicalPath(const FileEntryRef F, FileManager &FileMgr)
Get the canonical path of F.
Definition: SourceCode.cpp:520
std::unique_ptr< CompilerInstance > prepareCompilerInstance(std::unique_ptr< clang::CompilerInvocation > CI, const PrecompiledPreamble *Preamble, std::unique_ptr< llvm::MemoryBuffer > Buffer, llvm::IntrusiveRefCntPtr< llvm::vfs::FileSystem > VFS, DiagnosticConsumer &DiagsClient)
Definition: Compiler.cpp:129
llvm::StringMap< IncludeGraphNode > IncludeGraph
Definition: Headers.h:101
std::unique_ptr< FrontendAction > createStaticIndexingAction(SymbolCollector::Options Opts, std::function< void(SymbolSlab)> SymbolsCallback, std::function< void(RefSlab)> RefsCallback, std::function< void(RelationSlab)> RelationsCallback, std::function< void(IncludeGraph)> IncludeGraphCallback)
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:67
llvm::StringRef PathRef
A typedef to represent a ref to file path.
Definition: Path.h:29
void elog(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:61
bool isHeaderFile(llvm::StringRef FileName, std::optional< LangOptions > LangOpts)
Infers whether this is a header from the FileName and LangOpts (if presents).
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
A work item on the thread pool's queue.
Definition: Background.h:72
static const Config & current()
Returns the Config of the current Context, or an empty configuration.
Definition: Config.cpp:17
BackgroundPolicy Background
Whether this TU should be background-indexed.
Definition: Config.h:86
struct clang::clangd::Config::@3 Index
Controls index behavior.
A tree that can be used to represent memory usage of nested components while preserving the hierarchy...
Definition: MemoryTree.h:30
void addUsage(size_t Increment)
Increases size of current node by Increment.
Definition: MemoryTree.h:56
MemoryTree & child(llvm::StringLiteral Name)
No copy of the Name.
Definition: MemoryTree.h:39