clang 23.0.0git
DependencyScanningFilesystem.cpp
Go to the documentation of this file.
1//===- DependencyScanningFilesystem.cpp - Optimized Scanning FS -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/Support/MemoryBuffer.h"
12#include "llvm/Support/Threading.h"
13#include <optional>
14
15using namespace clang;
16using namespace dependencies;
17
18llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
19DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
20 // Load the file and its content from the file system.
21 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
22 if (!MaybeFile)
23 return MaybeFile.getError();
24 auto File = std::move(*MaybeFile);
25
26 auto MaybeStat = File->status();
27 if (!MaybeStat)
28 return MaybeStat.getError();
29 auto Stat = std::move(*MaybeStat);
30
31 auto MaybeBuffer = File->getBuffer(Stat.getName());
32 if (!MaybeBuffer)
33 return MaybeBuffer.getError();
34 auto Buffer = std::move(*MaybeBuffer);
35
36 // If the file size changed between read and stat, pretend it didn't.
37 if (Stat.getSize() != Buffer->getBufferSize())
38 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
39
40 return TentativeEntry(Stat, std::move(Buffer));
41}
42
44 EntryRef Ref) {
45 auto &Entry = Ref.Entry;
46
47 if (Entry.isError() || Entry.isDirectory())
48 return false;
49
50 CachedFileContents *Contents = Entry.getCachedContents();
51 assert(Contents && "contents not initialized");
52
53 // Double-checked locking.
54 if (Contents->DepDirectives.load())
55 return true;
56
57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58
59 // Double-checked locking.
60 if (Contents->DepDirectives.load())
61 return true;
62
64 // Scan the file for preprocessor directives that might affect the
65 // dependencies.
66 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67 Contents->DepDirectiveTokens,
68 Directives)) {
69 Contents->DepDirectiveTokens.clear();
70 // FIXME: Propagate the diagnostic if desired by the client.
71 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72 return false;
73 }
74
75 // This function performed double-checked locking using `DepDirectives`.
76 // Assigning it must be the last thing this function does, otherwise other
77 // threads may skip the critical section (`DepDirectives != nullptr`), leading
78 // to a data race.
79 Contents->DepDirectives.store(
80 new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81 return true;
82}
83
86 // This heuristic was chosen using a empirical testing on a
87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88 // sharding gives a performance edge by reducing the lock contention.
89 // FIXME: A better heuristic might also consider the OS to account for
90 // the different cost of lock contention on different OSes.
91 NumShards =
92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93 CacheShards = std::make_unique<CacheShard[]>(NumShards);
94}
95
98 StringRef Filename) const {
99 assert(llvm::sys::path::is_absolute_gnu(Filename));
100 return CacheShards[llvm::hash_value(Filename) % NumShards];
101}
102
105 llvm::sys::fs::UniqueID UID) const {
106 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
107 return CacheShards[Hash % NumShards];
108}
109
110std::vector<DependencyScanningFilesystemSharedCache::OutOfDateEntry>
112 llvm::vfs::FileSystem &UnderlyingFS) const {
113 // Iterate through all shards and look for cached stat errors.
114 std::vector<OutOfDateEntry> InvalidDiagInfo;
115 for (unsigned i = 0; i < NumShards; i++) {
116 const CacheShard &Shard = CacheShards[i];
117 std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
118 for (const auto &[Path, State] : Shard.CacheByFilename) {
119 const CachedFileSystemEntry *Entry = State.Entry;
120 // Skip slots without a resolved entry: real-path-only entries from
121 // getRealPath, or uncached negative stats. Runs post-scan, so no
122 // in-progress slots remain.
123 if (!Entry)
124 continue;
125 llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path);
126 if (Status) {
127 if (Entry->getError()) {
128 // This is the case where we have cached the non-existence
129 // of the file at Path first, and a file at the path is created
130 // later. The cache entry is not invalidated (as we have no good
131 // way to do it now), which may lead to missing file build errors.
132 InvalidDiagInfo.emplace_back(Path.data());
133 } else {
134 llvm::vfs::Status CachedStatus = Entry->getStatus();
135 if (Status->getType() == llvm::sys::fs::file_type::regular_file &&
136 Status->getType() == CachedStatus.getType()) {
137 // We only check regular files. Directory files sizes could change
138 // due to content changes, and reporting directory size changes can
139 // lead to false positives.
140 // TODO: At the moment, we do not detect symlinks to files whose
141 // size may change. We need to decide if we want to detect cached
142 // symlink size changes. We can also expand this to detect file
143 // type changes.
144 uint64_t CachedSize = CachedStatus.getSize();
145 uint64_t ActualSize = Status->getSize();
146 if (CachedSize != ActualSize) {
147 // This is the case where the cached file has a different size
148 // from the actual file that comes from the underlying FS.
149 InvalidDiagInfo.emplace_back(Path.data(), CachedSize, ActualSize);
150 }
151 }
152 }
153 }
154 }
155 }
156 return InvalidDiagInfo;
157}
158
159namespace {
160using InProgressEntry =
162using SlotResolved = llvm::ErrorOr<const CachedFileSystemEntry *>;
163using SlotProducer = std::shared_ptr<InProgressEntry>;
164using SlotAcquisitionResult = std::variant<SlotResolved, SlotProducer>;
165
166/// Returns a resolved entry if one is already present or in-flight under
167/// \p K; otherwise installs a fresh \c InProgressEntry and returns it as a
168/// producer slot.
169template <typename Map, typename Key>
170SlotAcquisitionResult acquireSlot(std::mutex &CacheLock, Map &M, const Key &K) {
171 std::shared_ptr<InProgressEntry> Pending;
172 {
173 std::lock_guard<std::mutex> ShardLock(CacheLock);
174 auto &State = M[K];
175
176 // Cache hit.
177 if (State.Entry)
178 return SlotResolved{State.Entry};
179
180 if (!State.InProgress) {
181 State.InProgress = std::make_shared<InProgressEntry>();
182 return SlotProducer{State.InProgress};
183 }
184
185 // Copy the shared_ptr so the slot survives our wait once the shard lock
186 // is released and the producer resets State.InProgress on publish.
187 Pending = State.InProgress;
188 }
189
190 // Wait off the shard lock so unrelated keys in this shard aren't blocked.
191 std::unique_lock<std::mutex> EntryLock(Pending->Mutex);
192 Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; });
193 return SlotResolved{Pending->Result};
194}
195} // namespace
196
197const CachedRealPath *
199 StringRef Filename) const {
200 assert(llvm::sys::path::is_absolute_gnu(Filename));
201 std::lock_guard<std::mutex> LockGuard(CacheLock);
202 auto It = CacheByFilename.find(Filename);
203 return It == CacheByFilename.end() ? nullptr : It->getValue().RealPath;
204}
205
207 getOrEmplaceRealPathForFilename(StringRef Filename,
208 llvm::ErrorOr<llvm::StringRef> RealPath) {
209 std::lock_guard<std::mutex> LockGuard(CacheLock);
210
211 const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].RealPath;
212 if (!StoredRealPath) {
213 auto OwnedRealPath = [&]() -> CachedRealPath {
214 if (!RealPath)
215 return RealPath.getError();
216 return RealPath->str();
217 }();
218
219 StoredRealPath = new (RealPathStorage.Allocate())
220 CachedRealPath(std::move(OwnedRealPath));
221 }
222
223 return *StoredRealPath;
224}
225
230 llvm::vfs::ProxyFileSystem>(std::move(FS)),
231 Service(Service), WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
232 updateWorkingDirForCacheLookup();
233}
234
236DependencyScanningWorkerFilesystem::resolveUIDThroughSharedCache(
237 StringRef OriginalFilename, const llvm::vfs::Status &Stat) {
238 auto &UIDShard = Service.getSharedCache().getShardForUID(Stat.getUniqueID());
239 auto UIDSlot = acquireSlot(UIDShard.CacheLock, UIDShard.EntriesByUID,
240 Stat.getUniqueID());
241 if (auto *Resolved = std::get_if<SlotResolved>(&UIDSlot)) {
242 assert(*Resolved && **Resolved &&
243 "in-progress UID slot fulfilled without an entry");
244 return **Resolved;
245 }
246 auto UIDProducer = std::move(std::get<SlotProducer>(UIDSlot));
247
248 auto TEntry =
249 Stat.isDirectory() ? TentativeEntry(Stat) : readFile(OriginalFilename);
250
251 // Allocate the entry and bind the UID slot under one shard-lock acquisition
252 // (BumpPtrAllocator isn't thread-safe). On read-failure, the entry wraps
253 // the open error so concurrent UID waiters surface it rather than racing
254 // to retry the open.
255 const CachedFileSystemEntry *SharedEntry;
256 {
257 std::lock_guard<std::mutex> ShardLock(UIDShard.CacheLock);
258 auto &State = UIDShard.EntriesByUID[Stat.getUniqueID()];
259 assert(!State.Entry && "UID slot already published an entry");
260 if (TEntry) {
261 CachedFileContents *StoredContents = nullptr;
262 if (TEntry->Contents)
263 StoredContents = new (UIDShard.ContentsStorage.Allocate())
264 CachedFileContents(std::move(TEntry->Contents));
265 SharedEntry = new (UIDShard.EntryStorage.Allocate())
266 CachedFileSystemEntry(std::move(TEntry->Status), StoredContents);
267 } else {
268 SharedEntry = new (UIDShard.EntryStorage.Allocate())
269 CachedFileSystemEntry(TEntry.getError());
270 }
271 State.Entry = SharedEntry;
272 State.InProgress.reset();
273 }
274 UIDProducer->publish(SharedEntry);
275 return SharedEntry;
276}
277
278llvm::ErrorOr<const CachedFileSystemEntry *>
279DependencyScanningWorkerFilesystem::resolveFilenameThroughSharedCache(
280 StringRef OriginalFilename, StringRef FilenameForLookup) {
281 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
282 auto &FilenameShard =
283 Service.getSharedCache().getShardForFilename(FilenameForLookup);
284 auto FilenameSlot =
285 acquireSlot(FilenameShard.CacheLock, FilenameShard.CacheByFilename,
286 FilenameForLookup);
287 if (auto *Resolved = std::get_if<SlotResolved>(&FilenameSlot))
288 return *Resolved;
289 auto FilenameProducer = std::move(std::get<SlotProducer>(FilenameSlot));
290
291 // Compute the outcome. Three cases:
292 // - Stat succeeded: delegate to the UID resolver.
293 // - Stat failed, cacheable: defer error-entry allocation to the critical
294 // section below so allocate+bind+reset share one shard-lock acquisition.
295 // - Stat failed, not cacheable: publish the error to current waiters but
296 // don't persist; a later separate query re-runs the stat (so a file
297 // created mid-scan becomes visible).
298 auto Stat = getUnderlyingFS().status(OriginalFilename);
299 const bool ShouldCacheNegativeStat =
300 !Stat && Service.getOpts().CacheNegativeStats &&
301 shouldCacheNegativeStatsForPath(OriginalFilename);
302 llvm::ErrorOr<const CachedFileSystemEntry *> Result = std::error_code{};
303 if (Stat)
304 Result = resolveUIDThroughSharedCache(OriginalFilename, *Stat);
305 else if (!ShouldCacheNegativeStat)
306 Result = Stat.getError();
307
308 // Bind the result and reset the in-flight slot under a single critical
309 // section. The cached-negative case allocates here so allocate+bind+reset
310 // share one shard-lock acquisition.
311 {
312 std::lock_guard<std::mutex> ShardLock(FilenameShard.CacheLock);
313 auto &State = FilenameShard.CacheByFilename[FilenameForLookup];
314 assert(!State.Entry && "filename slot already published an entry");
315 if (ShouldCacheNegativeStat) {
316 auto *Entry = new (FilenameShard.EntryStorage.Allocate())
317 CachedFileSystemEntry(Stat.getError());
318 State.Entry = Entry;
319 Result = Entry;
320 } else if (Result) {
321 State.Entry = *Result;
322 }
323 State.InProgress.reset();
324 }
325 FilenameProducer->publish(Result);
326 return Result;
327}
328
329llvm::ErrorOr<EntryRef>
331 StringRef OriginalFilename) {
332 SmallString<256> PathBuf;
333 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
334 if (!FilenameForLookup)
335 return FilenameForLookup.getError();
336
337 auto &Local = LocalCache[*FilenameForLookup];
338 if (Local.File)
339 return EntryRef(OriginalFilename, *Local.File).unwrapError();
340
341 auto MaybeEntry =
342 resolveFilenameThroughSharedCache(OriginalFilename, *FilenameForLookup);
343 if (!MaybeEntry)
344 return MaybeEntry.getError();
345 Local.File = *MaybeEntry;
346 return EntryRef(OriginalFilename, **MaybeEntry).unwrapError();
347}
348
349llvm::ErrorOr<llvm::vfs::Status>
351 SmallString<256> OwnedFilename;
352 StringRef Filename = Path.toStringRef(OwnedFilename);
353
354 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
355 if (!Result)
356 return Result.getError();
357 return Result->getStatus();
358}
359
361 // While some VFS overlay filesystems may implement more-efficient
362 // mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem`
363 // typically wraps `RealFileSystem` which does not specialize `exists`,
364 // so it is not likely to benefit from such optimizations. Instead,
365 // it is more-valuable to have this query go through the
366 // cached-`status` code-path of the `DependencyScanningWorkerFilesystem`.
367 llvm::ErrorOr<llvm::vfs::Status> Status = status(Path);
368 return Status && Status->exists();
369}
370
371namespace {
372
373/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
374/// this subclass.
375class DepScanFile final : public llvm::vfs::File {
376public:
377 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
378 llvm::vfs::Status Stat)
379 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
380
381 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
382
383 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
384
385 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
386 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
387 bool IsVolatile) override {
388 return llvm::MemoryBuffer::getMemBuffer(Buffer->getMemBufferRef(),
389 RequiresNullTerminator);
390 }
391
392 std::error_code close() override { return {}; }
393
394private:
395 std::unique_ptr<llvm::MemoryBuffer> Buffer;
396 llvm::vfs::Status Stat;
397};
398
399} // end anonymous namespace
400
401llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
402DepScanFile::create(EntryRef Entry) {
403 assert(!Entry.isError() && "error");
404
405 if (Entry.isDirectory())
406 return std::make_error_code(std::errc::is_a_directory);
407
408 auto Result = std::make_unique<DepScanFile>(
409 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
410 Entry.getStatus().getName(),
411 /*RequiresNullTerminator=*/false),
412 Entry.getStatus());
413
414 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
415 std::unique_ptr<llvm::vfs::File>(std::move(Result)));
416}
417
418llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
420 SmallString<256> OwnedFilename;
421 StringRef Filename = Path.toStringRef(OwnedFilename);
422
423 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
424 if (!Result)
425 return Result.getError();
426 return DepScanFile::create(Result.get());
427}
428
429std::error_code
431 SmallVectorImpl<char> &Output) {
432 SmallString<256> OwnedFilename;
433 StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
434
435 SmallString<256> PathBuf;
436 auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
437 if (!FilenameForLookup)
438 return FilenameForLookup.getError();
439
440 auto HandleCachedRealPath =
441 [&Output](const CachedRealPath &RealPath) -> std::error_code {
442 if (!RealPath)
443 return RealPath.getError();
444 Output.assign(RealPath->begin(), RealPath->end());
445 return {};
446 };
447
448 // If we already have the result in local cache, no work required.
449 auto &Local = LocalCache[*FilenameForLookup];
450 if (Local.RealPath)
451 return HandleCachedRealPath(*Local.RealPath);
452
453 // If we have the result in the shared cache, cache it locally.
454 auto &Shard =
455 Service.getSharedCache().getShardForFilename(*FilenameForLookup);
456 if (const auto *ShardRealPath =
457 Shard.findRealPathByFilename(*FilenameForLookup)) {
458 Local.RealPath = ShardRealPath;
459 return HandleCachedRealPath(*Local.RealPath);
460 }
461
462 // If we don't know the real path, compute it...
463 std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
464 llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
465 if (!EC)
466 ComputedRealPath = StringRef{Output.data(), Output.size()};
467
468 // ...and try to write it into the shared cache. In case some other thread won
469 // this race and already wrote its own result there, just adopt it. Write
470 // whatever is in the shared cache into the local one.
471 const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
472 *FilenameForLookup, ComputedRealPath);
473 Local.RealPath = &RealPath;
474 return HandleCachedRealPath(*Local.RealPath);
475}
476
478 const Twine &Path) {
479 std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
480 updateWorkingDirForCacheLookup();
481 return EC;
482}
483
484void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
485 llvm::ErrorOr<std::string> CWD =
486 getUnderlyingFS().getCurrentWorkingDirectory();
487 if (!CWD) {
488 WorkingDirForCacheLookup = CWD.getError();
489 } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
490 WorkingDirForCacheLookup = llvm::errc::invalid_argument;
491 } else {
492 WorkingDirForCacheLookup = *CWD;
493 }
494 assert(!WorkingDirForCacheLookup ||
495 llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
496}
497
498llvm::ErrorOr<StringRef>
499DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
500 StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
501 StringRef FilenameForLookup;
502 if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
503 FilenameForLookup = OriginalFilename;
504 } else if (!WorkingDirForCacheLookup) {
505 return WorkingDirForCacheLookup.getError();
506 } else {
507 StringRef RelFilename = OriginalFilename;
508 RelFilename.consume_front("./");
509 PathBuf.assign(WorkingDirForCacheLookup->begin(),
510 WorkingDirForCacheLookup->end());
511 llvm::sys::path::append(PathBuf, RelFilename);
512 FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
513 }
514 assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
515 return FilenameForLookup;
516}
517
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
std::vector< OutOfDateEntry > getOutOfDateEntries(llvm::vfs::FileSystem &UnderlyingFS) const
Visits all cached entries and re-stat an entry using UnderlyingFS to check if the cache contains out-...
The dependency scanning service contains shared configuration and state that is used by the individua...
DependencyScanningFilesystemSharedCache & getSharedCache()
std::error_code getRealPath(const Twine &Path, SmallVectorImpl< char > &Output) override
bool ensureDirectiveTokensArePopulated(EntryRef Entry)
Ensure the directive tokens are populated for this file entry.
bool exists(const Twine &Path) override
Check whether Path exists.
DependencyScanningWorkerFilesystem(DependencyScanningService &Service, IntrusiveRefCntPtr< llvm::vfs::FileSystem > FS)
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
std::error_code setCurrentWorkingDirectory(const Twine &Path) override
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
llvm::ErrorOr< EntryRef > unwrapError() const
If the cached entry represents an error, promotes it into ErrorOr.
llvm::ErrorOr< std::string > CachedRealPath
bool shouldCacheNegativeStatsForPath(StringRef Path)
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
@ Result
The result type of a method or function.
Definition TypeBase.h:905
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
hash_code hash_value(const clang::dependencies::ModuleID &ID)
Contents and directive tokens of a cached file entry.
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
llvm::StringMap< FilenameCacheState, llvm::BumpPtrAllocator > CacheByFilename
Map from filenames to their cached state.
llvm::SpecificBumpPtrAllocator< CachedRealPath > RealPathStorage
The backing storage for cached real paths.
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns the real path associated with the filename or nullptr if none is found.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
const CachedRealPath & getOrEmplaceRealPathForFilename(StringRef Filename, llvm::ErrorOr< StringRef > RealPath)
Returns the real path associated with the filename if there is some.
In-flight slot used to dedup concurrent producers for the same key.