clang 23.0.0git
DependencyScanningFilesystem.h
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10#define LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11
12#include "clang/Basic/LLVM.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/Allocator.h"
17#include "llvm/Support/ErrorOr.h"
18#include "llvm/Support/VirtualFileSystem.h"
19#include <condition_variable>
20#include <memory>
21#include <mutex>
22#include <optional>
23#include <variant>
24
25namespace clang {
26namespace dependencies {
27
29
32
33/// Contents and directive tokens of a cached file entry. Single instance can
34/// be shared between multiple entries.
36 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
37 : Original(std::move(Contents)), DepDirectives(nullptr) {}
38
39 /// Owning storage for the original contents.
40 std::unique_ptr<llvm::MemoryBuffer> Original;
41
42 /// The mutex that must be locked before mutating directive tokens.
43 std::mutex ValueLock;
45 /// Accessor to the directive tokens that's atomic to avoid data races.
46 /// \p CachedFileContents has ownership of the pointer.
47 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
48
50};
51
52/// An in-memory representation of a file system entity that is of interest to
53/// the dependency scanning filesystem.
54///
55/// It represents one of the following:
56/// - opened file with contents and a stat value,
57/// - opened file with contents, directive tokens and a stat value,
58/// - directory entry with its stat value,
59/// - filesystem error.
60///
61/// Single instance of this class can be shared across different filenames (e.g.
62/// a regular file and a symlink). For this reason the status filename is empty
63/// and is only materialized by \c EntryRef that knows the requested filename.
65public:
66 /// Creates an entry without contents: either a filesystem error or
67 /// a directory with stat value.
68 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
69 : MaybeStat(std::move(Stat)), Contents(nullptr) {
70 clearStatName();
71 }
72
73 /// Creates an entry representing a file with contents.
74 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
75 CachedFileContents *Contents)
76 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
77 clearStatName();
78 }
79
80 /// \returns True if the entry is a filesystem error.
81 bool isError() const { return !MaybeStat; }
82
83 /// \returns True if the current entry represents a directory.
84 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
85
86 /// \returns Original contents of the file.
87 StringRef getOriginalContents() const {
88 assert(!isError() && "error");
89 assert(!MaybeStat->isDirectory() && "not a file");
90 assert(Contents && "contents not initialized");
91 return Contents->Original->getBuffer();
92 }
93
94 /// \returns The scanned preprocessor directive tokens of the file that are
95 /// used to speed up preprocessing, if available.
96 std::optional<ArrayRef<dependency_directives_scan::Directive>>
98 assert(!isError() && "error");
99 assert(!isDirectory() && "not a file");
100 assert(Contents && "contents not initialized");
101 if (auto *Directives = Contents->DepDirectives.load()) {
102 if (Directives->has_value())
104 }
105 return std::nullopt;
106 }
107
108 /// \returns The error.
109 std::error_code getError() const { return MaybeStat.getError(); }
110
111 /// \returns The entry status with empty filename.
112 llvm::vfs::Status getStatus() const {
113 assert(!isError() && "error");
114 assert(MaybeStat->getName().empty() && "stat name must be empty");
115 return *MaybeStat;
116 }
117
118 /// \returns The unique ID of the entry.
119 llvm::sys::fs::UniqueID getUniqueID() const {
120 assert(!isError() && "error");
121 return MaybeStat->getUniqueID();
122 }
123
124 /// \returns The data structure holding both contents and directive tokens.
126 assert(!isError() && "error");
127 assert(!isDirectory() && "not a file");
128 return Contents;
129 }
130
131private:
132 void clearStatName() {
133 if (MaybeStat)
134 MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
135 }
136
137 /// Either the filesystem error or status of the entry.
138 /// The filename is empty and only materialized by \c EntryRef.
139 llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
140
141 /// Non-owning pointer to the file contents.
142 ///
143 /// We're using pointer here to keep the size of this class small. Instances
144 /// representing directories and filesystem errors don't hold any contents
145 /// anyway.
146 CachedFileContents *Contents;
147};
148
149using CachedRealPath = llvm::ErrorOr<std::string>;
150
151/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
152/// underlying real file system, and the scanned preprocessor directives of
153/// files.
154///
155/// It is sharded based on the hash of the key to reduce the lock contention for
156/// the worker threads.
158public:
159 /// In-flight slot used to dedup concurrent producers for the same key.
160 /// The producer publishes via \c publish(); waiters block on \c Mutex/CV
161 /// until \c Done is set. \c Result holds the resolved entry, or an
162 /// uncached-negative error shared with overlapping waiters but not
163 /// persisted in the shard.
165 std::mutex Mutex;
166 std::condition_variable CondVar;
167 bool Done = false;
168 llvm::ErrorOr<const CachedFileSystemEntry *> Result = std::error_code{};
169
170 /// Publishes the producer's outcome to this slot and wakes all waiters.
171 void publish(llvm::ErrorOr<const CachedFileSystemEntry *> R) {
172 {
173 std::lock_guard<std::mutex> EntryLock(Mutex);
174 assert(!Done && "slot already published");
175 Result = R;
176 Done = true;
177 }
178 CondVar.notify_all();
179 }
180 };
181
182 struct CacheShard {
183 /// The mutex that needs to be locked before mutation of any member.
184 mutable std::mutex CacheLock;
185
186 /// Cache state per filename: resolved entry, real path, and an in-flight
187 /// slot (if any). \c InProgress is reset on publish.
189 const CachedFileSystemEntry *Entry = nullptr;
190 const CachedRealPath *RealPath = nullptr;
191 std::shared_ptr<InProgressEntry> InProgress;
192 };
193
194 /// Cache state stored per unique ID; similar to
195 /// \c FilenameCacheState.
197 const CachedFileSystemEntry *Entry = nullptr;
198 std::shared_ptr<InProgressEntry> InProgress;
199 };
200
201 /// Map from filenames to their cached state.
202 llvm::StringMap<FilenameCacheState, llvm::BumpPtrAllocator> CacheByFilename;
203
204 /// Map from unique IDs to their cached state.
205 llvm::DenseMap<llvm::sys::fs::UniqueID, UIDCacheState> EntriesByUID;
206
207 /// The backing storage for cached entries.
208 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
209
210 /// The backing storage for cached contents.
211 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
212
213 /// The backing storage for cached real paths.
214 llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
215
216 /// Returns the real path associated with the filename or nullptr if none is
217 /// found.
218 const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
219
220 /// Returns the real path associated with the filename if there is some.
221 /// Otherwise, constructs new one with the given one, associates it with the
222 /// filename and returns the result.
223 const CachedRealPath &
224 getOrEmplaceRealPathForFilename(StringRef Filename,
225 llvm::ErrorOr<StringRef> RealPath);
226 };
227
229
230 /// Returns shard for the given key.
231 CacheShard &getShardForFilename(StringRef Filename) const;
232 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
233
235 // A null terminated string that contains a path.
236 const char *Path = nullptr;
237
240 uint64_t CachedSize = 0;
241 uint64_t ActualSize = 0;
242 };
243
244 std::variant<NegativelyCachedInfo, SizeChangedInfo> Info;
245
248
249 OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize)
250 : Path(Path), Info(SizeChangedInfo{CachedSize, ActualSize}) {}
251 };
252
253 /// Visits all cached entries and re-stat an entry using UnderlyingFS to check
254 /// if the cache contains out-of-date entries. An entry can be out-of-date for
255 /// two reasons:
256 /// 1. The entry contains a stat error, indicating the file did not exist
257 /// in the cache, but the file exists on the UnderlyingFS.
258 /// 2. The entry is associated with a file whose size is different from the
259 /// size of the file on the same path on the UnderlyingFS.
260 std::vector<OutOfDateEntry>
261 getOutOfDateEntries(llvm::vfs::FileSystem &UnderlyingFS) const;
262
263private:
264 std::unique_ptr<CacheShard[]> CacheShards;
265 unsigned NumShards;
266};
267
268/// Reference to a CachedFileSystemEntry.
269/// If the underlying entry is an opened file, this wrapper returns the file
270/// contents and the scanned preprocessor directives.
271class EntryRef {
272 /// The filename used to access this entry.
273 std::string Filename;
274
275 /// The underlying cached entry.
276 const CachedFileSystemEntry &Entry;
277
279
280public:
281 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
282 : Filename(Name), Entry(Entry) {}
283
284 llvm::vfs::Status getStatus() const {
285 llvm::vfs::Status Stat = Entry.getStatus();
286 if (!Stat.isDirectory())
287 Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
288 return llvm::vfs::Status::copyWithNewName(Stat, Filename);
289 }
290
291 bool isError() const { return Entry.isError(); }
292 bool isDirectory() const { return Entry.isDirectory(); }
293
294 /// If the cached entry represents an error, promotes it into `ErrorOr`.
295 llvm::ErrorOr<EntryRef> unwrapError() const {
296 if (isError())
297 return Entry.getError();
298 return *this;
299 }
300
301 StringRef getContents() const { return Entry.getOriginalContents(); }
302
303 std::optional<ArrayRef<dependency_directives_scan::Directive>>
305 return Entry.getDirectiveTokens();
306 }
307};
308
309/// A virtual file system optimized for the dependency discovery.
310///
311/// It is primarily designed to work with source files whose contents was
312/// preprocessed to remove any tokens that are unlikely to affect the dependency
313/// computation.
314///
315/// This is not a thread safe VFS. A single instance is meant to be used only in
316/// one thread. Multiple instances are allowed to service multiple threads
317/// running in parallel.
319 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
320 llvm::vfs::ProxyFileSystem> {
321public:
322 static const char ID;
323
327
328 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
329 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
330 openFileForRead(const Twine &Path) override;
331
332 std::error_code getRealPath(const Twine &Path,
333 SmallVectorImpl<char> &Output) override;
334
335 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
336
337 /// Returns entry for the given filename.
338 ///
339 /// Attempts to use the local and shared caches first, then falls back to
340 /// using the underlying filesystem.
341 llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
342
343 /// Ensure the directive tokens are populated for this file entry.
344 ///
345 /// Returns true if the directive tokens are populated for this file entry,
346 /// false if not (i.e. this entry is not a file or its scan fails).
348
349 /// \returns The scanned preprocessor directive tokens of the file that are
350 /// used to speed up preprocessing, if available.
351 std::optional<ArrayRef<dependency_directives_scan::Directive>>
352 getDirectiveTokens(const Twine &Path) {
353 if (llvm::ErrorOr<EntryRef> Entry = getOrCreateFileSystemEntry(Path.str()))
355 return Entry->getDirectiveTokens();
356 return std::nullopt;
357 }
358
359 /// Check whether \p Path exists. By default checks cached result of \c
360 /// status(), and falls back on FS if unable to do so.
361 bool exists(const Twine &Path) override;
362
363private:
364 /// Resolves the cache entry for \p FilenameForLookup through the shared
365 /// cache: returns an entry already produced by another worker (a cache hit
366 /// or the result of an in-flight wait), or claims the producer slot and
367 /// computes the entry via the underlying filesystem.
368 /// \p FilenameForLookup is always absolute, and may differ from
369 /// \p OriginalFilename if the latter is relative.
370 llvm::ErrorOr<const CachedFileSystemEntry *>
371 resolveFilenameThroughSharedCache(StringRef OriginalFilename,
372 StringRef FilenameForLookup);
373
374 /// Resolves the cache entry for the on-disk file identified by \p Stat
375 /// through the UID-keyed shared cache. Reads the file's contents on the
376 /// producer path. Always returns a non-null entry (which may carry a
377 /// readFile error).
379 resolveUIDThroughSharedCache(StringRef OriginalFilename,
380 const llvm::vfs::Status &Stat);
381
382 /// Represents a filesystem entry that has been stat-ed (and potentially read)
383 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
384 struct TentativeEntry {
385 llvm::vfs::Status Status;
386 std::unique_ptr<llvm::MemoryBuffer> Contents;
387
388 TentativeEntry(llvm::vfs::Status Status,
389 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
390 : Status(std::move(Status)), Contents(std::move(Contents)) {}
391 };
392
393 /// Reads file at the given path. Enforces consistency between the file size
394 /// in status and size of read contents.
395 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
396
397 void printImpl(raw_ostream &OS, PrintType Type,
398 unsigned IndentLevel) const override {
399 printIndent(OS, IndentLevel);
400 OS << "DependencyScanningFilesystem\n";
401 getUnderlyingFS().print(OS, Type, IndentLevel + 1);
402 }
403
404 /// The service associated with this VFS.
405 DependencyScanningService &Service;
406
407 /// Per-filename state cached locally by this worker thread, so repeated
408 /// queries can be served without touching the shared cache. The entry and
409 /// real path are arena-owned by the shared cache and outlive this worker, so
410 /// borrowing raw pointers here is safe.
411 struct LocalEntry {
412 const CachedFileSystemEntry *File = nullptr;
413 const CachedRealPath *RealPath = nullptr;
414 };
415
416 /// The local cache is used by the worker thread to cache file system queries
417 /// locally instead of querying the global cache every time.
418 llvm::StringMap<LocalEntry, llvm::BumpPtrAllocator> LocalCache;
419
420 /// The working directory to use for making relative paths absolute before
421 /// using them for cache lookups.
422 llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
423
424 void updateWorkingDirForCacheLookup();
425
426 llvm::ErrorOr<StringRef>
427 tryGetFilenameForLookup(StringRef OriginalFilename,
428 llvm::SmallVectorImpl<char> &PathBuf) const;
429};
430
431} // end namespace dependencies
432} // end namespace clang
433
434#endif // LLVM_CLANG_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat, CachedFileContents *Contents)
Creates an entry representing a file with contents.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat)
Creates an entry without contents: either a filesystem error or a directory with stat value.
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
std::vector< OutOfDateEntry > getOutOfDateEntries(llvm::vfs::FileSystem &UnderlyingFS) const
Visits all cached entries and re-stat an entry using UnderlyingFS to check if the cache contains out-...
The dependency scanning service contains shared configuration and state that is used by the individua...
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens(const Twine &Path)
std::error_code getRealPath(const Twine &Path, SmallVectorImpl< char > &Output) override
bool ensureDirectiveTokensArePopulated(EntryRef Entry)
Ensure the directive tokens are populated for this file entry.
bool exists(const Twine &Path) override
Check whether Path exists.
DependencyScanningWorkerFilesystem(DependencyScanningService &Service, IntrusiveRefCntPtr< llvm::vfs::FileSystem > FS)
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
std::error_code setCurrentWorkingDirectory(const Twine &Path) override
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
llvm::ErrorOr< EntryRef > unwrapError() const
If the cached entry represents an error, promotes it into ErrorOr.
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
SmallVector< dependency_directives_scan::Directive, 20 > DependencyDirectivesTy
llvm::ErrorOr< std::string > CachedRealPath
The JSON file list parser is used to communicate input to InstallAPI.
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
Contents and directive tokens of a cached file entry.
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
CachedFileContents(std::unique_ptr< llvm::MemoryBuffer > Contents)
Cache state per filename: resolved entry, real path, and an in-flight slot (if any).
llvm::SpecificBumpPtrAllocator< CachedFileSystemEntry > EntryStorage
The backing storage for cached entries.
llvm::SpecificBumpPtrAllocator< CachedFileContents > ContentsStorage
The backing storage for cached contents.
llvm::StringMap< FilenameCacheState, llvm::BumpPtrAllocator > CacheByFilename
Map from filenames to their cached state.
llvm::SpecificBumpPtrAllocator< CachedRealPath > RealPathStorage
The backing storage for cached real paths.
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns the real path associated with the filename or nullptr if none is found.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
llvm::DenseMap< llvm::sys::fs::UniqueID, UIDCacheState > EntriesByUID
Map from unique IDs to their cached state.
const CachedRealPath & getOrEmplaceRealPathForFilename(StringRef Filename, llvm::ErrorOr< StringRef > RealPath)
Returns the real path associated with the filename if there is some.
In-flight slot used to dedup concurrent producers for the same key.
void publish(llvm::ErrorOr< const CachedFileSystemEntry * > R)
Publishes the producer's outcome to this slot and wakes all waiters.
OutOfDateEntry(const char *Path, uint64_t CachedSize, uint64_t ActualSize)