clang 20.0.0git
DependencyScanningFilesystem.h
Go to the documentation of this file.
1//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11
12#include "clang/Basic/LLVM.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/Allocator.h"
17#include "llvm/Support/ErrorOr.h"
18#include "llvm/Support/VirtualFileSystem.h"
19#include <mutex>
20#include <optional>
21
22namespace clang {
23namespace tooling {
24namespace dependencies {
25
28
29/// Contents and directive tokens of a cached file entry. Single instance can
30/// be shared between multiple entries.
32 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33 : Original(std::move(Contents)), DepDirectives(nullptr) {}
34
35 /// Owning storage for the original contents.
36 std::unique_ptr<llvm::MemoryBuffer> Original;
37
38 /// The mutex that must be locked before mutating directive tokens.
39 std::mutex ValueLock;
41 /// Accessor to the directive tokens that's atomic to avoid data races.
42 /// \p CachedFileContents has ownership of the pointer.
43 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44
46};
47
48/// An in-memory representation of a file system entity that is of interest to
49/// the dependency scanning filesystem.
50///
51/// It represents one of the following:
52/// - opened file with contents and a stat value,
53/// - opened file with contents, directive tokens and a stat value,
54/// - directory entry with its stat value,
55/// - filesystem error.
56///
57/// Single instance of this class can be shared across different filenames (e.g.
58/// a regular file and a symlink). For this reason the status filename is empty
59/// and is only materialized by \c EntryRef that knows the requested filename.
61public:
62 /// Creates an entry without contents: either a filesystem error or
63 /// a directory with stat value.
64 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65 : MaybeStat(std::move(Stat)), Contents(nullptr) {
66 clearStatName();
67 }
68
69 /// Creates an entry representing a file with contents.
70 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71 CachedFileContents *Contents)
72 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73 clearStatName();
74 }
75
76 /// \returns True if the entry is a filesystem error.
77 bool isError() const { return !MaybeStat; }
78
79 /// \returns True if the current entry represents a directory.
80 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81
82 /// \returns Original contents of the file.
83 StringRef getOriginalContents() const {
84 assert(!isError() && "error");
85 assert(!MaybeStat->isDirectory() && "not a file");
86 assert(Contents && "contents not initialized");
87 return Contents->Original->getBuffer();
88 }
89
90 /// \returns The scanned preprocessor directive tokens of the file that are
91 /// used to speed up preprocessing, if available.
92 std::optional<ArrayRef<dependency_directives_scan::Directive>>
94 assert(!isError() && "error");
95 assert(!isDirectory() && "not a file");
96 assert(Contents && "contents not initialized");
97 if (auto *Directives = Contents->DepDirectives.load()) {
98 if (Directives->has_value())
100 }
101 return std::nullopt;
102 }
103
104 /// \returns The error.
105 std::error_code getError() const { return MaybeStat.getError(); }
106
107 /// \returns The entry status with empty filename.
108 llvm::vfs::Status getStatus() const {
109 assert(!isError() && "error");
110 assert(MaybeStat->getName().empty() && "stat name must be empty");
111 return *MaybeStat;
112 }
113
114 /// \returns The unique ID of the entry.
115 llvm::sys::fs::UniqueID getUniqueID() const {
116 assert(!isError() && "error");
117 return MaybeStat->getUniqueID();
118 }
119
120 /// \returns The data structure holding both contents and directive tokens.
122 assert(!isError() && "error");
123 assert(!isDirectory() && "not a file");
124 return Contents;
125 }
126
127private:
128 void clearStatName() {
129 if (MaybeStat)
130 MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
131 }
132
133 /// Either the filesystem error or status of the entry.
134 /// The filename is empty and only materialized by \c EntryRef.
135 llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136
137 /// Non-owning pointer to the file contents.
138 ///
139 /// We're using pointer here to keep the size of this class small. Instances
140 /// representing directories and filesystem errors don't hold any contents
141 /// anyway.
142 CachedFileContents *Contents;
143};
144
145using CachedRealPath = llvm::ErrorOr<std::string>;
146
147/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
148/// underlying real file system, and the scanned preprocessor directives of
149/// files.
150///
151/// It is sharded based on the hash of the key to reduce the lock contention for
152/// the worker threads.
154public:
155 struct CacheShard {
156 /// The mutex that needs to be locked before mutation of any member.
157 mutable std::mutex CacheLock;
158
159 /// Map from filenames to cached entries and real paths.
160 llvm::StringMap<
161 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162 llvm::BumpPtrAllocator>
164
165 /// Map from unique IDs to cached entries.
166 llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
168
169 /// The backing storage for cached entries.
170 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
171
172 /// The backing storage for cached contents.
173 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
174
175 /// The backing storage for cached real paths.
176 llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177
178 /// Returns entry associated with the filename or nullptr if none is found.
179 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
180
181 /// Returns entry associated with the unique ID or nullptr if none is found.
183 findEntryByUID(llvm::sys::fs::UniqueID UID) const;
184
185 /// Returns entry associated with the filename if there is some. Otherwise,
186 /// constructs new one with the given status, associates it with the
187 /// filename and returns the result.
190 llvm::ErrorOr<llvm::vfs::Status> Stat);
191
192 /// Returns entry associated with the unique ID if there is some. Otherwise,
193 /// constructs new one with the given status and contents, associates it
194 /// with the unique ID and returns the result.
196 getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
197 std::unique_ptr<llvm::MemoryBuffer> Contents);
198
199 /// Returns entry associated with the filename if there is some. Otherwise,
200 /// associates the given entry with the filename and returns it.
203 const CachedFileSystemEntry &Entry);
204
205 /// Returns the real path associated with the filename or nullptr if none is
206 /// found.
207 const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208
209 /// Returns the real path associated with the filename if there is some.
210 /// Otherwise, constructs new one with the given one, associates it with the
211 /// filename and returns the result.
212 const CachedRealPath &
214 llvm::ErrorOr<StringRef> RealPath);
215 };
216
218
219 /// Returns shard for the given key.
220 CacheShard &getShardForFilename(StringRef Filename) const;
221 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
222
223private:
224 std::unique_ptr<CacheShard[]> CacheShards;
225 unsigned NumShards;
226};
227
228/// This class is a local cache, that caches the 'stat' and 'open' calls to the
229/// underlying real file system.
231 llvm::StringMap<
232 std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233 llvm::BumpPtrAllocator>
234 Cache;
235
236public:
237 /// Returns entry associated with the filename or nullptr if none is found.
239 assert(llvm::sys::path::is_absolute_gnu(Filename));
240 auto It = Cache.find(Filename);
241 return It == Cache.end() ? nullptr : It->getValue().first;
242 }
243
244 /// Associates the given entry with the filename and returns the given entry
245 /// pointer (for convenience).
248 const CachedFileSystemEntry &Entry) {
249 assert(llvm::sys::path::is_absolute_gnu(Filename));
250 auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
251 auto &[CachedEntry, CachedRealPath] = It->getValue();
252 if (!Inserted) {
253 // The file is already present in the local cache. If we got here, it only
254 // contains the real path. Let's make sure the entry is populated too.
255 assert((!CachedEntry && CachedRealPath) && "entry already present");
256 CachedEntry = &Entry;
257 }
258 return *CachedEntry;
259 }
260
261 /// Returns real path associated with the filename or nullptr if none is
262 /// found.
264 assert(llvm::sys::path::is_absolute_gnu(Filename));
265 auto It = Cache.find(Filename);
266 return It == Cache.end() ? nullptr : It->getValue().second;
267 }
268
269 /// Associates the given real path with the filename and returns the given
270 /// entry pointer (for convenience).
271 const CachedRealPath &
273 const CachedRealPath &RealPath) {
274 assert(llvm::sys::path::is_absolute_gnu(Filename));
275 auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
276 auto &[CachedEntry, CachedRealPath] = It->getValue();
277 if (!Inserted) {
278 // The file is already present in the local cache. If we got here, it only
279 // contains the entry. Let's make sure the real path is populated too.
280 assert((!CachedRealPath && CachedEntry) && "real path already present");
281 CachedRealPath = &RealPath;
282 }
283 return *CachedRealPath;
284 }
285};
286
287/// Reference to a CachedFileSystemEntry.
288/// If the underlying entry is an opened file, this wrapper returns the file
289/// contents and the scanned preprocessor directives.
290class EntryRef {
291 /// The filename used to access this entry.
292 std::string Filename;
293
294 /// The underlying cached entry.
295 const CachedFileSystemEntry &Entry;
296
298
299public:
300 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
301 : Filename(Name), Entry(Entry) {}
302
303 llvm::vfs::Status getStatus() const {
304 llvm::vfs::Status Stat = Entry.getStatus();
305 if (!Stat.isDirectory())
306 Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
307 return llvm::vfs::Status::copyWithNewName(Stat, Filename);
308 }
309
310 bool isError() const { return Entry.isError(); }
311 bool isDirectory() const { return Entry.isDirectory(); }
312
313 /// If the cached entry represents an error, promotes it into `ErrorOr`.
314 llvm::ErrorOr<EntryRef> unwrapError() const {
315 if (isError())
316 return Entry.getError();
317 return *this;
318 }
319
320 StringRef getContents() const { return Entry.getOriginalContents(); }
321
322 std::optional<ArrayRef<dependency_directives_scan::Directive>>
324 return Entry.getDirectiveTokens();
325 }
326};
327
328/// A virtual file system optimized for the dependency discovery.
329///
330/// It is primarily designed to work with source files whose contents was
331/// preprocessed to remove any tokens that are unlikely to affect the dependency
332/// computation.
333///
334/// This is not a thread safe VFS. A single instance is meant to be used only in
335/// one thread. Multiple instances are allowed to service multiple threads
336/// running in parallel.
338 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
339 llvm::vfs::ProxyFileSystem> {
340public:
341 static const char ID;
342
346
347 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
348 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349 openFileForRead(const Twine &Path) override;
350
351 std::error_code getRealPath(const Twine &Path,
352 SmallVectorImpl<char> &Output) override;
353
354 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
355
356 /// Make it so that no paths bypass this VFS.
357 void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); }
358 /// Set the prefix for paths that should bypass this VFS and go straight to
359 /// the underlying VFS.
360 void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; }
361
362 /// Returns entry for the given filename.
363 ///
364 /// Attempts to use the local and shared caches first, then falls back to
365 /// using the underlying filesystem.
366 llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
367
368 /// Ensure the directive tokens are populated for this file entry.
369 ///
370 /// Returns true if the directive tokens are populated for this file entry,
371 /// false if not (i.e. this entry is not a file or its scan fails).
373
374 /// Check whether \p Path exists. By default checks cached result of \c
375 /// status(), and falls back on FS if unable to do so.
376 bool exists(const Twine &Path) override;
377
378private:
379 /// For a filename that's not yet associated with any entry in the caches,
380 /// uses the underlying filesystem to either look up the entry based in the
381 /// shared cache indexed by unique ID, or creates new entry from scratch.
382 /// \p FilenameForLookup will always be an absolute path, and different than
383 /// \p OriginalFilename if \p OriginalFilename is relative.
384 llvm::ErrorOr<const CachedFileSystemEntry &>
385 computeAndStoreResult(StringRef OriginalFilename,
386 StringRef FilenameForLookup);
387
388 /// Represents a filesystem entry that has been stat-ed (and potentially read)
389 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
390 struct TentativeEntry {
391 llvm::vfs::Status Status;
392 std::unique_ptr<llvm::MemoryBuffer> Contents;
393
394 TentativeEntry(llvm::vfs::Status Status,
395 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
396 : Status(std::move(Status)), Contents(std::move(Contents)) {}
397 };
398
399 /// Reads file at the given path. Enforces consistency between the file size
400 /// in status and size of read contents.
401 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
402
403 /// Returns entry associated with the unique ID of the given tentative entry
404 /// if there is some in the shared cache. Otherwise, constructs new one,
405 /// associates it with the unique ID and returns the result.
406 const CachedFileSystemEntry &
407 getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
408
409 /// Returns entry associated with the filename or nullptr if none is found.
410 ///
411 /// Returns entry from local cache if there is some. Otherwise, if the entry
412 /// is found in the shared cache, writes it through the local cache and
413 /// returns it. Otherwise returns nullptr.
414 const CachedFileSystemEntry *
415 findEntryByFilenameWithWriteThrough(StringRef Filename);
416
417 /// Returns entry associated with the unique ID in the shared cache or nullptr
418 /// if none is found.
419 const CachedFileSystemEntry *
420 findSharedEntryByUID(llvm::vfs::Status Stat) const {
421 return SharedCache.getShardForUID(Stat.getUniqueID())
422 .findEntryByUID(Stat.getUniqueID());
423 }
424
425 /// Associates the given entry with the filename in the local cache and
426 /// returns it.
427 const CachedFileSystemEntry &
428 insertLocalEntryForFilename(StringRef Filename,
429 const CachedFileSystemEntry &Entry) {
430 return LocalCache.insertEntryForFilename(Filename, Entry);
431 }
432
433 /// Returns entry associated with the filename in the shared cache if there is
434 /// some. Otherwise, constructs new one with the given error code, associates
435 /// it with the filename and returns the result.
436 const CachedFileSystemEntry &
437 getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
438 return SharedCache.getShardForFilename(Filename)
440 }
441
442 /// Returns entry associated with the filename in the shared cache if there is
443 /// some. Otherwise, associates the given entry with the filename and returns
444 /// it.
445 const CachedFileSystemEntry &
446 getOrInsertSharedEntryForFilename(StringRef Filename,
447 const CachedFileSystemEntry &Entry) {
448 return SharedCache.getShardForFilename(Filename)
450 }
451
452 void printImpl(raw_ostream &OS, PrintType Type,
453 unsigned IndentLevel) const override {
454 printIndent(OS, IndentLevel);
455 OS << "DependencyScanningFilesystem\n";
456 getUnderlyingFS().print(OS, Type, IndentLevel + 1);
457 }
458
459 /// Whether this path should bypass this VFS and go straight to the underlying
460 /// VFS.
461 bool shouldBypass(StringRef Path) const;
462
463 /// The global cache shared between worker threads.
464 DependencyScanningFilesystemSharedCache &SharedCache;
465 /// The local cache is used by the worker thread to cache file system queries
466 /// locally instead of querying the global cache every time.
467 DependencyScanningFilesystemLocalCache LocalCache;
468
469 /// Prefix of paths that should go straight to the underlying VFS.
470 std::optional<std::string> BypassedPathPrefix;
471
472 /// The working directory to use for making relative paths absolute before
473 /// using them for cache lookups.
474 llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
475
476 void updateWorkingDirForCacheLookup();
477
478 llvm::ErrorOr<StringRef>
479 tryGetFilenameForLookup(StringRef OriginalFilename,
480 llvm::SmallVectorImpl<char> &PathBuf) const;
481};
482
483} // end namespace dependencies
484} // end namespace tooling
485} // end namespace clang
486
487#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
IndirectLocalPath & Path
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
StringRef Filename
Definition: Format.cpp:3001
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
TypePropertyCache< Private > Cache
Definition: Type.cpp:4483
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat)
Creates an entry without contents: either a filesystem error or a directory with stat value.
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat, CachedFileContents *Contents)
Creates an entry representing a file with contents.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
This class is a local cache, that caches the 'stat' and 'open' calls to the underlying real file syst...
const CachedRealPath & insertRealPathForFilename(StringRef Filename, const CachedRealPath &RealPath)
Associates the given real path with the filename and returns the given entry pointer (for convenience...
const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Associates the given entry with the filename and returns the given entry pointer (for convenience).
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns real path associated with the filename or nullptr if none is found.
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
This class is a shared cache, that caches the 'stat' and 'open' calls to the underlying real file sys...
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
A virtual file system optimized for the dependency discovery.
std::error_code getRealPath(const Twine &Path, SmallVectorImpl< char > &Output) override
bool ensureDirectiveTokensArePopulated(EntryRef Entry)
Ensure the directive tokens are populated for this file entry.
bool exists(const Twine &Path) override
Check whether Path exists.
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
void setBypassedPathPrefix(StringRef Prefix)
Set the prefix for paths that should bypass this VFS and go straight to the underlying VFS.
void resetBypassedPathPrefix()
Make it so that no paths bypass this VFS.
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
llvm::ErrorOr< EntryRef > unwrapError() const
If the cached entry represents an error, promotes it into ErrorOr.
llvm::ErrorOr< std::string > CachedRealPath
The JSON file list parser is used to communicate input to InstallAPI.
Contents and directive tokens of a cached file entry.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
CachedFileContents(std::unique_ptr< llvm::MemoryBuffer > Contents)
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
const CachedFileSystemEntry & getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr< llvm::MemoryBuffer > Contents)
Returns entry associated with the unique ID if there is some.
llvm::SpecificBumpPtrAllocator< CachedFileSystemEntry > EntryStorage
The backing storage for cached entries.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
llvm::DenseMap< llvm::sys::fs::UniqueID, const CachedFileSystemEntry * > EntriesByUID
Map from unique IDs to cached entries.
const CachedFileSystemEntry * findEntryByUID(llvm::sys::fs::UniqueID UID) const
Returns entry associated with the unique ID or nullptr if none is found.
llvm::StringMap< std::pair< const CachedFileSystemEntry *, const CachedRealPath * >, llvm::BumpPtrAllocator > CacheByFilename
Map from filenames to cached entries and real paths.
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns the real path associated with the filename or nullptr if none is found.
const CachedFileSystemEntry & getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Returns entry associated with the filename if there is some.
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
llvm::SpecificBumpPtrAllocator< CachedRealPath > RealPathStorage
The backing storage for cached real paths.
llvm::SpecificBumpPtrAllocator< CachedFileContents > ContentsStorage
The backing storage for cached contents.
const CachedFileSystemEntry & getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr< llvm::vfs::Status > Stat)
Returns entry associated with the filename if there is some.
const CachedRealPath & getOrEmplaceRealPathForFilename(StringRef Filename, llvm::ErrorOr< StringRef > RealPath)
Returns the real path associated with the filename if there is some.