clang 19.0.0git
DependencyScanningFilesystem.h
Go to the documentation of this file.
1//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11
12#include "clang/Basic/LLVM.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/Allocator.h"
17#include "llvm/Support/ErrorOr.h"
18#include "llvm/Support/VirtualFileSystem.h"
19#include <mutex>
20#include <optional>
21
22namespace clang {
23namespace tooling {
24namespace dependencies {
25
28
29/// Contents and directive tokens of a cached file entry. Single instance can
30/// be shared between multiple entries.
32 CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33 : Original(std::move(Contents)), DepDirectives(nullptr) {}
34
35 /// Owning storage for the original contents.
36 std::unique_ptr<llvm::MemoryBuffer> Original;
37
38 /// The mutex that must be locked before mutating directive tokens.
39 std::mutex ValueLock;
41 /// Accessor to the directive tokens that's atomic to avoid data races.
42 /// \p CachedFileContents has ownership of the pointer.
43 std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44
46};
47
48/// An in-memory representation of a file system entity that is of interest to
49/// the dependency scanning filesystem.
50///
51/// It represents one of the following:
52/// - opened file with contents and a stat value,
53/// - opened file with contents, directive tokens and a stat value,
54/// - directory entry with its stat value,
55/// - filesystem error.
56///
57/// Single instance of this class can be shared across different filenames (e.g.
58/// a regular file and a symlink). For this reason the status filename is empty
59/// and is only materialized by \c EntryRef that knows the requested filename.
61public:
62 /// Creates an entry without contents: either a filesystem error or
63 /// a directory with stat value.
64 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65 : MaybeStat(std::move(Stat)), Contents(nullptr) {
66 clearStatName();
67 }
68
69 /// Creates an entry representing a file with contents.
70 CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71 CachedFileContents *Contents)
72 : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73 clearStatName();
74 }
75
76 /// \returns True if the entry is a filesystem error.
77 bool isError() const { return !MaybeStat; }
78
79 /// \returns True if the current entry represents a directory.
80 bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81
82 /// \returns Original contents of the file.
83 StringRef getOriginalContents() const {
84 assert(!isError() && "error");
85 assert(!MaybeStat->isDirectory() && "not a file");
86 assert(Contents && "contents not initialized");
87 return Contents->Original->getBuffer();
88 }
89
90 /// \returns The scanned preprocessor directive tokens of the file that are
91 /// used to speed up preprocessing, if available.
92 std::optional<ArrayRef<dependency_directives_scan::Directive>>
94 assert(!isError() && "error");
95 assert(!isDirectory() && "not a file");
96 assert(Contents && "contents not initialized");
97 if (auto *Directives = Contents->DepDirectives.load()) {
98 if (Directives->has_value())
100 }
101 return std::nullopt;
102 }
103
104 /// \returns The error.
105 std::error_code getError() const { return MaybeStat.getError(); }
106
107 /// \returns The entry status with empty filename.
108 llvm::vfs::Status getStatus() const {
109 assert(!isError() && "error");
110 assert(MaybeStat->getName().empty() && "stat name must be empty");
111 return *MaybeStat;
112 }
113
114 /// \returns The unique ID of the entry.
115 llvm::sys::fs::UniqueID getUniqueID() const {
116 assert(!isError() && "error");
117 return MaybeStat->getUniqueID();
118 }
119
120 /// \returns The data structure holding both contents and directive tokens.
122 assert(!isError() && "error");
123 assert(!isDirectory() && "not a file");
124 return Contents;
125 }
126
127private:
128 void clearStatName() {
129 if (MaybeStat)
130 MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
131 }
132
133 /// Either the filesystem error or status of the entry.
134 /// The filename is empty and only materialized by \c EntryRef.
135 llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136
137 /// Non-owning pointer to the file contents.
138 ///
139 /// We're using pointer here to keep the size of this class small. Instances
140 /// representing directories and filesystem errors don't hold any contents
141 /// anyway.
142 CachedFileContents *Contents;
143};
144
145/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
146/// underlying real file system, and the scanned preprocessor directives of
147/// files.
148///
149/// It is sharded based on the hash of the key to reduce the lock contention for
150/// the worker threads.
152public:
153 struct CacheShard {
154 /// The mutex that needs to be locked before mutation of any member.
155 mutable std::mutex CacheLock;
156
157 /// Map from filenames to cached entries.
158 llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
160
161 /// Map from unique IDs to cached entries.
162 llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
164
165 /// The backing storage for cached entries.
166 llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
167
168 /// The backing storage for cached contents.
169 llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
170
171 /// Returns entry associated with the filename or nullptr if none is found.
172 const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
173
174 /// Returns entry associated with the unique ID or nullptr if none is found.
176 findEntryByUID(llvm::sys::fs::UniqueID UID) const;
177
178 /// Returns entry associated with the filename if there is some. Otherwise,
179 /// constructs new one with the given status, associates it with the
180 /// filename and returns the result.
183 llvm::ErrorOr<llvm::vfs::Status> Stat);
184
185 /// Returns entry associated with the unique ID if there is some. Otherwise,
186 /// constructs new one with the given status and contents, associates it
187 /// with the unique ID and returns the result.
189 getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
190 std::unique_ptr<llvm::MemoryBuffer> Contents);
191
192 /// Returns entry associated with the filename if there is some. Otherwise,
193 /// associates the given entry with the filename and returns it.
196 const CachedFileSystemEntry &Entry);
197 };
198
200
201 /// Returns shard for the given key.
202 CacheShard &getShardForFilename(StringRef Filename) const;
203 CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
204
205private:
206 std::unique_ptr<CacheShard[]> CacheShards;
207 unsigned NumShards;
208};
209
210/// This class is a local cache, that caches the 'stat' and 'open' calls to the
211/// underlying real file system.
213 llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
214
215public:
216 /// Returns entry associated with the filename or nullptr if none is found.
218 assert(llvm::sys::path::is_absolute_gnu(Filename));
219 auto It = Cache.find(Filename);
220 return It == Cache.end() ? nullptr : It->getValue();
221 }
222
223 /// Associates the given entry with the filename and returns the given entry
224 /// pointer (for convenience).
227 const CachedFileSystemEntry &Entry) {
228 assert(llvm::sys::path::is_absolute_gnu(Filename));
229 const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
230 assert(InsertedEntry == &Entry && "entry already present");
231 return *InsertedEntry;
232 }
233};
234
235/// Reference to a CachedFileSystemEntry.
236/// If the underlying entry is an opened file, this wrapper returns the file
237/// contents and the scanned preprocessor directives.
238class EntryRef {
239 /// The filename used to access this entry.
240 std::string Filename;
241
242 /// The underlying cached entry.
243 const CachedFileSystemEntry &Entry;
244
245public:
246 EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
247 : Filename(Name), Entry(Entry) {}
248
249 llvm::vfs::Status getStatus() const {
250 llvm::vfs::Status Stat = Entry.getStatus();
251 if (!Stat.isDirectory())
252 Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
253 return llvm::vfs::Status::copyWithNewName(Stat, Filename);
254 }
255
256 bool isError() const { return Entry.isError(); }
257 bool isDirectory() const { return Entry.isDirectory(); }
258
259 /// If the cached entry represents an error, promotes it into `ErrorOr`.
260 llvm::ErrorOr<EntryRef> unwrapError() const {
261 if (isError())
262 return Entry.getError();
263 return *this;
264 }
265
266 StringRef getContents() const { return Entry.getOriginalContents(); }
267
268 std::optional<ArrayRef<dependency_directives_scan::Directive>>
270 return Entry.getDirectiveTokens();
271 }
272};
273
274/// A virtual file system optimized for the dependency discovery.
275///
276/// It is primarily designed to work with source files whose contents was
277/// preprocessed to remove any tokens that are unlikely to affect the dependency
278/// computation.
279///
280/// This is not a thread safe VFS. A single instance is meant to be used only in
281/// one thread. Multiple instances are allowed to service multiple threads
282/// running in parallel.
284 : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
285 llvm::vfs::ProxyFileSystem> {
286public:
287 static const char ID;
288
292
293 llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
294 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
295 openFileForRead(const Twine &Path) override;
296
297 std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
298
299 /// Returns entry for the given filename.
300 ///
301 /// Attempts to use the local and shared caches first, then falls back to
302 /// using the underlying filesystem.
303 llvm::ErrorOr<EntryRef>
305 bool DisableDirectivesScanning = false);
306
307private:
308 /// Check whether the file should be scanned for preprocessor directives.
309 bool shouldScanForDirectives(StringRef Filename);
310
311 /// For a filename that's not yet associated with any entry in the caches,
312 /// uses the underlying filesystem to either look up the entry based in the
313 /// shared cache indexed by unique ID, or creates new entry from scratch.
314 /// \p FilenameForLookup will always be an absolute path, and different than
315 /// \p OriginalFilename if \p OriginalFilename is relative.
316 llvm::ErrorOr<const CachedFileSystemEntry &>
317 computeAndStoreResult(StringRef OriginalFilename,
318 StringRef FilenameForLookup);
319
320 /// Scan for preprocessor directives for the given entry if necessary and
321 /// returns a wrapper object with reference semantics.
322 EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
323 StringRef Filename, bool Disable);
324
325 /// Represents a filesystem entry that has been stat-ed (and potentially read)
326 /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
327 struct TentativeEntry {
328 llvm::vfs::Status Status;
329 std::unique_ptr<llvm::MemoryBuffer> Contents;
330
331 TentativeEntry(llvm::vfs::Status Status,
332 std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
333 : Status(std::move(Status)), Contents(std::move(Contents)) {}
334 };
335
336 /// Reads file at the given path. Enforces consistency between the file size
337 /// in status and size of read contents.
338 llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
339
340 /// Returns entry associated with the unique ID of the given tentative entry
341 /// if there is some in the shared cache. Otherwise, constructs new one,
342 /// associates it with the unique ID and returns the result.
343 const CachedFileSystemEntry &
344 getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
345
346 /// Returns entry associated with the filename or nullptr if none is found.
347 ///
348 /// Returns entry from local cache if there is some. Otherwise, if the entry
349 /// is found in the shared cache, writes it through the local cache and
350 /// returns it. Otherwise returns nullptr.
351 const CachedFileSystemEntry *
352 findEntryByFilenameWithWriteThrough(StringRef Filename);
353
354 /// Returns entry associated with the unique ID in the shared cache or nullptr
355 /// if none is found.
356 const CachedFileSystemEntry *
357 findSharedEntryByUID(llvm::vfs::Status Stat) const {
358 return SharedCache.getShardForUID(Stat.getUniqueID())
359 .findEntryByUID(Stat.getUniqueID());
360 }
361
362 /// Associates the given entry with the filename in the local cache and
363 /// returns it.
364 const CachedFileSystemEntry &
365 insertLocalEntryForFilename(StringRef Filename,
366 const CachedFileSystemEntry &Entry) {
367 return LocalCache.insertEntryForFilename(Filename, Entry);
368 }
369
370 /// Returns entry associated with the filename in the shared cache if there is
371 /// some. Otherwise, constructs new one with the given error code, associates
372 /// it with the filename and returns the result.
373 const CachedFileSystemEntry &
374 getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
375 return SharedCache.getShardForFilename(Filename)
377 }
378
379 /// Returns entry associated with the filename in the shared cache if there is
380 /// some. Otherwise, associates the given entry with the filename and returns
381 /// it.
382 const CachedFileSystemEntry &
383 getOrInsertSharedEntryForFilename(StringRef Filename,
384 const CachedFileSystemEntry &Entry) {
385 return SharedCache.getShardForFilename(Filename)
387 }
388
389 void printImpl(raw_ostream &OS, PrintType Type,
390 unsigned IndentLevel) const override {
391 printIndent(OS, IndentLevel);
392 OS << "DependencyScanningFilesystem\n";
393 getUnderlyingFS().print(OS, Type, IndentLevel + 1);
394 }
395
396 /// The global cache shared between worker threads.
397 DependencyScanningFilesystemSharedCache &SharedCache;
398 /// The local cache is used by the worker thread to cache file system queries
399 /// locally instead of querying the global cache every time.
400 DependencyScanningFilesystemLocalCache LocalCache;
401
402 /// The working directory to use for making relative paths absolute before
403 /// using them for cache lookups.
404 llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
405
406 void updateWorkingDirForCacheLookup();
407};
408
409} // end namespace dependencies
410} // end namespace tooling
411} // end namespace clang
412
413#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
StringRef Filename
Definition: Format.cpp:2980
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
TypePropertyCache< Private > Cache
Definition: Type.cpp:4305
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat)
Creates an entry without contents: either a filesystem error or a directory with stat value.
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat, CachedFileContents *Contents)
Creates an entry representing a file with contents.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
This class is a local cache, that caches the 'stat' and 'open' calls to the underlying real file syst...
const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Associates the given entry with the filename and returns the given entry pointer (for convenience).
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
This class is a shared cache, that caches the 'stat' and 'open' calls to the underlying real file sys...
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
A virtual file system optimized for the dependency discovery.
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename, bool DisableDirectivesScanning=false)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
llvm::ErrorOr< EntryRef > unwrapError() const
If the cached entry represents an error, promotes it into ErrorOr.
The JSON file list parser is used to communicate input to InstallAPI.
Definition: Format.h:5304
Contents and directive tokens of a cached file entry.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
CachedFileContents(std::unique_ptr< llvm::MemoryBuffer > Contents)
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
const CachedFileSystemEntry & getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr< llvm::MemoryBuffer > Contents)
Returns entry associated with the unique ID if there is some.
llvm::SpecificBumpPtrAllocator< CachedFileSystemEntry > EntryStorage
The backing storage for cached entries.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
llvm::DenseMap< llvm::sys::fs::UniqueID, const CachedFileSystemEntry * > EntriesByUID
Map from unique IDs to cached entries.
const CachedFileSystemEntry * findEntryByUID(llvm::sys::fs::UniqueID UID) const
Returns entry associated with the unique ID or nullptr if none is found.
const CachedFileSystemEntry & getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Returns entry associated with the filename if there is some.
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
llvm::SpecificBumpPtrAllocator< CachedFileContents > ContentsStorage
The backing storage for cached contents.
const CachedFileSystemEntry & getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr< llvm::vfs::Status > Stat)
Returns entry associated with the filename if there is some.
llvm::StringMap< const CachedFileSystemEntry *, llvm::BumpPtrAllocator > EntriesByFilename
Map from filenames to cached entries.