clang 17.0.0git
DependencyScanningFilesystem.cpp
Go to the documentation of this file.
1//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/Support/MemoryBuffer.h"
11#include "llvm/Support/SmallVectorMemoryBuffer.h"
12#include "llvm/Support/Threading.h"
13#include <optional>
14
15using namespace clang;
16using namespace tooling;
17using namespace dependencies;
18
19llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
20DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21 // Load the file and its content from the file system.
22 auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23 if (!MaybeFile)
24 return MaybeFile.getError();
25 auto File = std::move(*MaybeFile);
26
27 auto MaybeStat = File->status();
28 if (!MaybeStat)
29 return MaybeStat.getError();
30 auto Stat = std::move(*MaybeStat);
31
32 auto MaybeBuffer = File->getBuffer(Stat.getName());
33 if (!MaybeBuffer)
34 return MaybeBuffer.getError();
35 auto Buffer = std::move(*MaybeBuffer);
36
37 // If the file size changed between read and stat, pretend it didn't.
38 if (Stat.getSize() != Buffer->getBufferSize())
39 Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40
41 return TentativeEntry(Stat, std::move(Buffer));
42}
43
44EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45 const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46 if (Entry.isError() || Entry.isDirectory() || Disable ||
47 !shouldScanForDirectives(Filename))
48 return EntryRef(Filename, Entry);
49
50 CachedFileContents *Contents = Entry.getCachedContents();
51 assert(Contents && "contents not initialized");
52
53 // Double-checked locking.
54 if (Contents->DepDirectives.load())
55 return EntryRef(Filename, Entry);
56
57 std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58
59 // Double-checked locking.
60 if (Contents->DepDirectives.load())
61 return EntryRef(Filename, Entry);
62
64 // Scan the file for preprocessor directives that might affect the
65 // dependencies.
66 if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67 Contents->DepDirectiveTokens,
68 Directives)) {
69 Contents->DepDirectiveTokens.clear();
70 // FIXME: Propagate the diagnostic if desired by the client.
71 Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72 return EntryRef(Filename, Entry);
73 }
74
75 // This function performed double-checked locking using `DepDirectives`.
76 // Assigning it must be the last thing this function does, otherwise other
77 // threads may skip the
78 // critical section (`DepDirectives != nullptr`), leading to a data race.
79 Contents->DepDirectives.store(
80 new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81 return EntryRef(Filename, Entry);
82}
83
86 // This heuristic was chosen using a empirical testing on a
87 // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88 // sharding gives a performance edge by reducing the lock contention.
89 // FIXME: A better heuristic might also consider the OS to account for
90 // the different cost of lock contention on different OSes.
91 NumShards =
92 std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93 CacheShards = std::make_unique<CacheShard[]>(NumShards);
94}
95
98 StringRef Filename) const {
99 return CacheShards[llvm::hash_value(Filename) % NumShards];
100}
101
104 llvm::sys::fs::UniqueID UID) const {
105 auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
106 return CacheShards[Hash % NumShards];
107}
108
111 StringRef Filename) const {
112 std::lock_guard<std::mutex> LockGuard(CacheLock);
113 auto It = EntriesByFilename.find(Filename);
114 return It == EntriesByFilename.end() ? nullptr : It->getValue();
115}
116
119 llvm::sys::fs::UniqueID UID) const {
120 std::lock_guard<std::mutex> LockGuard(CacheLock);
121 auto It = EntriesByUID.find(UID);
122 return It == EntriesByUID.end() ? nullptr : It->getSecond();
123}
124
128 llvm::ErrorOr<llvm::vfs::Status> Stat) {
129 std::lock_guard<std::mutex> LockGuard(CacheLock);
130 auto Insertion = EntriesByFilename.insert({Filename, nullptr});
131 if (Insertion.second)
132 Insertion.first->second =
133 new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
134 return *Insertion.first->second;
135}
136
139 llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
140 std::unique_ptr<llvm::MemoryBuffer> Contents) {
141 std::lock_guard<std::mutex> LockGuard(CacheLock);
142 auto Insertion = EntriesByUID.insert({UID, nullptr});
143 if (Insertion.second) {
144 CachedFileContents *StoredContents = nullptr;
145 if (Contents)
146 StoredContents = new (ContentsStorage.Allocate())
147 CachedFileContents(std::move(Contents));
148 Insertion.first->second = new (EntryStorage.Allocate())
149 CachedFileSystemEntry(std::move(Stat), StoredContents);
150 }
151 return *Insertion.first->second;
152}
153
157 const CachedFileSystemEntry &Entry) {
158 std::lock_guard<std::mutex> LockGuard(CacheLock);
159 return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
160}
161
162/// Whitelist file extensions that should be minimized, treating no extension as
163/// a source file that should be minimized.
164///
165/// This is kinda hacky, it would be better if we knew what kind of file Clang
166/// was expecting instead.
168 StringRef Ext = llvm::sys::path::extension(Filename);
169 if (Ext.empty())
170 return true; // C++ standard library
171 return llvm::StringSwitch<bool>(Ext)
172 .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
173 .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
174 .CasesLower(".m", ".mm", true)
175 .CasesLower(".i", ".ii", ".mi", ".mmi", true)
176 .CasesLower(".def", ".inc", true)
177 .Default(false);
178}
179
180static bool shouldCacheStatFailures(StringRef Filename) {
181 StringRef Ext = llvm::sys::path::extension(Filename);
182 if (Ext.empty())
183 return false; // This may be the module cache directory.
184 // Only cache stat failures on files that are not expected to change during
185 // the build.
186 StringRef FName = llvm::sys::path::filename(Filename);
187 if (FName == "module.modulemap" || FName == "module.map")
188 return true;
190}
191
192bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
193 StringRef Filename) {
195}
196
198DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
199 TentativeEntry TEntry) {
200 auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
201 return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
202 std::move(TEntry.Status),
203 std::move(TEntry.Contents));
204}
205
207DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
208 StringRef Filename) {
209 if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
210 return Entry;
211 auto &Shard = SharedCache.getShardForFilename(Filename);
212 if (const auto *Entry = Shard.findEntryByFilename(Filename))
213 return &LocalCache.insertEntryForFilename(Filename, *Entry);
214 return nullptr;
215}
216
217llvm::ErrorOr<const CachedFileSystemEntry &>
218DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
219 llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
220 if (!Stat) {
222 return Stat.getError();
223 const auto &Entry =
224 getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
225 return insertLocalEntryForFilename(Filename, Entry);
226 }
227
228 if (const auto *Entry = findSharedEntryByUID(*Stat))
229 return insertLocalEntryForFilename(Filename, *Entry);
230
231 auto TEntry =
232 Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
233
234 const CachedFileSystemEntry *SharedEntry = [&]() {
235 if (TEntry) {
236 const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
237 return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
238 }
239 return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
240 }();
241
242 return insertLocalEntryForFilename(Filename, *SharedEntry);
243}
244
245llvm::ErrorOr<EntryRef>
247 StringRef Filename, bool DisableDirectivesScanning) {
248 if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
249 return scanForDirectivesIfNecessary(*Entry, Filename,
250 DisableDirectivesScanning)
251 .unwrapError();
252 auto MaybeEntry = computeAndStoreResult(Filename);
253 if (!MaybeEntry)
254 return MaybeEntry.getError();
255 return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
256 DisableDirectivesScanning)
257 .unwrapError();
258}
259
260llvm::ErrorOr<llvm::vfs::Status>
262 SmallString<256> OwnedFilename;
263 StringRef Filename = Path.toStringRef(OwnedFilename);
264
265 if (Filename.endswith(".pcm"))
266 return getUnderlyingFS().status(Path);
267
268 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
269 if (!Result)
270 return Result.getError();
271 return Result->getStatus();
272}
273
274namespace {
275
276/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
277/// this subclass.
278class DepScanFile final : public llvm::vfs::File {
279public:
280 DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
281 llvm::vfs::Status Stat)
282 : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
283
284 static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
285
286 llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
287
288 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
289 getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
290 bool IsVolatile) override {
291 return std::move(Buffer);
292 }
293
294 std::error_code close() override { return {}; }
295
296private:
297 std::unique_ptr<llvm::MemoryBuffer> Buffer;
298 llvm::vfs::Status Stat;
299};
300
301} // end anonymous namespace
302
303llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
304DepScanFile::create(EntryRef Entry) {
305 assert(!Entry.isError() && "error");
306
307 if (Entry.isDirectory())
308 return std::make_error_code(std::errc::is_a_directory);
309
310 auto Result = std::make_unique<DepScanFile>(
311 llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
312 Entry.getStatus().getName(),
313 /*RequiresNullTerminator=*/false),
314 Entry.getStatus());
315
316 return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
317 std::unique_ptr<llvm::vfs::File>(std::move(Result)));
318}
319
320llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
322 SmallString<256> OwnedFilename;
323 StringRef Filename = Path.toStringRef(OwnedFilename);
324
325 if (Filename.endswith(".pcm"))
326 return getUnderlyingFS().openFileForRead(Path);
327
328 llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
329 if (!Result)
330 return Result.getError();
331 return DepScanFile::create(Result.get());
332}
static bool shouldCacheStatFailures(StringRef Filename)
static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename)
Whitelist file extensions that should be minimized, treating no extension as a source file that shoul...
StringRef Filename
Definition: Format.cpp:2798
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename, bool DisableDirectivesScanning=false)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
bool scanSourceForDependencyDirectives(StringRef Input, SmallVectorImpl< dependency_directives_scan::Token > &Tokens, SmallVectorImpl< dependency_directives_scan::Directive > &Directives, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Scan the input for the preprocessor directives that might have an effect on the dependencies for a co...
@ Result
The result type of a method or function.
Definition: Format.h:4761
Contents and directive tokens of a cached file entry.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
const CachedFileSystemEntry & getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr< llvm::MemoryBuffer > Contents)
Returns entry associated with the unique ID if there is some.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
const CachedFileSystemEntry * findEntryByUID(llvm::sys::fs::UniqueID UID) const
Returns entry associated with the unique ID or nullptr if none is found.
const CachedFileSystemEntry & getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Returns entry associated with the filename if there is some.
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
const CachedFileSystemEntry & getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr< llvm::vfs::Status > Stat)
Returns entry associated with the filename if there is some.
llvm::StringMap< const CachedFileSystemEntry *, llvm::BumpPtrAllocator > EntriesByFilename
Map from filenames to cached entries.