clang  10.0.0svn
FileManager.cpp
Go to the documentation of this file.
1 //===--- FileManager.cpp - File System Probing and Caching ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the FileManager interface.
10 //
11 //===----------------------------------------------------------------------===//
12 //
13 // TODO: This should index all interesting directories with dirent calls.
14 // getdirentries ?
15 // opendir/readdir_r/closedir ?
16 //
17 //===----------------------------------------------------------------------===//
18 
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <climits>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <string>
34 #include <utility>
35 
36 using namespace clang;
37 
38 //===----------------------------------------------------------------------===//
39 // Common logic.
40 //===----------------------------------------------------------------------===//
41 
44  : FS(std::move(FS)), FileSystemOpts(FSO), SeenDirEntries(64),
45  SeenFileEntries(64), NextFileUID(0) {
46  NumDirLookups = NumFileLookups = 0;
47  NumDirCacheMisses = NumFileCacheMisses = 0;
48 
49  // If the caller doesn't provide a virtual file system, just grab the real
50  // file system.
51  if (!this->FS)
52  this->FS = llvm::vfs::getRealFileSystem();
53 }
54 
55 FileManager::~FileManager() = default;
56 
57 void FileManager::setStatCache(std::unique_ptr<FileSystemStatCache> statCache) {
58  assert(statCache && "No stat cache provided?");
59  StatCache = std::move(statCache);
60 }
61 
62 void FileManager::clearStatCache() { StatCache.reset(); }
63 
64 /// Retrieve the directory that the given file name resides in.
65 /// Filename can point to either a real file or a virtual file.
67  StringRef Filename,
68  bool CacheFailure) {
69  if (Filename.empty())
70  return nullptr;
71 
72  if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
73  return nullptr; // If Filename is a directory.
74 
75  StringRef DirName = llvm::sys::path::parent_path(Filename);
76  // Use the current directory if file has no path component.
77  if (DirName.empty())
78  DirName = ".";
79 
80  return FileMgr.getDirectory(DirName, CacheFailure);
81 }
82 
83 /// Add all ancestors of the given path (pointing to either a file or
84 /// a directory) as virtual directories.
85 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
86  StringRef DirName = llvm::sys::path::parent_path(Path);
87  if (DirName.empty())
88  DirName = ".";
89 
90  auto &NamedDirEnt = *SeenDirEntries.insert({DirName, nullptr}).first;
91 
92  // When caching a virtual directory, we always cache its ancestors
93  // at the same time. Therefore, if DirName is already in the cache,
94  // we don't need to recurse as its ancestors must also already be in
95  // the cache (or it's a known non-virtual directory).
96  if (NamedDirEnt.second)
97  return;
98 
99  // Add the virtual directory to the cache.
100  auto UDE = llvm::make_unique<DirectoryEntry>();
101  UDE->Name = NamedDirEnt.first();
102  NamedDirEnt.second = UDE.get();
103  VirtualDirectoryEntries.push_back(std::move(UDE));
104 
105  // Recursively add the other ancestors.
106  addAncestorsAsVirtualDirs(DirName);
107 }
108 
109 const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
110  bool CacheFailure) {
111  // stat doesn't like trailing separators except for root directory.
112  // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
113  // (though it can strip '\\')
114  if (DirName.size() > 1 &&
115  DirName != llvm::sys::path::root_path(DirName) &&
116  llvm::sys::path::is_separator(DirName.back()))
117  DirName = DirName.substr(0, DirName.size()-1);
118 #ifdef _WIN32
119  // Fixing a problem with "clang C:test.c" on Windows.
120  // Stat("C:") does not recognize "C:" as a valid directory
121  std::string DirNameStr;
122  if (DirName.size() > 1 && DirName.back() == ':' &&
123  DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
124  DirNameStr = DirName.str() + '.';
125  DirName = DirNameStr;
126  }
127 #endif
128 
129  ++NumDirLookups;
130 
131  // See if there was already an entry in the map. Note that the map
132  // contains both virtual and real directories.
133  auto SeenDirInsertResult = SeenDirEntries.insert({DirName, nullptr});
134  if (!SeenDirInsertResult.second)
135  return SeenDirInsertResult.first->second;
136 
137  // We've not seen this before. Fill it in.
138  ++NumDirCacheMisses;
139  auto &NamedDirEnt = *SeenDirInsertResult.first;
140  assert(!NamedDirEnt.second && "should be newly-created");
141 
142  // Get the null-terminated directory name as stored as the key of the
143  // SeenDirEntries map.
144  StringRef InterndDirName = NamedDirEnt.first();
145 
146  // Check to see if the directory exists.
147  llvm::vfs::Status Status;
148  if (getStatValue(InterndDirName, Status, false, nullptr /*directory lookup*/)) {
149  // There's no real directory at the given path.
150  if (!CacheFailure)
151  SeenDirEntries.erase(DirName);
152  return nullptr;
153  }
154 
155  // It exists. See if we have already opened a directory with the
156  // same inode (this occurs on Unix-like systems when one dir is
157  // symlinked to another, for example) or the same path (on
158  // Windows).
159  DirectoryEntry &UDE = UniqueRealDirs[Status.getUniqueID()];
160 
161  NamedDirEnt.second = &UDE;
162  if (UDE.getName().empty()) {
163  // We don't have this directory yet, add it. We use the string
164  // key from the SeenDirEntries map as the string.
165  UDE.Name = InterndDirName;
166  }
167 
168  return &UDE;
169 }
170 
171 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
172  bool CacheFailure) {
173  ++NumFileLookups;
174 
175  // See if there is already an entry in the map.
176  auto SeenFileInsertResult = SeenFileEntries.insert({Filename, nullptr});
177  if (!SeenFileInsertResult.second)
178  return SeenFileInsertResult.first->second;
179 
180  // We've not seen this before. Fill it in.
181  ++NumFileCacheMisses;
182  auto &NamedFileEnt = *SeenFileInsertResult.first;
183  assert(!NamedFileEnt.second && "should be newly-created");
184 
185  // Get the null-terminated file name as stored as the key of the
186  // SeenFileEntries map.
187  StringRef InterndFileName = NamedFileEnt.first();
188 
189  // Look up the directory for the file. When looking up something like
190  // sys/foo.h we'll discover all of the search directories that have a 'sys'
191  // subdirectory. This will let us avoid having to waste time on known-to-fail
192  // searches when we go to find sys/bar.h, because all the search directories
193  // without a 'sys' subdir will get a cached failure result.
194  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
195  CacheFailure);
196  if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist.
197  if (!CacheFailure)
198  SeenFileEntries.erase(Filename);
199 
200  return nullptr;
201  }
202 
203  // FIXME: Use the directory info to prune this, before doing the stat syscall.
204  // FIXME: This will reduce the # syscalls.
205 
206  // Check to see if the file exists.
207  std::unique_ptr<llvm::vfs::File> F;
208  llvm::vfs::Status Status;
209  if (getStatValue(InterndFileName, Status, true, openFile ? &F : nullptr)) {
210  // There's no real file at the given path.
211  if (!CacheFailure)
212  SeenFileEntries.erase(Filename);
213 
214  return nullptr;
215  }
216 
217  assert((openFile || !F) && "undesired open file");
218 
219  // It exists. See if we have already opened a file with the same inode.
220  // This occurs when one dir is symlinked to another, for example.
221  FileEntry &UFE = UniqueRealFiles[Status.getUniqueID()];
222 
223  NamedFileEnt.second = &UFE;
224 
225  // If the name returned by getStatValue is different than Filename, re-intern
226  // the name.
227  if (Status.getName() != Filename) {
228  auto &NamedFileEnt =
229  *SeenFileEntries.insert({Status.getName(), &UFE}).first;
230  assert(NamedFileEnt.second == &UFE &&
231  "filename from getStatValue() refers to wrong file");
232  InterndFileName = NamedFileEnt.first().data();
233  }
234 
235  if (UFE.isValid()) { // Already have an entry with this inode, return it.
236 
237  // FIXME: this hack ensures that if we look up a file by a virtual path in
238  // the VFS that the getDir() will have the virtual path, even if we found
239  // the file by a 'real' path first. This is required in order to find a
240  // module's structure when its headers/module map are mapped in the VFS.
241  // We should remove this as soon as we can properly support a file having
242  // multiple names.
243  if (DirInfo != UFE.Dir && Status.IsVFSMapped)
244  UFE.Dir = DirInfo;
245 
246  // Always update the name to use the last name by which a file was accessed.
247  // FIXME: Neither this nor always using the first name is correct; we want
248  // to switch towards a design where we return a FileName object that
249  // encapsulates both the name by which the file was accessed and the
250  // corresponding FileEntry.
251  UFE.Name = InterndFileName;
252 
253  return &UFE;
254  }
255 
256  // Otherwise, we don't have this file yet, add it.
257  UFE.Name = InterndFileName;
258  UFE.Size = Status.getSize();
259  UFE.ModTime = llvm::sys::toTimeT(Status.getLastModificationTime());
260  UFE.Dir = DirInfo;
261  UFE.UID = NextFileUID++;
262  UFE.UniqueID = Status.getUniqueID();
263  UFE.IsNamedPipe = Status.getType() == llvm::sys::fs::file_type::fifo_file;
264  UFE.File = std::move(F);
265  UFE.IsValid = true;
266 
267  if (UFE.File) {
268  if (auto PathName = UFE.File->getName())
269  fillRealPathName(&UFE, *PathName);
270  } else if (!openFile) {
271  // We should still fill the path even if we aren't opening the file.
272  fillRealPathName(&UFE, InterndFileName);
273  }
274  return &UFE;
275 }
276 
277 const FileEntry *
278 FileManager::getVirtualFile(StringRef Filename, off_t Size,
279  time_t ModificationTime) {
280  ++NumFileLookups;
281 
282  // See if there is already an entry in the map for an existing file.
283  auto &NamedFileEnt = *SeenFileEntries.insert({Filename, nullptr}).first;
284  if (NamedFileEnt.second)
285  return NamedFileEnt.second;
286 
287  // We've not seen this before, or the file is cached as non-existent.
288  ++NumFileCacheMisses;
289  addAncestorsAsVirtualDirs(Filename);
290  FileEntry *UFE = nullptr;
291 
292  // Now that all ancestors of Filename are in the cache, the
293  // following call is guaranteed to find the DirectoryEntry from the
294  // cache.
295  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
296  /*CacheFailure=*/true);
297  assert(DirInfo &&
298  "The directory of a virtual file should already be in the cache.");
299 
300  // Check to see if the file exists. If so, drop the virtual file
301  llvm::vfs::Status Status;
302  const char *InterndFileName = NamedFileEnt.first().data();
303  if (getStatValue(InterndFileName, Status, true, nullptr) == 0) {
304  UFE = &UniqueRealFiles[Status.getUniqueID()];
305  Status = llvm::vfs::Status(
306  Status.getName(), Status.getUniqueID(),
307  llvm::sys::toTimePoint(ModificationTime),
308  Status.getUser(), Status.getGroup(), Size,
309  Status.getType(), Status.getPermissions());
310 
311  NamedFileEnt.second = UFE;
312 
313  // If we had already opened this file, close it now so we don't
314  // leak the descriptor. We're not going to use the file
315  // descriptor anyway, since this is a virtual file.
316  if (UFE->File)
317  UFE->closeFile();
318 
319  // If we already have an entry with this inode, return it.
320  if (UFE->isValid())
321  return UFE;
322 
323  UFE->UniqueID = Status.getUniqueID();
324  UFE->IsNamedPipe = Status.getType() == llvm::sys::fs::file_type::fifo_file;
325  fillRealPathName(UFE, Status.getName());
326  } else {
327  VirtualFileEntries.push_back(llvm::make_unique<FileEntry>());
328  UFE = VirtualFileEntries.back().get();
329  NamedFileEnt.second = UFE;
330  }
331 
332  UFE->Name = InterndFileName;
333  UFE->Size = Size;
334  UFE->ModTime = ModificationTime;
335  UFE->Dir = DirInfo;
336  UFE->UID = NextFileUID++;
337  UFE->IsValid = true;
338  UFE->File.reset();
339  return UFE;
340 }
341 
343  StringRef pathRef(path.data(), path.size());
344 
345  if (FileSystemOpts.WorkingDir.empty()
346  || llvm::sys::path::is_absolute(pathRef))
347  return false;
348 
349  SmallString<128> NewPath(FileSystemOpts.WorkingDir);
350  llvm::sys::path::append(NewPath, pathRef);
351  path = NewPath;
352  return true;
353 }
354 
356  bool Changed = FixupRelativePath(Path);
357 
358  if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) {
359  FS->makeAbsolute(Path);
360  Changed = true;
361  }
362 
363  return Changed;
364 }
365 
366 void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
367  llvm::SmallString<128> AbsPath(FileName);
368  // This is not the same as `VFS::getRealPath()`, which resolves symlinks
369  // but can be very expensive on real file systems.
370  // FIXME: the semantic of RealPathName is unclear, and the name might be
371  // misleading. We need to clean up the interface here.
372  makeAbsolutePath(AbsPath);
373  llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
374  UFE->RealPathName = AbsPath.str();
375 }
376 
377 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
378 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile,
379  bool ShouldCloseOpenFile) {
380  uint64_t FileSize = Entry->getSize();
381  // If there's a high enough chance that the file have changed since we
382  // got its size, force a stat before opening it.
383  if (isVolatile)
384  FileSize = -1;
385 
386  StringRef Filename = Entry->getName();
387  // If the file is already open, use the open file descriptor.
388  if (Entry->File) {
389  auto Result =
390  Entry->File->getBuffer(Filename, FileSize,
391  /*RequiresNullTerminator=*/true, isVolatile);
392  // FIXME: we need a set of APIs that can make guarantees about whether a
393  // FileEntry is open or not.
394  if (ShouldCloseOpenFile)
395  Entry->closeFile();
396  return Result;
397  }
398 
399  // Otherwise, open the file.
400 
401  if (FileSystemOpts.WorkingDir.empty())
402  return FS->getBufferForFile(Filename, FileSize,
403  /*RequiresNullTerminator=*/true, isVolatile);
404 
405  SmallString<128> FilePath(Entry->getName());
406  FixupRelativePath(FilePath);
407  return FS->getBufferForFile(FilePath, FileSize,
408  /*RequiresNullTerminator=*/true, isVolatile);
409 }
410 
411 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
412 FileManager::getBufferForFile(StringRef Filename, bool isVolatile) {
413  if (FileSystemOpts.WorkingDir.empty())
414  return FS->getBufferForFile(Filename, -1, true, isVolatile);
415 
416  SmallString<128> FilePath(Filename);
417  FixupRelativePath(FilePath);
418  return FS->getBufferForFile(FilePath.c_str(), -1, true, isVolatile);
419 }
420 
421 /// getStatValue - Get the 'stat' information for the specified path,
422 /// using the cache to accelerate it if possible. This returns true
423 /// if the path points to a virtual file or does not exist, or returns
424 /// false if it's an existent real file. If FileDescriptor is NULL,
425 /// do directory look-up instead of file look-up.
426 bool FileManager::getStatValue(StringRef Path, llvm::vfs::Status &Status,
427  bool isFile,
428  std::unique_ptr<llvm::vfs::File> *F) {
429  // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
430  // absolute!
431  if (FileSystemOpts.WorkingDir.empty())
432  return bool(FileSystemStatCache::get(Path, Status, isFile, F,
433  StatCache.get(), *FS));
434 
435  SmallString<128> FilePath(Path);
436  FixupRelativePath(FilePath);
437 
438  return bool(FileSystemStatCache::get(FilePath.c_str(), Status, isFile, F,
439  StatCache.get(), *FS));
440 }
441 
443  llvm::vfs::Status &Result) {
444  SmallString<128> FilePath(Path);
445  FixupRelativePath(FilePath);
446 
447  llvm::ErrorOr<llvm::vfs::Status> S = FS->status(FilePath.c_str());
448  if (!S)
449  return true;
450  Result = *S;
451  return false;
452 }
453 
455  assert(Entry && "Cannot invalidate a NULL FileEntry");
456 
457  SeenFileEntries.erase(Entry->getName());
458 
459  // FileEntry invalidation should not block future optimizations in the file
460  // caches. Possible alternatives are cache truncation (invalidate last N) or
461  // invalidation of the whole cache.
462  //
463  // FIXME: This is broken. We sometimes have the same FileEntry* shared
464  // betweeen multiple SeenFileEntries, so this can leave dangling pointers.
465  UniqueRealFiles.erase(Entry->getUniqueID());
466 }
467 
469  SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
470  UIDToFiles.clear();
471  UIDToFiles.resize(NextFileUID);
472 
473  // Map file entries
474  for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
475  FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
476  FE != FEEnd; ++FE)
477  if (FE->getValue())
478  UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
479 
480  // Map virtual file entries
481  for (const auto &VFE : VirtualFileEntries)
482  UIDToFiles[VFE->getUID()] = VFE.get();
483 }
484 
486  off_t Size, time_t ModificationTime) {
487  File->Size = Size;
488  File->ModTime = ModificationTime;
489 }
490 
492  // FIXME: use llvm::sys::fs::canonical() when it gets implemented
493  llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
494  = CanonicalDirNames.find(Dir);
495  if (Known != CanonicalDirNames.end())
496  return Known->second;
497 
498  StringRef CanonicalName(Dir->getName());
499 
500  SmallString<4096> CanonicalNameBuf;
501  if (!FS->getRealPath(Dir->getName(), CanonicalNameBuf))
502  CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
503 
504  CanonicalDirNames.insert({Dir, CanonicalName});
505  return CanonicalName;
506 }
507 
509  llvm::errs() << "\n*** File Manager Stats:\n";
510  llvm::errs() << UniqueRealFiles.size() << " real files found, "
511  << UniqueRealDirs.size() << " real dirs found.\n";
512  llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
513  << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
514  llvm::errs() << NumDirLookups << " dir lookups, "
515  << NumDirCacheMisses << " dir cache misses.\n";
516  llvm::errs() << NumFileLookups << " file lookups, "
517  << NumFileCacheMisses << " file cache misses.\n";
518 
519  //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
520 }
Implements support for file system lookup, file system caching, and directory search management...
Definition: FileManager.h:116
Defines the clang::FileManager interface and associated types.
void GetUniqueIDMapping(SmallVectorImpl< const FileEntry *> &UIDToFiles) const
Produce an array mapping from the unique IDs assigned to each file to the corresponding FileEntry poi...
void closeFile() const
Definition: FileManager.h:100
Defines the FileSystemStatCache interface.
bool makeAbsolutePath(SmallVectorImpl< char > &Path) const
Makes Path absolute taking into account FileSystemOptions and the working directory option...
const llvm::sys::fs::UniqueID & getUniqueID() const
Definition: FileManager.h:88
FileManager(const FileSystemOptions &FileSystemOpts, IntrusiveRefCntPtr< llvm::vfs::FileSystem > FS=nullptr)
Construct a file manager, optionally with a custom VFS.
Definition: FileManager.cpp:42
Definition: Format.h:2274
const FileEntry * getFile(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Lookup, cache, and verify the specified file (real or virtual).
const DirectoryEntry * getDirectory(StringRef DirName, bool CacheFailure=true)
Lookup, cache, and verify the specified directory (real or virtual).
StringRef Filename
Definition: Format.cpp:1711
bool FixupRelativePath(SmallVectorImpl< char > &path) const
If path is not absolute and FileSystemOptions set the working directory, the path is modified to be r...
static const DirectoryEntry * getDirectoryFromFile(FileManager &FileMgr, StringRef Filename, bool CacheFailure)
Retrieve the directory that the given file name resides in.
Definition: FileManager.cpp:66
std::string WorkingDir
If set, paths are resolved as if the working directory was set to the value of WorkingDir.
#define bool
Definition: stdbool.h:15
The result type of a method or function.
StringRef getName() const
Definition: FileManager.h:83
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void PrintStats() const
const FileEntry * getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime)
Retrieve a file entry for a "virtual" file that acts as if there were a file with the given name on d...
StringRef getCanonicalName(const DirectoryEntry *Dir)
Retrieve the canonical name for a given directory.
bool isValid() const
Definition: FileManager.h:85
Dataflow Directional Tag Classes.
void clearStatCache()
Removes the FileSystemStatCache object from the manager.
Definition: FileManager.cpp:62
off_t getSize() const
Definition: FileManager.h:86
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const FileEntry *Entry, bool isVolatile=false, bool ShouldCloseOpenFile=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful, otherwise returning null.
bool getNoncachedStatValue(StringRef Path, llvm::vfs::Status &Result)
Get the &#39;stat&#39; information for the given Path.
Cached information about one directory (either on disk or in the virtual file system).
Definition: FileManager.h:45
void setStatCache(std::unique_ptr< FileSystemStatCache > statCache)
Installs the provided FileSystemStatCache object within the FileManager.
Definition: FileManager.cpp:57
Keeps track of options that affect how file operations are performed.
static std::error_code get(StringRef Path, llvm::vfs::Status &Status, bool isFile, std::unique_ptr< llvm::vfs::File > *F, FileSystemStatCache *Cache, llvm::vfs::FileSystem &FS)
Get the &#39;stat&#39; information for the specified path, using the cache to accelerate it if possible...
static void modifyFileEntry(FileEntry *File, off_t Size, time_t ModificationTime)
Modifies the size and modification time of a previously created FileEntry.
void invalidateCache(const FileEntry *Entry)
Remove the real file Entry from the cache.
StringRef getName() const
Definition: FileManager.h:51