clang  9.0.0svn
FileManager.cpp
Go to the documentation of this file.
1 //===--- FileManager.cpp - File System Probing and Caching ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the FileManager interface.
10 //
11 //===----------------------------------------------------------------------===//
12 //
13 // TODO: This should index all interesting directories with dirent calls.
14 // getdirentries ?
15 // opendir/readdir_r/closedir ?
16 //
17 //===----------------------------------------------------------------------===//
18 
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <climits>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <string>
34 #include <utility>
35 
36 using namespace clang;
37 
38 //===----------------------------------------------------------------------===//
39 // Common logic.
40 //===----------------------------------------------------------------------===//
41 
44  : FS(std::move(FS)), FileSystemOpts(FSO), SeenDirEntries(64),
45  SeenFileEntries(64), NextFileUID(0) {
46  NumDirLookups = NumFileLookups = 0;
47  NumDirCacheMisses = NumFileCacheMisses = 0;
48 
49  // If the caller doesn't provide a virtual file system, just grab the real
50  // file system.
51  if (!this->FS)
52  this->FS = llvm::vfs::getRealFileSystem();
53 }
54 
55 FileManager::~FileManager() = default;
56 
57 void FileManager::setStatCache(std::unique_ptr<FileSystemStatCache> statCache) {
58  assert(statCache && "No stat cache provided?");
59  StatCache = std::move(statCache);
60 }
61 
62 void FileManager::clearStatCache() { StatCache.reset(); }
63 
64 /// Retrieve the directory that the given file name resides in.
65 /// Filename can point to either a real file or a virtual file.
67  StringRef Filename,
68  bool CacheFailure) {
69  if (Filename.empty())
70  return nullptr;
71 
72  if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
73  return nullptr; // If Filename is a directory.
74 
75  StringRef DirName = llvm::sys::path::parent_path(Filename);
76  // Use the current directory if file has no path component.
77  if (DirName.empty())
78  DirName = ".";
79 
80  return FileMgr.getDirectory(DirName, CacheFailure);
81 }
82 
83 /// Add all ancestors of the given path (pointing to either a file or
84 /// a directory) as virtual directories.
85 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
86  StringRef DirName = llvm::sys::path::parent_path(Path);
87  if (DirName.empty())
88  DirName = ".";
89 
90  auto &NamedDirEnt = *SeenDirEntries.insert({DirName, nullptr}).first;
91 
92  // When caching a virtual directory, we always cache its ancestors
93  // at the same time. Therefore, if DirName is already in the cache,
94  // we don't need to recurse as its ancestors must also already be in
95  // the cache (or it's a known non-virtual directory).
96  if (NamedDirEnt.second)
97  return;
98 
99  // Add the virtual directory to the cache.
100  auto UDE = llvm::make_unique<DirectoryEntry>();
101  UDE->Name = NamedDirEnt.first();
102  NamedDirEnt.second = UDE.get();
103  VirtualDirectoryEntries.push_back(std::move(UDE));
104 
105  // Recursively add the other ancestors.
106  addAncestorsAsVirtualDirs(DirName);
107 }
108 
109 const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
110  bool CacheFailure) {
111  // stat doesn't like trailing separators except for root directory.
112  // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
113  // (though it can strip '\\')
114  if (DirName.size() > 1 &&
115  DirName != llvm::sys::path::root_path(DirName) &&
116  llvm::sys::path::is_separator(DirName.back()))
117  DirName = DirName.substr(0, DirName.size()-1);
118 #ifdef _WIN32
119  // Fixing a problem with "clang C:test.c" on Windows.
120  // Stat("C:") does not recognize "C:" as a valid directory
121  std::string DirNameStr;
122  if (DirName.size() > 1 && DirName.back() == ':' &&
123  DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
124  DirNameStr = DirName.str() + '.';
125  DirName = DirNameStr;
126  }
127 #endif
128 
129  ++NumDirLookups;
130 
131  // See if there was already an entry in the map. Note that the map
132  // contains both virtual and real directories.
133  auto SeenDirInsertResult = SeenDirEntries.insert({DirName, nullptr});
134  if (!SeenDirInsertResult.second)
135  return SeenDirInsertResult.first->second;
136 
137  // We've not seen this before. Fill it in.
138  ++NumDirCacheMisses;
139  auto &NamedDirEnt = *SeenDirInsertResult.first;
140  assert(!NamedDirEnt.second && "should be newly-created");
141 
142  // Get the null-terminated directory name as stored as the key of the
143  // SeenDirEntries map.
144  StringRef InterndDirName = NamedDirEnt.first();
145 
146  // Check to see if the directory exists.
147  FileData Data;
148  if (getStatValue(InterndDirName, Data, false, nullptr /*directory lookup*/)) {
149  // There's no real directory at the given path.
150  if (!CacheFailure)
151  SeenDirEntries.erase(DirName);
152  return nullptr;
153  }
154 
155  // It exists. See if we have already opened a directory with the
156  // same inode (this occurs on Unix-like systems when one dir is
157  // symlinked to another, for example) or the same path (on
158  // Windows).
159  DirectoryEntry &UDE = UniqueRealDirs[Data.UniqueID];
160 
161  NamedDirEnt.second = &UDE;
162  if (UDE.getName().empty()) {
163  // We don't have this directory yet, add it. We use the string
164  // key from the SeenDirEntries map as the string.
165  UDE.Name = InterndDirName;
166  }
167 
168  return &UDE;
169 }
170 
171 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
172  bool CacheFailure) {
173  ++NumFileLookups;
174 
175  // See if there is already an entry in the map.
176  auto SeenFileInsertResult = SeenFileEntries.insert({Filename, nullptr});
177  if (!SeenFileInsertResult.second)
178  return SeenFileInsertResult.first->second;
179 
180  // We've not seen this before. Fill it in.
181  ++NumFileCacheMisses;
182  auto &NamedFileEnt = *SeenFileInsertResult.first;
183  assert(!NamedFileEnt.second && "should be newly-created");
184 
185  // Get the null-terminated file name as stored as the key of the
186  // SeenFileEntries map.
187  StringRef InterndFileName = NamedFileEnt.first();
188 
189  // Look up the directory for the file. When looking up something like
190  // sys/foo.h we'll discover all of the search directories that have a 'sys'
191  // subdirectory. This will let us avoid having to waste time on known-to-fail
192  // searches when we go to find sys/bar.h, because all the search directories
193  // without a 'sys' subdir will get a cached failure result.
194  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
195  CacheFailure);
196  if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist.
197  if (!CacheFailure)
198  SeenFileEntries.erase(Filename);
199 
200  return nullptr;
201  }
202 
203  // FIXME: Use the directory info to prune this, before doing the stat syscall.
204  // FIXME: This will reduce the # syscalls.
205 
206  // Check to see if the file exists.
207  std::unique_ptr<llvm::vfs::File> F;
208  FileData Data;
209  if (getStatValue(InterndFileName, Data, true, openFile ? &F : nullptr)) {
210  // There's no real file at the given path.
211  if (!CacheFailure)
212  SeenFileEntries.erase(Filename);
213 
214  return nullptr;
215  }
216 
217  assert((openFile || !F) && "undesired open file");
218 
219  // It exists. See if we have already opened a file with the same inode.
220  // This occurs when one dir is symlinked to another, for example.
221  FileEntry &UFE = UniqueRealFiles[Data.UniqueID];
222 
223  NamedFileEnt.second = &UFE;
224 
225  // If the name returned by getStatValue is different than Filename, re-intern
226  // the name.
227  if (Data.Name != Filename) {
228  auto &NamedFileEnt = *SeenFileEntries.insert({Data.Name, &UFE}).first;
229  assert(NamedFileEnt.second == &UFE &&
230  "filename from getStatValue() refers to wrong file");
231  InterndFileName = NamedFileEnt.first().data();
232  }
233 
234  if (UFE.isValid()) { // Already have an entry with this inode, return it.
235 
236  // FIXME: this hack ensures that if we look up a file by a virtual path in
237  // the VFS that the getDir() will have the virtual path, even if we found
238  // the file by a 'real' path first. This is required in order to find a
239  // module's structure when its headers/module map are mapped in the VFS.
240  // We should remove this as soon as we can properly support a file having
241  // multiple names.
242  if (DirInfo != UFE.Dir && Data.IsVFSMapped)
243  UFE.Dir = DirInfo;
244 
245  // Always update the name to use the last name by which a file was accessed.
246  // FIXME: Neither this nor always using the first name is correct; we want
247  // to switch towards a design where we return a FileName object that
248  // encapsulates both the name by which the file was accessed and the
249  // corresponding FileEntry.
250  UFE.Name = InterndFileName;
251 
252  return &UFE;
253  }
254 
255  // Otherwise, we don't have this file yet, add it.
256  UFE.Name = InterndFileName;
257  UFE.Size = Data.Size;
258  UFE.ModTime = Data.ModTime;
259  UFE.Dir = DirInfo;
260  UFE.UID = NextFileUID++;
261  UFE.UniqueID = Data.UniqueID;
262  UFE.IsNamedPipe = Data.IsNamedPipe;
263  UFE.InPCH = Data.InPCH;
264  UFE.File = std::move(F);
265  UFE.IsValid = true;
266 
267  if (UFE.File) {
268  if (auto PathName = UFE.File->getName())
269  fillRealPathName(&UFE, *PathName);
270  }
271  return &UFE;
272 }
273 
274 const FileEntry *
275 FileManager::getVirtualFile(StringRef Filename, off_t Size,
276  time_t ModificationTime) {
277  ++NumFileLookups;
278 
279  // See if there is already an entry in the map for an existing file.
280  auto &NamedFileEnt = *SeenFileEntries.insert({Filename, nullptr}).first;
281  if (NamedFileEnt.second)
282  return NamedFileEnt.second;
283 
284  // We've not seen this before, or the file is cached as non-existent.
285  ++NumFileCacheMisses;
286  addAncestorsAsVirtualDirs(Filename);
287  FileEntry *UFE = nullptr;
288 
289  // Now that all ancestors of Filename are in the cache, the
290  // following call is guaranteed to find the DirectoryEntry from the
291  // cache.
292  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
293  /*CacheFailure=*/true);
294  assert(DirInfo &&
295  "The directory of a virtual file should already be in the cache.");
296 
297  // Check to see if the file exists. If so, drop the virtual file
298  FileData Data;
299  const char *InterndFileName = NamedFileEnt.first().data();
300  if (getStatValue(InterndFileName, Data, true, nullptr) == 0) {
301  Data.Size = Size;
302  Data.ModTime = ModificationTime;
303  UFE = &UniqueRealFiles[Data.UniqueID];
304 
305  NamedFileEnt.second = UFE;
306 
307  // If we had already opened this file, close it now so we don't
308  // leak the descriptor. We're not going to use the file
309  // descriptor anyway, since this is a virtual file.
310  if (UFE->File)
311  UFE->closeFile();
312 
313  // If we already have an entry with this inode, return it.
314  if (UFE->isValid())
315  return UFE;
316 
317  UFE->UniqueID = Data.UniqueID;
318  UFE->IsNamedPipe = Data.IsNamedPipe;
319  UFE->InPCH = Data.InPCH;
320  fillRealPathName(UFE, Data.Name);
321  } else {
322  VirtualFileEntries.push_back(llvm::make_unique<FileEntry>());
323  UFE = VirtualFileEntries.back().get();
324  NamedFileEnt.second = UFE;
325  }
326 
327  UFE->Name = InterndFileName;
328  UFE->Size = Size;
329  UFE->ModTime = ModificationTime;
330  UFE->Dir = DirInfo;
331  UFE->UID = NextFileUID++;
332  UFE->IsValid = true;
333  UFE->File.reset();
334  return UFE;
335 }
336 
338  StringRef pathRef(path.data(), path.size());
339 
340  if (FileSystemOpts.WorkingDir.empty()
341  || llvm::sys::path::is_absolute(pathRef))
342  return false;
343 
344  SmallString<128> NewPath(FileSystemOpts.WorkingDir);
345  llvm::sys::path::append(NewPath, pathRef);
346  path = NewPath;
347  return true;
348 }
349 
351  bool Changed = FixupRelativePath(Path);
352 
353  if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) {
354  FS->makeAbsolute(Path);
355  Changed = true;
356  }
357 
358  return Changed;
359 }
360 
361 void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) {
362  llvm::SmallString<128> AbsPath(FileName);
363  // This is not the same as `VFS::getRealPath()`, which resolves symlinks
364  // but can be very expensive on real file systems.
365  // FIXME: the semantic of RealPathName is unclear, and the name might be
366  // misleading. We need to clean up the interface here.
367  makeAbsolutePath(AbsPath);
368  llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
369  UFE->RealPathName = AbsPath.str();
370 }
371 
372 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
373 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile,
374  bool ShouldCloseOpenFile) {
375  uint64_t FileSize = Entry->getSize();
376  // If there's a high enough chance that the file have changed since we
377  // got its size, force a stat before opening it.
378  if (isVolatile)
379  FileSize = -1;
380 
381  StringRef Filename = Entry->getName();
382  // If the file is already open, use the open file descriptor.
383  if (Entry->File) {
384  auto Result =
385  Entry->File->getBuffer(Filename, FileSize,
386  /*RequiresNullTerminator=*/true, isVolatile);
387  // FIXME: we need a set of APIs that can make guarantees about whether a
388  // FileEntry is open or not.
389  if (ShouldCloseOpenFile)
390  Entry->closeFile();
391  return Result;
392  }
393 
394  // Otherwise, open the file.
395 
396  if (FileSystemOpts.WorkingDir.empty())
397  return FS->getBufferForFile(Filename, FileSize,
398  /*RequiresNullTerminator=*/true, isVolatile);
399 
400  SmallString<128> FilePath(Entry->getName());
401  FixupRelativePath(FilePath);
402  return FS->getBufferForFile(FilePath, FileSize,
403  /*RequiresNullTerminator=*/true, isVolatile);
404 }
405 
406 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
407 FileManager::getBufferForFile(StringRef Filename, bool isVolatile) {
408  if (FileSystemOpts.WorkingDir.empty())
409  return FS->getBufferForFile(Filename, -1, true, isVolatile);
410 
411  SmallString<128> FilePath(Filename);
412  FixupRelativePath(FilePath);
413  return FS->getBufferForFile(FilePath.c_str(), -1, true, isVolatile);
414 }
415 
416 /// getStatValue - Get the 'stat' information for the specified path,
417 /// using the cache to accelerate it if possible. This returns true
418 /// if the path points to a virtual file or does not exist, or returns
419 /// false if it's an existent real file. If FileDescriptor is NULL,
420 /// do directory look-up instead of file look-up.
421 bool FileManager::getStatValue(StringRef Path, FileData &Data, bool isFile,
422  std::unique_ptr<llvm::vfs::File> *F) {
423  // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
424  // absolute!
425  if (FileSystemOpts.WorkingDir.empty())
426  return FileSystemStatCache::get(Path, Data, isFile, F,StatCache.get(), *FS);
427 
428  SmallString<128> FilePath(Path);
429  FixupRelativePath(FilePath);
430 
431  return FileSystemStatCache::get(FilePath.c_str(), Data, isFile, F,
432  StatCache.get(), *FS);
433 }
434 
436  llvm::vfs::Status &Result) {
437  SmallString<128> FilePath(Path);
438  FixupRelativePath(FilePath);
439 
440  llvm::ErrorOr<llvm::vfs::Status> S = FS->status(FilePath.c_str());
441  if (!S)
442  return true;
443  Result = *S;
444  return false;
445 }
446 
448  assert(Entry && "Cannot invalidate a NULL FileEntry");
449 
450  SeenFileEntries.erase(Entry->getName());
451 
452  // FileEntry invalidation should not block future optimizations in the file
453  // caches. Possible alternatives are cache truncation (invalidate last N) or
454  // invalidation of the whole cache.
455  //
456  // FIXME: This is broken. We sometimes have the same FileEntry* shared
457  // betweeen multiple SeenFileEntries, so this can leave dangling pointers.
458  UniqueRealFiles.erase(Entry->getUniqueID());
459 }
460 
462  SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
463  UIDToFiles.clear();
464  UIDToFiles.resize(NextFileUID);
465 
466  // Map file entries
467  for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
468  FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
469  FE != FEEnd; ++FE)
470  if (FE->getValue())
471  UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
472 
473  // Map virtual file entries
474  for (const auto &VFE : VirtualFileEntries)
475  UIDToFiles[VFE->getUID()] = VFE.get();
476 }
477 
479  off_t Size, time_t ModificationTime) {
480  File->Size = Size;
481  File->ModTime = ModificationTime;
482 }
483 
485  // FIXME: use llvm::sys::fs::canonical() when it gets implemented
486  llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
487  = CanonicalDirNames.find(Dir);
488  if (Known != CanonicalDirNames.end())
489  return Known->second;
490 
491  StringRef CanonicalName(Dir->getName());
492 
493  SmallString<4096> CanonicalNameBuf;
494  if (!FS->getRealPath(Dir->getName(), CanonicalNameBuf))
495  CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
496 
497  CanonicalDirNames.insert({Dir, CanonicalName});
498  return CanonicalName;
499 }
500 
502  llvm::errs() << "\n*** File Manager Stats:\n";
503  llvm::errs() << UniqueRealFiles.size() << " real files found, "
504  << UniqueRealDirs.size() << " real dirs found.\n";
505  llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
506  << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
507  llvm::errs() << NumDirLookups << " dir lookups, "
508  << NumDirCacheMisses << " dir cache misses.\n";
509  llvm::errs() << NumFileLookups << " file lookups, "
510  << NumFileCacheMisses << " file cache misses.\n";
511 
512  //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
513 }
Implements support for file system lookup, file system caching, and directory search management...
Definition: FileManager.h:120
Defines the clang::FileManager interface and associated types.
void GetUniqueIDMapping(SmallVectorImpl< const FileEntry *> &UIDToFiles) const
Produce an array mapping from the unique IDs assigned to each file to the corresponding FileEntry poi...
void closeFile() const
Definition: FileManager.h:102
Defines the FileSystemStatCache interface.
bool makeAbsolutePath(SmallVectorImpl< char > &Path) const
Makes Path absolute taking into account FileSystemOptions and the working directory option...
const llvm::sys::fs::UniqueID & getUniqueID() const
Definition: FileManager.h:89
FileManager(const FileSystemOptions &FileSystemOpts, IntrusiveRefCntPtr< llvm::vfs::FileSystem > FS=nullptr)
Definition: FileManager.cpp:42
Definition: Format.h:2071
const FileEntry * getFile(StringRef Filename, bool OpenFile=false, bool CacheFailure=true)
Lookup, cache, and verify the specified file (real or virtual).
const DirectoryEntry * getDirectory(StringRef DirName, bool CacheFailure=true)
Lookup, cache, and verify the specified directory (real or virtual).
StringRef Filename
Definition: Format.cpp:1628
bool FixupRelativePath(SmallVectorImpl< char > &path) const
If path is not absolute and FileSystemOptions set the working directory, the path is modified to be r...
static const DirectoryEntry * getDirectoryFromFile(FileManager &FileMgr, StringRef Filename, bool CacheFailure)
Retrieve the directory that the given file name resides in.
Definition: FileManager.cpp:66
std::string WorkingDir
If set, paths are resolved as if the working directory was set to the value of WorkingDir.
The result type of a method or function.
static bool get(StringRef Path, FileData &Data, bool isFile, std::unique_ptr< llvm::vfs::File > *F, FileSystemStatCache *Cache, llvm::vfs::FileSystem &FS)
Get the &#39;stat&#39; information for the specified path, using the cache to accelerate it if possible...
llvm::sys::fs::UniqueID UniqueID
StringRef getName() const
Definition: FileManager.h:84
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:59
void PrintStats() const
const FileEntry * getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime)
Retrieve a file entry for a "virtual" file that acts as if there were a file with the given name on d...
StringRef getCanonicalName(const DirectoryEntry *Dir)
Retrieve the canonical name for a given directory.
bool isValid() const
Definition: FileManager.h:86
Dataflow Directional Tag Classes.
void clearStatCache()
Removes the FileSystemStatCache object from the manager.
Definition: FileManager.cpp:62
off_t getSize() const
Definition: FileManager.h:87
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const FileEntry *Entry, bool isVolatile=false, bool ShouldCloseOpenFile=true)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful, otherwise returning null.
bool getNoncachedStatValue(StringRef Path, llvm::vfs::Status &Result)
Get the &#39;stat&#39; information for the given Path.
Cached information about one directory (either on disk or in the virtual file system).
Definition: FileManager.h:45
void setStatCache(std::unique_ptr< FileSystemStatCache > statCache)
Installs the provided FileSystemStatCache object within the FileManager.
Definition: FileManager.cpp:57
Keeps track of options that affect how file operations are performed.
static void modifyFileEntry(FileEntry *File, off_t Size, time_t ModificationTime)
Modifies the size and modification time of a previously created FileEntry.
void invalidateCache(const FileEntry *Entry)
Remove the real file Entry from the cache.
StringRef getName() const
Definition: FileManager.h:51