FileManager.cpp revision 263508
1//===--- FileManager.cpp - File System Probing and Caching ----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements the FileManager interface.
11//
12//===----------------------------------------------------------------------===//
13//
14// TODO: This should index all interesting directories with dirent calls.
15//  getdirentries ?
16//  opendir/readdir_r/closedir ?
17//
18//===----------------------------------------------------------------------===//
19
20#include "clang/Basic/FileManager.h"
21#include "clang/Basic/FileSystemStatCache.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/FileSystem.h"
25#include "llvm/Support/MemoryBuffer.h"
26#include "llvm/Support/Path.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Support/system_error.h"
29#include <map>
30#include <set>
31#include <string>
32
33// FIXME: This is terrible, we need this for ::close.
34#if !defined(_MSC_VER) && !defined(__MINGW32__)
35#include <unistd.h>
36#include <sys/uio.h>
37#else
38#include <io.h>
39#ifndef S_ISFIFO
40#define S_ISFIFO(x) (0)
41#endif
42#endif
43#if defined(LLVM_ON_UNIX)
44#include <limits.h>
45#endif
46using namespace clang;
47
48// FIXME: Enhance libsystem to support inode and other fields.
49#include <sys/stat.h>
50
51/// NON_EXISTENT_DIR - A special value distinct from null that is used to
52/// represent a dir name that doesn't exist on the disk.
53#define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
54
55/// NON_EXISTENT_FILE - A special value distinct from null that is used to
56/// represent a filename that doesn't exist on the disk.
57#define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
58
59
60FileEntry::~FileEntry() {
61  // If this FileEntry owns an open file descriptor that never got used, close
62  // it.
63  if (FD != -1) ::close(FD);
64}
65
66class FileManager::UniqueDirContainer {
67  /// UniqueDirs - Cache from ID's to existing directories/files.
68  std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueDirs;
69
70public:
71  /// getDirectory - Return an existing DirectoryEntry with the given
72  /// ID's if there is already one; otherwise create and return a
73  /// default-constructed DirectoryEntry.
74  DirectoryEntry &getDirectory(const llvm::sys::fs::UniqueID &UniqueID) {
75    return UniqueDirs[UniqueID];
76  }
77
78  size_t size() const { return UniqueDirs.size(); }
79};
80
81class FileManager::UniqueFileContainer {
82  /// UniqueFiles - Cache from ID's to existing directories/files.
83  std::set<FileEntry> UniqueFiles;
84
85public:
86  /// getFile - Return an existing FileEntry with the given ID's if
87  /// there is already one; otherwise create and return a
88  /// default-constructed FileEntry.
89  FileEntry &getFile(llvm::sys::fs::UniqueID UniqueID, bool IsNamedPipe,
90                     bool InPCH) {
91    return const_cast<FileEntry &>(
92        *UniqueFiles.insert(FileEntry(UniqueID, IsNamedPipe, InPCH)).first);
93  }
94
95  size_t size() const { return UniqueFiles.size(); }
96
97  void erase(const FileEntry *Entry) { UniqueFiles.erase(*Entry); }
98};
99
100//===----------------------------------------------------------------------===//
101// Common logic.
102//===----------------------------------------------------------------------===//
103
104FileManager::FileManager(const FileSystemOptions &FSO)
105  : FileSystemOpts(FSO),
106    UniqueRealDirs(*new UniqueDirContainer()),
107    UniqueRealFiles(*new UniqueFileContainer()),
108    SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) {
109  NumDirLookups = NumFileLookups = 0;
110  NumDirCacheMisses = NumFileCacheMisses = 0;
111}
112
113FileManager::~FileManager() {
114  delete &UniqueRealDirs;
115  delete &UniqueRealFiles;
116  for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
117    delete VirtualFileEntries[i];
118  for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i)
119    delete VirtualDirectoryEntries[i];
120}
121
122void FileManager::addStatCache(FileSystemStatCache *statCache,
123                               bool AtBeginning) {
124  assert(statCache && "No stat cache provided?");
125  if (AtBeginning || StatCache.get() == 0) {
126    statCache->setNextStatCache(StatCache.take());
127    StatCache.reset(statCache);
128    return;
129  }
130
131  FileSystemStatCache *LastCache = StatCache.get();
132  while (LastCache->getNextStatCache())
133    LastCache = LastCache->getNextStatCache();
134
135  LastCache->setNextStatCache(statCache);
136}
137
138void FileManager::removeStatCache(FileSystemStatCache *statCache) {
139  if (!statCache)
140    return;
141
142  if (StatCache.get() == statCache) {
143    // This is the first stat cache.
144    StatCache.reset(StatCache->takeNextStatCache());
145    return;
146  }
147
148  // Find the stat cache in the list.
149  FileSystemStatCache *PrevCache = StatCache.get();
150  while (PrevCache && PrevCache->getNextStatCache() != statCache)
151    PrevCache = PrevCache->getNextStatCache();
152
153  assert(PrevCache && "Stat cache not found for removal");
154  PrevCache->setNextStatCache(statCache->getNextStatCache());
155}
156
157void FileManager::clearStatCaches() {
158  StatCache.reset(0);
159}
160
161/// \brief Retrieve the directory that the given file name resides in.
162/// Filename can point to either a real file or a virtual file.
163static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
164                                                  StringRef Filename,
165                                                  bool CacheFailure) {
166  if (Filename.empty())
167    return NULL;
168
169  if (llvm::sys::path::is_separator(Filename[Filename.size() - 1]))
170    return NULL;  // If Filename is a directory.
171
172  StringRef DirName = llvm::sys::path::parent_path(Filename);
173  // Use the current directory if file has no path component.
174  if (DirName.empty())
175    DirName = ".";
176
177  return FileMgr.getDirectory(DirName, CacheFailure);
178}
179
180/// Add all ancestors of the given path (pointing to either a file or
181/// a directory) as virtual directories.
182void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
183  StringRef DirName = llvm::sys::path::parent_path(Path);
184  if (DirName.empty())
185    return;
186
187  llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
188    SeenDirEntries.GetOrCreateValue(DirName);
189
190  // When caching a virtual directory, we always cache its ancestors
191  // at the same time.  Therefore, if DirName is already in the cache,
192  // we don't need to recurse as its ancestors must also already be in
193  // the cache.
194  if (NamedDirEnt.getValue())
195    return;
196
197  // Add the virtual directory to the cache.
198  DirectoryEntry *UDE = new DirectoryEntry;
199  UDE->Name = NamedDirEnt.getKeyData();
200  NamedDirEnt.setValue(UDE);
201  VirtualDirectoryEntries.push_back(UDE);
202
203  // Recursively add the other ancestors.
204  addAncestorsAsVirtualDirs(DirName);
205}
206
207const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
208                                                bool CacheFailure) {
209  // stat doesn't like trailing separators except for root directory.
210  // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'.
211  // (though it can strip '\\')
212  if (DirName.size() > 1 &&
213      DirName != llvm::sys::path::root_path(DirName) &&
214      llvm::sys::path::is_separator(DirName.back()))
215    DirName = DirName.substr(0, DirName.size()-1);
216#ifdef LLVM_ON_WIN32
217  // Fixing a problem with "clang C:test.c" on Windows.
218  // Stat("C:") does not recognize "C:" as a valid directory
219  std::string DirNameStr;
220  if (DirName.size() > 1 && DirName.back() == ':' &&
221      DirName.equals_lower(llvm::sys::path::root_name(DirName))) {
222    DirNameStr = DirName.str() + '.';
223    DirName = DirNameStr;
224  }
225#endif
226
227  ++NumDirLookups;
228  llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
229    SeenDirEntries.GetOrCreateValue(DirName);
230
231  // See if there was already an entry in the map.  Note that the map
232  // contains both virtual and real directories.
233  if (NamedDirEnt.getValue())
234    return NamedDirEnt.getValue() == NON_EXISTENT_DIR
235              ? 0 : NamedDirEnt.getValue();
236
237  ++NumDirCacheMisses;
238
239  // By default, initialize it to invalid.
240  NamedDirEnt.setValue(NON_EXISTENT_DIR);
241
242  // Get the null-terminated directory name as stored as the key of the
243  // SeenDirEntries map.
244  const char *InterndDirName = NamedDirEnt.getKeyData();
245
246  // Check to see if the directory exists.
247  FileData Data;
248  if (getStatValue(InterndDirName, Data, false, 0 /*directory lookup*/)) {
249    // There's no real directory at the given path.
250    if (!CacheFailure)
251      SeenDirEntries.erase(DirName);
252    return 0;
253  }
254
255  // It exists.  See if we have already opened a directory with the
256  // same inode (this occurs on Unix-like systems when one dir is
257  // symlinked to another, for example) or the same path (on
258  // Windows).
259  DirectoryEntry &UDE =
260      UniqueRealDirs.getDirectory(Data.UniqueID);
261
262  NamedDirEnt.setValue(&UDE);
263  if (!UDE.getName()) {
264    // We don't have this directory yet, add it.  We use the string
265    // key from the SeenDirEntries map as the string.
266    UDE.Name  = InterndDirName;
267  }
268
269  return &UDE;
270}
271
272const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
273                                      bool CacheFailure) {
274  ++NumFileLookups;
275
276  // See if there is already an entry in the map.
277  llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
278    SeenFileEntries.GetOrCreateValue(Filename);
279
280  // See if there is already an entry in the map.
281  if (NamedFileEnt.getValue())
282    return NamedFileEnt.getValue() == NON_EXISTENT_FILE
283                 ? 0 : NamedFileEnt.getValue();
284
285  ++NumFileCacheMisses;
286
287  // By default, initialize it to invalid.
288  NamedFileEnt.setValue(NON_EXISTENT_FILE);
289
290  // Get the null-terminated file name as stored as the key of the
291  // SeenFileEntries map.
292  const char *InterndFileName = NamedFileEnt.getKeyData();
293
294  // Look up the directory for the file.  When looking up something like
295  // sys/foo.h we'll discover all of the search directories that have a 'sys'
296  // subdirectory.  This will let us avoid having to waste time on known-to-fail
297  // searches when we go to find sys/bar.h, because all the search directories
298  // without a 'sys' subdir will get a cached failure result.
299  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
300                                                       CacheFailure);
301  if (DirInfo == 0) {  // Directory doesn't exist, file can't exist.
302    if (!CacheFailure)
303      SeenFileEntries.erase(Filename);
304
305    return 0;
306  }
307
308  // FIXME: Use the directory info to prune this, before doing the stat syscall.
309  // FIXME: This will reduce the # syscalls.
310
311  // Nope, there isn't.  Check to see if the file exists.
312  int FileDescriptor = -1;
313  FileData Data;
314  if (getStatValue(InterndFileName, Data, true,
315                   openFile ? &FileDescriptor : 0)) {
316    // There's no real file at the given path.
317    if (!CacheFailure)
318      SeenFileEntries.erase(Filename);
319
320    return 0;
321  }
322
323  if (FileDescriptor != -1 && !openFile) {
324    close(FileDescriptor);
325    FileDescriptor = -1;
326  }
327
328  // It exists.  See if we have already opened a file with the same inode.
329  // This occurs when one dir is symlinked to another, for example.
330  FileEntry &UFE =
331      UniqueRealFiles.getFile(Data.UniqueID, Data.IsNamedPipe, Data.InPCH);
332
333  NamedFileEnt.setValue(&UFE);
334  if (UFE.getName()) { // Already have an entry with this inode, return it.
335    // If the stat process opened the file, close it to avoid a FD leak.
336    if (FileDescriptor != -1)
337      close(FileDescriptor);
338
339    return &UFE;
340  }
341
342  // Otherwise, we don't have this directory yet, add it.
343  // FIXME: Change the name to be a char* that points back to the
344  // 'SeenFileEntries' key.
345  UFE.Name    = InterndFileName;
346  UFE.Size = Data.Size;
347  UFE.ModTime = Data.ModTime;
348  UFE.Dir     = DirInfo;
349  UFE.UID     = NextFileUID++;
350  UFE.FD      = FileDescriptor;
351  return &UFE;
352}
353
354const FileEntry *
355FileManager::getVirtualFile(StringRef Filename, off_t Size,
356                            time_t ModificationTime) {
357  ++NumFileLookups;
358
359  // See if there is already an entry in the map.
360  llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
361    SeenFileEntries.GetOrCreateValue(Filename);
362
363  // See if there is already an entry in the map.
364  if (NamedFileEnt.getValue() && NamedFileEnt.getValue() != NON_EXISTENT_FILE)
365    return NamedFileEnt.getValue();
366
367  ++NumFileCacheMisses;
368
369  // By default, initialize it to invalid.
370  NamedFileEnt.setValue(NON_EXISTENT_FILE);
371
372  addAncestorsAsVirtualDirs(Filename);
373  FileEntry *UFE = 0;
374
375  // Now that all ancestors of Filename are in the cache, the
376  // following call is guaranteed to find the DirectoryEntry from the
377  // cache.
378  const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename,
379                                                       /*CacheFailure=*/true);
380  assert(DirInfo &&
381         "The directory of a virtual file should already be in the cache.");
382
383  // Check to see if the file exists. If so, drop the virtual file
384  FileData Data;
385  const char *InterndFileName = NamedFileEnt.getKeyData();
386  if (getStatValue(InterndFileName, Data, true, 0) == 0) {
387    Data.Size = Size;
388    Data.ModTime = ModificationTime;
389    UFE = &UniqueRealFiles.getFile(Data.UniqueID, Data.IsNamedPipe, Data.InPCH);
390
391    NamedFileEnt.setValue(UFE);
392
393    // If we had already opened this file, close it now so we don't
394    // leak the descriptor. We're not going to use the file
395    // descriptor anyway, since this is a virtual file.
396    if (UFE->FD != -1) {
397      close(UFE->FD);
398      UFE->FD = -1;
399    }
400
401    // If we already have an entry with this inode, return it.
402    if (UFE->getName())
403      return UFE;
404  }
405
406  if (!UFE) {
407    UFE = new FileEntry();
408    VirtualFileEntries.push_back(UFE);
409    NamedFileEnt.setValue(UFE);
410  }
411
412  UFE->Name    = InterndFileName;
413  UFE->Size    = Size;
414  UFE->ModTime = ModificationTime;
415  UFE->Dir     = DirInfo;
416  UFE->UID     = NextFileUID++;
417  UFE->FD      = -1;
418  return UFE;
419}
420
421void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
422  StringRef pathRef(path.data(), path.size());
423
424  if (FileSystemOpts.WorkingDir.empty()
425      || llvm::sys::path::is_absolute(pathRef))
426    return;
427
428  SmallString<128> NewPath(FileSystemOpts.WorkingDir);
429  llvm::sys::path::append(NewPath, pathRef);
430  path = NewPath;
431}
432
433llvm::MemoryBuffer *FileManager::
434getBufferForFile(const FileEntry *Entry, std::string *ErrorStr,
435                 bool isVolatile) {
436  OwningPtr<llvm::MemoryBuffer> Result;
437  llvm::error_code ec;
438
439  uint64_t FileSize = Entry->getSize();
440  // If there's a high enough chance that the file have changed since we
441  // got its size, force a stat before opening it.
442  if (isVolatile)
443    FileSize = -1;
444
445  const char *Filename = Entry->getName();
446  // If the file is already open, use the open file descriptor.
447  if (Entry->FD != -1) {
448    ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result, FileSize);
449    if (ErrorStr)
450      *ErrorStr = ec.message();
451
452    close(Entry->FD);
453    Entry->FD = -1;
454    return Result.take();
455  }
456
457  // Otherwise, open the file.
458
459  if (FileSystemOpts.WorkingDir.empty()) {
460    ec = llvm::MemoryBuffer::getFile(Filename, Result, FileSize);
461    if (ec && ErrorStr)
462      *ErrorStr = ec.message();
463    return Result.take();
464  }
465
466  SmallString<128> FilePath(Entry->getName());
467  FixupRelativePath(FilePath);
468  ec = llvm::MemoryBuffer::getFile(FilePath.str(), Result, FileSize);
469  if (ec && ErrorStr)
470    *ErrorStr = ec.message();
471  return Result.take();
472}
473
474llvm::MemoryBuffer *FileManager::
475getBufferForFile(StringRef Filename, std::string *ErrorStr) {
476  OwningPtr<llvm::MemoryBuffer> Result;
477  llvm::error_code ec;
478  if (FileSystemOpts.WorkingDir.empty()) {
479    ec = llvm::MemoryBuffer::getFile(Filename, Result);
480    if (ec && ErrorStr)
481      *ErrorStr = ec.message();
482    return Result.take();
483  }
484
485  SmallString<128> FilePath(Filename);
486  FixupRelativePath(FilePath);
487  ec = llvm::MemoryBuffer::getFile(FilePath.c_str(), Result);
488  if (ec && ErrorStr)
489    *ErrorStr = ec.message();
490  return Result.take();
491}
492
493/// getStatValue - Get the 'stat' information for the specified path,
494/// using the cache to accelerate it if possible.  This returns true
495/// if the path points to a virtual file or does not exist, or returns
496/// false if it's an existent real file.  If FileDescriptor is NULL,
497/// do directory look-up instead of file look-up.
498bool FileManager::getStatValue(const char *Path, FileData &Data, bool isFile,
499                               int *FileDescriptor) {
500  // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be
501  // absolute!
502  if (FileSystemOpts.WorkingDir.empty())
503    return FileSystemStatCache::get(Path, Data, isFile, FileDescriptor,
504                                    StatCache.get());
505
506  SmallString<128> FilePath(Path);
507  FixupRelativePath(FilePath);
508
509  return FileSystemStatCache::get(FilePath.c_str(), Data, isFile,
510                                  FileDescriptor, StatCache.get());
511}
512
513bool FileManager::getNoncachedStatValue(StringRef Path,
514                                        llvm::sys::fs::file_status &Result) {
515  SmallString<128> FilePath(Path);
516  FixupRelativePath(FilePath);
517
518  return llvm::sys::fs::status(FilePath.c_str(), Result);
519}
520
521void FileManager::invalidateCache(const FileEntry *Entry) {
522  assert(Entry && "Cannot invalidate a NULL FileEntry");
523
524  SeenFileEntries.erase(Entry->getName());
525
526  // FileEntry invalidation should not block future optimizations in the file
527  // caches. Possible alternatives are cache truncation (invalidate last N) or
528  // invalidation of the whole cache.
529  UniqueRealFiles.erase(Entry);
530}
531
532
533void FileManager::GetUniqueIDMapping(
534                   SmallVectorImpl<const FileEntry *> &UIDToFiles) const {
535  UIDToFiles.clear();
536  UIDToFiles.resize(NextFileUID);
537
538  // Map file entries
539  for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator
540         FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end();
541       FE != FEEnd; ++FE)
542    if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE)
543      UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
544
545  // Map virtual file entries
546  for (SmallVectorImpl<FileEntry *>::const_iterator
547         VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
548       VFE != VFEEnd; ++VFE)
549    if (*VFE && *VFE != NON_EXISTENT_FILE)
550      UIDToFiles[(*VFE)->getUID()] = *VFE;
551}
552
553void FileManager::modifyFileEntry(FileEntry *File,
554                                  off_t Size, time_t ModificationTime) {
555  File->Size = Size;
556  File->ModTime = ModificationTime;
557}
558
559StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
560  // FIXME: use llvm::sys::fs::canonical() when it gets implemented
561#ifdef LLVM_ON_UNIX
562  llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
563    = CanonicalDirNames.find(Dir);
564  if (Known != CanonicalDirNames.end())
565    return Known->second;
566
567  StringRef CanonicalName(Dir->getName());
568  char CanonicalNameBuf[PATH_MAX];
569  if (realpath(Dir->getName(), CanonicalNameBuf)) {
570    unsigned Len = strlen(CanonicalNameBuf);
571    char *Mem = static_cast<char *>(CanonicalNameStorage.Allocate(Len, 1));
572    memcpy(Mem, CanonicalNameBuf, Len);
573    CanonicalName = StringRef(Mem, Len);
574  }
575
576  CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
577  return CanonicalName;
578#else
579  return StringRef(Dir->getName());
580#endif
581}
582
583void FileManager::PrintStats() const {
584  llvm::errs() << "\n*** File Manager Stats:\n";
585  llvm::errs() << UniqueRealFiles.size() << " real files found, "
586               << UniqueRealDirs.size() << " real dirs found.\n";
587  llvm::errs() << VirtualFileEntries.size() << " virtual files found, "
588               << VirtualDirectoryEntries.size() << " virtual dirs found.\n";
589  llvm::errs() << NumDirLookups << " dir lookups, "
590               << NumDirCacheMisses << " dir cache misses.\n";
591  llvm::errs() << NumFileLookups << " file lookups, "
592               << NumFileCacheMisses << " file cache misses.\n";
593
594  //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
595}
596