1//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Lex/DependencyDirectivesScanner.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/Allocator.h"
17#include "llvm/Support/ErrorOr.h"
18#include "llvm/Support/VirtualFileSystem.h"
19#include <mutex>
20#include <optional>
21
22namespace clang {
23namespace tooling {
24namespace dependencies {
25
26using DependencyDirectivesTy =
27    SmallVector<dependency_directives_scan::Directive, 20>;
28
29/// Contents and directive tokens of a cached file entry. Single instance can
30/// be shared between multiple entries.
31struct CachedFileContents {
32  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33      : Original(std::move(Contents)), DepDirectives(nullptr) {}
34
35  /// Owning storage for the original contents.
36  std::unique_ptr<llvm::MemoryBuffer> Original;
37
38  /// The mutex that must be locked before mutating directive tokens.
39  std::mutex ValueLock;
40  SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
41  /// Accessor to the directive tokens that's atomic to avoid data races.
42  /// \p CachedFileContents has ownership of the pointer.
43  std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44
45  ~CachedFileContents() { delete DepDirectives.load(); }
46};
47
48/// An in-memory representation of a file system entity that is of interest to
49/// the dependency scanning filesystem.
50///
51/// It represents one of the following:
52/// - opened file with contents and a stat value,
53/// - opened file with contents, directive tokens and a stat value,
54/// - directory entry with its stat value,
55/// - filesystem error.
56///
57/// Single instance of this class can be shared across different filenames (e.g.
58/// a regular file and a symlink). For this reason the status filename is empty
59/// and is only materialized by \c EntryRef that knows the requested filename.
60class CachedFileSystemEntry {
61public:
62  /// Creates an entry without contents: either a filesystem error or
63  /// a directory with stat value.
64  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65      : MaybeStat(std::move(Stat)), Contents(nullptr) {
66    clearStatName();
67  }
68
69  /// Creates an entry representing a file with contents.
70  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71                        CachedFileContents *Contents)
72      : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73    clearStatName();
74  }
75
76  /// \returns True if the entry is a filesystem error.
77  bool isError() const { return !MaybeStat; }
78
79  /// \returns True if the current entry represents a directory.
80  bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81
82  /// \returns Original contents of the file.
83  StringRef getOriginalContents() const {
84    assert(!isError() && "error");
85    assert(!MaybeStat->isDirectory() && "not a file");
86    assert(Contents && "contents not initialized");
87    return Contents->Original->getBuffer();
88  }
89
90  /// \returns The scanned preprocessor directive tokens of the file that are
91  /// used to speed up preprocessing, if available.
92  std::optional<ArrayRef<dependency_directives_scan::Directive>>
93  getDirectiveTokens() const {
94    assert(!isError() && "error");
95    assert(!isDirectory() && "not a file");
96    assert(Contents && "contents not initialized");
97    if (auto *Directives = Contents->DepDirectives.load()) {
98      if (Directives->has_value())
99        return ArrayRef<dependency_directives_scan::Directive>(**Directives);
100    }
101    return std::nullopt;
102  }
103
104  /// \returns The error.
105  std::error_code getError() const { return MaybeStat.getError(); }
106
107  /// \returns The entry status with empty filename.
108  llvm::vfs::Status getStatus() const {
109    assert(!isError() && "error");
110    assert(MaybeStat->getName().empty() && "stat name must be empty");
111    return *MaybeStat;
112  }
113
114  /// \returns The unique ID of the entry.
115  llvm::sys::fs::UniqueID getUniqueID() const {
116    assert(!isError() && "error");
117    return MaybeStat->getUniqueID();
118  }
119
120  /// \returns The data structure holding both contents and directive tokens.
121  CachedFileContents *getCachedContents() const {
122    assert(!isError() && "error");
123    assert(!isDirectory() && "not a file");
124    return Contents;
125  }
126
127private:
128  void clearStatName() {
129    if (MaybeStat)
130      MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
131  }
132
133  /// Either the filesystem error or status of the entry.
134  /// The filename is empty and only materialized by \c EntryRef.
135  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136
137  /// Non-owning pointer to the file contents.
138  ///
139  /// We're using pointer here to keep the size of this class small. Instances
140  /// representing directories and filesystem errors don't hold any contents
141  /// anyway.
142  CachedFileContents *Contents;
143};
144
145/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
146/// underlying real file system, and the scanned preprocessor directives of
147/// files.
148///
149/// It is sharded based on the hash of the key to reduce the lock contention for
150/// the worker threads.
151class DependencyScanningFilesystemSharedCache {
152public:
153  struct CacheShard {
154    /// The mutex that needs to be locked before mutation of any member.
155    mutable std::mutex CacheLock;
156
157    /// Map from filenames to cached entries.
158    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
159        EntriesByFilename;
160
161    /// Map from unique IDs to cached entries.
162    llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
163        EntriesByUID;
164
165    /// The backing storage for cached entries.
166    llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
167
168    /// The backing storage for cached contents.
169    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
170
171    /// Returns entry associated with the filename or nullptr if none is found.
172    const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
173
174    /// Returns entry associated with the unique ID or nullptr if none is found.
175    const CachedFileSystemEntry *
176    findEntryByUID(llvm::sys::fs::UniqueID UID) const;
177
178    /// Returns entry associated with the filename if there is some. Otherwise,
179    /// constructs new one with the given status, associates it with the
180    /// filename and returns the result.
181    const CachedFileSystemEntry &
182    getOrEmplaceEntryForFilename(StringRef Filename,
183                                 llvm::ErrorOr<llvm::vfs::Status> Stat);
184
185    /// Returns entry associated with the unique ID if there is some. Otherwise,
186    /// constructs new one with the given status and contents, associates it
187    /// with the unique ID and returns the result.
188    const CachedFileSystemEntry &
189    getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
190                            std::unique_ptr<llvm::MemoryBuffer> Contents);
191
192    /// Returns entry associated with the filename if there is some. Otherwise,
193    /// associates the given entry with the filename and returns it.
194    const CachedFileSystemEntry &
195    getOrInsertEntryForFilename(StringRef Filename,
196                                const CachedFileSystemEntry &Entry);
197  };
198
199  DependencyScanningFilesystemSharedCache();
200
201  /// Returns shard for the given key.
202  CacheShard &getShardForFilename(StringRef Filename) const;
203  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
204
205private:
206  std::unique_ptr<CacheShard[]> CacheShards;
207  unsigned NumShards;
208};
209
210/// This class is a local cache, that caches the 'stat' and 'open' calls to the
211/// underlying real file system.
212class DependencyScanningFilesystemLocalCache {
213  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
214
215public:
216  /// Returns entry associated with the filename or nullptr if none is found.
217  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
218    assert(llvm::sys::path::is_absolute_gnu(Filename));
219    auto It = Cache.find(Filename);
220    return It == Cache.end() ? nullptr : It->getValue();
221  }
222
223  /// Associates the given entry with the filename and returns the given entry
224  /// pointer (for convenience).
225  const CachedFileSystemEntry &
226  insertEntryForFilename(StringRef Filename,
227                         const CachedFileSystemEntry &Entry) {
228    assert(llvm::sys::path::is_absolute_gnu(Filename));
229    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
230    assert(InsertedEntry == &Entry && "entry already present");
231    return *InsertedEntry;
232  }
233};
234
235/// Reference to a CachedFileSystemEntry.
236/// If the underlying entry is an opened file, this wrapper returns the file
237/// contents and the scanned preprocessor directives.
238class EntryRef {
239  /// The filename used to access this entry.
240  std::string Filename;
241
242  /// The underlying cached entry.
243  const CachedFileSystemEntry &Entry;
244
245public:
246  EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
247      : Filename(Name), Entry(Entry) {}
248
249  llvm::vfs::Status getStatus() const {
250    llvm::vfs::Status Stat = Entry.getStatus();
251    if (!Stat.isDirectory())
252      Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
253    return llvm::vfs::Status::copyWithNewName(Stat, Filename);
254  }
255
256  bool isError() const { return Entry.isError(); }
257  bool isDirectory() const { return Entry.isDirectory(); }
258
259  /// If the cached entry represents an error, promotes it into `ErrorOr`.
260  llvm::ErrorOr<EntryRef> unwrapError() const {
261    if (isError())
262      return Entry.getError();
263    return *this;
264  }
265
266  StringRef getContents() const { return Entry.getOriginalContents(); }
267
268  std::optional<ArrayRef<dependency_directives_scan::Directive>>
269  getDirectiveTokens() const {
270    return Entry.getDirectiveTokens();
271  }
272};
273
274/// A virtual file system optimized for the dependency discovery.
275///
276/// It is primarily designed to work with source files whose contents was
277/// preprocessed to remove any tokens that are unlikely to affect the dependency
278/// computation.
279///
280/// This is not a thread safe VFS. A single instance is meant to be used only in
281/// one thread. Multiple instances are allowed to service multiple threads
282/// running in parallel.
283class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
284public:
285  DependencyScanningWorkerFilesystem(
286      DependencyScanningFilesystemSharedCache &SharedCache,
287      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
288
289  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
290  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
291  openFileForRead(const Twine &Path) override;
292
293  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
294
295  /// Returns entry for the given filename.
296  ///
297  /// Attempts to use the local and shared caches first, then falls back to
298  /// using the underlying filesystem.
299  llvm::ErrorOr<EntryRef>
300  getOrCreateFileSystemEntry(StringRef Filename,
301                             bool DisableDirectivesScanning = false);
302
303private:
304  /// Check whether the file should be scanned for preprocessor directives.
305  bool shouldScanForDirectives(StringRef Filename);
306
307  /// For a filename that's not yet associated with any entry in the caches,
308  /// uses the underlying filesystem to either look up the entry based in the
309  /// shared cache indexed by unique ID, or creates new entry from scratch.
310  /// \p FilenameForLookup will always be an absolute path, and different than
311  /// \p OriginalFilename if \p OriginalFilename is relative.
312  llvm::ErrorOr<const CachedFileSystemEntry &>
313  computeAndStoreResult(StringRef OriginalFilename,
314                        StringRef FilenameForLookup);
315
316  /// Scan for preprocessor directives for the given entry if necessary and
317  /// returns a wrapper object with reference semantics.
318  EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
319                                        StringRef Filename, bool Disable);
320
321  /// Represents a filesystem entry that has been stat-ed (and potentially read)
322  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
323  struct TentativeEntry {
324    llvm::vfs::Status Status;
325    std::unique_ptr<llvm::MemoryBuffer> Contents;
326
327    TentativeEntry(llvm::vfs::Status Status,
328                   std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
329        : Status(std::move(Status)), Contents(std::move(Contents)) {}
330  };
331
332  /// Reads file at the given path. Enforces consistency between the file size
333  /// in status and size of read contents.
334  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
335
336  /// Returns entry associated with the unique ID of the given tentative entry
337  /// if there is some in the shared cache. Otherwise, constructs new one,
338  /// associates it with the unique ID and returns the result.
339  const CachedFileSystemEntry &
340  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
341
342  /// Returns entry associated with the filename or nullptr if none is found.
343  ///
344  /// Returns entry from local cache if there is some. Otherwise, if the entry
345  /// is found in the shared cache, writes it through the local cache and
346  /// returns it. Otherwise returns nullptr.
347  const CachedFileSystemEntry *
348  findEntryByFilenameWithWriteThrough(StringRef Filename);
349
350  /// Returns entry associated with the unique ID in the shared cache or nullptr
351  /// if none is found.
352  const CachedFileSystemEntry *
353  findSharedEntryByUID(llvm::vfs::Status Stat) const {
354    return SharedCache.getShardForUID(Stat.getUniqueID())
355        .findEntryByUID(Stat.getUniqueID());
356  }
357
358  /// Associates the given entry with the filename in the local cache and
359  /// returns it.
360  const CachedFileSystemEntry &
361  insertLocalEntryForFilename(StringRef Filename,
362                              const CachedFileSystemEntry &Entry) {
363    return LocalCache.insertEntryForFilename(Filename, Entry);
364  }
365
366  /// Returns entry associated with the filename in the shared cache if there is
367  /// some. Otherwise, constructs new one with the given error code, associates
368  /// it with the filename and returns the result.
369  const CachedFileSystemEntry &
370  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
371    return SharedCache.getShardForFilename(Filename)
372        .getOrEmplaceEntryForFilename(Filename, EC);
373  }
374
375  /// Returns entry associated with the filename in the shared cache if there is
376  /// some. Otherwise, associates the given entry with the filename and returns
377  /// it.
378  const CachedFileSystemEntry &
379  getOrInsertSharedEntryForFilename(StringRef Filename,
380                                    const CachedFileSystemEntry &Entry) {
381    return SharedCache.getShardForFilename(Filename)
382        .getOrInsertEntryForFilename(Filename, Entry);
383  }
384
385  void printImpl(raw_ostream &OS, PrintType Type,
386                 unsigned IndentLevel) const override {
387    printIndent(OS, IndentLevel);
388    OS << "DependencyScanningFilesystem\n";
389    getUnderlyingFS().print(OS, Type, IndentLevel + 1);
390  }
391
392  /// The global cache shared between worker threads.
393  DependencyScanningFilesystemSharedCache &SharedCache;
394  /// The local cache is used by the worker thread to cache file system queries
395  /// locally instead of querying the global cache every time.
396  DependencyScanningFilesystemLocalCache LocalCache;
397
398  /// The working directory to use for making relative paths absolute before
399  /// using them for cache lookups.
400  llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
401
402  void updateWorkingDirForCacheLookup();
403};
404
405} // end namespace dependencies
406} // end namespace tooling
407} // end namespace clang
408
409#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
410