1//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10#include "llvm/Support/MemoryBuffer.h"
11#include "llvm/Support/SmallVectorMemoryBuffer.h"
12#include "llvm/Support/Threading.h"
13#include <optional>
14
15using namespace clang;
16using namespace tooling;
17using namespace dependencies;
18
19llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
20DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21  // Load the file and its content from the file system.
22  auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23  if (!MaybeFile)
24    return MaybeFile.getError();
25  auto File = std::move(*MaybeFile);
26
27  auto MaybeStat = File->status();
28  if (!MaybeStat)
29    return MaybeStat.getError();
30  auto Stat = std::move(*MaybeStat);
31
32  auto MaybeBuffer = File->getBuffer(Stat.getName());
33  if (!MaybeBuffer)
34    return MaybeBuffer.getError();
35  auto Buffer = std::move(*MaybeBuffer);
36
37  // If the file size changed between read and stat, pretend it didn't.
38  if (Stat.getSize() != Buffer->getBufferSize())
39    Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40
41  return TentativeEntry(Stat, std::move(Buffer));
42}
43
44EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45    const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46  if (Entry.isError() || Entry.isDirectory() || Disable ||
47      !shouldScanForDirectives(Filename))
48    return EntryRef(Filename, Entry);
49
50  CachedFileContents *Contents = Entry.getCachedContents();
51  assert(Contents && "contents not initialized");
52
53  // Double-checked locking.
54  if (Contents->DepDirectives.load())
55    return EntryRef(Filename, Entry);
56
57  std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58
59  // Double-checked locking.
60  if (Contents->DepDirectives.load())
61    return EntryRef(Filename, Entry);
62
63  SmallVector<dependency_directives_scan::Directive, 64> Directives;
64  // Scan the file for preprocessor directives that might affect the
65  // dependencies.
66  if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67                                        Contents->DepDirectiveTokens,
68                                        Directives)) {
69    Contents->DepDirectiveTokens.clear();
70    // FIXME: Propagate the diagnostic if desired by the client.
71    Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72    return EntryRef(Filename, Entry);
73  }
74
75  // This function performed double-checked locking using `DepDirectives`.
76  // Assigning it must be the last thing this function does, otherwise other
77  // threads may skip the
78  // critical section (`DepDirectives != nullptr`), leading to a data race.
79  Contents->DepDirectives.store(
80      new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81  return EntryRef(Filename, Entry);
82}
83
84DependencyScanningFilesystemSharedCache::
85    DependencyScanningFilesystemSharedCache() {
86  // This heuristic was chosen using a empirical testing on a
87  // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88  // sharding gives a performance edge by reducing the lock contention.
89  // FIXME: A better heuristic might also consider the OS to account for
90  // the different cost of lock contention on different OSes.
91  NumShards =
92      std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93  CacheShards = std::make_unique<CacheShard[]>(NumShards);
94}
95
96DependencyScanningFilesystemSharedCache::CacheShard &
97DependencyScanningFilesystemSharedCache::getShardForFilename(
98    StringRef Filename) const {
99  return CacheShards[llvm::hash_value(Filename) % NumShards];
100}
101
102DependencyScanningFilesystemSharedCache::CacheShard &
103DependencyScanningFilesystemSharedCache::getShardForUID(
104    llvm::sys::fs::UniqueID UID) const {
105  auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
106  return CacheShards[Hash % NumShards];
107}
108
109const CachedFileSystemEntry *
110DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
111    StringRef Filename) const {
112  std::lock_guard<std::mutex> LockGuard(CacheLock);
113  auto It = EntriesByFilename.find(Filename);
114  return It == EntriesByFilename.end() ? nullptr : It->getValue();
115}
116
117const CachedFileSystemEntry *
118DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
119    llvm::sys::fs::UniqueID UID) const {
120  std::lock_guard<std::mutex> LockGuard(CacheLock);
121  auto It = EntriesByUID.find(UID);
122  return It == EntriesByUID.end() ? nullptr : It->getSecond();
123}
124
125const CachedFileSystemEntry &
126DependencyScanningFilesystemSharedCache::CacheShard::
127    getOrEmplaceEntryForFilename(StringRef Filename,
128                                 llvm::ErrorOr<llvm::vfs::Status> Stat) {
129  std::lock_guard<std::mutex> LockGuard(CacheLock);
130  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
131  if (Insertion.second)
132    Insertion.first->second =
133        new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
134  return *Insertion.first->second;
135}
136
137const CachedFileSystemEntry &
138DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
139    llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
140    std::unique_ptr<llvm::MemoryBuffer> Contents) {
141  std::lock_guard<std::mutex> LockGuard(CacheLock);
142  auto Insertion = EntriesByUID.insert({UID, nullptr});
143  if (Insertion.second) {
144    CachedFileContents *StoredContents = nullptr;
145    if (Contents)
146      StoredContents = new (ContentsStorage.Allocate())
147          CachedFileContents(std::move(Contents));
148    Insertion.first->second = new (EntryStorage.Allocate())
149        CachedFileSystemEntry(std::move(Stat), StoredContents);
150  }
151  return *Insertion.first->second;
152}
153
154const CachedFileSystemEntry &
155DependencyScanningFilesystemSharedCache::CacheShard::
156    getOrInsertEntryForFilename(StringRef Filename,
157                                const CachedFileSystemEntry &Entry) {
158  std::lock_guard<std::mutex> LockGuard(CacheLock);
159  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
160}
161
162/// Whitelist file extensions that should be minimized, treating no extension as
163/// a source file that should be minimized.
164///
165/// This is kinda hacky, it would be better if we knew what kind of file Clang
166/// was expecting instead.
167static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
168  StringRef Ext = llvm::sys::path::extension(Filename);
169  if (Ext.empty())
170    return true; // C++ standard library
171  return llvm::StringSwitch<bool>(Ext)
172      .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
173      .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
174      .CasesLower(".m", ".mm", true)
175      .CasesLower(".i", ".ii", ".mi", ".mmi", true)
176      .CasesLower(".def", ".inc", true)
177      .Default(false);
178}
179
180static bool shouldCacheStatFailures(StringRef Filename) {
181  StringRef Ext = llvm::sys::path::extension(Filename);
182  if (Ext.empty())
183    return false; // This may be the module cache directory.
184  // Only cache stat failures on source files.
185  return shouldScanForDirectivesBasedOnExtension(Filename);
186}
187
188bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
189    StringRef Filename) {
190  return shouldScanForDirectivesBasedOnExtension(Filename);
191}
192
193const CachedFileSystemEntry &
194DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
195    TentativeEntry TEntry) {
196  auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
197  return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
198                                       std::move(TEntry.Status),
199                                       std::move(TEntry.Contents));
200}
201
202const CachedFileSystemEntry *
203DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
204    StringRef Filename) {
205  if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
206    return Entry;
207  auto &Shard = SharedCache.getShardForFilename(Filename);
208  if (const auto *Entry = Shard.findEntryByFilename(Filename))
209    return &LocalCache.insertEntryForFilename(Filename, *Entry);
210  return nullptr;
211}
212
213llvm::ErrorOr<const CachedFileSystemEntry &>
214DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
215  llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
216  if (!Stat) {
217    if (!shouldCacheStatFailures(Filename))
218      return Stat.getError();
219    const auto &Entry =
220        getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
221    return insertLocalEntryForFilename(Filename, Entry);
222  }
223
224  if (const auto *Entry = findSharedEntryByUID(*Stat))
225    return insertLocalEntryForFilename(Filename, *Entry);
226
227  auto TEntry =
228      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
229
230  const CachedFileSystemEntry *SharedEntry = [&]() {
231    if (TEntry) {
232      const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
233      return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
234    }
235    return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
236  }();
237
238  return insertLocalEntryForFilename(Filename, *SharedEntry);
239}
240
241llvm::ErrorOr<EntryRef>
242DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
243    StringRef Filename, bool DisableDirectivesScanning) {
244  if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
245    return scanForDirectivesIfNecessary(*Entry, Filename,
246                                        DisableDirectivesScanning)
247        .unwrapError();
248  auto MaybeEntry = computeAndStoreResult(Filename);
249  if (!MaybeEntry)
250    return MaybeEntry.getError();
251  return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
252                                      DisableDirectivesScanning)
253      .unwrapError();
254}
255
256llvm::ErrorOr<llvm::vfs::Status>
257DependencyScanningWorkerFilesystem::status(const Twine &Path) {
258  SmallString<256> OwnedFilename;
259  StringRef Filename = Path.toStringRef(OwnedFilename);
260
261  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
262  if (!Result)
263    return Result.getError();
264  return Result->getStatus();
265}
266
267namespace {
268
269/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
270/// this subclass.
271class DepScanFile final : public llvm::vfs::File {
272public:
273  DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
274              llvm::vfs::Status Stat)
275      : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
276
277  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
278
279  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
280
281  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
282  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
283            bool IsVolatile) override {
284    return std::move(Buffer);
285  }
286
287  std::error_code close() override { return {}; }
288
289private:
290  std::unique_ptr<llvm::MemoryBuffer> Buffer;
291  llvm::vfs::Status Stat;
292};
293
294} // end anonymous namespace
295
296llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
297DepScanFile::create(EntryRef Entry) {
298  assert(!Entry.isError() && "error");
299
300  if (Entry.isDirectory())
301    return std::make_error_code(std::errc::is_a_directory);
302
303  auto Result = std::make_unique<DepScanFile>(
304      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
305                                       Entry.getStatus().getName(),
306                                       /*RequiresNullTerminator=*/false),
307      Entry.getStatus());
308
309  return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
310      std::unique_ptr<llvm::vfs::File>(std::move(Result)));
311}
312
313llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
314DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
315  SmallString<256> OwnedFilename;
316  StringRef Filename = Path.toStringRef(OwnedFilename);
317
318  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
319  if (!Result)
320    return Result.getError();
321  return DepScanFile::create(Result.get());
322}
323