1//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Support/FileCollector.h"
10#include "llvm/ADT/SmallString.h"
11#include "llvm/ADT/Twine.h"
12#include "llvm/Support/FileSystem.h"
13#include "llvm/Support/Path.h"
14#include "llvm/Support/Process.h"
15
16using namespace llvm;
17
18FileCollectorBase::FileCollectorBase() = default;
19FileCollectorBase::~FileCollectorBase() = default;
20
21void FileCollectorBase::addFile(const Twine &File) {
22  std::lock_guard<std::mutex> lock(Mutex);
23  std::string FileStr = File.str();
24  if (markAsSeen(FileStr))
25    addFileImpl(FileStr);
26}
27
28void FileCollectorBase::addDirectory(const Twine &Dir) {
29  assert(sys::fs::is_directory(Dir));
30  std::error_code EC;
31  addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
32}
33
34static bool isCaseSensitivePath(StringRef Path) {
35  SmallString<256> TmpDest = Path, UpperDest, RealDest;
36
37  // Remove component traversals, links, etc.
38  if (sys::fs::real_path(Path, TmpDest))
39    return true; // Current default value in vfs.yaml
40  Path = TmpDest;
41
42  // Change path to all upper case and ask for its real path, if the latter
43  // exists and is equal to path, it's not case sensitive. Default to case
44  // sensitive in the absence of real_path, since this is the YAMLVFSWriter
45  // default.
46  UpperDest = Path.upper();
47  if (!sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
48    return false;
49  return true;
50}
51
52FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
53    : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
54}
55
56void FileCollector::PathCanonicalizer::updateWithRealPath(
57    SmallVectorImpl<char> &Path) {
58  StringRef SrcPath(Path.begin(), Path.size());
59  StringRef Filename = sys::path::filename(SrcPath);
60  StringRef Directory = sys::path::parent_path(SrcPath);
61
62  // Use real_path to fix any symbolic link component present in the directory
63  // part of the path, caching the search because computing the real path is
64  // expensive.
65  SmallString<256> RealPath;
66  auto DirWithSymlink = CachedDirs.find(Directory);
67  if (DirWithSymlink == CachedDirs.end()) {
68    // FIXME: Should this be a call to FileSystem::getRealpath(), in some
69    // cases? What if there is nothing on disk?
70    if (sys::fs::real_path(Directory, RealPath))
71      return;
72    CachedDirs[Directory] = std::string(RealPath.str());
73  } else {
74    RealPath = DirWithSymlink->second;
75  }
76
77  // Finish recreating the path by appending the original filename, since we
78  // don't need to resolve symlinks in the filename.
79  //
80  // FIXME: If we can cope with this, maybe we can cope without calling
81  // getRealPath() at all when there's no ".." component.
82  sys::path::append(RealPath, Filename);
83
84  // Swap to create the output.
85  Path.swap(RealPath);
86}
87
88/// Make Path absolute.
89static void makeAbsolute(SmallVectorImpl<char> &Path) {
90  // We need an absolute src path to append to the root.
91  sys::fs::make_absolute(Path);
92
93  // Canonicalize src to a native path to avoid mixed separator styles.
94  sys::path::native(Path);
95
96  // Remove redundant leading "./" pieces and consecutive separators.
97  Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
98                               StringRef(Path.begin(), Path.size()))
99                               .begin());
100}
101
102FileCollector::PathCanonicalizer::PathStorage
103FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
104  PathStorage Paths;
105  Paths.VirtualPath = SrcPath;
106  makeAbsolute(Paths.VirtualPath);
107
108  // If a ".." component is present after a symlink component, remove_dots may
109  // lead to the wrong real destination path. Let the source be canonicalized
110  // like that but make sure we always use the real path for the destination.
111  Paths.CopyFrom = Paths.VirtualPath;
112  updateWithRealPath(Paths.CopyFrom);
113
114  // Canonicalize the virtual path by removing "..", "." components.
115  sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
116
117  return Paths;
118}
119
120void FileCollector::addFileImpl(StringRef SrcPath) {
121  PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
122
123  SmallString<256> DstPath = StringRef(Root);
124  sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
125
126  // Always map a canonical src path to its real path into the YAML, by doing
127  // this we map different virtual src paths to the same entry in the VFS
128  // overlay, which is a way to emulate symlink inside the VFS; this is also
129  // needed for correctness, not doing that can lead to module redefinition
130  // errors.
131  addFileToMapping(Paths.VirtualPath, DstPath);
132}
133
134llvm::vfs::directory_iterator
135FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
136                                IntrusiveRefCntPtr<vfs::FileSystem> FS,
137                                std::error_code &EC) {
138  auto It = FS->dir_begin(Dir, EC);
139  if (EC)
140    return It;
141  addFile(Dir);
142  for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
143    if (It->type() == sys::fs::file_type::regular_file ||
144        It->type() == sys::fs::file_type::directory_file ||
145        It->type() == sys::fs::file_type::symlink_file) {
146      addFile(It->path());
147    }
148  }
149  if (EC)
150    return It;
151  // Return a new iterator.
152  return FS->dir_begin(Dir, EC);
153}
154
155/// Set the access and modification time for the given file from the given
156/// status object.
157static std::error_code
158copyAccessAndModificationTime(StringRef Filename,
159                              const sys::fs::file_status &Stat) {
160  int FD;
161
162  if (auto EC =
163          sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
164    return EC;
165
166  if (auto EC = sys::fs::setLastAccessAndModificationTime(
167          FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
168    return EC;
169
170  if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
171    return EC;
172
173  return {};
174}
175
176std::error_code FileCollector::copyFiles(bool StopOnError) {
177  auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
178  if (Err) {
179    return Err;
180  }
181
182  std::lock_guard<std::mutex> lock(Mutex);
183
184  for (auto &entry : VFSWriter.getMappings()) {
185    // Get the status of the original file/directory.
186    sys::fs::file_status Stat;
187    if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
188      if (StopOnError)
189        return EC;
190      continue;
191    }
192
193    // Continue if the file doesn't exist.
194    if (Stat.type() == sys::fs::file_type::file_not_found)
195      continue;
196
197    // Create directory tree.
198    if (std::error_code EC =
199            sys::fs::create_directories(sys::path::parent_path(entry.RPath),
200                                        /*IgnoreExisting=*/true)) {
201      if (StopOnError)
202        return EC;
203    }
204
205    if (Stat.type() == sys::fs::file_type::directory_file) {
206      // Construct a directory when it's just a directory entry.
207      if (std::error_code EC =
208              sys::fs::create_directories(entry.RPath,
209                                          /*IgnoreExisting=*/true)) {
210        if (StopOnError)
211          return EC;
212      }
213      continue;
214    }
215
216    // Copy file over.
217    if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
218      if (StopOnError)
219        return EC;
220    }
221
222    // Copy over permissions.
223    if (auto perms = sys::fs::getPermissions(entry.VPath)) {
224      if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
225        if (StopOnError)
226          return EC;
227      }
228    }
229
230    // Copy over modification time.
231    copyAccessAndModificationTime(entry.RPath, Stat);
232  }
233  return {};
234}
235
236std::error_code FileCollector::writeMapping(StringRef MappingFile) {
237  std::lock_guard<std::mutex> lock(Mutex);
238
239  VFSWriter.setOverlayDir(OverlayRoot);
240  VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
241  VFSWriter.setUseExternalNames(false);
242
243  std::error_code EC;
244  raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
245  if (EC)
246    return EC;
247
248  VFSWriter.write(os);
249
250  return {};
251}
252
253namespace llvm {
254
255class FileCollectorFileSystem : public vfs::FileSystem {
256public:
257  explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
258                                   std::shared_ptr<FileCollector> Collector)
259      : FS(std::move(FS)), Collector(std::move(Collector)) {}
260
261  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
262    auto Result = FS->status(Path);
263    if (Result && Result->exists())
264      Collector->addFile(Path);
265    return Result;
266  }
267
268  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
269  openFileForRead(const Twine &Path) override {
270    auto Result = FS->openFileForRead(Path);
271    if (Result && *Result)
272      Collector->addFile(Path);
273    return Result;
274  }
275
276  llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
277                                          std::error_code &EC) override {
278    return Collector->addDirectoryImpl(Dir, FS, EC);
279  }
280
281  std::error_code getRealPath(const Twine &Path,
282                              SmallVectorImpl<char> &Output) const override {
283    auto EC = FS->getRealPath(Path, Output);
284    if (!EC) {
285      Collector->addFile(Path);
286      if (Output.size() > 0)
287        Collector->addFile(Output);
288    }
289    return EC;
290  }
291
292  std::error_code isLocal(const Twine &Path, bool &Result) override {
293    return FS->isLocal(Path, Result);
294  }
295
296  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
297    return FS->getCurrentWorkingDirectory();
298  }
299
300  std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
301    return FS->setCurrentWorkingDirectory(Path);
302  }
303
304private:
305  IntrusiveRefCntPtr<vfs::FileSystem> FS;
306  std::shared_ptr<FileCollector> Collector;
307};
308
309} // namespace llvm
310
311IntrusiveRefCntPtr<vfs::FileSystem>
312FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
313                                  std::shared_ptr<FileCollector> Collector) {
314  return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
315}
316