1274958Sdim//===--- ModuleDependencyCollector.cpp - Collect module dependencies ------===//
2274958Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6274958Sdim//
7274958Sdim//===----------------------------------------------------------------------===//
8274958Sdim//
9274958Sdim// Collect the dependencies of a set of modules.
10274958Sdim//
11274958Sdim//===----------------------------------------------------------------------===//
12274958Sdim
13309124Sdim#include "clang/Basic/CharInfo.h"
14274958Sdim#include "clang/Frontend/Utils.h"
15309124Sdim#include "clang/Lex/Preprocessor.h"
16274958Sdim#include "clang/Serialization/ASTReader.h"
17274958Sdim#include "llvm/ADT/iterator_range.h"
18341825Sdim#include "llvm/Config/llvm-config.h"
19274958Sdim#include "llvm/Support/FileSystem.h"
20274958Sdim#include "llvm/Support/Path.h"
21274958Sdim#include "llvm/Support/raw_ostream.h"
22274958Sdim
23274958Sdimusing namespace clang;
24274958Sdim
25274958Sdimnamespace {
26309124Sdim/// Private implementations for ModuleDependencyCollector
27274958Sdimclass ModuleDependencyListener : public ASTReaderListener {
28274958Sdim  ModuleDependencyCollector &Collector;
29274958Sdimpublic:
30274958Sdim  ModuleDependencyListener(ModuleDependencyCollector &Collector)
31274958Sdim      : Collector(Collector) {}
32274958Sdim  bool needsInputFileVisitation() override { return true; }
33274958Sdim  bool needsSystemInputFileVisitation() override { return true; }
34296417Sdim  bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden,
35309124Sdim                      bool IsExplicitModule) override {
36309124Sdim    Collector.addFile(Filename);
37309124Sdim    return true;
38309124Sdim  }
39274958Sdim};
40309124Sdim
41314564Sdimstruct ModuleDependencyPPCallbacks : public PPCallbacks {
42314564Sdim  ModuleDependencyCollector &Collector;
43314564Sdim  SourceManager &SM;
44314564Sdim  ModuleDependencyPPCallbacks(ModuleDependencyCollector &Collector,
45314564Sdim                              SourceManager &SM)
46314564Sdim      : Collector(Collector), SM(SM) {}
47314564Sdim
48314564Sdim  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
49314564Sdim                          StringRef FileName, bool IsAngled,
50314564Sdim                          CharSourceRange FilenameRange, const FileEntry *File,
51314564Sdim                          StringRef SearchPath, StringRef RelativePath,
52341825Sdim                          const Module *Imported,
53341825Sdim                          SrcMgr::CharacteristicKind FileType) override {
54314564Sdim    if (!File)
55314564Sdim      return;
56314564Sdim    Collector.addFile(File->getName());
57314564Sdim  }
58314564Sdim};
59314564Sdim
60309124Sdimstruct ModuleDependencyMMCallbacks : public ModuleMapCallbacks {
61309124Sdim  ModuleDependencyCollector &Collector;
62309124Sdim  ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector)
63309124Sdim      : Collector(Collector) {}
64309124Sdim
65309124Sdim  void moduleMapAddHeader(StringRef HeaderPath) override {
66309124Sdim    if (llvm::sys::path::is_absolute(HeaderPath))
67309124Sdim      Collector.addFile(HeaderPath);
68309124Sdim  }
69309124Sdim  void moduleMapAddUmbrellaHeader(FileManager *FileMgr,
70309124Sdim                                  const FileEntry *Header) override {
71309124Sdim    StringRef HeaderFilename = Header->getName();
72309124Sdim    moduleMapAddHeader(HeaderFilename);
73309124Sdim    // The FileManager can find and cache the symbolic link for a framework
74309124Sdim    // header before its real path, this means a module can have some of its
75309124Sdim    // headers to use other paths. Although this is usually not a problem, it's
76309124Sdim    // inconsistent, and not collecting the original path header leads to
77309124Sdim    // umbrella clashes while rebuilding modules in the crash reproducer. For
78309124Sdim    // example:
79309124Sdim    //    ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h
80309124Sdim    // instead of:
81309124Sdim    //    ImageIO.framework/ImageIO.h
82309124Sdim    //
83309124Sdim    // FIXME: this shouldn't be necessary once we have FileName instances
84309124Sdim    // around instead of FileEntry ones. For now, make sure we collect all
85309124Sdim    // that we need for the reproducer to work correctly.
86309124Sdim    StringRef UmbreallDirFromHeader =
87309124Sdim        llvm::sys::path::parent_path(HeaderFilename);
88309124Sdim    StringRef UmbrellaDir = Header->getDir()->getName();
89309124Sdim    if (!UmbrellaDir.equals(UmbreallDirFromHeader)) {
90309124Sdim      SmallString<128> AltHeaderFilename;
91309124Sdim      llvm::sys::path::append(AltHeaderFilename, UmbrellaDir,
92309124Sdim                              llvm::sys::path::filename(HeaderFilename));
93309124Sdim      if (FileMgr->getFile(AltHeaderFilename))
94309124Sdim        moduleMapAddHeader(AltHeaderFilename);
95309124Sdim    }
96309124Sdim  }
97309124Sdim};
98309124Sdim
99274958Sdim}
100274958Sdim
101274958Sdimvoid ModuleDependencyCollector::attachToASTReader(ASTReader &R) {
102360784Sdim  R.addListener(std::make_unique<ModuleDependencyListener>(*this));
103274958Sdim}
104274958Sdim
105309124Sdimvoid ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
106360784Sdim  PP.addPPCallbacks(std::make_unique<ModuleDependencyPPCallbacks>(
107314564Sdim      *this, PP.getSourceManager()));
108309124Sdim  PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
109360784Sdim      std::make_unique<ModuleDependencyMMCallbacks>(*this));
110309124Sdim}
111309124Sdim
112309124Sdimstatic bool isCaseSensitivePath(StringRef Path) {
113309124Sdim  SmallString<256> TmpDest = Path, UpperDest, RealDest;
114309124Sdim  // Remove component traversals, links, etc.
115353358Sdim  if (llvm::sys::fs::real_path(Path, TmpDest))
116309124Sdim    return true; // Current default value in vfs.yaml
117309124Sdim  Path = TmpDest;
118309124Sdim
119309124Sdim  // Change path to all upper case and ask for its real path, if the latter
120309124Sdim  // exists and is equal to Path, it's not case sensitive. Default to case
121341825Sdim  // sensitive in the absence of realpath, since this is what the VFSWriter
122309124Sdim  // already expects when sensitivity isn't setup.
123309124Sdim  for (auto &C : Path)
124309124Sdim    UpperDest.push_back(toUppercase(C));
125353358Sdim  if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
126309124Sdim    return false;
127309124Sdim  return true;
128309124Sdim}
129309124Sdim
130274958Sdimvoid ModuleDependencyCollector::writeFileMap() {
131274958Sdim  if (Seen.empty())
132274958Sdim    return;
133274958Sdim
134309124Sdim  StringRef VFSDir = getDest();
135274958Sdim
136309124Sdim  // Default to use relative overlay directories in the VFS yaml file. This
137309124Sdim  // allows crash reproducer scripts to work across machines.
138309124Sdim  VFSWriter.setOverlayDir(VFSDir);
139309124Sdim
140309124Sdim  // Explicitly set case sensitivity for the YAML writer. For that, find out
141309124Sdim  // the sensitivity at the path where the headers all collected to.
142309124Sdim  VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
143309124Sdim
144309124Sdim  // Do not rely on real path names when executing the crash reproducer scripts
145309124Sdim  // since we only want to actually use the files we have on the VFS cache.
146309124Sdim  VFSWriter.setUseExternalNames(false);
147309124Sdim
148280031Sdim  std::error_code EC;
149309124Sdim  SmallString<256> YAMLPath = VFSDir;
150309124Sdim  llvm::sys::path::append(YAMLPath, "vfs.yaml");
151360784Sdim  llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::OF_Text);
152280031Sdim  if (EC) {
153309124Sdim    HasErrors = true;
154274958Sdim    return;
155274958Sdim  }
156274958Sdim  VFSWriter.write(OS);
157274958Sdim}
158274958Sdim
159309124Sdimbool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
160309124Sdim                                            SmallVectorImpl<char> &Result) {
161274958Sdim  using namespace llvm::sys;
162309124Sdim  SmallString<256> RealPath;
163309124Sdim  StringRef FileName = path::filename(SrcPath);
164309124Sdim  std::string Dir = path::parent_path(SrcPath).str();
165309124Sdim  auto DirWithSymLink = SymLinkMap.find(Dir);
166274958Sdim
167309124Sdim  // Use real_path to fix any symbolic link component present in a path.
168309124Sdim  // Computing the real path is expensive, cache the search through the
169309124Sdim  // parent path directory.
170309124Sdim  if (DirWithSymLink == SymLinkMap.end()) {
171353358Sdim    if (llvm::sys::fs::real_path(Dir, RealPath))
172309124Sdim      return false;
173309124Sdim    SymLinkMap[Dir] = RealPath.str();
174309124Sdim  } else {
175309124Sdim    RealPath = DirWithSymLink->second;
176309124Sdim  }
177309124Sdim
178309124Sdim  path::append(RealPath, FileName);
179309124Sdim  Result.swap(RealPath);
180309124Sdim  return true;
181309124Sdim}
182309124Sdim
183314564Sdimstd::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
184314564Sdim                                                      StringRef Dst) {
185309124Sdim  using namespace llvm::sys;
186309124Sdim
187309124Sdim  // We need an absolute src path to append to the root.
188274958Sdim  SmallString<256> AbsoluteSrc = Src;
189274958Sdim  fs::make_absolute(AbsoluteSrc);
190309124Sdim  // Canonicalize src to a native path to avoid mixed separator styles.
191280031Sdim  path::native(AbsoluteSrc);
192309124Sdim  // Remove redundant leading "./" pieces and consecutive separators.
193309124Sdim  AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
194274958Sdim
195309124Sdim  // Canonicalize the source path by removing "..", "." components.
196314564Sdim  SmallString<256> VirtualPath = AbsoluteSrc;
197314564Sdim  path::remove_dots(VirtualPath, /*remove_dot_dot=*/true);
198274958Sdim
199309124Sdim  // If a ".." component is present after a symlink component, remove_dots may
200309124Sdim  // lead to the wrong real destination path. Let the source be canonicalized
201309124Sdim  // like that but make sure we always use the real path for the destination.
202314564Sdim  SmallString<256> CopyFrom;
203314564Sdim  if (!getRealPath(AbsoluteSrc, CopyFrom))
204314564Sdim    CopyFrom = VirtualPath;
205314564Sdim  SmallString<256> CacheDst = getDest();
206309124Sdim
207314564Sdim  if (Dst.empty()) {
208314564Sdim    // The common case is to map the virtual path to the same path inside the
209314564Sdim    // cache.
210314564Sdim    path::append(CacheDst, path::relative_path(CopyFrom));
211314564Sdim  } else {
212314564Sdim    // When collecting entries from input vfsoverlays, copy the external
213314564Sdim    // contents into the cache but still map from the source.
214314564Sdim    if (!fs::exists(Dst))
215314564Sdim      return std::error_code();
216314564Sdim    path::append(CacheDst, Dst);
217314564Sdim    CopyFrom = Dst;
218314564Sdim  }
219314564Sdim
220274958Sdim  // Copy the file into place.
221314564Sdim  if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst),
222314564Sdim                                                  /*IgnoreExisting=*/true))
223274958Sdim    return EC;
224314564Sdim  if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst))
225274958Sdim    return EC;
226309124Sdim
227309124Sdim  // Always map a canonical src path to its real path into the YAML, by doing
228309124Sdim  // this we map different virtual src paths to the same entry in the VFS
229309124Sdim  // overlay, which is a way to emulate symlink inside the VFS; this is also
230321369Sdim  // needed for correctness, not doing that can lead to module redefinition
231309124Sdim  // errors.
232314564Sdim  addFileMapping(VirtualPath, CacheDst);
233274958Sdim  return std::error_code();
234274958Sdim}
235274958Sdim
236314564Sdimvoid ModuleDependencyCollector::addFile(StringRef Filename, StringRef FileDst) {
237309124Sdim  if (insertSeen(Filename))
238314564Sdim    if (copyToRoot(Filename, FileDst))
239309124Sdim      HasErrors = true;
240274958Sdim}
241