1274958Sdim//===--- ModuleDependencyCollector.cpp - Collect module dependencies ------===// 2274958Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6274958Sdim// 7274958Sdim//===----------------------------------------------------------------------===// 8274958Sdim// 9274958Sdim// Collect the dependencies of a set of modules. 10274958Sdim// 11274958Sdim//===----------------------------------------------------------------------===// 12274958Sdim 13309124Sdim#include "clang/Basic/CharInfo.h" 14274958Sdim#include "clang/Frontend/Utils.h" 15309124Sdim#include "clang/Lex/Preprocessor.h" 16274958Sdim#include "clang/Serialization/ASTReader.h" 17274958Sdim#include "llvm/ADT/iterator_range.h" 18341825Sdim#include "llvm/Config/llvm-config.h" 19274958Sdim#include "llvm/Support/FileSystem.h" 20274958Sdim#include "llvm/Support/Path.h" 21274958Sdim#include "llvm/Support/raw_ostream.h" 22274958Sdim 23274958Sdimusing namespace clang; 24274958Sdim 25274958Sdimnamespace { 26309124Sdim/// Private implementations for ModuleDependencyCollector 27274958Sdimclass ModuleDependencyListener : public ASTReaderListener { 28274958Sdim ModuleDependencyCollector &Collector; 29274958Sdimpublic: 30274958Sdim ModuleDependencyListener(ModuleDependencyCollector &Collector) 31274958Sdim : Collector(Collector) {} 32274958Sdim bool needsInputFileVisitation() override { return true; } 33274958Sdim bool needsSystemInputFileVisitation() override { return true; } 34296417Sdim bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden, 35309124Sdim bool IsExplicitModule) override { 36309124Sdim Collector.addFile(Filename); 37309124Sdim return true; 38309124Sdim } 39274958Sdim}; 40309124Sdim 41314564Sdimstruct ModuleDependencyPPCallbacks : public PPCallbacks { 42314564Sdim ModuleDependencyCollector &Collector; 43314564Sdim SourceManager &SM; 44314564Sdim ModuleDependencyPPCallbacks(ModuleDependencyCollector &Collector, 45314564Sdim SourceManager &SM) 46314564Sdim : Collector(Collector), SM(SM) {} 47314564Sdim 48314564Sdim void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 49314564Sdim StringRef FileName, bool IsAngled, 50314564Sdim CharSourceRange FilenameRange, const FileEntry *File, 51314564Sdim StringRef SearchPath, StringRef RelativePath, 52341825Sdim const Module *Imported, 53341825Sdim SrcMgr::CharacteristicKind FileType) override { 54314564Sdim if (!File) 55314564Sdim return; 56314564Sdim Collector.addFile(File->getName()); 57314564Sdim } 58314564Sdim}; 59314564Sdim 60309124Sdimstruct ModuleDependencyMMCallbacks : public ModuleMapCallbacks { 61309124Sdim ModuleDependencyCollector &Collector; 62309124Sdim ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector) 63309124Sdim : Collector(Collector) {} 64309124Sdim 65309124Sdim void moduleMapAddHeader(StringRef HeaderPath) override { 66309124Sdim if (llvm::sys::path::is_absolute(HeaderPath)) 67309124Sdim Collector.addFile(HeaderPath); 68309124Sdim } 69309124Sdim void moduleMapAddUmbrellaHeader(FileManager *FileMgr, 70309124Sdim const FileEntry *Header) override { 71309124Sdim StringRef HeaderFilename = Header->getName(); 72309124Sdim moduleMapAddHeader(HeaderFilename); 73309124Sdim // The FileManager can find and cache the symbolic link for a framework 74309124Sdim // header before its real path, this means a module can have some of its 75309124Sdim // headers to use other paths. Although this is usually not a problem, it's 76309124Sdim // inconsistent, and not collecting the original path header leads to 77309124Sdim // umbrella clashes while rebuilding modules in the crash reproducer. For 78309124Sdim // example: 79309124Sdim // ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h 80309124Sdim // instead of: 81309124Sdim // ImageIO.framework/ImageIO.h 82309124Sdim // 83309124Sdim // FIXME: this shouldn't be necessary once we have FileName instances 84309124Sdim // around instead of FileEntry ones. For now, make sure we collect all 85309124Sdim // that we need for the reproducer to work correctly. 86309124Sdim StringRef UmbreallDirFromHeader = 87309124Sdim llvm::sys::path::parent_path(HeaderFilename); 88309124Sdim StringRef UmbrellaDir = Header->getDir()->getName(); 89309124Sdim if (!UmbrellaDir.equals(UmbreallDirFromHeader)) { 90309124Sdim SmallString<128> AltHeaderFilename; 91309124Sdim llvm::sys::path::append(AltHeaderFilename, UmbrellaDir, 92309124Sdim llvm::sys::path::filename(HeaderFilename)); 93309124Sdim if (FileMgr->getFile(AltHeaderFilename)) 94309124Sdim moduleMapAddHeader(AltHeaderFilename); 95309124Sdim } 96309124Sdim } 97309124Sdim}; 98309124Sdim 99274958Sdim} 100274958Sdim 101274958Sdimvoid ModuleDependencyCollector::attachToASTReader(ASTReader &R) { 102360784Sdim R.addListener(std::make_unique<ModuleDependencyListener>(*this)); 103274958Sdim} 104274958Sdim 105309124Sdimvoid ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) { 106360784Sdim PP.addPPCallbacks(std::make_unique<ModuleDependencyPPCallbacks>( 107314564Sdim *this, PP.getSourceManager())); 108309124Sdim PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks( 109360784Sdim std::make_unique<ModuleDependencyMMCallbacks>(*this)); 110309124Sdim} 111309124Sdim 112309124Sdimstatic bool isCaseSensitivePath(StringRef Path) { 113309124Sdim SmallString<256> TmpDest = Path, UpperDest, RealDest; 114309124Sdim // Remove component traversals, links, etc. 115353358Sdim if (llvm::sys::fs::real_path(Path, TmpDest)) 116309124Sdim return true; // Current default value in vfs.yaml 117309124Sdim Path = TmpDest; 118309124Sdim 119309124Sdim // Change path to all upper case and ask for its real path, if the latter 120309124Sdim // exists and is equal to Path, it's not case sensitive. Default to case 121341825Sdim // sensitive in the absence of realpath, since this is what the VFSWriter 122309124Sdim // already expects when sensitivity isn't setup. 123309124Sdim for (auto &C : Path) 124309124Sdim UpperDest.push_back(toUppercase(C)); 125353358Sdim if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest)) 126309124Sdim return false; 127309124Sdim return true; 128309124Sdim} 129309124Sdim 130274958Sdimvoid ModuleDependencyCollector::writeFileMap() { 131274958Sdim if (Seen.empty()) 132274958Sdim return; 133274958Sdim 134309124Sdim StringRef VFSDir = getDest(); 135274958Sdim 136309124Sdim // Default to use relative overlay directories in the VFS yaml file. This 137309124Sdim // allows crash reproducer scripts to work across machines. 138309124Sdim VFSWriter.setOverlayDir(VFSDir); 139309124Sdim 140309124Sdim // Explicitly set case sensitivity for the YAML writer. For that, find out 141309124Sdim // the sensitivity at the path where the headers all collected to. 142309124Sdim VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir)); 143309124Sdim 144309124Sdim // Do not rely on real path names when executing the crash reproducer scripts 145309124Sdim // since we only want to actually use the files we have on the VFS cache. 146309124Sdim VFSWriter.setUseExternalNames(false); 147309124Sdim 148280031Sdim std::error_code EC; 149309124Sdim SmallString<256> YAMLPath = VFSDir; 150309124Sdim llvm::sys::path::append(YAMLPath, "vfs.yaml"); 151360784Sdim llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::OF_Text); 152280031Sdim if (EC) { 153309124Sdim HasErrors = true; 154274958Sdim return; 155274958Sdim } 156274958Sdim VFSWriter.write(OS); 157274958Sdim} 158274958Sdim 159309124Sdimbool ModuleDependencyCollector::getRealPath(StringRef SrcPath, 160309124Sdim SmallVectorImpl<char> &Result) { 161274958Sdim using namespace llvm::sys; 162309124Sdim SmallString<256> RealPath; 163309124Sdim StringRef FileName = path::filename(SrcPath); 164309124Sdim std::string Dir = path::parent_path(SrcPath).str(); 165309124Sdim auto DirWithSymLink = SymLinkMap.find(Dir); 166274958Sdim 167309124Sdim // Use real_path to fix any symbolic link component present in a path. 168309124Sdim // Computing the real path is expensive, cache the search through the 169309124Sdim // parent path directory. 170309124Sdim if (DirWithSymLink == SymLinkMap.end()) { 171353358Sdim if (llvm::sys::fs::real_path(Dir, RealPath)) 172309124Sdim return false; 173309124Sdim SymLinkMap[Dir] = RealPath.str(); 174309124Sdim } else { 175309124Sdim RealPath = DirWithSymLink->second; 176309124Sdim } 177309124Sdim 178309124Sdim path::append(RealPath, FileName); 179309124Sdim Result.swap(RealPath); 180309124Sdim return true; 181309124Sdim} 182309124Sdim 183314564Sdimstd::error_code ModuleDependencyCollector::copyToRoot(StringRef Src, 184314564Sdim StringRef Dst) { 185309124Sdim using namespace llvm::sys; 186309124Sdim 187309124Sdim // We need an absolute src path to append to the root. 188274958Sdim SmallString<256> AbsoluteSrc = Src; 189274958Sdim fs::make_absolute(AbsoluteSrc); 190309124Sdim // Canonicalize src to a native path to avoid mixed separator styles. 191280031Sdim path::native(AbsoluteSrc); 192309124Sdim // Remove redundant leading "./" pieces and consecutive separators. 193309124Sdim AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc); 194274958Sdim 195309124Sdim // Canonicalize the source path by removing "..", "." components. 196314564Sdim SmallString<256> VirtualPath = AbsoluteSrc; 197314564Sdim path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); 198274958Sdim 199309124Sdim // If a ".." component is present after a symlink component, remove_dots may 200309124Sdim // lead to the wrong real destination path. Let the source be canonicalized 201309124Sdim // like that but make sure we always use the real path for the destination. 202314564Sdim SmallString<256> CopyFrom; 203314564Sdim if (!getRealPath(AbsoluteSrc, CopyFrom)) 204314564Sdim CopyFrom = VirtualPath; 205314564Sdim SmallString<256> CacheDst = getDest(); 206309124Sdim 207314564Sdim if (Dst.empty()) { 208314564Sdim // The common case is to map the virtual path to the same path inside the 209314564Sdim // cache. 210314564Sdim path::append(CacheDst, path::relative_path(CopyFrom)); 211314564Sdim } else { 212314564Sdim // When collecting entries from input vfsoverlays, copy the external 213314564Sdim // contents into the cache but still map from the source. 214314564Sdim if (!fs::exists(Dst)) 215314564Sdim return std::error_code(); 216314564Sdim path::append(CacheDst, Dst); 217314564Sdim CopyFrom = Dst; 218314564Sdim } 219314564Sdim 220274958Sdim // Copy the file into place. 221314564Sdim if (std::error_code EC = fs::create_directories(path::parent_path(CacheDst), 222314564Sdim /*IgnoreExisting=*/true)) 223274958Sdim return EC; 224314564Sdim if (std::error_code EC = fs::copy_file(CopyFrom, CacheDst)) 225274958Sdim return EC; 226309124Sdim 227309124Sdim // Always map a canonical src path to its real path into the YAML, by doing 228309124Sdim // this we map different virtual src paths to the same entry in the VFS 229309124Sdim // overlay, which is a way to emulate symlink inside the VFS; this is also 230321369Sdim // needed for correctness, not doing that can lead to module redefinition 231309124Sdim // errors. 232314564Sdim addFileMapping(VirtualPath, CacheDst); 233274958Sdim return std::error_code(); 234274958Sdim} 235274958Sdim 236314564Sdimvoid ModuleDependencyCollector::addFile(StringRef Filename, StringRef FileDst) { 237309124Sdim if (insertSeen(Filename)) 238314564Sdim if (copyToRoot(Filename, FileDst)) 239309124Sdim HasErrors = true; 240274958Sdim} 241