1341825Sdim//===- FileMatchTrie.cpp --------------------------------------------------===// 2243791Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6243791Sdim// 7243791Sdim//===----------------------------------------------------------------------===// 8243791Sdim// 9243791Sdim// This file contains the implementation of a FileMatchTrie. 10243791Sdim// 11243791Sdim//===----------------------------------------------------------------------===// 12243791Sdim 13243791Sdim#include "clang/Tooling/FileMatchTrie.h" 14243791Sdim#include "llvm/ADT/StringMap.h" 15341825Sdim#include "llvm/ADT/StringRef.h" 16243791Sdim#include "llvm/Support/FileSystem.h" 17261991Sdim#include "llvm/Support/Path.h" 18243791Sdim#include "llvm/Support/raw_ostream.h" 19341825Sdim#include <string> 20341825Sdim#include <vector> 21341825Sdim 22288943Sdimusing namespace clang; 23288943Sdimusing namespace tooling; 24243791Sdim 25288943Sdimnamespace { 26341825Sdim 27341825Sdim/// Default \c PathComparator using \c llvm::sys::fs::equivalent(). 28243791Sdimstruct DefaultPathComparator : public PathComparator { 29276479Sdim bool equivalent(StringRef FileA, StringRef FileB) const override { 30243791Sdim return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); 31243791Sdim } 32243791Sdim}; 33243791Sdim 34341825Sdim} // namespace 35341825Sdim 36288943Sdimnamespace clang { 37288943Sdimnamespace tooling { 38341825Sdim 39341825Sdim/// A node of the \c FileMatchTrie. 40243791Sdim/// 41243791Sdim/// Each node has storage for up to one path and a map mapping a path segment to 42243791Sdim/// child nodes. The trie starts with an empty root node. 43243791Sdimclass FileMatchTrieNode { 44243791Sdimpublic: 45341825Sdim /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes 46243791Sdim /// the number of \c NewPath's trailing characters already consumed during 47243791Sdim /// recursion. 48243791Sdim /// 49243791Sdim /// An insert of a path 50243791Sdim /// 'p'starts at the root node and does the following: 51243791Sdim /// - If the node is empty, insert 'p' into its storage and abort. 52243791Sdim /// - If the node has a path 'p2' but no children, take the last path segment 53243791Sdim /// 's' of 'p2', put a new child into the map at 's' an insert the rest of 54243791Sdim /// 'p2' there. 55243791Sdim /// - Insert a new child for the last segment of 'p' and insert the rest of 56243791Sdim /// 'p' there. 57243791Sdim /// 58243791Sdim /// An insert operation is linear in the number of a path's segments. 59243791Sdim void insert(StringRef NewPath, unsigned ConsumedLength = 0) { 60243791Sdim // We cannot put relative paths into the FileMatchTrie as then a path can be 61243791Sdim // a postfix of another path, violating a core assumption of the trie. 62243791Sdim if (llvm::sys::path::is_relative(NewPath)) 63243791Sdim return; 64243791Sdim if (Path.empty()) { 65243791Sdim // This is an empty leaf. Store NewPath and return. 66243791Sdim Path = NewPath; 67243791Sdim return; 68243791Sdim } 69243791Sdim if (Children.empty()) { 70243791Sdim // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 71243791Sdim if (NewPath == Path) 72243791Sdim return; 73243791Sdim // Make this a node and create a child-leaf with 'Path'. 74243791Sdim StringRef Element(llvm::sys::path::filename( 75243791Sdim StringRef(Path).drop_back(ConsumedLength))); 76243791Sdim Children[Element].Path = Path; 77243791Sdim } 78243791Sdim StringRef Element(llvm::sys::path::filename( 79243791Sdim StringRef(NewPath).drop_back(ConsumedLength))); 80243791Sdim Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); 81243791Sdim } 82243791Sdim 83341825Sdim /// Tries to find the node under this \c FileMatchTrieNode that best 84243791Sdim /// matches 'FileName'. 85243791Sdim /// 86243791Sdim /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to 87243791Sdim /// \c true and an empty string is returned. If no path fits 'FileName', an 88243791Sdim /// empty string is returned. \c ConsumedLength denotes the number of 89243791Sdim /// \c Filename's trailing characters already consumed during recursion. 90243791Sdim /// 91243791Sdim /// To find the best matching node for a given path 'p', the 92243791Sdim /// \c findEquivalent() function is called recursively for each path segment 93341825Sdim /// (back to front) of 'p' until a node 'n' is reached that does not .. 94243791Sdim /// - .. have children. In this case it is checked 95243791Sdim /// whether the stored path is equivalent to 'p'. If yes, the best match is 96243791Sdim /// found. Otherwise continue with the parent node as if this node did not 97243791Sdim /// exist. 98243791Sdim /// - .. a child matching the next path segment. In this case, all children of 99243791Sdim /// 'n' are an equally good match for 'p'. All children are of 'n' are found 100243791Sdim /// recursively and their equivalence to 'p' is determined. If none are 101243791Sdim /// equivalent, continue with the parent node as if 'n' didn't exist. If one 102243791Sdim /// is equivalent, the best match is found. Otherwise, report and ambigiuity 103243791Sdim /// error. 104243791Sdim StringRef findEquivalent(const PathComparator& Comparator, 105243791Sdim StringRef FileName, 106243791Sdim bool &IsAmbiguous, 107243791Sdim unsigned ConsumedLength = 0) const { 108243791Sdim if (Children.empty()) { 109243791Sdim if (Comparator.equivalent(StringRef(Path), FileName)) 110243791Sdim return StringRef(Path); 111341825Sdim return {}; 112243791Sdim } 113243791Sdim StringRef Element(llvm::sys::path::filename(FileName.drop_back( 114243791Sdim ConsumedLength))); 115243791Sdim llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = 116243791Sdim Children.find(Element); 117243791Sdim if (MatchingChild != Children.end()) { 118243791Sdim StringRef Result = MatchingChild->getValue().findEquivalent( 119243791Sdim Comparator, FileName, IsAmbiguous, 120243791Sdim ConsumedLength + Element.size() + 1); 121243791Sdim if (!Result.empty() || IsAmbiguous) 122243791Sdim return Result; 123243791Sdim } 124243791Sdim std::vector<StringRef> AllChildren; 125243791Sdim getAll(AllChildren, MatchingChild); 126243791Sdim StringRef Result; 127341825Sdim for (const auto &Child : AllChildren) { 128341825Sdim if (Comparator.equivalent(Child, FileName)) { 129243791Sdim if (Result.empty()) { 130341825Sdim Result = Child; 131243791Sdim } else { 132243791Sdim IsAmbiguous = true; 133341825Sdim return {}; 134243791Sdim } 135243791Sdim } 136243791Sdim } 137243791Sdim return Result; 138243791Sdim } 139243791Sdim 140243791Sdimprivate: 141341825Sdim /// Gets all paths under this FileMatchTrieNode. 142243791Sdim void getAll(std::vector<StringRef> &Results, 143243791Sdim llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { 144243791Sdim if (Path.empty()) 145243791Sdim return; 146243791Sdim if (Children.empty()) { 147243791Sdim Results.push_back(StringRef(Path)); 148243791Sdim return; 149243791Sdim } 150243791Sdim for (llvm::StringMap<FileMatchTrieNode>::const_iterator 151243791Sdim It = Children.begin(), E = Children.end(); 152243791Sdim It != E; ++It) { 153243791Sdim if (It == Except) 154243791Sdim continue; 155243791Sdim It->getValue().getAll(Results, Children.end()); 156243791Sdim } 157243791Sdim } 158243791Sdim 159243791Sdim // The stored absolute path in this node. Only valid for leaf nodes, i.e. 160243791Sdim // nodes where Children.empty(). 161243791Sdim std::string Path; 162243791Sdim 163243791Sdim // The children of this node stored in a map based on the next path segment. 164243791Sdim llvm::StringMap<FileMatchTrieNode> Children; 165243791Sdim}; 166243791Sdim 167341825Sdim} // namespace tooling 168341825Sdim} // namespace clang 169341825Sdim 170243791SdimFileMatchTrie::FileMatchTrie() 171341825Sdim : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} 172243791Sdim 173243791SdimFileMatchTrie::FileMatchTrie(PathComparator *Comparator) 174341825Sdim : Root(new FileMatchTrieNode), Comparator(Comparator) {} 175243791Sdim 176243791SdimFileMatchTrie::~FileMatchTrie() { 177243791Sdim delete Root; 178243791Sdim} 179243791Sdim 180243791Sdimvoid FileMatchTrie::insert(StringRef NewPath) { 181243791Sdim Root->insert(NewPath); 182243791Sdim} 183243791Sdim 184243791SdimStringRef FileMatchTrie::findEquivalent(StringRef FileName, 185249423Sdim raw_ostream &Error) const { 186243791Sdim if (llvm::sys::path::is_relative(FileName)) { 187243791Sdim Error << "Cannot resolve relative paths"; 188341825Sdim return {}; 189243791Sdim } 190243791Sdim bool IsAmbiguous = false; 191243791Sdim StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); 192243791Sdim if (IsAmbiguous) 193243791Sdim Error << "Path is ambiguous"; 194243791Sdim return Result; 195243791Sdim} 196