1//===--- FileMatchTrie.cpp - ----------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file contains the implementation of a FileMatchTrie.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Tooling/FileMatchTrie.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/FileSystem.h"
17#include "llvm/Support/Path.h"
18#include "llvm/Support/raw_ostream.h"
19#include <sstream>
20using namespace clang;
21using namespace tooling;
22
23namespace {
24/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
25struct DefaultPathComparator : public PathComparator {
26  bool equivalent(StringRef FileA, StringRef FileB) const override {
27    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
28  }
29};
30}
31
32namespace clang {
33namespace tooling {
34/// \brief A node of the \c FileMatchTrie.
35///
36/// Each node has storage for up to one path and a map mapping a path segment to
37/// child nodes. The trie starts with an empty root node.
38class FileMatchTrieNode {
39public:
40  /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
41  /// the number of \c NewPath's trailing characters already consumed during
42  /// recursion.
43  ///
44  /// An insert of a path
45  /// 'p'starts at the root node and does the following:
46  /// - If the node is empty, insert 'p' into its storage and abort.
47  /// - If the node has a path 'p2' but no children, take the last path segment
48  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
49  ///   'p2' there.
50  /// - Insert a new child for the last segment of 'p' and insert the rest of
51  ///   'p' there.
52  ///
53  /// An insert operation is linear in the number of a path's segments.
54  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
55    // We cannot put relative paths into the FileMatchTrie as then a path can be
56    // a postfix of another path, violating a core assumption of the trie.
57    if (llvm::sys::path::is_relative(NewPath))
58      return;
59    if (Path.empty()) {
60      // This is an empty leaf. Store NewPath and return.
61      Path = NewPath;
62      return;
63    }
64    if (Children.empty()) {
65      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
66      if (NewPath == Path)
67          return;
68      // Make this a node and create a child-leaf with 'Path'.
69      StringRef Element(llvm::sys::path::filename(
70          StringRef(Path).drop_back(ConsumedLength)));
71      Children[Element].Path = Path;
72    }
73    StringRef Element(llvm::sys::path::filename(
74          StringRef(NewPath).drop_back(ConsumedLength)));
75    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
76  }
77
78  /// \brief Tries to find the node under this \c FileMatchTrieNode that best
79  /// matches 'FileName'.
80  ///
81  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
82  /// \c true and an empty string is returned. If no path fits 'FileName', an
83  /// empty string is returned. \c ConsumedLength denotes the number of
84  /// \c Filename's trailing characters already consumed during recursion.
85  ///
86  /// To find the best matching node for a given path 'p', the
87  /// \c findEquivalent() function is called recursively for each path segment
88  /// (back to fron) of 'p' until a node 'n' is reached that does not ..
89  /// - .. have children. In this case it is checked
90  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
91  ///   found. Otherwise continue with the parent node as if this node did not
92  ///   exist.
93  /// - .. a child matching the next path segment. In this case, all children of
94  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
95  ///   recursively and their equivalence to 'p' is determined. If none are
96  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
97  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
98  ///   error.
99  StringRef findEquivalent(const PathComparator& Comparator,
100                           StringRef FileName,
101                           bool &IsAmbiguous,
102                           unsigned ConsumedLength = 0) const {
103    if (Children.empty()) {
104      if (Comparator.equivalent(StringRef(Path), FileName))
105        return StringRef(Path);
106      return StringRef();
107    }
108    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
109        ConsumedLength)));
110    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
111        Children.find(Element);
112    if (MatchingChild != Children.end()) {
113      StringRef Result = MatchingChild->getValue().findEquivalent(
114          Comparator, FileName, IsAmbiguous,
115          ConsumedLength + Element.size() + 1);
116      if (!Result.empty() || IsAmbiguous)
117        return Result;
118    }
119    std::vector<StringRef> AllChildren;
120    getAll(AllChildren, MatchingChild);
121    StringRef Result;
122    for (unsigned i = 0; i < AllChildren.size(); i++) {
123      if (Comparator.equivalent(AllChildren[i], FileName)) {
124        if (Result.empty()) {
125          Result = AllChildren[i];
126        } else {
127          IsAmbiguous = true;
128          return StringRef();
129        }
130      }
131    }
132    return Result;
133  }
134
135private:
136  /// \brief Gets all paths under this FileMatchTrieNode.
137  void getAll(std::vector<StringRef> &Results,
138              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
139    if (Path.empty())
140      return;
141    if (Children.empty()) {
142      Results.push_back(StringRef(Path));
143      return;
144    }
145    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
146         It = Children.begin(), E = Children.end();
147         It != E; ++It) {
148      if (It == Except)
149        continue;
150      It->getValue().getAll(Results, Children.end());
151    }
152  }
153
154  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
155  // nodes where Children.empty().
156  std::string Path;
157
158  // The children of this node stored in a map based on the next path segment.
159  llvm::StringMap<FileMatchTrieNode> Children;
160};
161} // end namespace tooling
162} // end namespace clang
163
164FileMatchTrie::FileMatchTrie()
165  : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
166
167FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
168  : Root(new FileMatchTrieNode), Comparator(Comparator) {}
169
170FileMatchTrie::~FileMatchTrie() {
171  delete Root;
172}
173
174void FileMatchTrie::insert(StringRef NewPath) {
175  Root->insert(NewPath);
176}
177
178StringRef FileMatchTrie::findEquivalent(StringRef FileName,
179                                        raw_ostream &Error) const {
180  if (llvm::sys::path::is_relative(FileName)) {
181    Error << "Cannot resolve relative paths";
182    return StringRef();
183  }
184  bool IsAmbiguous = false;
185  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
186  if (IsAmbiguous)
187    Error << "Path is ambiguous";
188  return Result;
189}
190