1//===- FileMatchTrie.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file contains the implementation of a FileMatchTrie.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Tooling/FileMatchTrie.h"
14#include "llvm/ADT/StringMap.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/Support/FileSystem.h"
17#include "llvm/Support/Path.h"
18#include "llvm/Support/raw_ostream.h"
19#include <string>
20#include <vector>
21
22using namespace clang;
23using namespace tooling;
24
25namespace {
26
27/// Default \c PathComparator using \c llvm::sys::fs::equivalent().
28struct DefaultPathComparator : public PathComparator {
29  bool equivalent(StringRef FileA, StringRef FileB) const override {
30    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
31  }
32};
33
34} // namespace
35
36namespace clang {
37namespace tooling {
38
39/// A node of the \c FileMatchTrie.
40///
41/// Each node has storage for up to one path and a map mapping a path segment to
42/// child nodes. The trie starts with an empty root node.
43class FileMatchTrieNode {
44public:
45  /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
46  /// the number of \c NewPath's trailing characters already consumed during
47  /// recursion.
48  ///
49  /// An insert of a path
50  /// 'p'starts at the root node and does the following:
51  /// - If the node is empty, insert 'p' into its storage and abort.
52  /// - If the node has a path 'p2' but no children, take the last path segment
53  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
54  ///   'p2' there.
55  /// - Insert a new child for the last segment of 'p' and insert the rest of
56  ///   'p' there.
57  ///
58  /// An insert operation is linear in the number of a path's segments.
59  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
60    // We cannot put relative paths into the FileMatchTrie as then a path can be
61    // a postfix of another path, violating a core assumption of the trie.
62    if (llvm::sys::path::is_relative(NewPath))
63      return;
64    if (Path.empty()) {
65      // This is an empty leaf. Store NewPath and return.
66      Path = std::string(NewPath);
67      return;
68    }
69    if (Children.empty()) {
70      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
71      if (NewPath == Path)
72          return;
73      // Make this a node and create a child-leaf with 'Path'.
74      StringRef Element(llvm::sys::path::filename(
75          StringRef(Path).drop_back(ConsumedLength)));
76      Children[Element].Path = Path;
77    }
78    StringRef Element(llvm::sys::path::filename(
79          StringRef(NewPath).drop_back(ConsumedLength)));
80    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
81  }
82
83  /// Tries to find the node under this \c FileMatchTrieNode that best
84  /// matches 'FileName'.
85  ///
86  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
87  /// \c true and an empty string is returned. If no path fits 'FileName', an
88  /// empty string is returned. \c ConsumedLength denotes the number of
89  /// \c Filename's trailing characters already consumed during recursion.
90  ///
91  /// To find the best matching node for a given path 'p', the
92  /// \c findEquivalent() function is called recursively for each path segment
93  /// (back to front) of 'p' until a node 'n' is reached that does not ..
94  /// - .. have children. In this case it is checked
95  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
96  ///   found. Otherwise continue with the parent node as if this node did not
97  ///   exist.
98  /// - .. a child matching the next path segment. In this case, all children of
99  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
100  ///   recursively and their equivalence to 'p' is determined. If none are
101  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
102  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
103  ///   error.
104  StringRef findEquivalent(const PathComparator& Comparator,
105                           StringRef FileName,
106                           bool &IsAmbiguous,
107                           unsigned ConsumedLength = 0) const {
108    if (Children.empty()) {
109      if (Comparator.equivalent(StringRef(Path), FileName))
110        return StringRef(Path);
111      return {};
112    }
113    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
114        ConsumedLength)));
115    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
116        Children.find(Element);
117    if (MatchingChild != Children.end()) {
118      StringRef Result = MatchingChild->getValue().findEquivalent(
119          Comparator, FileName, IsAmbiguous,
120          ConsumedLength + Element.size() + 1);
121      if (!Result.empty() || IsAmbiguous)
122        return Result;
123    }
124    std::vector<StringRef> AllChildren;
125    getAll(AllChildren, MatchingChild);
126    StringRef Result;
127    for (const auto &Child : AllChildren) {
128      if (Comparator.equivalent(Child, FileName)) {
129        if (Result.empty()) {
130          Result = Child;
131        } else {
132          IsAmbiguous = true;
133          return {};
134        }
135      }
136    }
137    return Result;
138  }
139
140private:
141  /// Gets all paths under this FileMatchTrieNode.
142  void getAll(std::vector<StringRef> &Results,
143              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
144    if (Path.empty())
145      return;
146    if (Children.empty()) {
147      Results.push_back(StringRef(Path));
148      return;
149    }
150    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
151         It = Children.begin(), E = Children.end();
152         It != E; ++It) {
153      if (It == Except)
154        continue;
155      It->getValue().getAll(Results, Children.end());
156    }
157  }
158
159  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
160  // nodes where Children.empty().
161  std::string Path;
162
163  // The children of this node stored in a map based on the next path segment.
164  llvm::StringMap<FileMatchTrieNode> Children;
165};
166
167} // namespace tooling
168} // namespace clang
169
170FileMatchTrie::FileMatchTrie()
171    : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
172
173FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
174    : Root(new FileMatchTrieNode), Comparator(Comparator) {}
175
176FileMatchTrie::~FileMatchTrie() {
177  delete Root;
178}
179
180void FileMatchTrie::insert(StringRef NewPath) {
181  Root->insert(NewPath);
182}
183
184StringRef FileMatchTrie::findEquivalent(StringRef FileName,
185                                        raw_ostream &Error) const {
186  if (llvm::sys::path::is_relative(FileName)) {
187    Error << "Cannot resolve relative paths";
188    return {};
189  }
190  bool IsAmbiguous = false;
191  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
192  if (IsAmbiguous)
193    Error << "Path is ambiguous";
194  return Result;
195}
196