1341825Sdim//===- FileMatchTrie.cpp --------------------------------------------------===//
2243791Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6243791Sdim//
7243791Sdim//===----------------------------------------------------------------------===//
8243791Sdim//
9243791Sdim//  This file contains the implementation of a FileMatchTrie.
10243791Sdim//
11243791Sdim//===----------------------------------------------------------------------===//
12243791Sdim
13243791Sdim#include "clang/Tooling/FileMatchTrie.h"
14243791Sdim#include "llvm/ADT/StringMap.h"
15341825Sdim#include "llvm/ADT/StringRef.h"
16243791Sdim#include "llvm/Support/FileSystem.h"
17261991Sdim#include "llvm/Support/Path.h"
18243791Sdim#include "llvm/Support/raw_ostream.h"
19341825Sdim#include <string>
20341825Sdim#include <vector>
21341825Sdim
22288943Sdimusing namespace clang;
23288943Sdimusing namespace tooling;
24243791Sdim
25288943Sdimnamespace {
26341825Sdim
27341825Sdim/// Default \c PathComparator using \c llvm::sys::fs::equivalent().
28243791Sdimstruct DefaultPathComparator : public PathComparator {
29276479Sdim  bool equivalent(StringRef FileA, StringRef FileB) const override {
30243791Sdim    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
31243791Sdim  }
32243791Sdim};
33243791Sdim
34341825Sdim} // namespace
35341825Sdim
36288943Sdimnamespace clang {
37288943Sdimnamespace tooling {
38341825Sdim
39341825Sdim/// A node of the \c FileMatchTrie.
40243791Sdim///
41243791Sdim/// Each node has storage for up to one path and a map mapping a path segment to
42243791Sdim/// child nodes. The trie starts with an empty root node.
43243791Sdimclass FileMatchTrieNode {
44243791Sdimpublic:
45341825Sdim  /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
46243791Sdim  /// the number of \c NewPath's trailing characters already consumed during
47243791Sdim  /// recursion.
48243791Sdim  ///
49243791Sdim  /// An insert of a path
50243791Sdim  /// 'p'starts at the root node and does the following:
51243791Sdim  /// - If the node is empty, insert 'p' into its storage and abort.
52243791Sdim  /// - If the node has a path 'p2' but no children, take the last path segment
53243791Sdim  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
54243791Sdim  ///   'p2' there.
55243791Sdim  /// - Insert a new child for the last segment of 'p' and insert the rest of
56243791Sdim  ///   'p' there.
57243791Sdim  ///
58243791Sdim  /// An insert operation is linear in the number of a path's segments.
59243791Sdim  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
60243791Sdim    // We cannot put relative paths into the FileMatchTrie as then a path can be
61243791Sdim    // a postfix of another path, violating a core assumption of the trie.
62243791Sdim    if (llvm::sys::path::is_relative(NewPath))
63243791Sdim      return;
64243791Sdim    if (Path.empty()) {
65243791Sdim      // This is an empty leaf. Store NewPath and return.
66243791Sdim      Path = NewPath;
67243791Sdim      return;
68243791Sdim    }
69243791Sdim    if (Children.empty()) {
70243791Sdim      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
71243791Sdim      if (NewPath == Path)
72243791Sdim          return;
73243791Sdim      // Make this a node and create a child-leaf with 'Path'.
74243791Sdim      StringRef Element(llvm::sys::path::filename(
75243791Sdim          StringRef(Path).drop_back(ConsumedLength)));
76243791Sdim      Children[Element].Path = Path;
77243791Sdim    }
78243791Sdim    StringRef Element(llvm::sys::path::filename(
79243791Sdim          StringRef(NewPath).drop_back(ConsumedLength)));
80243791Sdim    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
81243791Sdim  }
82243791Sdim
83341825Sdim  /// Tries to find the node under this \c FileMatchTrieNode that best
84243791Sdim  /// matches 'FileName'.
85243791Sdim  ///
86243791Sdim  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
87243791Sdim  /// \c true and an empty string is returned. If no path fits 'FileName', an
88243791Sdim  /// empty string is returned. \c ConsumedLength denotes the number of
89243791Sdim  /// \c Filename's trailing characters already consumed during recursion.
90243791Sdim  ///
91243791Sdim  /// To find the best matching node for a given path 'p', the
92243791Sdim  /// \c findEquivalent() function is called recursively for each path segment
93341825Sdim  /// (back to front) of 'p' until a node 'n' is reached that does not ..
94243791Sdim  /// - .. have children. In this case it is checked
95243791Sdim  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
96243791Sdim  ///   found. Otherwise continue with the parent node as if this node did not
97243791Sdim  ///   exist.
98243791Sdim  /// - .. a child matching the next path segment. In this case, all children of
99243791Sdim  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
100243791Sdim  ///   recursively and their equivalence to 'p' is determined. If none are
101243791Sdim  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
102243791Sdim  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
103243791Sdim  ///   error.
104243791Sdim  StringRef findEquivalent(const PathComparator& Comparator,
105243791Sdim                           StringRef FileName,
106243791Sdim                           bool &IsAmbiguous,
107243791Sdim                           unsigned ConsumedLength = 0) const {
108243791Sdim    if (Children.empty()) {
109243791Sdim      if (Comparator.equivalent(StringRef(Path), FileName))
110243791Sdim        return StringRef(Path);
111341825Sdim      return {};
112243791Sdim    }
113243791Sdim    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
114243791Sdim        ConsumedLength)));
115243791Sdim    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
116243791Sdim        Children.find(Element);
117243791Sdim    if (MatchingChild != Children.end()) {
118243791Sdim      StringRef Result = MatchingChild->getValue().findEquivalent(
119243791Sdim          Comparator, FileName, IsAmbiguous,
120243791Sdim          ConsumedLength + Element.size() + 1);
121243791Sdim      if (!Result.empty() || IsAmbiguous)
122243791Sdim        return Result;
123243791Sdim    }
124243791Sdim    std::vector<StringRef> AllChildren;
125243791Sdim    getAll(AllChildren, MatchingChild);
126243791Sdim    StringRef Result;
127341825Sdim    for (const auto &Child : AllChildren) {
128341825Sdim      if (Comparator.equivalent(Child, FileName)) {
129243791Sdim        if (Result.empty()) {
130341825Sdim          Result = Child;
131243791Sdim        } else {
132243791Sdim          IsAmbiguous = true;
133341825Sdim          return {};
134243791Sdim        }
135243791Sdim      }
136243791Sdim    }
137243791Sdim    return Result;
138243791Sdim  }
139243791Sdim
140243791Sdimprivate:
141341825Sdim  /// Gets all paths under this FileMatchTrieNode.
142243791Sdim  void getAll(std::vector<StringRef> &Results,
143243791Sdim              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
144243791Sdim    if (Path.empty())
145243791Sdim      return;
146243791Sdim    if (Children.empty()) {
147243791Sdim      Results.push_back(StringRef(Path));
148243791Sdim      return;
149243791Sdim    }
150243791Sdim    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
151243791Sdim         It = Children.begin(), E = Children.end();
152243791Sdim         It != E; ++It) {
153243791Sdim      if (It == Except)
154243791Sdim        continue;
155243791Sdim      It->getValue().getAll(Results, Children.end());
156243791Sdim    }
157243791Sdim  }
158243791Sdim
159243791Sdim  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
160243791Sdim  // nodes where Children.empty().
161243791Sdim  std::string Path;
162243791Sdim
163243791Sdim  // The children of this node stored in a map based on the next path segment.
164243791Sdim  llvm::StringMap<FileMatchTrieNode> Children;
165243791Sdim};
166243791Sdim
167341825Sdim} // namespace tooling
168341825Sdim} // namespace clang
169341825Sdim
170243791SdimFileMatchTrie::FileMatchTrie()
171341825Sdim    : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
172243791Sdim
173243791SdimFileMatchTrie::FileMatchTrie(PathComparator *Comparator)
174341825Sdim    : Root(new FileMatchTrieNode), Comparator(Comparator) {}
175243791Sdim
176243791SdimFileMatchTrie::~FileMatchTrie() {
177243791Sdim  delete Root;
178243791Sdim}
179243791Sdim
180243791Sdimvoid FileMatchTrie::insert(StringRef NewPath) {
181243791Sdim  Root->insert(NewPath);
182243791Sdim}
183243791Sdim
184243791SdimStringRef FileMatchTrie::findEquivalent(StringRef FileName,
185249423Sdim                                        raw_ostream &Error) const {
186243791Sdim  if (llvm::sys::path::is_relative(FileName)) {
187243791Sdim    Error << "Cannot resolve relative paths";
188341825Sdim    return {};
189243791Sdim  }
190243791Sdim  bool IsAmbiguous = false;
191243791Sdim  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
192243791Sdim  if (IsAmbiguous)
193243791Sdim    Error << "Path is ambiguous";
194243791Sdim  return Result;
195243791Sdim}
196