1243791Sdim//===--- FileMatchTrie.cpp - ----------------------------------------------===//
2243791Sdim//
3243791Sdim//                     The LLVM Compiler Infrastructure
4243791Sdim//
5243791Sdim// This file is distributed under the University of Illinois Open Source
6243791Sdim// License. See LICENSE.TXT for details.
7243791Sdim//
8243791Sdim//===----------------------------------------------------------------------===//
9243791Sdim//
10243791Sdim//  This file contains the implementation of a FileMatchTrie.
11243791Sdim//
12243791Sdim//===----------------------------------------------------------------------===//
13243791Sdim
14243791Sdim#include "clang/Tooling/FileMatchTrie.h"
15243791Sdim#include "llvm/ADT/StringMap.h"
16243791Sdim#include "llvm/Support/FileSystem.h"
17263509Sdim#include "llvm/Support/Path.h"
18243791Sdim#include "llvm/Support/raw_ostream.h"
19252723Sdim#include <sstream>
20243791Sdim
21243791Sdimnamespace clang {
22243791Sdimnamespace tooling {
23243791Sdim
24243791Sdim/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
25243791Sdimstruct DefaultPathComparator : public PathComparator {
26243791Sdim  virtual ~DefaultPathComparator() {}
27243791Sdim  virtual bool equivalent(StringRef FileA, StringRef FileB) const {
28243791Sdim    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
29243791Sdim  }
30243791Sdim};
31243791Sdim
32243791Sdim/// \brief A node of the \c FileMatchTrie.
33243791Sdim///
34243791Sdim/// Each node has storage for up to one path and a map mapping a path segment to
35243791Sdim/// child nodes. The trie starts with an empty root node.
36243791Sdimclass FileMatchTrieNode {
37243791Sdimpublic:
38243791Sdim  /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
39243791Sdim  /// the number of \c NewPath's trailing characters already consumed during
40243791Sdim  /// recursion.
41243791Sdim  ///
42243791Sdim  /// An insert of a path
43243791Sdim  /// 'p'starts at the root node and does the following:
44243791Sdim  /// - If the node is empty, insert 'p' into its storage and abort.
45243791Sdim  /// - If the node has a path 'p2' but no children, take the last path segment
46243791Sdim  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
47243791Sdim  ///   'p2' there.
48243791Sdim  /// - Insert a new child for the last segment of 'p' and insert the rest of
49243791Sdim  ///   'p' there.
50243791Sdim  ///
51243791Sdim  /// An insert operation is linear in the number of a path's segments.
52243791Sdim  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
53243791Sdim    // We cannot put relative paths into the FileMatchTrie as then a path can be
54243791Sdim    // a postfix of another path, violating a core assumption of the trie.
55243791Sdim    if (llvm::sys::path::is_relative(NewPath))
56243791Sdim      return;
57243791Sdim    if (Path.empty()) {
58243791Sdim      // This is an empty leaf. Store NewPath and return.
59243791Sdim      Path = NewPath;
60243791Sdim      return;
61243791Sdim    }
62243791Sdim    if (Children.empty()) {
63243791Sdim      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
64243791Sdim      if (NewPath == Path)
65243791Sdim          return;
66243791Sdim      // Make this a node and create a child-leaf with 'Path'.
67243791Sdim      StringRef Element(llvm::sys::path::filename(
68243791Sdim          StringRef(Path).drop_back(ConsumedLength)));
69243791Sdim      Children[Element].Path = Path;
70243791Sdim    }
71243791Sdim    StringRef Element(llvm::sys::path::filename(
72243791Sdim          StringRef(NewPath).drop_back(ConsumedLength)));
73243791Sdim    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
74243791Sdim  }
75243791Sdim
76243791Sdim  /// \brief Tries to find the node under this \c FileMatchTrieNode that best
77243791Sdim  /// matches 'FileName'.
78243791Sdim  ///
79243791Sdim  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
80243791Sdim  /// \c true and an empty string is returned. If no path fits 'FileName', an
81243791Sdim  /// empty string is returned. \c ConsumedLength denotes the number of
82243791Sdim  /// \c Filename's trailing characters already consumed during recursion.
83243791Sdim  ///
84243791Sdim  /// To find the best matching node for a given path 'p', the
85243791Sdim  /// \c findEquivalent() function is called recursively for each path segment
86243791Sdim  /// (back to fron) of 'p' until a node 'n' is reached that does not ..
87243791Sdim  /// - .. have children. In this case it is checked
88243791Sdim  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
89243791Sdim  ///   found. Otherwise continue with the parent node as if this node did not
90243791Sdim  ///   exist.
91243791Sdim  /// - .. a child matching the next path segment. In this case, all children of
92243791Sdim  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
93243791Sdim  ///   recursively and their equivalence to 'p' is determined. If none are
94243791Sdim  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
95243791Sdim  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
96243791Sdim  ///   error.
97243791Sdim  StringRef findEquivalent(const PathComparator& Comparator,
98243791Sdim                           StringRef FileName,
99243791Sdim                           bool &IsAmbiguous,
100243791Sdim                           unsigned ConsumedLength = 0) const {
101243791Sdim    if (Children.empty()) {
102243791Sdim      if (Comparator.equivalent(StringRef(Path), FileName))
103243791Sdim        return StringRef(Path);
104243791Sdim      return StringRef();
105243791Sdim    }
106243791Sdim    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
107243791Sdim        ConsumedLength)));
108243791Sdim    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
109243791Sdim        Children.find(Element);
110243791Sdim    if (MatchingChild != Children.end()) {
111243791Sdim      StringRef Result = MatchingChild->getValue().findEquivalent(
112243791Sdim          Comparator, FileName, IsAmbiguous,
113243791Sdim          ConsumedLength + Element.size() + 1);
114243791Sdim      if (!Result.empty() || IsAmbiguous)
115243791Sdim        return Result;
116243791Sdim    }
117243791Sdim    std::vector<StringRef> AllChildren;
118243791Sdim    getAll(AllChildren, MatchingChild);
119243791Sdim    StringRef Result;
120243791Sdim    for (unsigned i = 0; i < AllChildren.size(); i++) {
121243791Sdim      if (Comparator.equivalent(AllChildren[i], FileName)) {
122243791Sdim        if (Result.empty()) {
123243791Sdim          Result = AllChildren[i];
124243791Sdim        } else {
125243791Sdim          IsAmbiguous = true;
126243791Sdim          return StringRef();
127243791Sdim        }
128243791Sdim      }
129243791Sdim    }
130243791Sdim    return Result;
131243791Sdim  }
132243791Sdim
133243791Sdimprivate:
134243791Sdim  /// \brief Gets all paths under this FileMatchTrieNode.
135243791Sdim  void getAll(std::vector<StringRef> &Results,
136243791Sdim              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
137243791Sdim    if (Path.empty())
138243791Sdim      return;
139243791Sdim    if (Children.empty()) {
140243791Sdim      Results.push_back(StringRef(Path));
141243791Sdim      return;
142243791Sdim    }
143243791Sdim    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
144243791Sdim         It = Children.begin(), E = Children.end();
145243791Sdim         It != E; ++It) {
146243791Sdim      if (It == Except)
147243791Sdim        continue;
148243791Sdim      It->getValue().getAll(Results, Children.end());
149243791Sdim    }
150243791Sdim  }
151243791Sdim
152243791Sdim  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
153243791Sdim  // nodes where Children.empty().
154243791Sdim  std::string Path;
155243791Sdim
156243791Sdim  // The children of this node stored in a map based on the next path segment.
157243791Sdim  llvm::StringMap<FileMatchTrieNode> Children;
158243791Sdim};
159243791Sdim
160243791SdimFileMatchTrie::FileMatchTrie()
161243791Sdim  : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
162243791Sdim
163243791SdimFileMatchTrie::FileMatchTrie(PathComparator *Comparator)
164243791Sdim  : Root(new FileMatchTrieNode), Comparator(Comparator) {}
165243791Sdim
166243791SdimFileMatchTrie::~FileMatchTrie() {
167243791Sdim  delete Root;
168243791Sdim}
169243791Sdim
170243791Sdimvoid FileMatchTrie::insert(StringRef NewPath) {
171243791Sdim  Root->insert(NewPath);
172243791Sdim}
173243791Sdim
174243791SdimStringRef FileMatchTrie::findEquivalent(StringRef FileName,
175252723Sdim                                        raw_ostream &Error) const {
176243791Sdim  if (llvm::sys::path::is_relative(FileName)) {
177243791Sdim    Error << "Cannot resolve relative paths";
178243791Sdim    return StringRef();
179243791Sdim  }
180243791Sdim  bool IsAmbiguous = false;
181243791Sdim  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
182243791Sdim  if (IsAmbiguous)
183243791Sdim    Error << "Path is ambiguous";
184243791Sdim  return Result;
185243791Sdim}
186243791Sdim
187243791Sdim} // end namespace tooling
188243791Sdim} // end namespace clang
189