GlobPattern.cpp revision 341825
1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a glob pattern matcher.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Support/GlobPattern.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/Optional.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/Support/Errc.h"
19
20using namespace llvm;
21
22static bool hasWildcard(StringRef S) {
23  return S.find_first_of("?*[") != StringRef::npos;
24}
25
26// Expands character ranges and returns a bitmap.
27// For example, "a-cf-hz" is expanded to "abcfghz".
28static Expected<BitVector> expand(StringRef S, StringRef Original) {
29  BitVector BV(256, false);
30
31  // Expand X-Y.
32  for (;;) {
33    if (S.size() < 3)
34      break;
35
36    uint8_t Start = S[0];
37    uint8_t End = S[2];
38
39    // If it doesn't start with something like X-Y,
40    // consume the first character and proceed.
41    if (S[1] != '-') {
42      BV[Start] = true;
43      S = S.substr(1);
44      continue;
45    }
46
47    // It must be in the form of X-Y.
48    // Validate it and then interpret the range.
49    if (Start > End)
50      return make_error<StringError>("invalid glob pattern: " + Original,
51                                     errc::invalid_argument);
52
53    for (int C = Start; C <= End; ++C)
54      BV[(uint8_t)C] = true;
55    S = S.substr(3);
56  }
57
58  for (char C : S)
59    BV[(uint8_t)C] = true;
60  return BV;
61}
62
63// This is a scanner for the glob pattern.
64// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
65// (which is a negative form of "[<chars>]"), or a non-meta character.
66// This function returns the first token in S.
67static Expected<BitVector> scan(StringRef &S, StringRef Original) {
68  switch (S[0]) {
69  case '*':
70    S = S.substr(1);
71    // '*' is represented by an empty bitvector.
72    // All other bitvectors are 256-bit long.
73    return BitVector();
74  case '?':
75    S = S.substr(1);
76    return BitVector(256, true);
77  case '[': {
78    size_t End = S.find(']', 1);
79    if (End == StringRef::npos)
80      return make_error<StringError>("invalid glob pattern: " + Original,
81                                     errc::invalid_argument);
82
83    StringRef Chars = S.substr(1, End - 1);
84    S = S.substr(End + 1);
85    if (Chars.startswith("^")) {
86      Expected<BitVector> BV = expand(Chars.substr(1), Original);
87      if (!BV)
88        return BV.takeError();
89      return BV->flip();
90    }
91    return expand(Chars, Original);
92  }
93  default:
94    BitVector BV(256, false);
95    BV[(uint8_t)S[0]] = true;
96    S = S.substr(1);
97    return BV;
98  }
99}
100
101Expected<GlobPattern> GlobPattern::create(StringRef S) {
102  GlobPattern Pat;
103
104  // S doesn't contain any metacharacter,
105  // so the regular string comparison should work.
106  if (!hasWildcard(S)) {
107    Pat.Exact = S;
108    return Pat;
109  }
110
111  // S is something like "foo*". We can use startswith().
112  if (S.endswith("*") && !hasWildcard(S.drop_back())) {
113    Pat.Prefix = S.drop_back();
114    return Pat;
115  }
116
117  // S is something like "*foo". We can use endswith().
118  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
119    Pat.Suffix = S.drop_front();
120    return Pat;
121  }
122
123  // Otherwise, we need to do real glob pattern matching.
124  // Parse the pattern now.
125  StringRef Original = S;
126  while (!S.empty()) {
127    Expected<BitVector> BV = scan(S, Original);
128    if (!BV)
129      return BV.takeError();
130    Pat.Tokens.push_back(*BV);
131  }
132  return Pat;
133}
134
135bool GlobPattern::match(StringRef S) const {
136  if (Exact)
137    return S == *Exact;
138  if (Prefix)
139    return S.startswith(*Prefix);
140  if (Suffix)
141    return S.endswith(*Suffix);
142  return matchOne(Tokens, S);
143}
144
145// Runs glob pattern Pats against string S.
146bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
147  for (;;) {
148    if (Pats.empty())
149      return S.empty();
150
151    // If Pats[0] is '*', try to match Pats[1..] against all possible
152    // tail strings of S to see at least one pattern succeeds.
153    if (Pats[0].size() == 0) {
154      Pats = Pats.slice(1);
155      if (Pats.empty())
156        // Fast path. If a pattern is '*', it matches anything.
157        return true;
158      for (size_t I = 0, E = S.size(); I < E; ++I)
159        if (matchOne(Pats, S.substr(I)))
160          return true;
161      return false;
162    }
163
164    // If Pats[0] is not '*', it must consume one character.
165    if (S.empty() || !Pats[0][(uint8_t)S[0]])
166      return false;
167    Pats = Pats.slice(1);
168    S = S.substr(1);
169  }
170}
171