1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/GlobPattern.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/Optional.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Support/Errc.h"
18
19using namespace llvm;
20
21static bool hasWildcard(StringRef S) {
22  return S.find_first_of("?*[\\") != StringRef::npos;
23}
24
25// Expands character ranges and returns a bitmap.
26// For example, "a-cf-hz" is expanded to "abcfghz".
27static Expected<BitVector> expand(StringRef S, StringRef Original) {
28  BitVector BV(256, false);
29
30  // Expand X-Y.
31  for (;;) {
32    if (S.size() < 3)
33      break;
34
35    uint8_t Start = S[0];
36    uint8_t End = S[2];
37
38    // If it doesn't start with something like X-Y,
39    // consume the first character and proceed.
40    if (S[1] != '-') {
41      BV[Start] = true;
42      S = S.substr(1);
43      continue;
44    }
45
46    // It must be in the form of X-Y.
47    // Validate it and then interpret the range.
48    if (Start > End)
49      return make_error<StringError>("invalid glob pattern: " + Original,
50                                     errc::invalid_argument);
51
52    for (int C = Start; C <= End; ++C)
53      BV[(uint8_t)C] = true;
54    S = S.substr(3);
55  }
56
57  for (char C : S)
58    BV[(uint8_t)C] = true;
59  return BV;
60}
61
62// This is a scanner for the glob pattern.
63// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
64// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
65// equivalent to "[^<chars>]"), or a non-meta character.
66// This function returns the first token in S.
67static Expected<BitVector> scan(StringRef &S, StringRef Original) {
68  switch (S[0]) {
69  case '*':
70    S = S.substr(1);
71    // '*' is represented by an empty bitvector.
72    // All other bitvectors are 256-bit long.
73    return BitVector();
74  case '?':
75    S = S.substr(1);
76    return BitVector(256, true);
77  case '[': {
78    // ']' is allowed as the first character of a character class. '[]' is
79    // invalid. So, just skip the first character.
80    size_t End = S.find(']', 2);
81    if (End == StringRef::npos)
82      return make_error<StringError>("invalid glob pattern: " + Original,
83                                     errc::invalid_argument);
84
85    StringRef Chars = S.substr(1, End - 1);
86    S = S.substr(End + 1);
87    if (Chars.startswith("^") || Chars.startswith("!")) {
88      Expected<BitVector> BV = expand(Chars.substr(1), Original);
89      if (!BV)
90        return BV.takeError();
91      return BV->flip();
92    }
93    return expand(Chars, Original);
94  }
95  case '\\':
96    // Eat this character and fall through below to treat it like a non-meta
97    // character.
98    S = S.substr(1);
99    LLVM_FALLTHROUGH;
100  default:
101    BitVector BV(256, false);
102    BV[(uint8_t)S[0]] = true;
103    S = S.substr(1);
104    return BV;
105  }
106}
107
108Expected<GlobPattern> GlobPattern::create(StringRef S) {
109  GlobPattern Pat;
110
111  // S doesn't contain any metacharacter,
112  // so the regular string comparison should work.
113  if (!hasWildcard(S)) {
114    Pat.Exact = S;
115    return Pat;
116  }
117
118  // S is something like "foo*", and the "* is not escaped. We can use
119  // startswith().
120  if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
121    Pat.Prefix = S.drop_back();
122    return Pat;
123  }
124
125  // S is something like "*foo". We can use endswith().
126  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
127    Pat.Suffix = S.drop_front();
128    return Pat;
129  }
130
131  // Otherwise, we need to do real glob pattern matching.
132  // Parse the pattern now.
133  StringRef Original = S;
134  while (!S.empty()) {
135    Expected<BitVector> BV = scan(S, Original);
136    if (!BV)
137      return BV.takeError();
138    Pat.Tokens.push_back(*BV);
139  }
140  return Pat;
141}
142
143bool GlobPattern::match(StringRef S) const {
144  if (Exact)
145    return S == *Exact;
146  if (Prefix)
147    return S.startswith(*Prefix);
148  if (Suffix)
149    return S.endswith(*Suffix);
150  return matchOne(Tokens, S);
151}
152
153// Runs glob pattern Pats against string S.
154bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
155  for (;;) {
156    if (Pats.empty())
157      return S.empty();
158
159    // If Pats[0] is '*', try to match Pats[1..] against all possible
160    // tail strings of S to see at least one pattern succeeds.
161    if (Pats[0].size() == 0) {
162      Pats = Pats.slice(1);
163      if (Pats.empty())
164        // Fast path. If a pattern is '*', it matches anything.
165        return true;
166      for (size_t I = 0, E = S.size(); I < E; ++I)
167        if (matchOne(Pats, S.substr(I)))
168          return true;
169      return false;
170    }
171
172    // If Pats[0] is not '*', it must consume one character.
173    if (S.empty() || !Pats[0][(uint8_t)S[0]])
174      return false;
175    Pats = Pats.slice(1);
176    S = S.substr(1);
177  }
178}
179