1//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the functionality of matching a file path name to
11/// a pattern, similar to the POSIX fnmatch() function.
12///
13//===----------------------------------------------------------------------===//
14
15#include "MatchFilePath.h"
16
17using namespace llvm;
18
19namespace clang {
20namespace format {
21
22// Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and
23// Rule 1 of 2.13.3.
24bool matchFilePath(StringRef Pattern, StringRef FilePath) {
25  assert(!Pattern.empty());
26  assert(!FilePath.empty());
27
28  // No match if `Pattern` ends with a non-meta character not equal to the last
29  // character of `FilePath`.
30  if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
31    return false;
32
33  constexpr auto Separator = '/';
34  const auto EOP = Pattern.size();  // End of `Pattern`.
35  const auto End = FilePath.size(); // End of `FilePath`.
36  unsigned I = 0;                   // Index to `Pattern`.
37
38  for (unsigned J = 0; J < End; ++J) {
39    if (I == EOP)
40      return false;
41
42    switch (const auto F = FilePath[J]; Pattern[I]) {
43    case '\\':
44      if (++I == EOP || F != Pattern[I])
45        return false;
46      break;
47    case '?':
48      if (F == Separator)
49        return false;
50      break;
51    case '*': {
52      while (++I < EOP && Pattern[I] == '*') { // Skip consecutive stars.
53      }
54      const auto K = FilePath.find(Separator, J); // Index of next `Separator`.
55      const bool NoMoreSeparatorsInFilePath = K == StringRef::npos;
56      if (I == EOP) // `Pattern` ends with a star.
57        return NoMoreSeparatorsInFilePath;
58      // `Pattern` ends with a lone backslash.
59      if (Pattern[I] == '\\' && ++I == EOP)
60        return false;
61      // The star is followed by a (possibly escaped) `Separator`.
62      if (Pattern[I] == Separator) {
63        if (NoMoreSeparatorsInFilePath)
64          return false;
65        J = K; // Skip to next `Separator` in `FilePath`.
66        break;
67      }
68      // Recurse.
69      for (auto Pat = Pattern.substr(I); J < End && FilePath[J] != Separator;
70           ++J) {
71        if (matchFilePath(Pat, FilePath.substr(J)))
72          return true;
73      }
74      return false;
75    }
76    case '[':
77      // Skip e.g. `[!]`.
78      if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
79        // Skip unpaired `[`, brackets containing slashes, and `[]`.
80        if (const auto K = Pattern.find_first_of("]/", I + 1);
81            K != StringRef::npos && Pattern[K] == ']' && K > I + 1) {
82          if (F == Separator)
83            return false;
84          ++I; // After the `[`.
85          bool Negated = false;
86          if (Pattern[I] == '!') {
87            Negated = true;
88            ++I; // After the `!`.
89          }
90          bool Match = false;
91          do {
92            if (I + 2 < K && Pattern[I + 1] == '-') {
93              Match = Pattern[I] <= F && F <= Pattern[I + 2];
94              I += 3; // After the range, e.g. `A-Z`.
95            } else {
96              Match = F == Pattern[I++];
97            }
98          } while (!Match && I < K);
99          if (Negated ? Match : !Match)
100            return false;
101          I = K + 1; // After the `]`.
102          continue;
103        }
104      }
105      [[fallthrough]]; // Match `[` literally.
106    default:
107      if (F != Pattern[I])
108        return false;
109    }
110
111    ++I;
112  }
113
114  // Match trailing stars with null strings.
115  while (I < EOP && Pattern[I] == '*')
116    ++I;
117
118  return I == EOP;
119}
120
121} // namespace format
122} // namespace clang
123