1/* 2 * An implementation of what I call the "Sea of Stars" algorithm for 3 * POSIX fnmatch(). The basic idea is that we factor the pattern into 4 * a head component (which we match first and can reject without ever 5 * measuring the length of the string), an optional tail component 6 * (which only exists if the pattern contains at least one star), and 7 * an optional "sea of stars", a set of star-separated components 8 * between the head and tail. After the head and tail matches have 9 * been removed from the input string, the components in the "sea of 10 * stars" are matched sequentially by searching for their first 11 * occurrence past the end of the previous match. 12 * 13 * - Rich Felker, April 2012 14 */ 15 16#include "locale_impl.h" 17#include <fnmatch.h> 18#include <stdlib.h> 19#include <string.h> 20#include <wchar.h> 21#include <wctype.h> 22 23#define END 0 24#define UNMATCHABLE -2 25#define BRACKET -3 26#define QUESTION -4 27#define STAR -5 28 29static int str_next(const char* str, size_t n, size_t* step) { 30 if (!n) { 31 *step = 0; 32 return 0; 33 } 34 if (str[0] >= 128U) { 35 wchar_t wc; 36 int k = mbtowc(&wc, str, n); 37 if (k < 0) { 38 *step = 1; 39 return -1; 40 } 41 *step = k; 42 return wc; 43 } 44 *step = 1; 45 return str[0]; 46} 47 48static int pat_next(const char* pat, size_t m, size_t* step, int flags) { 49 int esc = 0; 50 if (!m || !*pat) { 51 *step = 0; 52 return END; 53 } 54 *step = 1; 55 if (pat[0] == '\\' && pat[1] && !(flags & FNM_NOESCAPE)) { 56 *step = 2; 57 pat++; 58 esc = 1; 59 goto escaped; 60 } 61 if (pat[0] == '[') { 62 size_t k = 1; 63 if (k < m) 64 if (pat[k] == '^' || pat[k] == '!') 65 k++; 66 if (k < m) 67 if (pat[k] == ']') 68 k++; 69 for (; k < m && pat[k] && pat[k] != ']'; k++) { 70 if (k + 1 < m && pat[k + 1] && pat[k] == '[' && 71 (pat[k + 1] == ':' || pat[k + 1] == '.' || pat[k + 1] == '=')) { 72 int z = pat[k + 1]; 73 k += 2; 74 if (k < m && pat[k]) 75 k++; 76 while (k < m && pat[k] && (pat[k - 1] != z || pat[k] != ']')) 77 k++; 78 if (k == m || !pat[k]) 79 break; 80 } 81 } 82 if (k == m || !pat[k]) { 83 *step = 1; 84 return '['; 85 } 86 *step = k + 1; 87 return BRACKET; 88 } 89 if (pat[0] == '*') 90 return STAR; 91 if (pat[0] == '?') 92 return QUESTION; 93escaped: 94 if (pat[0] >= 128U) { 95 wchar_t wc; 96 int k = mbtowc(&wc, pat, m); 97 if (k < 0) { 98 *step = 0; 99 return UNMATCHABLE; 100 } 101 *step = k + esc; 102 return wc; 103 } 104 return pat[0]; 105} 106 107static int casefold(int k) { 108 int c = towupper(k); 109 return c == k ? towlower(k) : c; 110} 111 112static int match_bracket(const char* p, int k, int kfold) { 113 wchar_t wc; 114 int inv = 0; 115 p++; 116 if (*p == '^' || *p == '!') { 117 inv = 1; 118 p++; 119 } 120 if (*p == ']') { 121 if (k == ']') 122 return !inv; 123 p++; 124 } else if (*p == '-') { 125 if (k == '-') 126 return !inv; 127 p++; 128 } 129 wc = p[-1]; 130 for (; *p != ']'; p++) { 131 if (p[0] == '-' && p[1] != ']') { 132 wchar_t wc2; 133 int l = mbtowc(&wc2, p + 1, 4); 134 if (l < 0) 135 return 0; 136 if (wc <= wc2) 137 if ((unsigned)k - wc <= wc2 - wc || (unsigned)kfold - wc <= wc2 - wc) 138 return !inv; 139 p += l - 1; 140 continue; 141 } 142 if (p[0] == '[' && (p[1] == ':' || p[1] == '.' || p[1] == '=')) { 143 const char* p0 = p + 2; 144 int z = p[1]; 145 p += 3; 146 while (p[-1] != z || p[0] != ']') 147 p++; 148 if (z == ':' && p - 1 - p0 < 16) { 149 char buf[16]; 150 memcpy(buf, p0, p - 1 - p0); 151 buf[p - 1 - p0] = 0; 152 if (iswctype(k, wctype(buf)) || iswctype(kfold, wctype(buf))) 153 return !inv; 154 } 155 continue; 156 } 157 if (*p < 128U) { 158 wc = (unsigned char)*p; 159 } else { 160 int l = mbtowc(&wc, p, 4); 161 if (l < 0) 162 return 0; 163 p += l - 1; 164 } 165 if (wc == k || wc == kfold) 166 return !inv; 167 } 168 return inv; 169} 170 171static int fnmatch_internal(const char* pat, size_t m, const char* str, size_t n, int flags) { 172 const char *p, *ptail, *endpat; 173 const char *s, *stail, *endstr; 174 size_t pinc, sinc, tailcnt = 0; 175 int c, k, kfold; 176 177 if (flags & FNM_PERIOD) { 178 if (*str == '.' && *pat != '.') 179 return FNM_NOMATCH; 180 } 181 for (;;) { 182 switch ((c = pat_next(pat, m, &pinc, flags))) { 183 case UNMATCHABLE: 184 return FNM_NOMATCH; 185 case STAR: 186 pat++; 187 m--; 188 break; 189 default: 190 k = str_next(str, n, &sinc); 191 if (k <= 0) 192 return (c == END) ? 0 : FNM_NOMATCH; 193 str += sinc; 194 n -= sinc; 195 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 196 if (c == BRACKET) { 197 if (!match_bracket(pat, k, kfold)) 198 return FNM_NOMATCH; 199 } else if (c != QUESTION && k != c && kfold != c) { 200 return FNM_NOMATCH; 201 } 202 pat += pinc; 203 m -= pinc; 204 continue; 205 } 206 break; 207 } 208 209 /* Compute real pat length if it was initially unknown/-1 */ 210 m = strnlen(pat, m); 211 endpat = pat + m; 212 213 /* Find the last * in pat and count chars needed after it */ 214 for (p = ptail = pat; p < endpat; p += pinc) { 215 switch (pat_next(p, endpat - p, &pinc, flags)) { 216 case UNMATCHABLE: 217 return FNM_NOMATCH; 218 case STAR: 219 tailcnt = 0; 220 ptail = p + 1; 221 break; 222 default: 223 tailcnt++; 224 break; 225 } 226 } 227 228 /* Past this point we need not check for UNMATCHABLE in pat, 229 * because all of pat has already been parsed once. */ 230 231 /* Compute real str length if it was initially unknown/-1 */ 232 n = strnlen(str, n); 233 endstr = str + n; 234 if (n < tailcnt) 235 return FNM_NOMATCH; 236 237 /* Find the final tailcnt chars of str, accounting for UTF-8. 238 * On illegal sequences we may get it wrong, but in that case 239 * we necessarily have a matching failure anyway. */ 240 for (s = endstr; s > str && tailcnt; tailcnt--) { 241 if (s[-1] < 128U || MB_CUR_MAX == 1) 242 s--; 243 else 244 while ((unsigned char)*--s - 0x80U < 0x40 && s > str) 245 ; 246 } 247 if (tailcnt) 248 return FNM_NOMATCH; 249 stail = s; 250 251 /* Check that the pat and str tails match */ 252 p = ptail; 253 for (;;) { 254 c = pat_next(p, endpat - p, &pinc, flags); 255 p += pinc; 256 if ((k = str_next(s, endstr - s, &sinc)) <= 0) { 257 if (c != END) 258 return FNM_NOMATCH; 259 break; 260 } 261 s += sinc; 262 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 263 if (c == BRACKET) { 264 if (!match_bracket(p - pinc, k, kfold)) 265 return FNM_NOMATCH; 266 } else if (c != QUESTION && k != c && kfold != c) { 267 return FNM_NOMATCH; 268 } 269 } 270 271 /* We're all done with the tails now, so throw them out */ 272 endstr = stail; 273 endpat = ptail; 274 275 /* Match pattern components until there are none left */ 276 while (pat < endpat) { 277 p = pat; 278 s = str; 279 for (;;) { 280 c = pat_next(p, endpat - p, &pinc, flags); 281 p += pinc; 282 /* Encountering * completes/commits a component */ 283 if (c == STAR) { 284 pat = p; 285 str = s; 286 break; 287 } 288 k = str_next(s, endstr - s, &sinc); 289 if (!k) 290 return FNM_NOMATCH; 291 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 292 if (c == BRACKET) { 293 if (!match_bracket(p - pinc, k, kfold)) 294 break; 295 } else if (c != QUESTION && k != c && kfold != c) { 296 break; 297 } 298 s += sinc; 299 } 300 if (c == STAR) 301 continue; 302 /* If we failed, advance str, by 1 char if it's a valid 303 * char, or past all invalid bytes otherwise. */ 304 k = str_next(str, endstr - str, &sinc); 305 if (k > 0) 306 str += sinc; 307 else 308 for (str++; str_next(str, endstr - str, &sinc) < 0; str++) 309 ; 310 } 311 312 return 0; 313} 314 315int fnmatch(const char* pat, const char* str, int flags) { 316 const char *s, *p; 317 size_t inc; 318 int c; 319 if (flags & FNM_PATHNAME) 320 for (;;) { 321 for (s = str; *s && *s != '/'; s++) 322 ; 323 for (p = pat; (c = pat_next(p, -1, &inc, flags)) != END && c != '/'; p += inc) 324 ; 325 if (c != *s && (!*s || !(flags & FNM_LEADING_DIR))) 326 return FNM_NOMATCH; 327 if (fnmatch_internal(pat, p - pat, str, s - str, flags)) 328 return FNM_NOMATCH; 329 if (!c) 330 return 0; 331 str = s + 1; 332 pat = p + inc; 333 } 334 else if (flags & FNM_LEADING_DIR) { 335 for (s = str; *s; s++) { 336 if (*s != '/') 337 continue; 338 if (!fnmatch_internal(pat, -1, str, s - str, flags)) 339 return 0; 340 } 341 } 342 return fnmatch_internal(pat, -1, str, -1, flags); 343} 344