1/* 2 * An implementation of what I call the "Sea of Stars" algorithm for 3 * POSIX fnmatch(). The basic idea is that we factor the pattern into 4 * a head component (which we match first and can reject without ever 5 * measuring the length of the string), an optional tail component 6 * (which only exists if the pattern contains at least one star), and 7 * an optional "sea of stars", a set of star-separated components 8 * between the head and tail. After the head and tail matches have 9 * been removed from the input string, the components in the "sea of 10 * stars" are matched sequentially by searching for their first 11 * occurrence past the end of the previous match. 12 * 13 * - Rich Felker, April 2012 14 */ 15 16#include <string.h> 17#include <fnmatch.h> 18#include <stdlib.h> 19#include <wchar.h> 20#include <wctype.h> 21#include "locale_impl.h" 22 23#define END 0 24#define UNMATCHABLE -2 25#define BRACKET -3 26#define QUESTION -4 27#define STAR -5 28 29static int str_next(const char *str, size_t n, size_t *step) 30{ 31 if (!n) { 32 *step = 0; 33 return 0; 34 } 35 if (str[0] >= 128U) { 36 wchar_t wc; 37 int k = mbtowc(&wc, str, n); 38 if (k<0) { 39 *step = 1; 40 return -1; 41 } 42 *step = k; 43 return wc; 44 } 45 *step = 1; 46 return str[0]; 47} 48 49static int pat_next(const char *pat, size_t m, size_t *step, int flags) 50{ 51 int esc = 0; 52 if (!m || !*pat) { 53 *step = 0; 54 return END; 55 } 56 *step = 1; 57 if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) { 58 *step = 2; 59 pat++; 60 esc = 1; 61 goto escaped; 62 } 63 if (pat[0]=='[') { 64 size_t k = 1; 65 if (k<m) if (pat[k] == '^' || pat[k] == '!') k++; 66 if (k<m) if (pat[k] == ']') k++; 67 for (; k<m && pat[k] && pat[k]!=']'; k++) { 68 if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) { 69 int z = pat[k+1]; 70 k+=2; 71 if (k<m && pat[k]) k++; 72 while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++; 73 if (k==m || !pat[k]) break; 74 } 75 } 76 if (k==m || !pat[k]) { 77 *step = 1; 78 return '['; 79 } 80 *step = k+1; 81 return BRACKET; 82 } 83 if (pat[0] == '*') 84 return STAR; 85 if (pat[0] == '?') 86 return QUESTION; 87escaped: 88 if (pat[0] >= 128U) { 89 wchar_t wc; 90 int k = mbtowc(&wc, pat, m); 91 if (k<0) { 92 *step = 0; 93 return UNMATCHABLE; 94 } 95 *step = k + esc; 96 return wc; 97 } 98 return pat[0]; 99} 100 101static int casefold(int k) 102{ 103 int c = towupper(k); 104 return c == k ? towlower(k) : c; 105} 106 107static int match_bracket(const char *p, int k, int kfold) 108{ 109 wchar_t wc; 110 int inv = 0; 111 p++; 112 if (*p=='^' || *p=='!') { 113 inv = 1; 114 p++; 115 } 116 if (*p==']') { 117 if (k==']') return !inv; 118 p++; 119 } else if (*p=='-') { 120 if (k=='-') return !inv; 121 p++; 122 } 123 wc = p[-1]; 124 for (; *p != ']'; p++) { 125 if (p[0]=='-' && p[1]!=']') { 126 wchar_t wc2; 127 int l = mbtowc(&wc2, p+1, 4); 128 if (l < 0) return 0; 129 if (wc <= wc2) 130 if ((unsigned)k-wc <= wc2-wc || 131 (unsigned)kfold-wc <= wc2-wc) 132 return !inv; 133 p += l-1; 134 continue; 135 } 136 if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) { 137 const char *p0 = p+2; 138 int z = p[1]; 139 p+=3; 140 while (p[-1]!=z || p[0]!=']') p++; 141 if (z == ':' && p-1-p0 < 16) { 142 char buf[16]; 143 memcpy(buf, p0, p-1-p0); 144 buf[p-1-p0] = 0; 145 if (iswctype(k, wctype(buf)) || 146 iswctype(kfold, wctype(buf))) 147 return !inv; 148 } 149 continue; 150 } 151 if (*p < 128U) { 152 wc = (unsigned char)*p; 153 } else { 154 int l = mbtowc(&wc, p, 4); 155 if (l < 0) return 0; 156 p += l-1; 157 } 158 if (wc==k || wc==kfold) return !inv; 159 } 160 return inv; 161} 162 163static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags) 164{ 165 const char *p, *ptail, *endpat; 166 const char *s, *stail, *endstr; 167 size_t pinc, sinc, tailcnt=0; 168 int c, k, kfold; 169 170 if (flags & FNM_PERIOD) { 171 if (*str == '.' && *pat != '.') 172 return FNM_NOMATCH; 173 } 174 for (;;) { 175 switch ((c = pat_next(pat, m, &pinc, flags))) { 176 case UNMATCHABLE: 177 return FNM_NOMATCH; 178 case STAR: 179 pat++; 180 m--; 181 break; 182 default: 183 k = str_next(str, n, &sinc); 184 if (k <= 0) 185 return (c==END) ? 0 : FNM_NOMATCH; 186 str += sinc; 187 n -= sinc; 188 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 189 if (c == BRACKET) { 190 if (!match_bracket(pat, k, kfold)) 191 return FNM_NOMATCH; 192 } else if (c != QUESTION && k != c && kfold != c) { 193 return FNM_NOMATCH; 194 } 195 pat+=pinc; 196 m-=pinc; 197 continue; 198 } 199 break; 200 } 201 202 /* Compute real pat length if it was initially unknown/-1 */ 203 m = strnlen(pat, m); 204 endpat = pat + m; 205 206 /* Find the last * in pat and count chars needed after it */ 207 for (p=ptail=pat; p<endpat; p+=pinc) { 208 switch (pat_next(p, endpat-p, &pinc, flags)) { 209 case UNMATCHABLE: 210 return FNM_NOMATCH; 211 case STAR: 212 tailcnt=0; 213 ptail = p+1; 214 break; 215 default: 216 tailcnt++; 217 break; 218 } 219 } 220 221 /* Past this point we need not check for UNMATCHABLE in pat, 222 * because all of pat has already been parsed once. */ 223 224 /* Compute real str length if it was initially unknown/-1 */ 225 n = strnlen(str, n); 226 endstr = str + n; 227 if (n < tailcnt) return FNM_NOMATCH; 228 229 /* Find the final tailcnt chars of str, accounting for UTF-8. 230 * On illegal sequences we may get it wrong, but in that case 231 * we necessarily have a matching failure anyway. */ 232 for (s=endstr; s>str && tailcnt; tailcnt--) { 233 if (s[-1] < 128U || MB_CUR_MAX==1) s--; 234 else while ((unsigned char)*--s-0x80U<0x40 && s>str); 235 } 236 if (tailcnt) return FNM_NOMATCH; 237 stail = s; 238 239 /* Check that the pat and str tails match */ 240 p = ptail; 241 for (;;) { 242 c = pat_next(p, endpat-p, &pinc, flags); 243 p += pinc; 244 if ((k = str_next(s, endstr-s, &sinc)) <= 0) { 245 if (c != END) return FNM_NOMATCH; 246 break; 247 } 248 s += sinc; 249 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 250 if (c == BRACKET) { 251 if (!match_bracket(p-pinc, k, kfold)) 252 return FNM_NOMATCH; 253 } else if (c != QUESTION && k != c && kfold != c) { 254 return FNM_NOMATCH; 255 } 256 } 257 258 /* We're all done with the tails now, so throw them out */ 259 endstr = stail; 260 endpat = ptail; 261 262 /* Match pattern components until there are none left */ 263 while (pat<endpat) { 264 p = pat; 265 s = str; 266 for (;;) { 267 c = pat_next(p, endpat-p, &pinc, flags); 268 p += pinc; 269 /* Encountering * completes/commits a component */ 270 if (c == STAR) { 271 pat = p; 272 str = s; 273 break; 274 } 275 k = str_next(s, endstr-s, &sinc); 276 if (!k) 277 return FNM_NOMATCH; 278 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 279 if (c == BRACKET) { 280 if (!match_bracket(p-pinc, k, kfold)) 281 break; 282 } else if (c != QUESTION && k != c && kfold != c) { 283 break; 284 } 285 s += sinc; 286 } 287 if (c == STAR) continue; 288 /* If we failed, advance str, by 1 char if it's a valid 289 * char, or past all invalid bytes otherwise. */ 290 k = str_next(str, endstr-str, &sinc); 291 if (k > 0) str += sinc; 292 else for (str++; str_next(str, endstr-str, &sinc)<0; str++); 293 } 294 295 return 0; 296} 297 298int fnmatch(const char *pat, const char *str, int flags) 299{ 300 const char *s, *p; 301 size_t inc; 302 int c; 303 if (flags & FNM_PATHNAME) for (;;) { 304 for (s=str; *s && *s!='/'; s++); 305 for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc); 306 if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR))) 307 return FNM_NOMATCH; 308 if (fnmatch_internal(pat, p-pat, str, s-str, flags)) 309 return FNM_NOMATCH; 310 if (!c) return 0; 311 str = s+1; 312 pat = p+inc; 313 } else if (flags & FNM_LEADING_DIR) { 314 for (s=str; *s; s++) { 315 if (*s != '/') continue; 316 if (!fnmatch_internal(pat, -1, str, s-str, flags)) 317 return 0; 318 } 319 } 320 return fnmatch_internal(pat, -1, str, -1, flags); 321} 322