1/* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33#if defined(LIBC_SCCS) && !defined(lint) 34static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; 35#endif /* LIBC_SCCS and not lint */ 36#include <sys/cdefs.h> 37__FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $"); 38 39#include "xlocale_private.h" 40 41/* 42 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 43 * Compares a filename or pathname to a pattern. 44 */ 45 46/* 47 * Some notes on multibyte character support: 48 * 1. Patterns with illegal byte sequences match nothing. 49 * 2. Illegal byte sequences in the "string" argument are handled by treating 50 * them as single-byte characters with a value of the first byte of the 51 * sequence cast to wchar_t. 52 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 53 * used for most, but not all, conversions. Further work will be required 54 * to support state-dependent encodings. 55 */ 56 57#include <fnmatch.h> 58#include <limits.h> 59#include <string.h> 60#include <wchar.h> 61#include <wctype.h> 62 63#include "collate.h" 64 65#define EOS '\0' 66 67#define RETURN_ERROR 2 /* neither 0 or FNM_NOMATCH */ 68#define RANGE_MATCH 1 69#define RANGE_NOMATCH 0 70#define RANGE_ERROR (-1) 71 72#define RECURSION_MAX 64 73 74__private_extern__ int rangematch(const char *, wchar_t, const char *, int, char **, char **, mbstate_t *, mbstate_t *, locale_t); 75static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 76 mbstate_t, locale_t, int); 77 78int 79fnmatch(pattern, string, flags) 80 const char *pattern, *string; 81 int flags; 82{ 83 static const mbstate_t initial; 84#if __DARWIN_UNIX03 85 return (fnmatch1(pattern, string, string, flags, initial, initial, __current_locale(), RECURSION_MAX)); 86#else /* !__DARWIN_UNIX03 */ 87 return (fnmatch1(pattern, string, string, flags, initial, initial, __current_locale(), RECURSION_MAX) != 0 ? FNM_NOMATCH : 0); 88#endif /* __DARWIN_UNIX03 */ 89} 90 91static int 92fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs, loc, recursion) 93 const char *pattern, *string, *stringstart; 94 int flags; 95 mbstate_t patmbs, strmbs; 96 locale_t loc; 97 int recursion; 98{ 99 char *newp, *news; 100 char c; 101 wchar_t pc, sc; 102 size_t pclen, sclen; 103 104 if (recursion-- <= 0) 105 return RETURN_ERROR; 106 for (;;) { 107 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc); 108 if (pclen == (size_t)-1 || pclen == (size_t)-2) 109#if __DARWIN_UNIX03 110 return (RETURN_ERROR); 111#else /* !__DARWIN_UNIX03 */ 112 return (FNM_NOMATCH); 113#endif /* __DARWIN_UNIX03 */ 114 pattern += pclen; 115 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc); 116 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 117 sc = (unsigned char)*string; 118 sclen = 1; 119 memset(&strmbs, 0, sizeof(strmbs)); 120 } 121 switch (pc) { 122 case EOS: 123 if ((flags & FNM_LEADING_DIR) && sc == '/') 124 return (0); 125 return (sc == EOS ? 0 : FNM_NOMATCH); 126 case '?': 127 if (sc == EOS) 128 return (FNM_NOMATCH); 129 if (sc == '/' && (flags & FNM_PATHNAME)) 130 return (FNM_NOMATCH); 131 if (sc == '.' && (flags & FNM_PERIOD) && 132 (string == stringstart || 133 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 134 return (FNM_NOMATCH); 135 string += sclen; 136 break; 137 case '*': 138 c = *pattern; 139 /* Collapse multiple stars. */ 140 while (c == '*') 141 c = *++pattern; 142 143 if (sc == '.' && (flags & FNM_PERIOD) && 144 (string == stringstart || 145 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 146 return (FNM_NOMATCH); 147 148 /* Optimize for pattern with * at end or before /. */ 149 if (c == EOS) 150 if (flags & FNM_PATHNAME) 151 return ((flags & FNM_LEADING_DIR) || 152 strchr(string, '/') == NULL ? 153 0 : FNM_NOMATCH); 154 else 155 return (0); 156 else if (c == '/' && flags & FNM_PATHNAME) { 157 if ((string = strchr(string, '/')) == NULL) 158 return (FNM_NOMATCH); 159 break; 160 } 161 162 /* General case, use recursion. */ 163 int ret; 164 while (sc != EOS) { 165 if ((ret = fnmatch1(pattern, string, stringstart, 166 flags, patmbs, strmbs, loc, recursion)) != FNM_NOMATCH) 167 return (ret); 168 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, 169 &strmbs, loc); 170 if (sclen == (size_t)-1 || 171 sclen == (size_t)-2) { 172 sc = (unsigned char)*string; 173 sclen = 1; 174 memset(&strmbs, 0, sizeof(strmbs)); 175 } 176 if (sc == '/' && flags & FNM_PATHNAME) 177 break; 178 string += sclen; 179 } 180 return (FNM_NOMATCH); 181 case '[': 182 if (sc == EOS) 183 return (FNM_NOMATCH); 184 if (sc == '/' && (flags & FNM_PATHNAME)) 185 return (FNM_NOMATCH); 186 if (sc == '.' && (flags & FNM_PERIOD) && 187 (string == stringstart || 188 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 189 return (FNM_NOMATCH); 190 191 switch (rangematch(pattern, sc, string + sclen, flags, 192 &newp, &news, &patmbs, &strmbs, loc)) { 193 case RANGE_ERROR: 194#if __DARWIN_UNIX03 195 return (RETURN_ERROR); 196#else /* !__DARWIN_UNIX03 */ 197 goto norm; 198#endif /* __DARWIN_UNIX03 */ 199 case RANGE_MATCH: 200 pattern = newp; 201 string = news; 202 break; 203 case RANGE_NOMATCH: 204 return (FNM_NOMATCH); 205 } 206 break; 207 case '\\': 208 if (!(flags & FNM_NOESCAPE)) { 209 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, 210 &patmbs, loc); 211 if (pclen == (size_t)-1 || pclen == (size_t)-2) 212#if __DARWIN_UNIX03 213 return (RETURN_ERROR); 214#else /* !__DARWIN_UNIX03 */ 215 return (FNM_NOMATCH); 216#endif /* __DARWIN_UNIX03 */ 217 if (pclen == 0) 218 pc = '\\'; 219 pattern += pclen; 220 } 221 /* FALLTHROUGH */ 222 default: 223#if !__DARWIN_UNIX03 224 norm: 225#endif /* !__DARWIN_UNIX03 */ 226 if (pc == sc) 227 ; 228 else if ((flags & FNM_CASEFOLD) && 229 (towlower_l(pc, loc) == towlower_l(sc, loc))) 230 ; 231 else 232 return (FNM_NOMATCH); 233 string += sclen; 234 break; 235 } 236 } 237 /* NOTREACHED */ 238} 239 240#ifndef BUILDING_VARIANT 241__private_extern__ int 242rangematch(pattern, test, string, flags, newp, news, patmbs, strmbs, loc) 243 const char *pattern, *string; 244 wchar_t test; 245 int flags; 246 char **newp, **news; 247 mbstate_t *patmbs, *strmbs; 248 locale_t loc; 249{ 250 int negate, ok, special; 251 wchar_t c, c2; 252 wchar_t buf[STR_LEN]; /* STR_LEN defined in collate.h */ 253 size_t pclen, sclen, len; 254 const char *origpat, *cp, *savestring; 255 mbstate_t save; 256 257 /* 258 * A bracket expression starting with an unquoted circumflex 259 * character produces unspecified results (IEEE 1003.2-1992, 260 * 3.13.2). This implementation treats it like '!', for 261 * consistency with the regular expression syntax. 262 * J.T. Conklin (conklin@ngai.kaleida.com) 263 */ 264 if ( (negate = (*pattern == '!' || *pattern == '^')) ) 265 ++pattern; 266 267 if (flags & FNM_CASEFOLD) 268 test = towlower_l(test, loc); 269 270 /* 271 * A right bracket shall lose its special meaning and represent 272 * itself in a bracket expression if it occurs first in the list. 273 * -- POSIX.2 2.8.3.2 274 */ 275 ok = 0; 276 origpat = pattern; 277 for (;;) { 278 c = 0; 279 if (*pattern == ']' && pattern > origpat) { 280 break; 281 } else if (*pattern == '\0') { 282 return (RANGE_ERROR); 283 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 284 return (RANGE_NOMATCH); 285 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 286 pattern++; 287 else if (*pattern == '[' && ((special = *(pattern + 1)) == '.' || special == '=' || special == ':')) { 288 cp = (pattern += 2); 289 while((cp = strchr(cp, special))) { 290 if (*(cp + 1) == ']') 291 break; 292 cp++; 293 } 294 if (!cp) 295 return (RANGE_ERROR); 296 if (special == '.') { 297treat_like_collating_symbol: 298 len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); 299 if (len == (size_t)-1 || len == 0) 300 return (RANGE_ERROR); 301 pattern = cp + 2; 302 if (len > 1) { 303 wchar_t *wp, sc; 304 /* no multi-character collation symbols as start of range */ 305 if (*(cp + 2) == '-' && *(cp + 3) != EOS 306 && *(cp + 3) != ']') 307 return (RANGE_ERROR); 308 wp = buf; 309 if (test != *wp++) 310 continue; 311 if (len == 1) { 312 ok = 1; 313 break; 314 } 315 memcpy(&save, strmbs, sizeof(save)); 316 savestring = string; 317 while (--len > 0) { 318 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, strmbs, loc); 319 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 320 sc = (unsigned char)*string; 321 sclen = 1; 322 memset(&strmbs, 0, sizeof(strmbs)); 323 } 324 if (sc != *wp++) { 325 memcpy(strmbs, &save, sizeof(save)); 326 string = savestring; 327 break; 328 } 329 string += sclen; 330 } 331 if (len == 0) { 332 ok = 1; 333 break; 334 } 335 continue; /* no match */ 336 } 337 c = *buf; 338 } else if (special == '=') { 339 int ec; 340 memcpy(&save, patmbs, sizeof(save)); 341 ec = __collate_equiv_class(pattern, cp - pattern, patmbs, loc); 342 if (ec < 0) 343 return (RANGE_ERROR); 344 if (ec == 0) { 345 memcpy(patmbs, &save, sizeof(save)); 346 goto treat_like_collating_symbol; 347 } 348 pattern = cp + 2; 349 /* no equivalence classes as start of range */ 350 if (*(cp + 2) == '-' && *(cp + 3) != EOS && 351 *(cp + 3) != ']') 352 return (RANGE_ERROR); 353 len = __collate_equiv_match(ec, NULL, 0, test, string, strlen(string), strmbs, &sclen, loc); 354 if (len == (size_t)-1) { 355 return (RANGE_ERROR); 356 } 357 if (len > 0) { 358 ok = 1; 359 string += sclen; 360 break; 361 } 362 continue; 363 } else { /* special == ':' */ 364 wctype_t charclass; 365 char name[CHARCLASS_NAME_MAX + 1]; 366 /* no character classes as start of range */ 367 if (*(cp + 2) == '-' && *(cp + 3) != EOS && 368 *(cp + 3) != ']') 369 return (RANGE_ERROR); 370 /* assume character class names are ascii */ 371 if (cp - pattern > CHARCLASS_NAME_MAX) 372 return (RANGE_ERROR); 373 strlcpy(name, pattern, cp - pattern + 1); 374 pattern = cp + 2; 375 if ((charclass = wctype(name)) == 0) 376 return (RANGE_ERROR); 377 if (iswctype_l(test, charclass, loc)) { 378 ok = 1; 379 break; 380 } 381 continue; 382 } 383 } 384 if (!c) { 385 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); 386 if (pclen == (size_t)-1 || pclen == (size_t)-2) 387 return (RANGE_ERROR); 388 pattern += pclen; 389 } 390 391 if (flags & FNM_CASEFOLD) 392 c = towlower_l(c, loc); 393 394 if (*pattern == '-' && *(pattern + 1) != EOS && 395 *(pattern + 1) != ']') { 396 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 397 if (*pattern != EOS) 398 pattern++; 399 pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, loc); 400 if (pclen == (size_t)-1 || pclen == (size_t)-2) 401 return (RANGE_ERROR); 402 pattern += pclen; 403 if (c2 == EOS) 404 return (RANGE_ERROR); 405 406 if ((c2 == '[' && (special = *pattern) == '.') || special == '=' || special == ':') { 407 /* no equivalence classes or character classes as end of range */ 408 if (special == '=' || special == ':') 409 return (RANGE_ERROR); 410 cp = ++pattern; 411 while((cp = strchr(cp, special))) { 412 if (*(cp + 1) == ']') 413 break; 414 cp++; 415 } 416 if (!cp) 417 return (RANGE_ERROR); 418 len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); 419 /* no multi-character collation symbols as end of range */ 420 if (len != 1) 421 return (RANGE_ERROR); 422 pattern = cp + 2; 423 c2 = *buf; 424 } 425 426 if (flags & FNM_CASEFOLD) 427 c2 = towlower_l(c2, loc); 428 429 if (loc->__collate_load_error ? 430 c <= test && test <= c2 : 431 __collate_range_cmp(c, test, loc) <= 0 432 && __collate_range_cmp(test, c2, loc) <= 0 433 ) { 434 ok = 1; 435 break; 436 } 437 } else if (c == test) { 438 ok = 1; 439 break; 440 } 441 } 442 /* go to end of bracket expression */ 443 special = 0; 444 while(*pattern != ']') { 445 if (*pattern == 0) 446 return (RANGE_ERROR); 447 if (*pattern == special) { 448 if (*++pattern == ']') { 449 special = 0; 450 pattern++; 451 } 452 continue; 453 } 454 if (!special && *pattern == '[') { 455 special = *++pattern; 456 if (special != '.' && special != '=' && special != ':') 457 special = 0; 458 else 459 pattern++; 460 continue; 461 } 462 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); 463 if (pclen == (size_t)-1 || pclen == (size_t)-2) 464 return (RANGE_ERROR); 465 pattern += pclen; 466 } 467 468 *newp = (char *)++pattern; 469 *news = (char *)string; 470 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 471} 472#endif /* BUILDING_VARIANT */ 473