1/* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Copyright (c) 2011 The FreeBSD Foundation 9 * All rights reserved. 10 * Portions of this software were developed by David Chisnall 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38#if defined(LIBC_SCCS) && !defined(lint) 39static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; 40#endif /* LIBC_SCCS and not lint */ 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD: releng/10.3/lib/libc/gen/fnmatch.c 289943 2015-10-25 21:39:23Z jilles $"); 43 44/* 45 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 46 * Compares a filename or pathname to a pattern. 47 */ 48 49/* 50 * Some notes on multibyte character support: 51 * 1. Patterns with illegal byte sequences match nothing. 52 * 2. Illegal byte sequences in the "string" argument are handled by treating 53 * them as single-byte characters with a value of the first byte of the 54 * sequence cast to wchar_t. 55 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 56 * used for most, but not all, conversions. Further work will be required 57 * to support state-dependent encodings. 58 */ 59 60#include <fnmatch.h> 61#include <limits.h> 62#include <string.h> 63#include <wchar.h> 64#include <wctype.h> 65 66#include "collate.h" 67 68#define EOS '\0' 69 70#define RANGE_MATCH 1 71#define RANGE_NOMATCH 0 72#define RANGE_ERROR (-1) 73 74static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 75static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 76 mbstate_t); 77 78int 79fnmatch(pattern, string, flags) 80 const char *pattern, *string; 81 int flags; 82{ 83 static const mbstate_t initial; 84 85 return (fnmatch1(pattern, string, string, flags, initial, initial)); 86} 87 88static int 89fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) 90 const char *pattern, *string, *stringstart; 91 int flags; 92 mbstate_t patmbs, strmbs; 93{ 94 const char *bt_pattern, *bt_string; 95 mbstate_t bt_patmbs, bt_strmbs; 96 char *newp; 97 char c; 98 wchar_t pc, sc; 99 size_t pclen, sclen; 100 101 bt_pattern = bt_string = NULL; 102 for (;;) { 103 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 104 if (pclen == (size_t)-1 || pclen == (size_t)-2) 105 return (FNM_NOMATCH); 106 pattern += pclen; 107 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 108 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 109 sc = (unsigned char)*string; 110 sclen = 1; 111 memset(&strmbs, 0, sizeof(strmbs)); 112 } 113 switch (pc) { 114 case EOS: 115 if ((flags & FNM_LEADING_DIR) && sc == '/') 116 return (0); 117 if (sc == EOS) 118 return (0); 119 goto backtrack; 120 case '?': 121 if (sc == EOS) 122 return (FNM_NOMATCH); 123 if (sc == '/' && (flags & FNM_PATHNAME)) 124 goto backtrack; 125 if (sc == '.' && (flags & FNM_PERIOD) && 126 (string == stringstart || 127 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 128 goto backtrack; 129 string += sclen; 130 break; 131 case '*': 132 c = *pattern; 133 /* Collapse multiple stars. */ 134 while (c == '*') 135 c = *++pattern; 136 137 if (sc == '.' && (flags & FNM_PERIOD) && 138 (string == stringstart || 139 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 140 goto backtrack; 141 142 /* Optimize for pattern with * at end or before /. */ 143 if (c == EOS) 144 if (flags & FNM_PATHNAME) 145 return ((flags & FNM_LEADING_DIR) || 146 strchr(string, '/') == NULL ? 147 0 : FNM_NOMATCH); 148 else 149 return (0); 150 else if (c == '/' && flags & FNM_PATHNAME) { 151 if ((string = strchr(string, '/')) == NULL) 152 return (FNM_NOMATCH); 153 break; 154 } 155 156 /* 157 * First try the shortest match for the '*' that 158 * could work. We can forget any earlier '*' since 159 * there is no way having it match more characters 160 * can help us, given that we are already here. 161 */ 162 bt_pattern = pattern, bt_patmbs = patmbs; 163 bt_string = string, bt_strmbs = strmbs; 164 break; 165 case '[': 166 if (sc == EOS) 167 return (FNM_NOMATCH); 168 if (sc == '/' && (flags & FNM_PATHNAME)) 169 goto backtrack; 170 if (sc == '.' && (flags & FNM_PERIOD) && 171 (string == stringstart || 172 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 173 goto backtrack; 174 175 switch (rangematch(pattern, sc, flags, &newp, 176 &patmbs)) { 177 case RANGE_ERROR: 178 goto norm; 179 case RANGE_MATCH: 180 pattern = newp; 181 break; 182 case RANGE_NOMATCH: 183 goto backtrack; 184 } 185 string += sclen; 186 break; 187 case '\\': 188 if (!(flags & FNM_NOESCAPE)) { 189 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 190 &patmbs); 191 if (pclen == (size_t)-1 || pclen == (size_t)-2) 192 return (FNM_NOMATCH); 193 pattern += pclen; 194 } 195 /* FALLTHROUGH */ 196 default: 197 norm: 198 string += sclen; 199 if (pc == sc) 200 ; 201 else if ((flags & FNM_CASEFOLD) && 202 (towlower(pc) == towlower(sc))) 203 ; 204 else { 205 backtrack: 206 /* 207 * If we have a mismatch (other than hitting 208 * the end of the string), go back to the last 209 * '*' seen and have it match one additional 210 * character. 211 */ 212 if (bt_pattern == NULL) 213 return (FNM_NOMATCH); 214 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, 215 &bt_strmbs); 216 if (sclen == (size_t)-1 || 217 sclen == (size_t)-2) { 218 sc = (unsigned char)*bt_string; 219 sclen = 1; 220 memset(&bt_strmbs, 0, 221 sizeof(bt_strmbs)); 222 } 223 if (sc == EOS) 224 return (FNM_NOMATCH); 225 if (sc == '/' && flags & FNM_PATHNAME) 226 return (FNM_NOMATCH); 227 bt_string += sclen; 228 pattern = bt_pattern, patmbs = bt_patmbs; 229 string = bt_string, strmbs = bt_strmbs; 230 } 231 break; 232 } 233 } 234 /* NOTREACHED */ 235} 236 237static int 238rangematch(pattern, test, flags, newp, patmbs) 239 const char *pattern; 240 wchar_t test; 241 int flags; 242 char **newp; 243 mbstate_t *patmbs; 244{ 245 int negate, ok; 246 wchar_t c, c2; 247 size_t pclen; 248 const char *origpat; 249 struct xlocale_collate *table = 250 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 251 252 /* 253 * A bracket expression starting with an unquoted circumflex 254 * character produces unspecified results (IEEE 1003.2-1992, 255 * 3.13.2). This implementation treats it like '!', for 256 * consistency with the regular expression syntax. 257 * J.T. Conklin (conklin@ngai.kaleida.com) 258 */ 259 if ( (negate = (*pattern == '!' || *pattern == '^')) ) 260 ++pattern; 261 262 if (flags & FNM_CASEFOLD) 263 test = towlower(test); 264 265 /* 266 * A right bracket shall lose its special meaning and represent 267 * itself in a bracket expression if it occurs first in the list. 268 * -- POSIX.2 2.8.3.2 269 */ 270 ok = 0; 271 origpat = pattern; 272 for (;;) { 273 if (*pattern == ']' && pattern > origpat) { 274 pattern++; 275 break; 276 } else if (*pattern == '\0') { 277 return (RANGE_ERROR); 278 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 279 return (RANGE_NOMATCH); 280 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 281 pattern++; 282 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 283 if (pclen == (size_t)-1 || pclen == (size_t)-2) 284 return (RANGE_NOMATCH); 285 pattern += pclen; 286 287 if (flags & FNM_CASEFOLD) 288 c = towlower(c); 289 290 if (*pattern == '-' && *(pattern + 1) != EOS && 291 *(pattern + 1) != ']') { 292 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 293 if (*pattern != EOS) 294 pattern++; 295 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 296 if (pclen == (size_t)-1 || pclen == (size_t)-2) 297 return (RANGE_NOMATCH); 298 pattern += pclen; 299 if (c2 == EOS) 300 return (RANGE_ERROR); 301 302 if (flags & FNM_CASEFOLD) 303 c2 = towlower(c2); 304 305 if (table->__collate_load_error ? 306 c <= test && test <= c2 : 307 __collate_range_cmp(table, c, test) <= 0 308 && __collate_range_cmp(table, test, c2) <= 0 309 ) 310 ok = 1; 311 } else if (c == test) 312 ok = 1; 313 } 314 315 *newp = (char *)pattern; 316 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 317} 318