1/* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Copyright (c) 2011 The FreeBSD Foundation 9 * All rights reserved. 10 * Portions of this software were developed by David Chisnall 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38#if defined(LIBC_SCCS) && !defined(lint) 39static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; 40#endif /* LIBC_SCCS and not lint */ 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD: stable/10/lib/libc/gen/fnmatch.c 324505 2017-10-10 21:04:40Z pfg $"); 43 44/* 45 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 46 * Compares a filename or pathname to a pattern. 47 */ 48 49/* 50 * Some notes on multibyte character support: 51 * 1. Patterns with illegal byte sequences match nothing. 52 * 2. Illegal byte sequences in the "string" argument are handled by treating 53 * them as single-byte characters with a value of the first byte of the 54 * sequence cast to wchar_t. 55 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 56 * used for most, but not all, conversions. Further work will be required 57 * to support state-dependent encodings. 58 */ 59 60#include <fnmatch.h> 61#include <limits.h> 62#include <string.h> 63#include <wchar.h> 64#include <wctype.h> 65 66#include "collate.h" 67 68#define EOS '\0' 69 70#define RANGE_MATCH 1 71#define RANGE_NOMATCH 0 72#define RANGE_ERROR (-1) 73 74static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 75static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 76 mbstate_t); 77 78int 79fnmatch(pattern, string, flags) 80 const char *pattern, *string; 81 int flags; 82{ 83 static const mbstate_t initial; 84 85 return (fnmatch1(pattern, string, string, flags, initial, initial)); 86} 87 88static int 89fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) 90 const char *pattern, *string, *stringstart; 91 int flags; 92 mbstate_t patmbs, strmbs; 93{ 94 const char *bt_pattern, *bt_string; 95 mbstate_t bt_patmbs, bt_strmbs; 96 char *newp; 97 char c; 98 wchar_t pc, sc; 99 size_t pclen, sclen; 100 101 bt_pattern = bt_string = NULL; 102 for (;;) { 103 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 104 if (pclen == (size_t)-1 || pclen == (size_t)-2) 105 return (FNM_NOMATCH); 106 pattern += pclen; 107 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 108 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 109 sc = (unsigned char)*string; 110 sclen = 1; 111 memset(&strmbs, 0, sizeof(strmbs)); 112 } 113 switch (pc) { 114 case EOS: 115 if ((flags & FNM_LEADING_DIR) && sc == '/') 116 return (0); 117 if (sc == EOS) 118 return (0); 119 goto backtrack; 120 case '?': 121 if (sc == EOS) 122 return (FNM_NOMATCH); 123 if (sc == '/' && (flags & FNM_PATHNAME)) 124 goto backtrack; 125 if (sc == '.' && (flags & FNM_PERIOD) && 126 (string == stringstart || 127 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 128 goto backtrack; 129 string += sclen; 130 break; 131 case '*': 132 c = *pattern; 133 /* Collapse multiple stars. */ 134 while (c == '*') 135 c = *++pattern; 136 137 if (sc == '.' && (flags & FNM_PERIOD) && 138 (string == stringstart || 139 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 140 goto backtrack; 141 142 /* Optimize for pattern with * at end or before /. */ 143 if (c == EOS) 144 if (flags & FNM_PATHNAME) 145 return ((flags & FNM_LEADING_DIR) || 146 strchr(string, '/') == NULL ? 147 0 : FNM_NOMATCH); 148 else 149 return (0); 150 else if (c == '/' && flags & FNM_PATHNAME) { 151 if ((string = strchr(string, '/')) == NULL) 152 return (FNM_NOMATCH); 153 break; 154 } 155 156 /* 157 * First try the shortest match for the '*' that 158 * could work. We can forget any earlier '*' since 159 * there is no way having it match more characters 160 * can help us, given that we are already here. 161 */ 162 bt_pattern = pattern, bt_patmbs = patmbs; 163 bt_string = string, bt_strmbs = strmbs; 164 break; 165 case '[': 166 if (sc == EOS) 167 return (FNM_NOMATCH); 168 if (sc == '/' && (flags & FNM_PATHNAME)) 169 goto backtrack; 170 if (sc == '.' && (flags & FNM_PERIOD) && 171 (string == stringstart || 172 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 173 goto backtrack; 174 175 switch (rangematch(pattern, sc, flags, &newp, 176 &patmbs)) { 177 case RANGE_ERROR: 178 goto norm; 179 case RANGE_MATCH: 180 pattern = newp; 181 break; 182 case RANGE_NOMATCH: 183 goto backtrack; 184 } 185 string += sclen; 186 break; 187 case '\\': 188 if (!(flags & FNM_NOESCAPE)) { 189 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 190 &patmbs); 191 if (pclen == 0 || pclen == (size_t)-1 || 192 pclen == (size_t)-2) 193 return (FNM_NOMATCH); 194 pattern += pclen; 195 } 196 /* FALLTHROUGH */ 197 default: 198 norm: 199 string += sclen; 200 if (pc == sc) 201 ; 202 else if ((flags & FNM_CASEFOLD) && 203 (towlower(pc) == towlower(sc))) 204 ; 205 else { 206 backtrack: 207 /* 208 * If we have a mismatch (other than hitting 209 * the end of the string), go back to the last 210 * '*' seen and have it match one additional 211 * character. 212 */ 213 if (bt_pattern == NULL) 214 return (FNM_NOMATCH); 215 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, 216 &bt_strmbs); 217 if (sclen == (size_t)-1 || 218 sclen == (size_t)-2) { 219 sc = (unsigned char)*bt_string; 220 sclen = 1; 221 memset(&bt_strmbs, 0, 222 sizeof(bt_strmbs)); 223 } 224 if (sc == EOS) 225 return (FNM_NOMATCH); 226 if (sc == '/' && flags & FNM_PATHNAME) 227 return (FNM_NOMATCH); 228 bt_string += sclen; 229 pattern = bt_pattern, patmbs = bt_patmbs; 230 string = bt_string, strmbs = bt_strmbs; 231 } 232 break; 233 } 234 } 235 /* NOTREACHED */ 236} 237 238static int 239rangematch(pattern, test, flags, newp, patmbs) 240 const char *pattern; 241 wchar_t test; 242 int flags; 243 char **newp; 244 mbstate_t *patmbs; 245{ 246 int negate, ok; 247 wchar_t c, c2; 248 size_t pclen; 249 const char *origpat; 250 struct xlocale_collate *table = 251 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 252 253 /* 254 * A bracket expression starting with an unquoted circumflex 255 * character produces unspecified results (IEEE 1003.2-1992, 256 * 3.13.2). This implementation treats it like '!', for 257 * consistency with the regular expression syntax. 258 * J.T. Conklin (conklin@ngai.kaleida.com) 259 */ 260 if ( (negate = (*pattern == '!' || *pattern == '^')) ) 261 ++pattern; 262 263 if (flags & FNM_CASEFOLD) 264 test = towlower(test); 265 266 /* 267 * A right bracket shall lose its special meaning and represent 268 * itself in a bracket expression if it occurs first in the list. 269 * -- POSIX.2 2.8.3.2 270 */ 271 ok = 0; 272 origpat = pattern; 273 for (;;) { 274 if (*pattern == ']' && pattern > origpat) { 275 pattern++; 276 break; 277 } else if (*pattern == '\0') { 278 return (RANGE_ERROR); 279 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 280 return (RANGE_NOMATCH); 281 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 282 pattern++; 283 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 284 if (pclen == (size_t)-1 || pclen == (size_t)-2) 285 return (RANGE_NOMATCH); 286 pattern += pclen; 287 288 if (flags & FNM_CASEFOLD) 289 c = towlower(c); 290 291 if (*pattern == '-' && *(pattern + 1) != EOS && 292 *(pattern + 1) != ']') { 293 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 294 if (*pattern != EOS) 295 pattern++; 296 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 297 if (pclen == (size_t)-1 || pclen == (size_t)-2) 298 return (RANGE_NOMATCH); 299 pattern += pclen; 300 if (c2 == EOS) 301 return (RANGE_ERROR); 302 303 if (flags & FNM_CASEFOLD) 304 c2 = towlower(c2); 305 306 if (table->__collate_load_error ? 307 c <= test && test <= c2 : 308 __wcollate_range_cmp(c, test) <= 0 309 && __wcollate_range_cmp(test, c2) <= 0 310 ) 311 ok = 1; 312 } else if (c == test) 313 ok = 1; 314 } 315 316 *newp = (char *)pattern; 317 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 318} 319