1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18/* Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008 19 * as described in; 20 * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html 21 * 22 * Filename pattern matches defined in section 2.13, "Pattern Matching Notation" 23 * from chapter 2. "Shell Command Language" 24 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 25 * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 26 * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 27 * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 28 * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce 29 * a valid bracket expression is treated as an ordinary character; 4. a differing 30 * number of consecutive slashes within pattern and string will NOT match; 31 * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character. 32 * 33 * Bracket expansion defined in section 9.3.5, "RE Bracket Expression", 34 * from chapter 9, "Regular Expressions" 35 * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05 36 * with no support for collating symbols, equivalence class expressions or 37 * character class expressions. A partial range expression with a leading 38 * hyphen following a valid range expression will match only the ordinary 39 * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 40 * 'a' through 'm', a <hyphen> '-', or a 'z'). 41 * 42 * NOTE: Only POSIX/C single byte locales are correctly supported at this time. 43 * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results, 44 * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and 45 * nonalpha characters within a range. 46 * 47 * XXX comments below indicate porting required for multi-byte character sets 48 * and non-POSIX locale collation orders; requires mbr* APIs to track shift 49 * state of pattern and string (rewinding pattern and string repeatedly). 50 * 51 * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g. 52 * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate 53 * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS. 54 */ 55 56#include "apr_file_info.h" 57#include "apr_fnmatch.h" 58#include "apr_tables.h" 59#include "apr_lib.h" 60#include "apr_strings.h" 61#include <string.h> 62#if APR_HAVE_CTYPE_H 63# include <ctype.h> 64#endif 65 66 67/* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled. 68 * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 69 * however the "\/" sequence is advanced to '/'. 70 * 71 * Both pattern and string are **char to support pointer increment of arbitrary 72 * multibyte characters for the given locale, in a later iteration of this code 73 */ 74static APR_INLINE int fnmatch_ch(const char **pattern, const char **string, int flags) 75{ 76 const char * const mismatch = *pattern; 77 const int nocase = !!(flags & APR_FNM_CASE_BLIND); 78 const int escape = !(flags & APR_FNM_NOESCAPE); 79 const int slash = !!(flags & APR_FNM_PATHNAME); 80 int result = APR_FNM_NOMATCH; 81 const char *startch; 82 int negate; 83 84 if (**pattern == '[') 85 { 86 ++*pattern; 87 88 /* Handle negation, either leading ! or ^ operators (never both) */ 89 negate = ((**pattern == '!') || (**pattern == '^')); 90 if (negate) 91 ++*pattern; 92 93 /* ']' is an ordinary character at the start of the range pattern */ 94 if (**pattern == ']') 95 goto leadingclosebrace; 96 97 while (**pattern) 98 { 99 if (**pattern == ']') { 100 ++*pattern; 101 /* XXX: Fix for MBCS character width */ 102 ++*string; 103 return (result ^ negate); 104 } 105 106 if (escape && (**pattern == '\\')) { 107 ++*pattern; 108 109 /* Patterns must be terminated with ']', not EOS */ 110 if (!**pattern) 111 break; 112 } 113 114 /* Patterns must be terminated with ']' not '/' */ 115 if (slash && (**pattern == '/')) 116 break; 117 118leadingclosebrace: 119 /* Look at only well-formed range patterns; 120 * "x-]" is not allowed unless escaped ("x-\]") 121 * XXX: Fix for locale/MBCS character width 122 */ 123 if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) 124 { 125 startch = *pattern; 126 *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2; 127 128 /* NOT a properly balanced [expr] pattern, EOS terminated 129 * or ranges containing a slash in FNM_PATHNAME mode pattern 130 * fall out to to the rewind and test '[' literal code path 131 */ 132 if (!**pattern || (slash && (**pattern == '/'))) 133 break; 134 135 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 136 if ((**string >= *startch) && (**string <= **pattern)) 137 result = 0; 138 else if (nocase && (isupper(**string) || isupper(*startch) 139 || isupper(**pattern)) 140 && (tolower(**string) >= tolower(*startch)) 141 && (tolower(**string) <= tolower(**pattern))) 142 result = 0; 143 144 ++*pattern; 145 continue; 146 } 147 148 /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 149 if ((**string == **pattern)) 150 result = 0; 151 else if (nocase && (isupper(**string) || isupper(**pattern)) 152 && (tolower(**string) == tolower(**pattern))) 153 result = 0; 154 155 ++*pattern; 156 } 157 158 /* NOT a properly balanced [expr] pattern; Rewind 159 * and reset result to test '[' literal 160 */ 161 *pattern = mismatch; 162 result = APR_FNM_NOMATCH; 163 } 164 else if (**pattern == '?') { 165 /* Optimize '?' match before unescaping **pattern */ 166 if (!**string || (slash && (**string == '/'))) 167 return APR_FNM_NOMATCH; 168 result = 0; 169 goto fnmatch_ch_success; 170 } 171 else if (escape && (**pattern == '\\') && (*pattern)[1]) { 172 ++*pattern; 173 } 174 175 /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */ 176 if (**string == **pattern) 177 result = 0; 178 else if (nocase && (isupper(**string) || isupper(**pattern)) 179 && (tolower(**string) == tolower(**pattern))) 180 result = 0; 181 182 /* Refuse to advance over trailing slash or nulls 183 */ 184 if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/')))) 185 return result; 186 187fnmatch_ch_success: 188 ++*pattern; 189 ++*string; 190 return result; 191} 192 193 194APR_DECLARE(int) apr_fnmatch(const char *pattern, const char *string, int flags) 195{ 196 static const char dummystring[2] = {' ', 0}; 197 const int escape = !(flags & APR_FNM_NOESCAPE); 198 const int slash = !!(flags & APR_FNM_PATHNAME); 199 const char *strendseg; 200 const char *dummyptr; 201 const char *matchptr; 202 int wild; 203 /* For '*' wild processing only; surpress 'used before initialization' 204 * warnings with dummy initialization values; 205 */ 206 const char *strstartseg = NULL; 207 const char *mismatch = NULL; 208 int matchlen = 0; 209 210 if (*pattern == '*') 211 goto firstsegment; 212 213 while (*pattern && *string) 214 { 215 /* Pre-decode "\/" which has no special significance, and 216 * match balanced slashes, starting a new segment pattern 217 */ 218 if (slash && escape && (*pattern == '\\') && (pattern[1] == '/')) 219 ++pattern; 220 if (slash && (*pattern == '/') && (*string == '/')) { 221 ++pattern; 222 ++string; 223 } 224 225firstsegment: 226 /* At the beginning of each segment, validate leading period behavior. 227 */ 228 if ((flags & APR_FNM_PERIOD) && (*string == '.')) 229 { 230 if (*pattern == '.') 231 ++pattern; 232 else if (escape && (*pattern == '\\') && (pattern[1] == '.')) 233 pattern += 2; 234 else 235 return APR_FNM_NOMATCH; 236 ++string; 237 } 238 239 /* Determine the end of string segment 240 * 241 * Presumes '/' character is unique, not composite in any MBCS encoding 242 */ 243 if (slash) { 244 strendseg = strchr(string, '/'); 245 if (!strendseg) 246 strendseg = strchr(string, '\0'); 247 } 248 else { 249 strendseg = strchr(string, '\0'); 250 } 251 252 /* Allow pattern '*' to be consumed even with no remaining string to match 253 */ 254 while (*pattern) 255 { 256 if ((string > strendseg) 257 || ((string == strendseg) && (*pattern != '*'))) 258 break; 259 260 if (slash && ((*pattern == '/') 261 || (escape && (*pattern == '\\') 262 && (pattern[1] == '/')))) 263 break; 264 265 /* Reduce groups of '*' and '?' to n '?' matches 266 * followed by one '*' test for simplicity 267 */ 268 for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern) 269 { 270 if (*pattern == '*') { 271 wild = 1; 272 } 273 else if (string < strendseg) { /* && (*pattern == '?') */ 274 /* XXX: Advance 1 char for MBCS locale */ 275 ++string; 276 } 277 else { /* (string >= strendseg) && (*pattern == '?') */ 278 return APR_FNM_NOMATCH; 279 } 280 } 281 282 if (wild) 283 { 284 strstartseg = string; 285 mismatch = pattern; 286 287 /* Count fixed (non '*') char matches remaining in pattern 288 * excluding '/' (or "\/") and '*' 289 */ 290 for (matchptr = pattern, matchlen = 0; 1; ++matchlen) 291 { 292 if ((*matchptr == '\0') 293 || (slash && ((*matchptr == '/') 294 || (escape && (*matchptr == '\\') 295 && (matchptr[1] == '/'))))) 296 { 297 /* Compare precisely this many trailing string chars, 298 * the resulting match needs no wildcard loop 299 */ 300 /* XXX: Adjust for MBCS */ 301 if (string + matchlen > strendseg) 302 return APR_FNM_NOMATCH; 303 304 string = strendseg - matchlen; 305 wild = 0; 306 break; 307 } 308 309 if (*matchptr == '*') 310 { 311 /* Ensure at least this many trailing string chars remain 312 * for the first comparison 313 */ 314 /* XXX: Adjust for MBCS */ 315 if (string + matchlen > strendseg) 316 return APR_FNM_NOMATCH; 317 318 /* Begin first wild comparison at the current position */ 319 break; 320 } 321 322 /* Skip forward in pattern by a single character match 323 * Use a dummy fnmatch_ch() test to count one "[range]" escape 324 */ 325 /* XXX: Adjust for MBCS */ 326 if (escape && (*matchptr == '\\') && matchptr[1]) { 327 matchptr += 2; 328 } 329 else if (*matchptr == '[') { 330 dummyptr = dummystring; 331 fnmatch_ch(&matchptr, &dummyptr, flags); 332 } 333 else { 334 ++matchptr; 335 } 336 } 337 } 338 339 /* Incrementally match string against the pattern 340 */ 341 while (*pattern && (string < strendseg)) 342 { 343 /* Success; begin a new wild pattern search 344 */ 345 if (*pattern == '*') 346 break; 347 348 if (slash && ((*string == '/') 349 || (*pattern == '/') 350 || (escape && (*pattern == '\\') 351 && (pattern[1] == '/')))) 352 break; 353 354 /* Compare ch's (the pattern is advanced over "\/" to the '/', 355 * but slashes will mismatch, and are not consumed) 356 */ 357 if (!fnmatch_ch(&pattern, &string, flags)) 358 continue; 359 360 /* Failed to match, loop against next char offset of string segment 361 * until not enough string chars remain to match the fixed pattern 362 */ 363 if (wild) { 364 /* XXX: Advance 1 char for MBCS locale */ 365 string = ++strstartseg; 366 if (string + matchlen > strendseg) 367 return APR_FNM_NOMATCH; 368 369 pattern = mismatch; 370 continue; 371 } 372 else 373 return APR_FNM_NOMATCH; 374 } 375 } 376 377 if (*string && !(slash && (*string == '/'))) 378 return APR_FNM_NOMATCH; 379 380 if (*pattern && !(slash && ((*pattern == '/') 381 || (escape && (*pattern == '\\') 382 && (pattern[1] == '/'))))) 383 return APR_FNM_NOMATCH; 384 } 385 386 /* Where both pattern and string are at EOS, declare success 387 */ 388 if (!*string && !*pattern) 389 return 0; 390 391 /* pattern didn't match to the end of string */ 392 return APR_FNM_NOMATCH; 393} 394 395 396/* This function is an Apache addition 397 * return non-zero if pattern has any glob chars in it 398 * @bug Function does not distinguish for FNM_PATHNAME mode, which renders 399 * a false positive for test[/]this (which is not a range, but 400 * seperate test[ and ]this segments and no glob.) 401 * @bug Function does not distinguish for non-FNM_ESCAPE mode. 402 * @bug Function does not parse []] correctly 403 * Solution may be to use fnmatch_ch() to walk the patterns? 404 */ 405APR_DECLARE(int) apr_fnmatch_test(const char *pattern) 406{ 407 int nesting; 408 409 nesting = 0; 410 while (*pattern) { 411 switch (*pattern) { 412 case '?': 413 case '*': 414 return 1; 415 416 case '\\': 417 if (*++pattern == '\0') { 418 return 0; 419 } 420 break; 421 422 case '[': /* '[' is only a glob if it has a matching ']' */ 423 ++nesting; 424 break; 425 426 case ']': 427 if (nesting) { 428 return 1; 429 } 430 break; 431 } 432 ++pattern; } 433 return 0; 434} 435 436 437/* Find all files matching the specified pattern */ 438APR_DECLARE(apr_status_t) apr_match_glob(const char *pattern, 439 apr_array_header_t **result, 440 apr_pool_t *p) 441{ 442 apr_dir_t *dir; 443 apr_finfo_t finfo; 444 apr_status_t rv; 445 char *path; 446 447 /* XXX So, this is kind of bogus. Basically, I need to strip any leading 448 * directories off the pattern, but there is no portable way to do that. 449 * So, for now we just find the last occurance of '/' and if that doesn't 450 * return anything, then we look for '\'. This means that we could 451 * screw up on unix if the pattern is something like "foo\.*" That '\' 452 * isn't a directory delimiter, it is a part of the filename. To fix this, 453 * we really need apr_filepath_basename, which will be coming as soon as 454 * I get to it. rbb 455 */ 456 char *idx = strrchr(pattern, '/'); 457 458 if (idx == NULL) { 459 idx = strrchr(pattern, '\\'); 460 } 461 if (idx == NULL) { 462 path = "."; 463 } 464 else { 465 path = apr_pstrndup(p, pattern, idx - pattern); 466 pattern = idx + 1; 467 } 468 469 *result = apr_array_make(p, 0, sizeof(char *)); 470 rv = apr_dir_open(&dir, path, p); 471 if (rv != APR_SUCCESS) { 472 return rv; 473 } 474 475 while (apr_dir_read(&finfo, APR_FINFO_NAME, dir) == APR_SUCCESS) { 476 if (apr_fnmatch(pattern, finfo.name, 0) == APR_SUCCESS) { 477 *(const char **)apr_array_push(*result) = apr_pstrdup(p, finfo.name); 478 } 479 } 480 apr_dir_close(dir); 481 return APR_SUCCESS; 482} 483