1251875Speter/* Licensed to the Apache Software Foundation (ASF) under one or more 2251875Speter * contributor license agreements. See the NOTICE file distributed with 3251875Speter * this work for additional information regarding copyright ownership. 4251875Speter * The ASF licenses this file to You under the Apache License, Version 2.0 5251875Speter * (the "License"); you may not use this file except in compliance with 6251875Speter * the License. You may obtain a copy of the License at 7251875Speter * 8251875Speter * http://www.apache.org/licenses/LICENSE-2.0 9251875Speter * 10251875Speter * Unless required by applicable law or agreed to in writing, software 11251875Speter * distributed under the License is distributed on an "AS IS" BASIS, 12251875Speter * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13251875Speter * See the License for the specific language governing permissions and 14251875Speter * limitations under the License. 15251875Speter */ 16251875Speter 17251875Speter 18251875Speter/* Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008 19251875Speter * as described in; 20251875Speter * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html 21251875Speter * 22251875Speter * Filename pattern matches defined in section 2.13, "Pattern Matching Notation" 23251875Speter * from chapter 2. "Shell Command Language" 24251875Speter * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 25251875Speter * where; 1. A bracket expression starting with an unquoted <circumflex> '^' 26251875Speter * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' 27251875Speter * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading 28251875Speter * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce 29251875Speter * a valid bracket expression is treated as an ordinary character; 4. a differing 30251875Speter * number of consecutive slashes within pattern and string will NOT match; 31251875Speter * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character. 32251875Speter * 33251875Speter * Bracket expansion defined in section 9.3.5, "RE Bracket Expression", 34251875Speter * from chapter 9, "Regular Expressions" 35251875Speter * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05 36251875Speter * with no support for collating symbols, equivalence class expressions or 37251875Speter * character class expressions. A partial range expression with a leading 38251875Speter * hyphen following a valid range expression will match only the ordinary 39251875Speter * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters 40251875Speter * 'a' through 'm', a <hyphen> '-', or a 'z'). 41251875Speter * 42251875Speter * NOTE: Only POSIX/C single byte locales are correctly supported at this time. 43251875Speter * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results, 44251875Speter * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and 45251875Speter * nonalpha characters within a range. 46251875Speter * 47251875Speter * XXX comments below indicate porting required for multi-byte character sets 48251875Speter * and non-POSIX locale collation orders; requires mbr* APIs to track shift 49251875Speter * state of pattern and string (rewinding pattern and string repeatedly). 50251875Speter * 51251875Speter * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g. 52251875Speter * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate 53251875Speter * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS. 54251875Speter */ 55251875Speter 56251875Speter#include "apr_file_info.h" 57251875Speter#include "apr_fnmatch.h" 58251875Speter#include "apr_tables.h" 59251875Speter#include "apr_lib.h" 60251875Speter#include "apr_strings.h" 61251875Speter#include <string.h> 62251875Speter#if APR_HAVE_CTYPE_H 63251875Speter# include <ctype.h> 64251875Speter#endif 65251875Speter 66251875Speter 67251875Speter/* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled. 68251875Speter * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, 69251875Speter * however the "\/" sequence is advanced to '/'. 70251875Speter * 71251875Speter * Both pattern and string are **char to support pointer increment of arbitrary 72251875Speter * multibyte characters for the given locale, in a later iteration of this code 73251875Speter */ 74251875Speterstatic APR_INLINE int fnmatch_ch(const char **pattern, const char **string, int flags) 75251875Speter{ 76251875Speter const char * const mismatch = *pattern; 77251875Speter const int nocase = !!(flags & APR_FNM_CASE_BLIND); 78251875Speter const int escape = !(flags & APR_FNM_NOESCAPE); 79251875Speter const int slash = !!(flags & APR_FNM_PATHNAME); 80251875Speter int result = APR_FNM_NOMATCH; 81251875Speter const char *startch; 82251875Speter int negate; 83251875Speter 84251875Speter if (**pattern == '[') 85251875Speter { 86251875Speter ++*pattern; 87251875Speter 88251875Speter /* Handle negation, either leading ! or ^ operators (never both) */ 89251875Speter negate = ((**pattern == '!') || (**pattern == '^')); 90251875Speter if (negate) 91251875Speter ++*pattern; 92251875Speter 93251875Speter /* ']' is an ordinary character at the start of the range pattern */ 94251875Speter if (**pattern == ']') 95251875Speter goto leadingclosebrace; 96251875Speter 97251875Speter while (**pattern) 98251875Speter { 99251875Speter if (**pattern == ']') { 100251875Speter ++*pattern; 101251875Speter /* XXX: Fix for MBCS character width */ 102251875Speter ++*string; 103251875Speter return (result ^ negate); 104251875Speter } 105251875Speter 106251875Speter if (escape && (**pattern == '\\')) { 107251875Speter ++*pattern; 108251875Speter 109251875Speter /* Patterns must be terminated with ']', not EOS */ 110251875Speter if (!**pattern) 111251875Speter break; 112251875Speter } 113251875Speter 114251875Speter /* Patterns must be terminated with ']' not '/' */ 115251875Speter if (slash && (**pattern == '/')) 116251875Speter break; 117251875Speter 118251875Speterleadingclosebrace: 119251875Speter /* Look at only well-formed range patterns; 120251875Speter * "x-]" is not allowed unless escaped ("x-\]") 121251875Speter * XXX: Fix for locale/MBCS character width 122251875Speter */ 123251875Speter if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) 124251875Speter { 125251875Speter startch = *pattern; 126251875Speter *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2; 127251875Speter 128251875Speter /* NOT a properly balanced [expr] pattern, EOS terminated 129251875Speter * or ranges containing a slash in FNM_PATHNAME mode pattern 130251875Speter * fall out to to the rewind and test '[' literal code path 131251875Speter */ 132251875Speter if (!**pattern || (slash && (**pattern == '/'))) 133251875Speter break; 134251875Speter 135251875Speter /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 136251875Speter if ((**string >= *startch) && (**string <= **pattern)) 137251875Speter result = 0; 138251875Speter else if (nocase && (isupper(**string) || isupper(*startch) 139251875Speter || isupper(**pattern)) 140251875Speter && (tolower(**string) >= tolower(*startch)) 141251875Speter && (tolower(**string) <= tolower(**pattern))) 142251875Speter result = 0; 143251875Speter 144251875Speter ++*pattern; 145251875Speter continue; 146251875Speter } 147251875Speter 148251875Speter /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ 149251875Speter if ((**string == **pattern)) 150251875Speter result = 0; 151251875Speter else if (nocase && (isupper(**string) || isupper(**pattern)) 152251875Speter && (tolower(**string) == tolower(**pattern))) 153251875Speter result = 0; 154251875Speter 155251875Speter ++*pattern; 156251875Speter } 157251875Speter 158251875Speter /* NOT a properly balanced [expr] pattern; Rewind 159251875Speter * and reset result to test '[' literal 160251875Speter */ 161251875Speter *pattern = mismatch; 162251875Speter result = APR_FNM_NOMATCH; 163251875Speter } 164251875Speter else if (**pattern == '?') { 165251875Speter /* Optimize '?' match before unescaping **pattern */ 166251875Speter if (!**string || (slash && (**string == '/'))) 167251875Speter return APR_FNM_NOMATCH; 168251875Speter result = 0; 169251875Speter goto fnmatch_ch_success; 170251875Speter } 171251875Speter else if (escape && (**pattern == '\\') && (*pattern)[1]) { 172251875Speter ++*pattern; 173251875Speter } 174251875Speter 175251875Speter /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */ 176251875Speter if (**string == **pattern) 177251875Speter result = 0; 178251875Speter else if (nocase && (isupper(**string) || isupper(**pattern)) 179251875Speter && (tolower(**string) == tolower(**pattern))) 180251875Speter result = 0; 181251875Speter 182251875Speter /* Refuse to advance over trailing slash or nulls 183251875Speter */ 184251875Speter if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/')))) 185251875Speter return result; 186251875Speter 187251875Speterfnmatch_ch_success: 188251875Speter ++*pattern; 189251875Speter ++*string; 190251875Speter return result; 191251875Speter} 192251875Speter 193251875Speter 194251875SpeterAPR_DECLARE(int) apr_fnmatch(const char *pattern, const char *string, int flags) 195251875Speter{ 196251875Speter static const char dummystring[2] = {' ', 0}; 197251875Speter const int escape = !(flags & APR_FNM_NOESCAPE); 198251875Speter const int slash = !!(flags & APR_FNM_PATHNAME); 199251875Speter const char *strendseg; 200251875Speter const char *dummyptr; 201251875Speter const char *matchptr; 202251875Speter int wild; 203251875Speter /* For '*' wild processing only; surpress 'used before initialization' 204251875Speter * warnings with dummy initialization values; 205251875Speter */ 206251875Speter const char *strstartseg = NULL; 207251875Speter const char *mismatch = NULL; 208251875Speter int matchlen = 0; 209251875Speter 210251875Speter if (*pattern == '*') 211251875Speter goto firstsegment; 212251875Speter 213251875Speter while (*pattern && *string) 214251875Speter { 215251875Speter /* Pre-decode "\/" which has no special significance, and 216251875Speter * match balanced slashes, starting a new segment pattern 217251875Speter */ 218251875Speter if (slash && escape && (*pattern == '\\') && (pattern[1] == '/')) 219251875Speter ++pattern; 220251875Speter if (slash && (*pattern == '/') && (*string == '/')) { 221251875Speter ++pattern; 222251875Speter ++string; 223251875Speter } 224251875Speter 225251875Speterfirstsegment: 226251875Speter /* At the beginning of each segment, validate leading period behavior. 227251875Speter */ 228251875Speter if ((flags & APR_FNM_PERIOD) && (*string == '.')) 229251875Speter { 230251875Speter if (*pattern == '.') 231251875Speter ++pattern; 232251875Speter else if (escape && (*pattern == '\\') && (pattern[1] == '.')) 233251875Speter pattern += 2; 234251875Speter else 235251875Speter return APR_FNM_NOMATCH; 236251875Speter ++string; 237251875Speter } 238251875Speter 239251875Speter /* Determine the end of string segment 240251875Speter * 241251875Speter * Presumes '/' character is unique, not composite in any MBCS encoding 242251875Speter */ 243251875Speter if (slash) { 244251875Speter strendseg = strchr(string, '/'); 245251875Speter if (!strendseg) 246251875Speter strendseg = strchr(string, '\0'); 247251875Speter } 248251875Speter else { 249251875Speter strendseg = strchr(string, '\0'); 250251875Speter } 251251875Speter 252251875Speter /* Allow pattern '*' to be consumed even with no remaining string to match 253251875Speter */ 254251875Speter while (*pattern) 255251875Speter { 256251875Speter if ((string > strendseg) 257251875Speter || ((string == strendseg) && (*pattern != '*'))) 258251875Speter break; 259251875Speter 260251875Speter if (slash && ((*pattern == '/') 261251875Speter || (escape && (*pattern == '\\') 262251875Speter && (pattern[1] == '/')))) 263251875Speter break; 264251875Speter 265251875Speter /* Reduce groups of '*' and '?' to n '?' matches 266251875Speter * followed by one '*' test for simplicity 267251875Speter */ 268251875Speter for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern) 269251875Speter { 270251875Speter if (*pattern == '*') { 271251875Speter wild = 1; 272251875Speter } 273251875Speter else if (string < strendseg) { /* && (*pattern == '?') */ 274251875Speter /* XXX: Advance 1 char for MBCS locale */ 275251875Speter ++string; 276251875Speter } 277251875Speter else { /* (string >= strendseg) && (*pattern == '?') */ 278251875Speter return APR_FNM_NOMATCH; 279251875Speter } 280251875Speter } 281251875Speter 282251875Speter if (wild) 283251875Speter { 284251875Speter strstartseg = string; 285251875Speter mismatch = pattern; 286251875Speter 287251875Speter /* Count fixed (non '*') char matches remaining in pattern 288251875Speter * excluding '/' (or "\/") and '*' 289251875Speter */ 290251875Speter for (matchptr = pattern, matchlen = 0; 1; ++matchlen) 291251875Speter { 292251875Speter if ((*matchptr == '\0') 293251875Speter || (slash && ((*matchptr == '/') 294251875Speter || (escape && (*matchptr == '\\') 295251875Speter && (matchptr[1] == '/'))))) 296251875Speter { 297251875Speter /* Compare precisely this many trailing string chars, 298251875Speter * the resulting match needs no wildcard loop 299251875Speter */ 300251875Speter /* XXX: Adjust for MBCS */ 301251875Speter if (string + matchlen > strendseg) 302251875Speter return APR_FNM_NOMATCH; 303251875Speter 304251875Speter string = strendseg - matchlen; 305251875Speter wild = 0; 306251875Speter break; 307251875Speter } 308251875Speter 309251875Speter if (*matchptr == '*') 310251875Speter { 311251875Speter /* Ensure at least this many trailing string chars remain 312251875Speter * for the first comparison 313251875Speter */ 314251875Speter /* XXX: Adjust for MBCS */ 315251875Speter if (string + matchlen > strendseg) 316251875Speter return APR_FNM_NOMATCH; 317251875Speter 318251875Speter /* Begin first wild comparison at the current position */ 319251875Speter break; 320251875Speter } 321251875Speter 322251875Speter /* Skip forward in pattern by a single character match 323251875Speter * Use a dummy fnmatch_ch() test to count one "[range]" escape 324251875Speter */ 325251875Speter /* XXX: Adjust for MBCS */ 326251875Speter if (escape && (*matchptr == '\\') && matchptr[1]) { 327251875Speter matchptr += 2; 328251875Speter } 329251875Speter else if (*matchptr == '[') { 330251875Speter dummyptr = dummystring; 331251875Speter fnmatch_ch(&matchptr, &dummyptr, flags); 332251875Speter } 333251875Speter else { 334251875Speter ++matchptr; 335251875Speter } 336251875Speter } 337251875Speter } 338251875Speter 339251875Speter /* Incrementally match string against the pattern 340251875Speter */ 341251875Speter while (*pattern && (string < strendseg)) 342251875Speter { 343251875Speter /* Success; begin a new wild pattern search 344251875Speter */ 345251875Speter if (*pattern == '*') 346251875Speter break; 347251875Speter 348251875Speter if (slash && ((*string == '/') 349251875Speter || (*pattern == '/') 350251875Speter || (escape && (*pattern == '\\') 351251875Speter && (pattern[1] == '/')))) 352251875Speter break; 353251875Speter 354251875Speter /* Compare ch's (the pattern is advanced over "\/" to the '/', 355251875Speter * but slashes will mismatch, and are not consumed) 356251875Speter */ 357251875Speter if (!fnmatch_ch(&pattern, &string, flags)) 358251875Speter continue; 359251875Speter 360251875Speter /* Failed to match, loop against next char offset of string segment 361251875Speter * until not enough string chars remain to match the fixed pattern 362251875Speter */ 363251875Speter if (wild) { 364251875Speter /* XXX: Advance 1 char for MBCS locale */ 365251875Speter string = ++strstartseg; 366251875Speter if (string + matchlen > strendseg) 367251875Speter return APR_FNM_NOMATCH; 368251875Speter 369251875Speter pattern = mismatch; 370251875Speter continue; 371251875Speter } 372251875Speter else 373251875Speter return APR_FNM_NOMATCH; 374251875Speter } 375251875Speter } 376251875Speter 377251875Speter if (*string && !(slash && (*string == '/'))) 378251875Speter return APR_FNM_NOMATCH; 379251875Speter 380251875Speter if (*pattern && !(slash && ((*pattern == '/') 381251875Speter || (escape && (*pattern == '\\') 382251875Speter && (pattern[1] == '/'))))) 383251875Speter return APR_FNM_NOMATCH; 384251875Speter } 385251875Speter 386251875Speter /* Where both pattern and string are at EOS, declare success 387251875Speter */ 388251875Speter if (!*string && !*pattern) 389251875Speter return 0; 390251875Speter 391251875Speter /* pattern didn't match to the end of string */ 392251875Speter return APR_FNM_NOMATCH; 393251875Speter} 394251875Speter 395251875Speter 396251875Speter/* This function is an Apache addition 397251875Speter * return non-zero if pattern has any glob chars in it 398251875Speter * @bug Function does not distinguish for FNM_PATHNAME mode, which renders 399251875Speter * a false positive for test[/]this (which is not a range, but 400251875Speter * seperate test[ and ]this segments and no glob.) 401251875Speter * @bug Function does not distinguish for non-FNM_ESCAPE mode. 402251875Speter * @bug Function does not parse []] correctly 403251875Speter * Solution may be to use fnmatch_ch() to walk the patterns? 404251875Speter */ 405251875SpeterAPR_DECLARE(int) apr_fnmatch_test(const char *pattern) 406251875Speter{ 407251875Speter int nesting; 408251875Speter 409251875Speter nesting = 0; 410251875Speter while (*pattern) { 411251875Speter switch (*pattern) { 412251875Speter case '?': 413251875Speter case '*': 414251875Speter return 1; 415251875Speter 416251875Speter case '\\': 417251875Speter if (*++pattern == '\0') { 418251875Speter return 0; 419251875Speter } 420251875Speter break; 421251875Speter 422251875Speter case '[': /* '[' is only a glob if it has a matching ']' */ 423251875Speter ++nesting; 424251875Speter break; 425251875Speter 426251875Speter case ']': 427251875Speter if (nesting) { 428251875Speter return 1; 429251875Speter } 430251875Speter break; 431251875Speter } 432251875Speter ++pattern; } 433251875Speter return 0; 434251875Speter} 435251875Speter 436251875Speter 437251875Speter/* Find all files matching the specified pattern */ 438251875SpeterAPR_DECLARE(apr_status_t) apr_match_glob(const char *pattern, 439251875Speter apr_array_header_t **result, 440251875Speter apr_pool_t *p) 441251875Speter{ 442251875Speter apr_dir_t *dir; 443251875Speter apr_finfo_t finfo; 444251875Speter apr_status_t rv; 445251875Speter char *path; 446251875Speter 447251875Speter /* XXX So, this is kind of bogus. Basically, I need to strip any leading 448251875Speter * directories off the pattern, but there is no portable way to do that. 449251875Speter * So, for now we just find the last occurance of '/' and if that doesn't 450251875Speter * return anything, then we look for '\'. This means that we could 451251875Speter * screw up on unix if the pattern is something like "foo\.*" That '\' 452251875Speter * isn't a directory delimiter, it is a part of the filename. To fix this, 453251875Speter * we really need apr_filepath_basename, which will be coming as soon as 454251875Speter * I get to it. rbb 455251875Speter */ 456251875Speter char *idx = strrchr(pattern, '/'); 457251875Speter 458251875Speter if (idx == NULL) { 459251875Speter idx = strrchr(pattern, '\\'); 460251875Speter } 461251875Speter if (idx == NULL) { 462251875Speter path = "."; 463251875Speter } 464251875Speter else { 465251875Speter path = apr_pstrndup(p, pattern, idx - pattern); 466251875Speter pattern = idx + 1; 467251875Speter } 468251875Speter 469251875Speter *result = apr_array_make(p, 0, sizeof(char *)); 470251875Speter rv = apr_dir_open(&dir, path, p); 471251875Speter if (rv != APR_SUCCESS) { 472251875Speter return rv; 473251875Speter } 474251875Speter 475251875Speter while (apr_dir_read(&finfo, APR_FINFO_NAME, dir) == APR_SUCCESS) { 476251875Speter if (apr_fnmatch(pattern, finfo.name, 0) == APR_SUCCESS) { 477251875Speter *(const char **)apr_array_push(*result) = apr_pstrdup(p, finfo.name); 478251875Speter } 479251875Speter } 480251875Speter apr_dir_close(dir); 481251875Speter return APR_SUCCESS; 482251875Speter} 483