1/*	$OpenBSD: fnmatch.c,v 1.23 2020/10/13 04:42:28 guenther Exp $	*/
2
3/* Copyright (c) 2011, VMware, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *     * Redistributions of source code must retain the above copyright
9 *       notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above copyright
11 *       notice, this list of conditions and the following disclaimer in the
12 *       documentation and/or other materials provided with the distribution.
13 *     * Neither the name of the VMware, Inc. nor the names of its contributors
14 *       may be used to endorse or promote products derived from this software
15 *       without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE FOR
21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * Copyright (c) 2008, 2016 Todd C. Miller <millert@openbsd.org>
31 *
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45/* Authored by William A. Rowe Jr. <wrowe; apache.org, vmware.com>, April 2011
46 *
47 * Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008
48 * as described in;
49 *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html
50 *
51 * Filename pattern matches defined in section 2.13, "Pattern Matching Notation"
52 * from chapter 2. "Shell Command Language"
53 *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
54 * where; 1. A bracket expression starting with an unquoted <circumflex> '^'
55 * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.'
56 * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading
57 * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce
58 * a valid bracket expression is treated as an ordinary character; 4. a differing
59 * number of consecutive slashes within pattern and string will NOT match;
60 * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character.
61 *
62 * Bracket expansion defined in section 9.3.5, "RE Bracket Expression",
63 * from chapter 9, "Regular Expressions"
64 *   http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05
65 * with no support for collating symbols, equivalence class expressions or
66 * character class expressions.  A partial range expression with a leading
67 * hyphen following a valid range expression will match only the ordinary
68 * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters
69 * 'a' through 'm', a <hyphen> '-', or a 'z').
70 *
71 * Supports BSD extensions FNM_LEADING_DIR to match pattern to the end of one
72 * path segment of string, and FNM_CASEFOLD to ignore alpha case.
73 *
74 * NOTE: Only POSIX/C single byte locales are correctly supported at this time.
75 * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results,
76 * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and
77 * nonalpha characters within a range.
78 *
79 * XXX comments below indicate porting required for multi-byte character sets
80 * and non-POSIX locale collation orders; requires mbr* APIs to track shift
81 * state of pattern and string (rewinding pattern and string repeatedly).
82 *
83 * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g.
84 * UTF-8, SHIFT-JIS, etc).  Any implementation allowing '\' as an alternate
85 * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS.
86 */
87
88#include <fnmatch.h>
89#include <string.h>
90#include <ctype.h>
91
92#include "charclass.h"
93
94#define	RANGE_MATCH	1
95#define	RANGE_NOMATCH	0
96#define	RANGE_ERROR	(-1)
97
98static int
99classmatch(const char *pattern, char test, int foldcase, const char **ep)
100{
101	const char * const mismatch = pattern;
102	const char *colon;
103	const struct cclass *cc;
104	int rval = RANGE_NOMATCH;
105	size_t len;
106
107	if (pattern[0] != '[' || pattern[1] != ':') {
108		*ep = mismatch;
109		return RANGE_ERROR;
110	}
111	pattern += 2;
112
113	if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
114		*ep = mismatch;
115		return RANGE_ERROR;
116	}
117	*ep = colon + 2;
118	len = (size_t)(colon - pattern);
119
120	if (foldcase && strncmp(pattern, "upper:]", 7) == 0)
121		pattern = "lower:]";
122	for (cc = cclasses; cc->name != NULL; cc++) {
123		if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
124			if (cc->isctype((unsigned char)test))
125				rval = RANGE_MATCH;
126			break;
127		}
128	}
129	if (cc->name == NULL) {
130		/* invalid character class, treat as normal text */
131		*ep = mismatch;
132		rval = RANGE_ERROR;
133	}
134	return rval;
135}
136
137/* Most MBCS/collation/case issues handled here.  Wildcard '*' is not handled.
138 * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over,
139 * however the "\/" sequence is advanced to '/'.
140 *
141 * Both pattern and string are **char to support pointer increment of arbitrary
142 * multibyte characters for the given locale, in a later iteration of this code
143 */
144static int fnmatch_ch(const char **pattern, const char **string, int flags)
145{
146	const char * const mismatch = *pattern;
147	const int nocase = !!(flags & FNM_CASEFOLD);
148	const int escape = !(flags & FNM_NOESCAPE);
149	const int slash = !!(flags & FNM_PATHNAME);
150	int result = FNM_NOMATCH;
151	const char *startch;
152	int negate;
153
154	if (**pattern == '[') {
155		++*pattern;
156
157		/* Handle negation, either leading ! or ^ operators */
158		negate = (**pattern == '!') || (**pattern == '^');
159		if (negate)
160			++*pattern;
161
162		/* ']' is an ordinary char at the start of the range pattern */
163		if (**pattern == ']')
164			goto leadingclosebrace;
165
166		while (**pattern) {
167			if (**pattern == ']') {
168				++*pattern;
169				/* XXX: Fix for MBCS character width */
170				++*string;
171				return (result ^ negate);
172			}
173
174			if (escape && (**pattern == '\\')) {
175				++*pattern;
176
177				/* Patterns must terminate with ']', not EOS */
178				if (!**pattern)
179					break;
180			}
181
182			/* Patterns must terminate with ']' not '/' */
183			if (slash && (**pattern == '/'))
184				break;
185
186			/* Match character classes. */
187			switch (classmatch(*pattern, **string, nocase, pattern)) {
188			case RANGE_MATCH:
189				result = 0;
190				continue;
191			case RANGE_NOMATCH:
192				/* Valid character class but no match. */
193				continue;
194			default:
195				/* Not a valid character class. */
196				break;
197			}
198			if (!**pattern)
199				break;
200
201leadingclosebrace:
202			/* Look at only well-formed range patterns;
203			 * "x-]" is not allowed unless escaped ("x-\]")
204			 * XXX: Fix for locale/MBCS character width
205			 */
206			if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) {
207				startch = *pattern;
208				*pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2;
209
210				/*
211				 * NOT a properly balanced [expr] pattern, EOS
212				 * terminated or ranges containing a slash in
213				 * FNM_PATHNAME mode pattern fall out to to the
214				 * rewind and test '[' literal code path.
215				 */
216				if (!**pattern || (slash && (**pattern == '/')))
217					break;
218
219				/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
220				if ((**string >= *startch) && (**string <= **pattern))
221					result = 0;
222				else if (nocase &&
223				    (isupper((unsigned char)**string) ||
224				     isupper((unsigned char)*startch) ||
225				     isupper((unsigned char)**pattern)) &&
226				    (tolower((unsigned char)**string) >=
227				     tolower((unsigned char)*startch)) &&
228				    (tolower((unsigned char)**string) <=
229				     tolower((unsigned char)**pattern)))
230					result = 0;
231
232				++*pattern;
233				continue;
234			}
235
236			/* XXX: handle locale/MBCS comparison, advance by MBCS char width */
237			if ((**string == **pattern))
238				result = 0;
239			else if (nocase && (isupper((unsigned char)**string) ||
240			    isupper((unsigned char)**pattern)) &&
241			    (tolower((unsigned char)**string) ==
242			    tolower((unsigned char)**pattern)))
243				result = 0;
244
245			++*pattern;
246		}
247		/*
248		 * NOT a properly balanced [expr] pattern;
249		 * Rewind and reset result to test '[' literal
250		 */
251		*pattern = mismatch;
252		result = FNM_NOMATCH;
253	} else if (**pattern == '?') {
254		/* Optimize '?' match before unescaping **pattern */
255		if (!**string || (slash && (**string == '/')))
256			return FNM_NOMATCH;
257		result = 0;
258		goto fnmatch_ch_success;
259	} else if (escape && (**pattern == '\\') && (*pattern)[1]) {
260		++*pattern;
261	}
262
263	/* XXX: handle locale/MBCS comparison, advance by the MBCS char width */
264	if (**string == **pattern)
265		result = 0;
266	else if (nocase && (isupper((unsigned char)**string) ||
267	    isupper((unsigned char)**pattern)) &&
268	    (tolower((unsigned char)**string) ==
269	    tolower((unsigned char)**pattern)))
270		result = 0;
271
272	/* Refuse to advance over trailing slash or NULs */
273	if (**string == '\0' || **pattern == '\0' ||
274	    (slash && ((**string == '/') || (**pattern == '/'))))
275		return result;
276
277fnmatch_ch_success:
278	++*pattern;
279	++*string;
280	return result;
281}
282
283
284int fnmatch(const char *pattern, const char *string, int flags)
285{
286	static const char dummystring[2] = {' ', 0};
287	const int escape = !(flags & FNM_NOESCAPE);
288	const int slash = !!(flags & FNM_PATHNAME);
289	const int leading_dir = !!(flags & FNM_LEADING_DIR);
290	const char *dummyptr, *matchptr, *strendseg;
291	int wild;
292	/* For '*' wild processing only; suppress 'used before initialization'
293	 * warnings with dummy initialization values;
294	 */
295	const char *strstartseg = NULL;
296	const char *mismatch = NULL;
297	int matchlen = 0;
298
299	if (*pattern == '*')
300		goto firstsegment;
301
302	while (*pattern && *string) {
303		/*
304		 * Pre-decode "\/" which has no special significance, and
305		 * match balanced slashes, starting a new segment pattern.
306		 */
307		if (slash && escape && (*pattern == '\\') && (pattern[1] == '/'))
308			++pattern;
309		if (slash && (*pattern == '/') && (*string == '/')) {
310			++pattern;
311			++string;
312		}
313
314firstsegment:
315		/*
316		 * At the beginning of each segment, validate leading period
317		 * behavior.
318		 */
319		if ((flags & FNM_PERIOD) && (*string == '.')) {
320		    if (*pattern == '.')
321			    ++pattern;
322		    else if (escape && (*pattern == '\\') && (pattern[1] == '.'))
323			    pattern += 2;
324		    else
325			    return FNM_NOMATCH;
326		    ++string;
327		}
328
329		/*
330		 * Determine the end of string segment.  Presumes '/'
331		 * character is unique, not composite in any MBCS encoding
332		 */
333		if (slash) {
334			strendseg = strchr(string, '/');
335			if (!strendseg)
336				strendseg = strchr(string, '\0');
337		} else {
338			strendseg = strchr(string, '\0');
339		}
340
341		/*
342		 * Allow pattern '*' to be consumed even with no remaining
343		 * string to match.
344		 */
345		while (*pattern) {
346			if ((string > strendseg) ||
347			    ((string == strendseg) && (*pattern != '*')))
348				break;
349
350			if (slash && ((*pattern == '/') ||
351			    (escape && (*pattern == '\\') && (pattern[1] == '/'))))
352				break;
353
354			/*
355			 * Reduce groups of '*' and '?' to n '?' matches
356			 * followed by one '*' test for simplicity.
357			 */
358			for (wild = 0; (*pattern == '*') || (*pattern == '?'); ++pattern) {
359				if (*pattern == '*') {
360					wild = 1;
361				} else if (string < strendseg) {  /* && (*pattern == '?') */
362					/* XXX: Advance 1 char for MBCS locale */
363					++string;
364				}
365				else {  /* (string >= strendseg) && (*pattern == '?') */
366					return FNM_NOMATCH;
367				}
368			}
369
370			if (wild) {
371				strstartseg = string;
372				mismatch = pattern;
373
374				/*
375				 * Count fixed (non '*') char matches remaining
376				 * in pattern * excluding '/' (or "\/") and '*'.
377				 */
378				for (matchptr = pattern, matchlen = 0; 1; ++matchlen) {
379					if ((*matchptr == '\0') ||
380					    (slash && ((*matchptr == '/') ||
381					    (escape && (*matchptr == '\\') &&
382					    (matchptr[1] == '/'))))) {
383						/* Compare precisely this many
384						 * trailing string chars, the
385						 * resulting match needs no
386						 * wildcard loop.
387						 */
388						/* XXX: Adjust for MBCS */
389						if (string + matchlen > strendseg)
390							return FNM_NOMATCH;
391
392						string = strendseg - matchlen;
393						wild = 0;
394						break;
395					}
396
397					if (*matchptr == '*') {
398						/*
399						 * Ensure at least this many
400						 * trailing string chars remain
401						 * for the first comparison.
402						 */
403						/* XXX: Adjust for MBCS */
404						if (string + matchlen > strendseg)
405							return FNM_NOMATCH;
406
407						/*
408						 * Begin first wild comparison
409						 * at the current position.
410						 */
411						break;
412					}
413
414					/*
415					 * Skip forward in pattern by a single
416					 * character match Use a dummy
417					 * fnmatch_ch() test to count one
418					 * "[range]" escape.
419					 */
420					/* XXX: Adjust for MBCS */
421					if (escape && (*matchptr == '\\') &&
422					    matchptr[1]) {
423						matchptr += 2;
424					} else if (*matchptr == '[') {
425						dummyptr = dummystring;
426						fnmatch_ch(&matchptr, &dummyptr,
427						    flags);
428					} else {
429						++matchptr;
430					}
431				}
432			}
433
434			/* Incrementally match string against the pattern. */
435			while (*pattern && (string < strendseg)) {
436				/* Success; begin a new wild pattern search. */
437				if (*pattern == '*')
438					break;
439
440				if (slash && ((*string == '/') ||
441				    (*pattern == '/') || (escape &&
442				    (*pattern == '\\') && (pattern[1] == '/'))))
443					break;
444
445				/*
446				 * Compare ch's (the pattern is advanced over
447				 * "\/" to the '/', but slashes will mismatch,
448				 * and are not consumed).
449				 */
450				if (!fnmatch_ch(&pattern, &string, flags))
451					continue;
452
453				/*
454				 * Failed to match, loop against next char
455				 * offset of string segment until not enough
456				 * string chars remain to match the fixed
457				 * pattern.
458				 */
459				if (wild) {
460					/* XXX: Advance 1 char for MBCS locale */
461					string = ++strstartseg;
462					if (string + matchlen > strendseg)
463						return FNM_NOMATCH;
464
465					pattern = mismatch;
466					continue;
467				} else
468					return FNM_NOMATCH;
469			}
470		}
471
472		if (*string && !((slash || leading_dir) && (*string == '/')))
473			return FNM_NOMATCH;
474
475		if (*pattern && !(slash && ((*pattern == '/') ||
476		    (escape && (*pattern == '\\') && (pattern[1] == '/')))))
477			return FNM_NOMATCH;
478
479		if (leading_dir && !*pattern && *string == '/')
480			return 0;
481	}
482
483	/* Where both pattern and string are at EOS, declare success.  */
484	if (!*string && !*pattern)
485		return 0;
486
487	/* Pattern didn't match to the end of string. */
488	return FNM_NOMATCH;
489}
490