1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "lafe_platform.h"
28__FBSDID("$FreeBSD$");
29
30#ifdef HAVE_STRING_H
31#include <string.h>
32#endif
33
34#include "pathmatch.h"
35
36/*
37 * Check whether a character 'c' is matched by a list specification [...]:
38 *    * Leading '!' negates the class.
39 *    * <char>-<char> is a range of characters
40 *    * \<char> removes any special meaning for <char>
41 *
42 * Some interesting boundary cases:
43 *   a-d-e is one range (a-d) followed by two single characters - and e.
44 *   \a-\d is same as a-d
45 *   a\-d is three single characters: a, d, -
46 *   Trailing - is not special (so [a-] is two characters a and -).
47 *   Initial - is not special ([a-] is same as [-a] is same as [\\-a])
48 *   This function never sees a trailing \.
49 *   [] always fails
50 *   [!] always succeeds
51 */
52static int
53pm_list(const char *start, const char *end, const char c, int flags)
54{
55	const char *p = start;
56	char rangeStart = '\0', nextRangeStart;
57	int match = 1, nomatch = 0;
58
59	/* This will be used soon... */
60	(void)flags; /* UNUSED */
61
62	/* If this is a negated class, return success for nomatch. */
63	if (*p == '!' && p < end) {
64		match = 0;
65		nomatch = 1;
66		++p;
67	}
68
69	while (p < end) {
70		nextRangeStart = '\0';
71		switch (*p) {
72		case '-':
73			/* Trailing or initial '-' is not special. */
74			if ((rangeStart == '\0') || (p == end - 1)) {
75				if (*p == c)
76					return (match);
77			} else {
78				char rangeEnd = *++p;
79				if (rangeEnd == '\\')
80					rangeEnd = *++p;
81				if ((rangeStart <= c) && (c <= rangeEnd))
82					return (match);
83			}
84			break;
85		case '\\':
86			++p;
87			/* Fall through */
88		default:
89			if (*p == c)
90				return (match);
91			nextRangeStart = *p; /* Possible start of range. */
92		}
93		rangeStart = nextRangeStart;
94		++p;
95	}
96	return (nomatch);
97}
98
99/*
100 * If s is pointing to "./", ".//", "./././" or the like, skip it.
101 */
102static const char *
103pm_slashskip(const char *s) {
104	while ((*s == '/')
105	    || (s[0] == '.' && s[1] == '/')
106	    || (s[0] == '.' && s[1] == '\0'))
107		++s;
108	return (s);
109}
110
111static int
112pm(const char *p, const char *s, int flags)
113{
114	const char *end;
115
116	/*
117	 * Ignore leading './', './/', '././', etc.
118	 */
119	if (s[0] == '.' && s[1] == '/')
120		s = pm_slashskip(s + 1);
121	if (p[0] == '.' && p[1] == '/')
122		p = pm_slashskip(p + 1);
123
124	for (;;) {
125		switch (*p) {
126		case '\0':
127			if (s[0] == '/') {
128				if (flags & PATHMATCH_NO_ANCHOR_END)
129					return (1);
130				/* "dir" == "dir/" == "dir/." */
131				s = pm_slashskip(s);
132			}
133			return (*s == '\0');
134		case '?':
135			/* ? always succeds, unless we hit end of 's' */
136			if (*s == '\0')
137				return (0);
138			break;
139		case '*':
140			/* "*" == "**" == "***" ... */
141			while (*p == '*')
142				++p;
143			/* Trailing '*' always succeeds. */
144			if (*p == '\0')
145				return (1);
146			while (*s) {
147				if (lafe_pathmatch(p, s, flags))
148					return (1);
149				++s;
150			}
151			return (0);
152		case '[':
153			/*
154			 * Find the end of the [...] character class,
155			 * ignoring \] that might occur within the class.
156			 */
157			end = p + 1;
158			while (*end != '\0' && *end != ']') {
159				if (*end == '\\' && end[1] != '\0')
160					++end;
161				++end;
162			}
163			if (*end == ']') {
164				/* We found [...], try to match it. */
165				if (!pm_list(p + 1, end, *s, flags))
166					return (0);
167				p = end; /* Jump to trailing ']' char. */
168				break;
169			} else
170				/* No final ']', so just match '['. */
171				if (*p != *s)
172					return (0);
173			break;
174		case '\\':
175			/* Trailing '\\' matches itself. */
176			if (p[1] == '\0') {
177				if (*s != '\\')
178					return (0);
179			} else {
180				++p;
181				if (*p != *s)
182					return (0);
183			}
184			break;
185		case '/':
186			if (*s != '/' && *s != '\0')
187				return (0);
188			/* Note: pattern "/\./" won't match "/";
189			 * pm_slashskip() correctly stops at backslash. */
190			p = pm_slashskip(p);
191			s = pm_slashskip(s);
192			if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
193				return (1);
194			--p; /* Counteract the increment below. */
195			--s;
196			break;
197		case '$':
198			/* '$' is special only at end of pattern and only
199			 * if PATHMATCH_NO_ANCHOR_END is specified. */
200			if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
201				/* "dir" == "dir/" == "dir/." */
202				return (*pm_slashskip(s) == '\0');
203			}
204			/* Otherwise, '$' is not special. */
205			/* FALL THROUGH */
206		default:
207			if (*p != *s)
208				return (0);
209			break;
210		}
211		++p;
212		++s;
213	}
214}
215
216/* Main entry point. */
217int
218lafe_pathmatch(const char *p, const char *s, int flags)
219{
220	/* Empty pattern only matches the empty string. */
221	if (p == NULL || *p == '\0')
222		return (s == NULL || *s == '\0');
223
224	/* Leading '^' anchors the start of the pattern. */
225	if (*p == '^') {
226		++p;
227		flags &= ~PATHMATCH_NO_ANCHOR_START;
228	}
229
230	if (*p == '/' && *s != '/')
231		return (0);
232
233	/* Certain patterns and file names anchor implicitly. */
234	if (*p == '*' || *p == '/' || *p == '/') {
235		while (*p == '/')
236			++p;
237		while (*s == '/')
238			++s;
239		return (pm(p, s, flags));
240	}
241
242	/* If start is unanchored, try to match start of each path element. */
243	if (flags & PATHMATCH_NO_ANCHOR_START) {
244		for ( ; s != NULL; s = strchr(s, '/')) {
245			if (*s == '/')
246				s++;
247			if (pm(p, s, flags))
248				return (1);
249		}
250		return (0);
251	}
252
253	/* Default: Match from beginning. */
254	return (pm(p, s, flags));
255}
256