1238825Smm/*-
2238825Smm * Copyright (c) 2003-2007 Tim Kientzle
3238825Smm * All rights reserved.
4238825Smm *
5238825Smm * Redistribution and use in source and binary forms, with or without
6238825Smm * modification, are permitted provided that the following conditions
7238825Smm * are met:
8238825Smm * 1. Redistributions of source code must retain the above copyright
9238825Smm *    notice, this list of conditions and the following disclaimer
10238825Smm *    in this position and unchanged.
11238825Smm * 2. Redistributions in binary form must reproduce the above copyright
12238825Smm *    notice, this list of conditions and the following disclaimer in the
13238825Smm *    documentation and/or other materials provided with the distribution.
14238825Smm *
15238825Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16238825Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17238825Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18238825Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19238825Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20238825Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21238825Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22238825Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23238825Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24238825Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25238825Smm */
26238825Smm
27238825Smm#include "archive_platform.h"
28238825Smm__FBSDID("$FreeBSD$");
29238825Smm
30238825Smm#ifdef HAVE_STRING_H
31238825Smm#include <string.h>
32238825Smm#endif
33238825Smm#ifdef HAVE_WCHAR_H
34238825Smm#include <wchar.h>
35238825Smm#endif
36238825Smm
37238825Smm#include "archive_pathmatch.h"
38238825Smm
39238825Smm/*
40238825Smm * Check whether a character 'c' is matched by a list specification [...]:
41238825Smm *    * Leading '!' or '^' negates the class.
42238825Smm *    * <char>-<char> is a range of characters
43238825Smm *    * \<char> removes any special meaning for <char>
44238825Smm *
45238825Smm * Some interesting boundary cases:
46238825Smm *   a-d-e is one range (a-d) followed by two single characters - and e.
47238825Smm *   \a-\d is same as a-d
48238825Smm *   a\-d is three single characters: a, d, -
49238825Smm *   Trailing - is not special (so [a-] is two characters a and -).
50238825Smm *   Initial - is not special ([a-] is same as [-a] is same as [\\-a])
51238825Smm *   This function never sees a trailing \.
52238825Smm *   [] always fails
53238825Smm *   [!] always succeeds
54238825Smm */
55238825Smmstatic int
56238825Smmpm_list(const char *start, const char *end, const char c, int flags)
57238825Smm{
58238825Smm	const char *p = start;
59238825Smm	char rangeStart = '\0', nextRangeStart;
60238825Smm	int match = 1, nomatch = 0;
61238825Smm
62238825Smm	/* This will be used soon... */
63238825Smm	(void)flags; /* UNUSED */
64238825Smm
65238825Smm	/* If this is a negated class, return success for nomatch. */
66238825Smm	if ((*p == '!' || *p == '^') && p < end) {
67238825Smm		match = 0;
68238825Smm		nomatch = 1;
69238825Smm		++p;
70238825Smm	}
71238825Smm
72238825Smm	while (p < end) {
73238825Smm		nextRangeStart = '\0';
74238825Smm		switch (*p) {
75238825Smm		case '-':
76238825Smm			/* Trailing or initial '-' is not special. */
77238825Smm			if ((rangeStart == '\0') || (p == end - 1)) {
78238825Smm				if (*p == c)
79238825Smm					return (match);
80238825Smm			} else {
81238825Smm				char rangeEnd = *++p;
82238825Smm				if (rangeEnd == '\\')
83238825Smm					rangeEnd = *++p;
84238825Smm				if ((rangeStart <= c) && (c <= rangeEnd))
85238825Smm					return (match);
86238825Smm			}
87238825Smm			break;
88238825Smm		case '\\':
89238825Smm			++p;
90238825Smm			/* Fall through */
91238825Smm		default:
92238825Smm			if (*p == c)
93238825Smm				return (match);
94238825Smm			nextRangeStart = *p; /* Possible start of range. */
95238825Smm		}
96238825Smm		rangeStart = nextRangeStart;
97238825Smm		++p;
98238825Smm	}
99238825Smm	return (nomatch);
100238825Smm}
101238825Smm
102238825Smmstatic int
103238825Smmpm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
104238825Smm{
105238825Smm	const wchar_t *p = start;
106238825Smm	wchar_t rangeStart = L'\0', nextRangeStart;
107238825Smm	int match = 1, nomatch = 0;
108238825Smm
109238825Smm	/* This will be used soon... */
110238825Smm	(void)flags; /* UNUSED */
111238825Smm
112238825Smm	/* If this is a negated class, return success for nomatch. */
113238825Smm	if ((*p == L'!' || *p == L'^') && p < end) {
114238825Smm		match = 0;
115238825Smm		nomatch = 1;
116238825Smm		++p;
117238825Smm	}
118238825Smm
119238825Smm	while (p < end) {
120238825Smm		nextRangeStart = L'\0';
121238825Smm		switch (*p) {
122238825Smm		case L'-':
123238825Smm			/* Trailing or initial '-' is not special. */
124238825Smm			if ((rangeStart == L'\0') || (p == end - 1)) {
125238825Smm				if (*p == c)
126238825Smm					return (match);
127238825Smm			} else {
128238825Smm				wchar_t rangeEnd = *++p;
129238825Smm				if (rangeEnd == L'\\')
130238825Smm					rangeEnd = *++p;
131238825Smm				if ((rangeStart <= c) && (c <= rangeEnd))
132238825Smm					return (match);
133238825Smm			}
134238825Smm			break;
135238825Smm		case L'\\':
136238825Smm			++p;
137238825Smm			/* Fall through */
138238825Smm		default:
139238825Smm			if (*p == c)
140238825Smm				return (match);
141238825Smm			nextRangeStart = *p; /* Possible start of range. */
142238825Smm		}
143238825Smm		rangeStart = nextRangeStart;
144238825Smm		++p;
145238825Smm	}
146238825Smm	return (nomatch);
147238825Smm}
148238825Smm
149238825Smm/*
150238825Smm * If s is pointing to "./", ".//", "./././" or the like, skip it.
151238825Smm */
152238825Smmstatic const char *
153238825Smmpm_slashskip(const char *s) {
154238825Smm	while ((*s == '/')
155238825Smm	    || (s[0] == '.' && s[1] == '/')
156238825Smm	    || (s[0] == '.' && s[1] == '\0'))
157238825Smm		++s;
158238825Smm	return (s);
159238825Smm}
160238825Smm
161238825Smmstatic const wchar_t *
162238825Smmpm_slashskip_w(const wchar_t *s) {
163238825Smm	while ((*s == L'/')
164238825Smm	    || (s[0] == L'.' && s[1] == L'/')
165238825Smm	    || (s[0] == L'.' && s[1] == L'\0'))
166238825Smm		++s;
167238825Smm	return (s);
168238825Smm}
169238825Smm
170238825Smmstatic int
171238825Smmpm(const char *p, const char *s, int flags)
172238825Smm{
173238825Smm	const char *end;
174238825Smm
175238825Smm	/*
176238825Smm	 * Ignore leading './', './/', '././', etc.
177238825Smm	 */
178238825Smm	if (s[0] == '.' && s[1] == '/')
179238825Smm		s = pm_slashskip(s + 1);
180238825Smm	if (p[0] == '.' && p[1] == '/')
181238825Smm		p = pm_slashskip(p + 1);
182238825Smm
183238825Smm	for (;;) {
184238825Smm		switch (*p) {
185238825Smm		case '\0':
186238825Smm			if (s[0] == '/') {
187238825Smm				if (flags & PATHMATCH_NO_ANCHOR_END)
188238825Smm					return (1);
189238825Smm				/* "dir" == "dir/" == "dir/." */
190238825Smm				s = pm_slashskip(s);
191238825Smm			}
192238825Smm			return (*s == '\0');
193238825Smm		case '?':
194238825Smm			/* ? always succeeds, unless we hit end of 's' */
195238825Smm			if (*s == '\0')
196238825Smm				return (0);
197238825Smm			break;
198238825Smm		case '*':
199238825Smm			/* "*" == "**" == "***" ... */
200238825Smm			while (*p == '*')
201238825Smm				++p;
202238825Smm			/* Trailing '*' always succeeds. */
203238825Smm			if (*p == '\0')
204238825Smm				return (1);
205238825Smm			while (*s) {
206238825Smm				if (archive_pathmatch(p, s, flags))
207238825Smm					return (1);
208238825Smm				++s;
209238825Smm			}
210238825Smm			return (0);
211238825Smm		case '[':
212238825Smm			/*
213238825Smm			 * Find the end of the [...] character class,
214238825Smm			 * ignoring \] that might occur within the class.
215238825Smm			 */
216238825Smm			end = p + 1;
217238825Smm			while (*end != '\0' && *end != ']') {
218238825Smm				if (*end == '\\' && end[1] != '\0')
219238825Smm					++end;
220238825Smm				++end;
221238825Smm			}
222238825Smm			if (*end == ']') {
223238825Smm				/* We found [...], try to match it. */
224238825Smm				if (!pm_list(p + 1, end, *s, flags))
225238825Smm					return (0);
226238825Smm				p = end; /* Jump to trailing ']' char. */
227238825Smm				break;
228238825Smm			} else
229238825Smm				/* No final ']', so just match '['. */
230238825Smm				if (*p != *s)
231238825Smm					return (0);
232238825Smm			break;
233238825Smm		case '\\':
234238825Smm			/* Trailing '\\' matches itself. */
235238825Smm			if (p[1] == '\0') {
236238825Smm				if (*s != '\\')
237238825Smm					return (0);
238238825Smm			} else {
239238825Smm				++p;
240238825Smm				if (*p != *s)
241238825Smm					return (0);
242238825Smm			}
243238825Smm			break;
244238825Smm		case '/':
245238825Smm			if (*s != '/' && *s != '\0')
246238825Smm				return (0);
247238825Smm			/* Note: pattern "/\./" won't match "/";
248238825Smm			 * pm_slashskip() correctly stops at backslash. */
249238825Smm			p = pm_slashskip(p);
250238825Smm			s = pm_slashskip(s);
251238825Smm			if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
252238825Smm				return (1);
253238825Smm			--p; /* Counteract the increment below. */
254238825Smm			--s;
255238825Smm			break;
256238825Smm		case '$':
257238825Smm			/* '$' is special only at end of pattern and only
258238825Smm			 * if PATHMATCH_NO_ANCHOR_END is specified. */
259238825Smm			if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
260238825Smm				/* "dir" == "dir/" == "dir/." */
261238825Smm				return (*pm_slashskip(s) == '\0');
262238825Smm			}
263238825Smm			/* Otherwise, '$' is not special. */
264238825Smm			/* FALL THROUGH */
265238825Smm		default:
266238825Smm			if (*p != *s)
267238825Smm				return (0);
268238825Smm			break;
269238825Smm		}
270238825Smm		++p;
271238825Smm		++s;
272238825Smm	}
273238825Smm}
274238825Smm
275238825Smmstatic int
276238825Smmpm_w(const wchar_t *p, const wchar_t *s, int flags)
277238825Smm{
278238825Smm	const wchar_t *end;
279238825Smm
280238825Smm	/*
281238825Smm	 * Ignore leading './', './/', '././', etc.
282238825Smm	 */
283238825Smm	if (s[0] == L'.' && s[1] == L'/')
284238825Smm		s = pm_slashskip_w(s + 1);
285238825Smm	if (p[0] == L'.' && p[1] == L'/')
286238825Smm		p = pm_slashskip_w(p + 1);
287238825Smm
288238825Smm	for (;;) {
289238825Smm		switch (*p) {
290238825Smm		case L'\0':
291238825Smm			if (s[0] == L'/') {
292238825Smm				if (flags & PATHMATCH_NO_ANCHOR_END)
293238825Smm					return (1);
294238825Smm				/* "dir" == "dir/" == "dir/." */
295238825Smm				s = pm_slashskip_w(s);
296238825Smm			}
297238825Smm			return (*s == L'\0');
298238825Smm		case L'?':
299238825Smm			/* ? always succeeds, unless we hit end of 's' */
300238825Smm			if (*s == L'\0')
301238825Smm				return (0);
302238825Smm			break;
303238825Smm		case L'*':
304238825Smm			/* "*" == "**" == "***" ... */
305238825Smm			while (*p == L'*')
306238825Smm				++p;
307238825Smm			/* Trailing '*' always succeeds. */
308238825Smm			if (*p == L'\0')
309238825Smm				return (1);
310238825Smm			while (*s) {
311238825Smm				if (archive_pathmatch_w(p, s, flags))
312238825Smm					return (1);
313238825Smm				++s;
314238825Smm			}
315238825Smm			return (0);
316238825Smm		case L'[':
317238825Smm			/*
318238825Smm			 * Find the end of the [...] character class,
319238825Smm			 * ignoring \] that might occur within the class.
320238825Smm			 */
321238825Smm			end = p + 1;
322238825Smm			while (*end != L'\0' && *end != L']') {
323238825Smm				if (*end == L'\\' && end[1] != L'\0')
324238825Smm					++end;
325238825Smm				++end;
326238825Smm			}
327238825Smm			if (*end == L']') {
328238825Smm				/* We found [...], try to match it. */
329238825Smm				if (!pm_list_w(p + 1, end, *s, flags))
330238825Smm					return (0);
331238825Smm				p = end; /* Jump to trailing ']' char. */
332238825Smm				break;
333238825Smm			} else
334238825Smm				/* No final ']', so just match '['. */
335238825Smm				if (*p != *s)
336238825Smm					return (0);
337238825Smm			break;
338238825Smm		case L'\\':
339238825Smm			/* Trailing '\\' matches itself. */
340238825Smm			if (p[1] == L'\0') {
341238825Smm				if (*s != L'\\')
342238825Smm					return (0);
343238825Smm			} else {
344238825Smm				++p;
345238825Smm				if (*p != *s)
346238825Smm					return (0);
347238825Smm			}
348238825Smm			break;
349238825Smm		case L'/':
350238825Smm			if (*s != L'/' && *s != L'\0')
351238825Smm				return (0);
352238825Smm			/* Note: pattern "/\./" won't match "/";
353238825Smm			 * pm_slashskip() correctly stops at backslash. */
354238825Smm			p = pm_slashskip_w(p);
355238825Smm			s = pm_slashskip_w(s);
356238825Smm			if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
357238825Smm				return (1);
358238825Smm			--p; /* Counteract the increment below. */
359238825Smm			--s;
360238825Smm			break;
361238825Smm		case L'$':
362238825Smm			/* '$' is special only at end of pattern and only
363238825Smm			 * if PATHMATCH_NO_ANCHOR_END is specified. */
364238825Smm			if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
365238825Smm				/* "dir" == "dir/" == "dir/." */
366238825Smm				return (*pm_slashskip_w(s) == L'\0');
367238825Smm			}
368238825Smm			/* Otherwise, '$' is not special. */
369238825Smm			/* FALL THROUGH */
370238825Smm		default:
371238825Smm			if (*p != *s)
372238825Smm				return (0);
373238825Smm			break;
374238825Smm		}
375238825Smm		++p;
376238825Smm		++s;
377238825Smm	}
378238825Smm}
379238825Smm
380238825Smm/* Main entry point. */
381238825Smmint
382238825Smm__archive_pathmatch(const char *p, const char *s, int flags)
383238825Smm{
384238825Smm	/* Empty pattern only matches the empty string. */
385238825Smm	if (p == NULL || *p == '\0')
386238825Smm		return (s == NULL || *s == '\0');
387370535Sgit2svn	else if (s == NULL)
388370535Sgit2svn		return (0);
389238825Smm
390238825Smm	/* Leading '^' anchors the start of the pattern. */
391238825Smm	if (*p == '^') {
392238825Smm		++p;
393238825Smm		flags &= ~PATHMATCH_NO_ANCHOR_START;
394238825Smm	}
395238825Smm
396238825Smm	if (*p == '/' && *s != '/')
397238825Smm		return (0);
398238825Smm
399299529Smm	/* Certain patterns anchor implicitly. */
400299529Smm	if (*p == '*' || *p == '/') {
401238825Smm		while (*p == '/')
402238825Smm			++p;
403238825Smm		while (*s == '/')
404238825Smm			++s;
405238825Smm		return (pm(p, s, flags));
406238825Smm	}
407238825Smm
408238825Smm	/* If start is unanchored, try to match start of each path element. */
409238825Smm	if (flags & PATHMATCH_NO_ANCHOR_START) {
410238825Smm		for ( ; s != NULL; s = strchr(s, '/')) {
411238825Smm			if (*s == '/')
412238825Smm				s++;
413238825Smm			if (pm(p, s, flags))
414238825Smm				return (1);
415238825Smm		}
416238825Smm		return (0);
417238825Smm	}
418238825Smm
419238825Smm	/* Default: Match from beginning. */
420238825Smm	return (pm(p, s, flags));
421238825Smm}
422238825Smm
423238825Smmint
424238825Smm__archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
425238825Smm{
426238825Smm	/* Empty pattern only matches the empty string. */
427238825Smm	if (p == NULL || *p == L'\0')
428238825Smm		return (s == NULL || *s == L'\0');
429370535Sgit2svn	else if (s == NULL)
430370535Sgit2svn		return (0);
431238825Smm
432238825Smm	/* Leading '^' anchors the start of the pattern. */
433238825Smm	if (*p == L'^') {
434238825Smm		++p;
435238825Smm		flags &= ~PATHMATCH_NO_ANCHOR_START;
436238825Smm	}
437238825Smm
438238825Smm	if (*p == L'/' && *s != L'/')
439238825Smm		return (0);
440238825Smm
441299529Smm	/* Certain patterns anchor implicitly. */
442299529Smm	if (*p == L'*' || *p == L'/') {
443238825Smm		while (*p == L'/')
444238825Smm			++p;
445238825Smm		while (*s == L'/')
446238825Smm			++s;
447238825Smm		return (pm_w(p, s, flags));
448238825Smm	}
449238825Smm
450238825Smm	/* If start is unanchored, try to match start of each path element. */
451238825Smm	if (flags & PATHMATCH_NO_ANCHOR_START) {
452238825Smm		for ( ; s != NULL; s = wcschr(s, L'/')) {
453238825Smm			if (*s == L'/')
454238825Smm				s++;
455238825Smm			if (pm_w(p, s, flags))
456238825Smm				return (1);
457238825Smm		}
458238825Smm		return (0);
459238825Smm	}
460238825Smm
461238825Smm	/* Default: Match from beginning. */
462238825Smm	return (pm_w(p, s, flags));
463238825Smm}
464