glob.c revision 227753
11573Srgrimes/*
21573Srgrimes * Copyright (c) 1989, 1993
31573Srgrimes *	The Regents of the University of California.  All rights reserved.
41573Srgrimes *
51573Srgrimes * This code is derived from software contributed to Berkeley by
61573Srgrimes * Guido van Rossum.
71573Srgrimes *
8227753Stheraven * Copyright (c) 2011 The FreeBSD Foundation
9227753Stheraven * All rights reserved.
10227753Stheraven * Portions of this software were developed by David Chisnall
11227753Stheraven * under sponsorship from the FreeBSD Foundation.
12227753Stheraven *
131573Srgrimes * Redistribution and use in source and binary forms, with or without
141573Srgrimes * modification, are permitted provided that the following conditions
151573Srgrimes * are met:
161573Srgrimes * 1. Redistributions of source code must retain the above copyright
171573Srgrimes *    notice, this list of conditions and the following disclaimer.
181573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
191573Srgrimes *    notice, this list of conditions and the following disclaimer in the
201573Srgrimes *    documentation and/or other materials provided with the distribution.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes */
371573Srgrimes
381573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
391573Srgrimesstatic char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
401573Srgrimes#endif /* LIBC_SCCS and not lint */
4190045Sobrien#include <sys/cdefs.h>
4290045Sobrien__FBSDID("$FreeBSD: head/lib/libc/gen/glob.c 227753 2011-11-20 14:45:42Z theraven $");
431573Srgrimes
441573Srgrimes/*
451573Srgrimes * glob(3) -- a superset of the one defined in POSIX 1003.2.
461573Srgrimes *
471573Srgrimes * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
481573Srgrimes *
491573Srgrimes * Optional extra services, controlled by flags not defined by POSIX:
501573Srgrimes *
511573Srgrimes * GLOB_QUOTE:
521573Srgrimes *	Escaping convention: \ inhibits any special meaning the following
531573Srgrimes *	character might have (except \ at end of string is retained).
541573Srgrimes * GLOB_MAGCHAR:
551573Srgrimes *	Set in gl_flags if pattern contained a globbing character.
561573Srgrimes * GLOB_NOMAGIC:
571573Srgrimes *	Same as GLOB_NOCHECK, but it will only append pattern if it did
581573Srgrimes *	not contain any magic characters.  [Used in csh style globbing]
591573Srgrimes * GLOB_ALTDIRFUNC:
601573Srgrimes *	Use alternately specified directory access functions.
611573Srgrimes * GLOB_TILDE:
621573Srgrimes *	expand ~user/foo to the /home/dir/of/user/foo
631573Srgrimes * GLOB_BRACE:
648870Srgrimes *	expand {1,2}{a,b} to 1a 1b 2a 2b
651573Srgrimes * gl_matchc:
661573Srgrimes *	Number of matches in the current invocation of glob.
671573Srgrimes */
681573Srgrimes
69132817Stjr/*
70132817Stjr * Some notes on multibyte character support:
71132817Stjr * 1. Patterns with illegal byte sequences match nothing - even if
72132817Stjr *    GLOB_NOCHECK is specified.
73132817Stjr * 2. Illegal byte sequences in filenames are handled by treating them as
74132817Stjr *    single-byte characters with a value of the first byte of the sequence
75132817Stjr *    cast to wchar_t.
76132817Stjr * 3. State-dependent encodings are not currently supported.
77132817Stjr */
78132817Stjr
791573Srgrimes#include <sys/param.h>
801573Srgrimes#include <sys/stat.h>
811573Srgrimes
821573Srgrimes#include <ctype.h>
831573Srgrimes#include <dirent.h>
841573Srgrimes#include <errno.h>
851573Srgrimes#include <glob.h>
86132817Stjr#include <limits.h>
871573Srgrimes#include <pwd.h>
88132817Stjr#include <stdint.h>
891573Srgrimes#include <stdio.h>
901573Srgrimes#include <stdlib.h>
911573Srgrimes#include <string.h>
921573Srgrimes#include <unistd.h>
93132817Stjr#include <wchar.h>
941573Srgrimes
9519276Sache#include "collate.h"
9619276Sache
971573Srgrimes#define	DOLLAR		'$'
981573Srgrimes#define	DOT		'.'
991573Srgrimes#define	EOS		'\0'
1001573Srgrimes#define	LBRACKET	'['
1011573Srgrimes#define	NOT		'!'
1021573Srgrimes#define	QUESTION	'?'
1031573Srgrimes#define	QUOTE		'\\'
1041573Srgrimes#define	RANGE		'-'
1051573Srgrimes#define	RBRACKET	']'
1061573Srgrimes#define	SEP		'/'
1071573Srgrimes#define	STAR		'*'
1081573Srgrimes#define	TILDE		'~'
1091573Srgrimes#define	UNDERSCORE	'_'
1101573Srgrimes#define	LBRACE		'{'
1111573Srgrimes#define	RBRACE		'}'
1121573Srgrimes#define	SLASH		'/'
1131573Srgrimes#define	COMMA		','
1141573Srgrimes
1151573Srgrimes#ifndef DEBUG
1161573Srgrimes
117132817Stjr#define	M_QUOTE		0x8000000000ULL
118132817Stjr#define	M_PROTECT	0x4000000000ULL
119132817Stjr#define	M_MASK		0xffffffffffULL
120132817Stjr#define	M_CHAR		0x00ffffffffULL
1211573Srgrimes
122132817Stjrtypedef uint_fast64_t Char;
1231573Srgrimes
1241573Srgrimes#else
1251573Srgrimes
1261573Srgrimes#define	M_QUOTE		0x80
1271573Srgrimes#define	M_PROTECT	0x40
1281573Srgrimes#define	M_MASK		0xff
129132817Stjr#define	M_CHAR		0x7f
1301573Srgrimes
1311573Srgrimestypedef char Char;
1321573Srgrimes
1331573Srgrimes#endif
1341573Srgrimes
1351573Srgrimes
136132817Stjr#define	CHAR(c)		((Char)((c)&M_CHAR))
1371573Srgrimes#define	META(c)		((Char)((c)|M_QUOTE))
1381573Srgrimes#define	M_ALL		META('*')
1391573Srgrimes#define	M_END		META(']')
1401573Srgrimes#define	M_NOT		META('!')
1411573Srgrimes#define	M_ONE		META('?')
1421573Srgrimes#define	M_RNG		META('-')
1431573Srgrimes#define	M_SET		META('[')
1441573Srgrimes#define	ismeta(c)	(((c)&M_QUOTE) != 0)
1451573Srgrimes
1461573Srgrimes
14790045Sobrienstatic int	 compare(const void *, const void *);
148158812Sachestatic int	 g_Ctoc(const Char *, char *, size_t);
14990045Sobrienstatic int	 g_lstat(Char *, struct stat *, glob_t *);
15090045Sobrienstatic DIR	*g_opendir(Char *, glob_t *);
151180021Smtmstatic const Char *g_strchr(const Char *, wchar_t);
1521573Srgrimes#ifdef notdef
15390045Sobrienstatic Char	*g_strcat(Char *, const Char *);
1541573Srgrimes#endif
15590045Sobrienstatic int	 g_stat(Char *, struct stat *, glob_t *);
156158812Sachestatic int	 glob0(const Char *, glob_t *, size_t *);
157158812Sachestatic int	 glob1(Char *, glob_t *, size_t *);
158158812Sachestatic int	 glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
159158812Sachestatic int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
160158812Sachestatic int	 globextend(const Char *, glob_t *, size_t *);
16174963Speterstatic const Char *
16290045Sobrien		 globtilde(const Char *, Char *, size_t, glob_t *);
163158812Sachestatic int	 globexp1(const Char *, glob_t *, size_t *);
164158812Sachestatic int	 globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
16590045Sobrienstatic int	 match(Char *, Char *, Char *);
1661573Srgrimes#ifdef DEBUG
16790045Sobrienstatic void	 qprintf(const char *, Char *);
1681573Srgrimes#endif
1691573Srgrimes
1701573Srgrimesint
171159294Sdelphijglob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
1721573Srgrimes{
173159294Sdelphij	const char *patnext;
174158812Sache	size_t limit;
175132817Stjr	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
176132817Stjr	mbstate_t mbs;
177132817Stjr	wchar_t wc;
178132817Stjr	size_t clen;
1791573Srgrimes
180159294Sdelphij	patnext = pattern;
1811573Srgrimes	if (!(flags & GLOB_APPEND)) {
1821573Srgrimes		pglob->gl_pathc = 0;
1831573Srgrimes		pglob->gl_pathv = NULL;
1841573Srgrimes		if (!(flags & GLOB_DOOFFS))
1851573Srgrimes			pglob->gl_offs = 0;
1861573Srgrimes	}
18780525Smikeh	if (flags & GLOB_LIMIT) {
18874469Sjlemon		limit = pglob->gl_matchc;
18980525Smikeh		if (limit == 0)
19080525Smikeh			limit = ARG_MAX;
19180525Smikeh	} else
19274469Sjlemon		limit = 0;
1931573Srgrimes	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
1941573Srgrimes	pglob->gl_errfunc = errfunc;
1951573Srgrimes	pglob->gl_matchc = 0;
1961573Srgrimes
1971573Srgrimes	bufnext = patbuf;
19874963Speter	bufend = bufnext + MAXPATHLEN - 1;
199132817Stjr	if (flags & GLOB_NOESCAPE) {
200132817Stjr		memset(&mbs, 0, sizeof(mbs));
201132817Stjr		while (bufend - bufnext >= MB_CUR_MAX) {
202132817Stjr			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
203132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2)
204132817Stjr				return (GLOB_NOMATCH);
205132817Stjr			else if (clen == 0)
206132817Stjr				break;
207132817Stjr			*bufnext++ = wc;
208132817Stjr			patnext += clen;
209132817Stjr		}
210132817Stjr	} else {
2111573Srgrimes		/* Protect the quoted characters. */
212132817Stjr		memset(&mbs, 0, sizeof(mbs));
213132817Stjr		while (bufend - bufnext >= MB_CUR_MAX) {
214132817Stjr			if (*patnext == QUOTE) {
215132817Stjr				if (*++patnext == EOS) {
216132817Stjr					*bufnext++ = QUOTE | M_PROTECT;
217132817Stjr					continue;
2181573Srgrimes				}
219132817Stjr				prot = M_PROTECT;
220132817Stjr			} else
221132817Stjr				prot = 0;
222132817Stjr			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
223132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2)
224132817Stjr				return (GLOB_NOMATCH);
225132817Stjr			else if (clen == 0)
226132817Stjr				break;
227132817Stjr			*bufnext++ = wc | prot;
228132817Stjr			patnext += clen;
229132817Stjr		}
2301573Srgrimes	}
2311573Srgrimes	*bufnext = EOS;
2321573Srgrimes
2331573Srgrimes	if (flags & GLOB_BRACE)
23474469Sjlemon	    return globexp1(patbuf, pglob, &limit);
2351573Srgrimes	else
23674469Sjlemon	    return glob0(patbuf, pglob, &limit);
2371573Srgrimes}
2381573Srgrimes
2391573Srgrimes/*
2401573Srgrimes * Expand recursively a glob {} pattern. When there is no more expansion
2411573Srgrimes * invoke the standard globbing routine to glob the rest of the magic
2421573Srgrimes * characters
2431573Srgrimes */
24474963Speterstatic int
245159294Sdelphijglobexp1(const Char *pattern, glob_t *pglob, size_t *limit)
2461573Srgrimes{
2471573Srgrimes	const Char* ptr = pattern;
2481573Srgrimes	int rv;
2491573Srgrimes
2501573Srgrimes	/* Protect a single {}, for find(1), like csh */
2511573Srgrimes	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
25274469Sjlemon		return glob0(pattern, pglob, limit);
2531573Srgrimes
254180021Smtm	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
25574469Sjlemon		if (!globexp2(ptr, pattern, pglob, &rv, limit))
2561573Srgrimes			return rv;
2571573Srgrimes
25874469Sjlemon	return glob0(pattern, pglob, limit);
2591573Srgrimes}
2601573Srgrimes
2611573Srgrimes
2621573Srgrimes/*
2631573Srgrimes * Recursive brace globbing helper. Tries to expand a single brace.
2641573Srgrimes * If it succeeds then it invokes globexp1 with the new pattern.
2651573Srgrimes * If it fails then it tries to glob the rest of the pattern and returns.
2661573Srgrimes */
26774963Speterstatic int
268159294Sdelphijglobexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
2691573Srgrimes{
2701573Srgrimes	int     i;
2711573Srgrimes	Char   *lm, *ls;
272150137Sache	const Char *pe, *pm, *pm1, *pl;
27374963Speter	Char    patbuf[MAXPATHLEN];
2741573Srgrimes
2751573Srgrimes	/* copy part up to the brace */
2761573Srgrimes	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
2771573Srgrimes		continue;
27874963Speter	*lm = EOS;
2791573Srgrimes	ls = lm;
2801573Srgrimes
2811573Srgrimes	/* Find the balanced brace */
2821573Srgrimes	for (i = 0, pe = ++ptr; *pe; pe++)
2831573Srgrimes		if (*pe == LBRACKET) {
2841573Srgrimes			/* Ignore everything between [] */
2851573Srgrimes			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
2861573Srgrimes				continue;
2871573Srgrimes			if (*pe == EOS) {
2888870Srgrimes				/*
2891573Srgrimes				 * We could not find a matching RBRACKET.
2901573Srgrimes				 * Ignore and just look for RBRACE
2911573Srgrimes				 */
2921573Srgrimes				pe = pm;
2931573Srgrimes			}
2941573Srgrimes		}
2951573Srgrimes		else if (*pe == LBRACE)
2961573Srgrimes			i++;
2971573Srgrimes		else if (*pe == RBRACE) {
2981573Srgrimes			if (i == 0)
2991573Srgrimes				break;
3001573Srgrimes			i--;
3011573Srgrimes		}
3021573Srgrimes
3031573Srgrimes	/* Non matching braces; just glob the pattern */
3041573Srgrimes	if (i != 0 || *pe == EOS) {
30574469Sjlemon		*rv = glob0(patbuf, pglob, limit);
3061573Srgrimes		return 0;
3071573Srgrimes	}
3081573Srgrimes
3091573Srgrimes	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
3101573Srgrimes		switch (*pm) {
3111573Srgrimes		case LBRACKET:
3121573Srgrimes			/* Ignore everything between [] */
313150137Sache			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
3141573Srgrimes				continue;
3151573Srgrimes			if (*pm == EOS) {
3168870Srgrimes				/*
3171573Srgrimes				 * We could not find a matching RBRACKET.
3181573Srgrimes				 * Ignore and just look for RBRACE
3191573Srgrimes				 */
320150137Sache				pm = pm1;
3211573Srgrimes			}
3221573Srgrimes			break;
3231573Srgrimes
3241573Srgrimes		case LBRACE:
3251573Srgrimes			i++;
3261573Srgrimes			break;
3271573Srgrimes
3281573Srgrimes		case RBRACE:
3291573Srgrimes			if (i) {
3301573Srgrimes			    i--;
3311573Srgrimes			    break;
3321573Srgrimes			}
3331573Srgrimes			/* FALLTHROUGH */
3341573Srgrimes		case COMMA:
3351573Srgrimes			if (i && *pm == COMMA)
3361573Srgrimes				break;
3371573Srgrimes			else {
3381573Srgrimes				/* Append the current string */
3391573Srgrimes				for (lm = ls; (pl < pm); *lm++ = *pl++)
3401573Srgrimes					continue;
3418870Srgrimes				/*
3421573Srgrimes				 * Append the rest of the pattern after the
3431573Srgrimes				 * closing brace
3441573Srgrimes				 */
3451573Srgrimes				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
3461573Srgrimes					continue;
3471573Srgrimes
3481573Srgrimes				/* Expand the current pattern */
3491573Srgrimes#ifdef DEBUG
3501573Srgrimes				qprintf("globexp2:", patbuf);
3511573Srgrimes#endif
35274469Sjlemon				*rv = globexp1(patbuf, pglob, limit);
3531573Srgrimes
3541573Srgrimes				/* move after the comma, to the next string */
3551573Srgrimes				pl = pm + 1;
3561573Srgrimes			}
3571573Srgrimes			break;
3581573Srgrimes
3591573Srgrimes		default:
3601573Srgrimes			break;
3611573Srgrimes		}
3621573Srgrimes	*rv = 0;
3631573Srgrimes	return 0;
3641573Srgrimes}
3651573Srgrimes
3661573Srgrimes
3671573Srgrimes
3681573Srgrimes/*
3691573Srgrimes * expand tilde from the passwd file.
3701573Srgrimes */
3711573Srgrimesstatic const Char *
372159294Sdelphijglobtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
3731573Srgrimes{
3741573Srgrimes	struct passwd *pwd;
3751573Srgrimes	char *h;
3761573Srgrimes	const Char *p;
37724158Simp	Char *b, *eb;
3781573Srgrimes
3791573Srgrimes	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
3801573Srgrimes		return pattern;
3811573Srgrimes
38224158Simp	/*
38324158Simp	 * Copy up to the end of the string or /
38424158Simp	 */
38524158Simp	eb = &patbuf[patbuf_len - 1];
38624158Simp	for (p = pattern + 1, h = (char *) patbuf;
38724158Simp	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
3881573Srgrimes		continue;
3891573Srgrimes
3901573Srgrimes	*h = EOS;
3911573Srgrimes
3921573Srgrimes	if (((char *) patbuf)[0] == EOS) {
3938870Srgrimes		/*
39428820Simp		 * handle a plain ~ or ~/ by expanding $HOME first (iff
39528820Simp		 * we're not running setuid or setgid) and then trying
39628820Simp		 * the password file
3971573Srgrimes		 */
398121667Stjr		if (issetugid() != 0 ||
39933664Sjb		    (h = getenv("HOME")) == NULL) {
40028836Sache			if (((h = getlogin()) != NULL &&
40128836Sache			     (pwd = getpwnam(h)) != NULL) ||
40228836Sache			    (pwd = getpwuid(getuid())) != NULL)
40328836Sache				h = pwd->pw_dir;
40428836Sache			else
4051573Srgrimes				return pattern;
4061573Srgrimes		}
4071573Srgrimes	}
4081573Srgrimes	else {
4091573Srgrimes		/*
4101573Srgrimes		 * Expand a ~user
4111573Srgrimes		 */
4121573Srgrimes		if ((pwd = getpwnam((char*) patbuf)) == NULL)
4131573Srgrimes			return pattern;
4141573Srgrimes		else
4151573Srgrimes			h = pwd->pw_dir;
4161573Srgrimes	}
4171573Srgrimes
4181573Srgrimes	/* Copy the home directory */
41924158Simp	for (b = patbuf; b < eb && *h; *b++ = *h++)
4201573Srgrimes		continue;
4218870Srgrimes
4221573Srgrimes	/* Append the rest of the pattern */
42324158Simp	while (b < eb && (*b++ = *p++) != EOS)
4241573Srgrimes		continue;
42524158Simp	*b = EOS;
4261573Srgrimes
4271573Srgrimes	return patbuf;
4281573Srgrimes}
4291573Srgrimes
4308870Srgrimes
4311573Srgrimes/*
4321573Srgrimes * The main glob() routine: compiles the pattern (optionally processing
4331573Srgrimes * quotes), calls glob1() to do the real pattern matching, and finally
4341573Srgrimes * sorts the list (unless unsorted operation is requested).  Returns 0
435100217Smikeh * if things went well, nonzero if errors occurred.
4361573Srgrimes */
4371573Srgrimesstatic int
438159294Sdelphijglob0(const Char *pattern, glob_t *pglob, size_t *limit)
4391573Srgrimes{
4401573Srgrimes	const Char *qpatnext;
441207981Sgordon	int err;
442158812Sache	size_t oldpathc;
443207981Sgordon	Char *bufnext, c, patbuf[MAXPATHLEN];
4441573Srgrimes
44574963Speter	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
4461573Srgrimes	oldpathc = pglob->gl_pathc;
4471573Srgrimes	bufnext = patbuf;
4481573Srgrimes
4491573Srgrimes	/* We don't need to check for buffer overflow any more. */
4501573Srgrimes	while ((c = *qpatnext++) != EOS) {
4511573Srgrimes		switch (c) {
4521573Srgrimes		case LBRACKET:
4531573Srgrimes			c = *qpatnext;
4541573Srgrimes			if (c == NOT)
4551573Srgrimes				++qpatnext;
4561573Srgrimes			if (*qpatnext == EOS ||
457180021Smtm			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
4581573Srgrimes				*bufnext++ = LBRACKET;
4591573Srgrimes				if (c == NOT)
4601573Srgrimes					--qpatnext;
4611573Srgrimes				break;
4621573Srgrimes			}
4631573Srgrimes			*bufnext++ = M_SET;
4641573Srgrimes			if (c == NOT)
4651573Srgrimes				*bufnext++ = M_NOT;
4661573Srgrimes			c = *qpatnext++;
4671573Srgrimes			do {
4681573Srgrimes				*bufnext++ = CHAR(c);
4691573Srgrimes				if (*qpatnext == RANGE &&
4701573Srgrimes				    (c = qpatnext[1]) != RBRACKET) {
4711573Srgrimes					*bufnext++ = M_RNG;
4721573Srgrimes					*bufnext++ = CHAR(c);
4731573Srgrimes					qpatnext += 2;
4741573Srgrimes				}
4751573Srgrimes			} while ((c = *qpatnext++) != RBRACKET);
4761573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4771573Srgrimes			*bufnext++ = M_END;
4781573Srgrimes			break;
4791573Srgrimes		case QUESTION:
4801573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4811573Srgrimes			*bufnext++ = M_ONE;
4821573Srgrimes			break;
4831573Srgrimes		case STAR:
4841573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4858870Srgrimes			/* collapse adjacent stars to one,
4861573Srgrimes			 * to avoid exponential behavior
4871573Srgrimes			 */
4881573Srgrimes			if (bufnext == patbuf || bufnext[-1] != M_ALL)
4891573Srgrimes			    *bufnext++ = M_ALL;
4901573Srgrimes			break;
4911573Srgrimes		default:
4921573Srgrimes			*bufnext++ = CHAR(c);
4931573Srgrimes			break;
4941573Srgrimes		}
4951573Srgrimes	}
4961573Srgrimes	*bufnext = EOS;
4971573Srgrimes#ifdef DEBUG
4981573Srgrimes	qprintf("glob0:", patbuf);
4991573Srgrimes#endif
5001573Srgrimes
50174469Sjlemon	if ((err = glob1(patbuf, pglob, limit)) != 0)
5021573Srgrimes		return(err);
5031573Srgrimes
5041573Srgrimes	/*
5058870Srgrimes	 * If there was no match we are going to append the pattern
5061573Srgrimes	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
5071573Srgrimes	 * and the pattern did not contain any magic characters
5081573Srgrimes	 * GLOB_NOMAGIC is there just for compatibility with csh.
5091573Srgrimes	 */
510100217Smikeh	if (pglob->gl_pathc == oldpathc) {
511100217Smikeh		if (((pglob->gl_flags & GLOB_NOCHECK) ||
512100217Smikeh		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
513100217Smikeh			!(pglob->gl_flags & GLOB_MAGCHAR))))
514100217Smikeh			return(globextend(pattern, pglob, limit));
515100217Smikeh		else
516100217Smikeh			return(GLOB_NOMATCH);
517100217Smikeh	}
518100217Smikeh	if (!(pglob->gl_flags & GLOB_NOSORT))
5191573Srgrimes		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
5201573Srgrimes		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
5211573Srgrimes	return(0);
5221573Srgrimes}
5231573Srgrimes
5241573Srgrimesstatic int
525159294Sdelphijcompare(const void *p, const void *q)
5261573Srgrimes{
5271573Srgrimes	return(strcmp(*(char **)p, *(char **)q));
5281573Srgrimes}
5291573Srgrimes
5301573Srgrimesstatic int
531159294Sdelphijglob1(Char *pattern, glob_t *pglob, size_t *limit)
5321573Srgrimes{
53374963Speter	Char pathbuf[MAXPATHLEN];
5341573Srgrimes
5351573Srgrimes	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
5361573Srgrimes	if (*pattern == EOS)
5371573Srgrimes		return(0);
53874963Speter	return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
53974963Speter	    pattern, pglob, limit));
5401573Srgrimes}
5411573Srgrimes
5421573Srgrimes/*
5431573Srgrimes * The functions glob2 and glob3 are mutually recursive; there is one level
5441573Srgrimes * of recursion for each segment in the pattern that contains one or more
5451573Srgrimes * meta characters.
5461573Srgrimes */
5471573Srgrimesstatic int
548159294Sdelphijglob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
549159294Sdelphij      glob_t *pglob, size_t *limit)
5501573Srgrimes{
5511573Srgrimes	struct stat sb;
5521573Srgrimes	Char *p, *q;
5531573Srgrimes	int anymeta;
5541573Srgrimes
5551573Srgrimes	/*
5561573Srgrimes	 * Loop over pattern segments until end of pattern or until
5571573Srgrimes	 * segment with meta character found.
5581573Srgrimes	 */
5591573Srgrimes	for (anymeta = 0;;) {
5601573Srgrimes		if (*pattern == EOS) {		/* End of pattern? */
5611573Srgrimes			*pathend = EOS;
5621573Srgrimes			if (g_lstat(pathbuf, &sb, pglob))
5631573Srgrimes				return(0);
5648870Srgrimes
5651573Srgrimes			if (((pglob->gl_flags & GLOB_MARK) &&
5661573Srgrimes			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
5671573Srgrimes			    || (S_ISLNK(sb.st_mode) &&
5681573Srgrimes			    (g_stat(pathbuf, &sb, pglob) == 0) &&
5691573Srgrimes			    S_ISDIR(sb.st_mode)))) {
57074963Speter				if (pathend + 1 > pathend_last)
571100217Smikeh					return (GLOB_ABORTED);
5721573Srgrimes				*pathend++ = SEP;
5731573Srgrimes				*pathend = EOS;
5741573Srgrimes			}
5751573Srgrimes			++pglob->gl_matchc;
57674469Sjlemon			return(globextend(pathbuf, pglob, limit));
5771573Srgrimes		}
5781573Srgrimes
5791573Srgrimes		/* Find end of next segment, copy tentatively to pathend. */
5801573Srgrimes		q = pathend;
5811573Srgrimes		p = pattern;
5821573Srgrimes		while (*p != EOS && *p != SEP) {
5831573Srgrimes			if (ismeta(*p))
5841573Srgrimes				anymeta = 1;
58574963Speter			if (q + 1 > pathend_last)
586100217Smikeh				return (GLOB_ABORTED);
5871573Srgrimes			*q++ = *p++;
5881573Srgrimes		}
5891573Srgrimes
5901573Srgrimes		if (!anymeta) {		/* No expansion, do next segment. */
5911573Srgrimes			pathend = q;
5921573Srgrimes			pattern = p;
59374963Speter			while (*pattern == SEP) {
59474963Speter				if (pathend + 1 > pathend_last)
595100217Smikeh					return (GLOB_ABORTED);
5961573Srgrimes				*pathend++ = *pattern++;
59774963Speter			}
5981573Srgrimes		} else			/* Need expansion, recurse. */
59974963Speter			return(glob3(pathbuf, pathend, pathend_last, pattern, p,
60074963Speter			    pglob, limit));
6011573Srgrimes	}
6021573Srgrimes	/* NOTREACHED */
6031573Srgrimes}
6041573Srgrimes
6051573Srgrimesstatic int
606159294Sdelphijglob3(Char *pathbuf, Char *pathend, Char *pathend_last,
607159294Sdelphij      Char *pattern, Char *restpattern,
608159294Sdelphij      glob_t *pglob, size_t *limit)
6091573Srgrimes{
61090045Sobrien	struct dirent *dp;
6111573Srgrimes	DIR *dirp;
6121573Srgrimes	int err;
6131573Srgrimes	char buf[MAXPATHLEN];
6141573Srgrimes
6151573Srgrimes	/*
6161573Srgrimes	 * The readdirfunc declaration can't be prototyped, because it is
6171573Srgrimes	 * assigned, below, to two functions which are prototyped in glob.h
6181573Srgrimes	 * and dirent.h as taking pointers to differently typed opaque
6191573Srgrimes	 * structures.
6201573Srgrimes	 */
6211573Srgrimes	struct dirent *(*readdirfunc)();
6221573Srgrimes
62374963Speter	if (pathend > pathend_last)
624100217Smikeh		return (GLOB_ABORTED);
6251573Srgrimes	*pathend = EOS;
6261573Srgrimes	errno = 0;
6278870Srgrimes
6281573Srgrimes	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
6291573Srgrimes		/* TODO: don't call for ENOENT or ENOTDIR? */
6301573Srgrimes		if (pglob->gl_errfunc) {
63174921Speter			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
632100217Smikeh				return (GLOB_ABORTED);
6331573Srgrimes			if (pglob->gl_errfunc(buf, errno) ||
6341573Srgrimes			    pglob->gl_flags & GLOB_ERR)
635100217Smikeh				return (GLOB_ABORTED);
6361573Srgrimes		}
6371573Srgrimes		return(0);
6381573Srgrimes	}
6391573Srgrimes
6401573Srgrimes	err = 0;
6411573Srgrimes
6421573Srgrimes	/* Search directory for matching names. */
6431573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
6441573Srgrimes		readdirfunc = pglob->gl_readdir;
6451573Srgrimes	else
6461573Srgrimes		readdirfunc = readdir;
6471573Srgrimes	while ((dp = (*readdirfunc)(dirp))) {
648159294Sdelphij		char *sc;
64990045Sobrien		Char *dc;
650132817Stjr		wchar_t wc;
651132817Stjr		size_t clen;
652132817Stjr		mbstate_t mbs;
6531573Srgrimes
6541573Srgrimes		/* Initial DOT must be matched literally. */
6551573Srgrimes		if (dp->d_name[0] == DOT && *pattern != DOT)
6561573Srgrimes			continue;
657132817Stjr		memset(&mbs, 0, sizeof(mbs));
65874963Speter		dc = pathend;
659159294Sdelphij		sc = dp->d_name;
660132817Stjr		while (dc < pathend_last) {
661132817Stjr			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
662132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2) {
663132817Stjr				wc = *sc;
664132817Stjr				clen = 1;
665132817Stjr				memset(&mbs, 0, sizeof(mbs));
666132817Stjr			}
667132817Stjr			if ((*dc++ = wc) == EOS)
668132817Stjr				break;
669132817Stjr			sc += clen;
670132817Stjr		}
6711573Srgrimes		if (!match(pathend, pattern, restpattern)) {
6721573Srgrimes			*pathend = EOS;
6731573Srgrimes			continue;
6741573Srgrimes		}
67574963Speter		err = glob2(pathbuf, --dc, pathend_last, restpattern,
67674963Speter		    pglob, limit);
6771573Srgrimes		if (err)
6781573Srgrimes			break;
6791573Srgrimes	}
6801573Srgrimes
6811573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
6821573Srgrimes		(*pglob->gl_closedir)(dirp);
6831573Srgrimes	else
6841573Srgrimes		closedir(dirp);
6851573Srgrimes	return(err);
6861573Srgrimes}
6871573Srgrimes
6881573Srgrimes
6891573Srgrimes/*
6901573Srgrimes * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
6911573Srgrimes * add the new item, and update gl_pathc.
6921573Srgrimes *
6931573Srgrimes * This assumes the BSD realloc, which only copies the block when its size
6941573Srgrimes * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
6951573Srgrimes * behavior.
6961573Srgrimes *
6971573Srgrimes * Return 0 if new item added, error code if memory couldn't be allocated.
6981573Srgrimes *
6991573Srgrimes * Invariant of the glob_t structure:
7001573Srgrimes *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
7011573Srgrimes *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
7021573Srgrimes */
7031573Srgrimesstatic int
704159294Sdelphijglobextend(const Char *path, glob_t *pglob, size_t *limit)
7051573Srgrimes{
70690045Sobrien	char **pathv;
707158812Sache	size_t i, newsize, len;
7081573Srgrimes	char *copy;
7091573Srgrimes	const Char *p;
7101573Srgrimes
71180525Smikeh	if (*limit && pglob->gl_pathc > *limit) {
71280525Smikeh		errno = 0;
71380525Smikeh		return (GLOB_NOSPACE);
71480525Smikeh	}
71574307Sjlemon
7161573Srgrimes	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
7178870Srgrimes	pathv = pglob->gl_pathv ?
7181573Srgrimes		    realloc((char *)pglob->gl_pathv, newsize) :
7191573Srgrimes		    malloc(newsize);
72074918Speter	if (pathv == NULL) {
72174918Speter		if (pglob->gl_pathv) {
72274918Speter			free(pglob->gl_pathv);
72374918Speter			pglob->gl_pathv = NULL;
72474918Speter		}
7251573Srgrimes		return(GLOB_NOSPACE);
72674918Speter	}
7271573Srgrimes
7281573Srgrimes	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
7291573Srgrimes		/* first time around -- clear initial gl_offs items */
7301573Srgrimes		pathv += pglob->gl_offs;
731158812Sache		for (i = pglob->gl_offs + 1; --i > 0; )
7321573Srgrimes			*--pathv = NULL;
7331573Srgrimes	}
7341573Srgrimes	pglob->gl_pathv = pathv;
7351573Srgrimes
7361573Srgrimes	for (p = path; *p++;)
7371573Srgrimes		continue;
738132817Stjr	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
73974921Speter	if ((copy = malloc(len)) != NULL) {
74074921Speter		if (g_Ctoc(path, copy, len)) {
74174918Speter			free(copy);
74274918Speter			return (GLOB_NOSPACE);
74374918Speter		}
7441573Srgrimes		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
7451573Srgrimes	}
7461573Srgrimes	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
7471573Srgrimes	return(copy == NULL ? GLOB_NOSPACE : 0);
7481573Srgrimes}
7491573Srgrimes
7501573Srgrimes/*
7511573Srgrimes * pattern matching function for filenames.  Each occurrence of the *
7521573Srgrimes * pattern causes a recursion level.
7531573Srgrimes */
7541573Srgrimesstatic int
755159294Sdelphijmatch(Char *name, Char *pat, Char *patend)
7561573Srgrimes{
7571573Srgrimes	int ok, negate_range;
7581573Srgrimes	Char c, k;
759227753Stheraven	struct xlocale_collate *table =
760227753Stheraven		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
7611573Srgrimes
7621573Srgrimes	while (pat < patend) {
7631573Srgrimes		c = *pat++;
7641573Srgrimes		switch (c & M_MASK) {
7651573Srgrimes		case M_ALL:
7661573Srgrimes			if (pat == patend)
7671573Srgrimes				return(1);
7688870Srgrimes			do
7691573Srgrimes			    if (match(name, pat, patend))
7701573Srgrimes				    return(1);
7711573Srgrimes			while (*name++ != EOS);
7721573Srgrimes			return(0);
7731573Srgrimes		case M_ONE:
7741573Srgrimes			if (*name++ == EOS)
7751573Srgrimes				return(0);
7761573Srgrimes			break;
7771573Srgrimes		case M_SET:
7781573Srgrimes			ok = 0;
7791573Srgrimes			if ((k = *name++) == EOS)
7801573Srgrimes				return(0);
7811573Srgrimes			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
7821573Srgrimes				++pat;
7831573Srgrimes			while (((c = *pat++) & M_MASK) != M_END)
7841573Srgrimes				if ((*pat & M_MASK) == M_RNG) {
785227753Stheraven					if (table->__collate_load_error ?
78624633Sache					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
787227753Stheraven					       __collate_range_cmp(table, CHAR(c), CHAR(k)) <= 0
788227753Stheraven					    && __collate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0
78917528Sache					   )
7901573Srgrimes						ok = 1;
7911573Srgrimes					pat += 2;
7921573Srgrimes				} else if (c == k)
7931573Srgrimes					ok = 1;
7941573Srgrimes			if (ok == negate_range)
7951573Srgrimes				return(0);
7961573Srgrimes			break;
7971573Srgrimes		default:
7981573Srgrimes			if (*name++ != c)
7991573Srgrimes				return(0);
8001573Srgrimes			break;
8011573Srgrimes		}
8021573Srgrimes	}
8031573Srgrimes	return(*name == EOS);
8041573Srgrimes}
8051573Srgrimes
8061573Srgrimes/* Free allocated data belonging to a glob_t structure. */
8071573Srgrimesvoid
808159294Sdelphijglobfree(glob_t *pglob)
8091573Srgrimes{
810158812Sache	size_t i;
81190045Sobrien	char **pp;
8121573Srgrimes
8131573Srgrimes	if (pglob->gl_pathv != NULL) {
8141573Srgrimes		pp = pglob->gl_pathv + pglob->gl_offs;
8151573Srgrimes		for (i = pglob->gl_pathc; i--; ++pp)
8161573Srgrimes			if (*pp)
8171573Srgrimes				free(*pp);
8181573Srgrimes		free(pglob->gl_pathv);
81974918Speter		pglob->gl_pathv = NULL;
8201573Srgrimes	}
8211573Srgrimes}
8221573Srgrimes
8231573Srgrimesstatic DIR *
824159294Sdelphijg_opendir(Char *str, glob_t *pglob)
8251573Srgrimes{
8261573Srgrimes	char buf[MAXPATHLEN];
8271573Srgrimes
8281573Srgrimes	if (!*str)
8291573Srgrimes		strcpy(buf, ".");
83074918Speter	else {
83174921Speter		if (g_Ctoc(str, buf, sizeof(buf)))
83274918Speter			return (NULL);
83374918Speter	}
8341573Srgrimes
8351573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8361573Srgrimes		return((*pglob->gl_opendir)(buf));
8371573Srgrimes
8381573Srgrimes	return(opendir(buf));
8391573Srgrimes}
8401573Srgrimes
8411573Srgrimesstatic int
842159294Sdelphijg_lstat(Char *fn, struct stat *sb, glob_t *pglob)
8431573Srgrimes{
8441573Srgrimes	char buf[MAXPATHLEN];
8451573Srgrimes
84674921Speter	if (g_Ctoc(fn, buf, sizeof(buf))) {
84774918Speter		errno = ENAMETOOLONG;
84874918Speter		return (-1);
84974918Speter	}
8501573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8511573Srgrimes		return((*pglob->gl_lstat)(buf, sb));
8521573Srgrimes	return(lstat(buf, sb));
8531573Srgrimes}
8541573Srgrimes
8551573Srgrimesstatic int
856159294Sdelphijg_stat(Char *fn, struct stat *sb, glob_t *pglob)
8571573Srgrimes{
8581573Srgrimes	char buf[MAXPATHLEN];
8591573Srgrimes
86074921Speter	if (g_Ctoc(fn, buf, sizeof(buf))) {
86174918Speter		errno = ENAMETOOLONG;
86274918Speter		return (-1);
86374918Speter	}
8641573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8651573Srgrimes		return((*pglob->gl_stat)(buf, sb));
8661573Srgrimes	return(stat(buf, sb));
8671573Srgrimes}
8681573Srgrimes
869180021Smtmstatic const Char *
870180021Smtmg_strchr(const Char *str, wchar_t ch)
8711573Srgrimes{
872159294Sdelphij
8731573Srgrimes	do {
8741573Srgrimes		if (*str == ch)
8751573Srgrimes			return (str);
8761573Srgrimes	} while (*str++);
8771573Srgrimes	return (NULL);
8781573Srgrimes}
8791573Srgrimes
88074918Speterstatic int
881159294Sdelphijg_Ctoc(const Char *str, char *buf, size_t len)
8821573Srgrimes{
883132817Stjr	mbstate_t mbs;
884132817Stjr	size_t clen;
8851573Srgrimes
886132817Stjr	memset(&mbs, 0, sizeof(mbs));
887132817Stjr	while (len >= MB_CUR_MAX) {
888132817Stjr		clen = wcrtomb(buf, *str, &mbs);
889132817Stjr		if (clen == (size_t)-1)
890132817Stjr			return (1);
891132817Stjr		if (*str == L'\0')
89274921Speter			return (0);
893132817Stjr		str++;
894132817Stjr		buf += clen;
895132817Stjr		len -= clen;
89674921Speter	}
89774921Speter	return (1);
8981573Srgrimes}
8991573Srgrimes
9001573Srgrimes#ifdef DEBUG
9018870Srgrimesstatic void
902159294Sdelphijqprintf(const char *str, Char *s)
9031573Srgrimes{
90490045Sobrien	Char *p;
9051573Srgrimes
9061573Srgrimes	(void)printf("%s:\n", str);
9071573Srgrimes	for (p = s; *p; p++)
9081573Srgrimes		(void)printf("%c", CHAR(*p));
9091573Srgrimes	(void)printf("\n");
9101573Srgrimes	for (p = s; *p; p++)
9111573Srgrimes		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
9121573Srgrimes	(void)printf("\n");
9131573Srgrimes	for (p = s; *p; p++)
9141573Srgrimes		(void)printf("%c", ismeta(*p) ? '_' : ' ');
9151573Srgrimes	(void)printf("\n");
9161573Srgrimes}
9171573Srgrimes#endif
918