glob.c revision 228754
11573Srgrimes/*
21573Srgrimes * Copyright (c) 1989, 1993
31573Srgrimes *	The Regents of the University of California.  All rights reserved.
41573Srgrimes *
51573Srgrimes * This code is derived from software contributed to Berkeley by
61573Srgrimes * Guido van Rossum.
71573Srgrimes *
8227753Stheraven * Copyright (c) 2011 The FreeBSD Foundation
9227753Stheraven * All rights reserved.
10227753Stheraven * Portions of this software were developed by David Chisnall
11227753Stheraven * under sponsorship from the FreeBSD Foundation.
12227753Stheraven *
131573Srgrimes * Redistribution and use in source and binary forms, with or without
141573Srgrimes * modification, are permitted provided that the following conditions
151573Srgrimes * are met:
161573Srgrimes * 1. Redistributions of source code must retain the above copyright
171573Srgrimes *    notice, this list of conditions and the following disclaimer.
181573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
191573Srgrimes *    notice, this list of conditions and the following disclaimer in the
201573Srgrimes *    documentation and/or other materials provided with the distribution.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes */
371573Srgrimes
381573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
391573Srgrimesstatic char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
401573Srgrimes#endif /* LIBC_SCCS and not lint */
4190045Sobrien#include <sys/cdefs.h>
4290045Sobrien__FBSDID("$FreeBSD: head/lib/libc/gen/glob.c 228754 2011-12-20 22:56:13Z eadler $");
431573Srgrimes
441573Srgrimes/*
451573Srgrimes * glob(3) -- a superset of the one defined in POSIX 1003.2.
461573Srgrimes *
471573Srgrimes * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
481573Srgrimes *
491573Srgrimes * Optional extra services, controlled by flags not defined by POSIX:
501573Srgrimes *
511573Srgrimes * GLOB_QUOTE:
521573Srgrimes *	Escaping convention: \ inhibits any special meaning the following
531573Srgrimes *	character might have (except \ at end of string is retained).
541573Srgrimes * GLOB_MAGCHAR:
551573Srgrimes *	Set in gl_flags if pattern contained a globbing character.
561573Srgrimes * GLOB_NOMAGIC:
571573Srgrimes *	Same as GLOB_NOCHECK, but it will only append pattern if it did
581573Srgrimes *	not contain any magic characters.  [Used in csh style globbing]
591573Srgrimes * GLOB_ALTDIRFUNC:
601573Srgrimes *	Use alternately specified directory access functions.
611573Srgrimes * GLOB_TILDE:
621573Srgrimes *	expand ~user/foo to the /home/dir/of/user/foo
631573Srgrimes * GLOB_BRACE:
648870Srgrimes *	expand {1,2}{a,b} to 1a 1b 2a 2b
651573Srgrimes * gl_matchc:
661573Srgrimes *	Number of matches in the current invocation of glob.
671573Srgrimes */
681573Srgrimes
69132817Stjr/*
70132817Stjr * Some notes on multibyte character support:
71132817Stjr * 1. Patterns with illegal byte sequences match nothing - even if
72132817Stjr *    GLOB_NOCHECK is specified.
73132817Stjr * 2. Illegal byte sequences in filenames are handled by treating them as
74132817Stjr *    single-byte characters with a value of the first byte of the sequence
75132817Stjr *    cast to wchar_t.
76132817Stjr * 3. State-dependent encodings are not currently supported.
77132817Stjr */
78132817Stjr
791573Srgrimes#include <sys/param.h>
801573Srgrimes#include <sys/stat.h>
811573Srgrimes
821573Srgrimes#include <ctype.h>
831573Srgrimes#include <dirent.h>
841573Srgrimes#include <errno.h>
851573Srgrimes#include <glob.h>
86132817Stjr#include <limits.h>
871573Srgrimes#include <pwd.h>
88132817Stjr#include <stdint.h>
891573Srgrimes#include <stdio.h>
901573Srgrimes#include <stdlib.h>
911573Srgrimes#include <string.h>
921573Srgrimes#include <unistd.h>
93132817Stjr#include <wchar.h>
941573Srgrimes
9519276Sache#include "collate.h"
9619276Sache
971573Srgrimes#define	DOLLAR		'$'
981573Srgrimes#define	DOT		'.'
991573Srgrimes#define	EOS		'\0'
1001573Srgrimes#define	LBRACKET	'['
1011573Srgrimes#define	NOT		'!'
1021573Srgrimes#define	QUESTION	'?'
1031573Srgrimes#define	QUOTE		'\\'
1041573Srgrimes#define	RANGE		'-'
1051573Srgrimes#define	RBRACKET	']'
1061573Srgrimes#define	SEP		'/'
1071573Srgrimes#define	STAR		'*'
1081573Srgrimes#define	TILDE		'~'
1091573Srgrimes#define	UNDERSCORE	'_'
1101573Srgrimes#define	LBRACE		'{'
1111573Srgrimes#define	RBRACE		'}'
1121573Srgrimes#define	SLASH		'/'
1131573Srgrimes#define	COMMA		','
1141573Srgrimes
1151573Srgrimes#ifndef DEBUG
1161573Srgrimes
117132817Stjr#define	M_QUOTE		0x8000000000ULL
118132817Stjr#define	M_PROTECT	0x4000000000ULL
119132817Stjr#define	M_MASK		0xffffffffffULL
120132817Stjr#define	M_CHAR		0x00ffffffffULL
1211573Srgrimes
122132817Stjrtypedef uint_fast64_t Char;
1231573Srgrimes
1241573Srgrimes#else
1251573Srgrimes
1261573Srgrimes#define	M_QUOTE		0x80
1271573Srgrimes#define	M_PROTECT	0x40
1281573Srgrimes#define	M_MASK		0xff
129132817Stjr#define	M_CHAR		0x7f
1301573Srgrimes
1311573Srgrimestypedef char Char;
1321573Srgrimes
1331573Srgrimes#endif
1341573Srgrimes
1351573Srgrimes
136132817Stjr#define	CHAR(c)		((Char)((c)&M_CHAR))
1371573Srgrimes#define	META(c)		((Char)((c)|M_QUOTE))
1381573Srgrimes#define	M_ALL		META('*')
1391573Srgrimes#define	M_END		META(']')
1401573Srgrimes#define	M_NOT		META('!')
1411573Srgrimes#define	M_ONE		META('?')
1421573Srgrimes#define	M_RNG		META('-')
1431573Srgrimes#define	M_SET		META('[')
1441573Srgrimes#define	ismeta(c)	(((c)&M_QUOTE) != 0)
1451573Srgrimes
1461573Srgrimes
14790045Sobrienstatic int	 compare(const void *, const void *);
148158812Sachestatic int	 g_Ctoc(const Char *, char *, size_t);
14990045Sobrienstatic int	 g_lstat(Char *, struct stat *, glob_t *);
15090045Sobrienstatic DIR	*g_opendir(Char *, glob_t *);
151180021Smtmstatic const Char *g_strchr(const Char *, wchar_t);
1521573Srgrimes#ifdef notdef
15390045Sobrienstatic Char	*g_strcat(Char *, const Char *);
1541573Srgrimes#endif
15590045Sobrienstatic int	 g_stat(Char *, struct stat *, glob_t *);
156158812Sachestatic int	 glob0(const Char *, glob_t *, size_t *);
157158812Sachestatic int	 glob1(Char *, glob_t *, size_t *);
158158812Sachestatic int	 glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
159158812Sachestatic int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
160158812Sachestatic int	 globextend(const Char *, glob_t *, size_t *);
16174963Speterstatic const Char *
16290045Sobrien		 globtilde(const Char *, Char *, size_t, glob_t *);
163158812Sachestatic int	 globexp1(const Char *, glob_t *, size_t *);
164158812Sachestatic int	 globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
16590045Sobrienstatic int	 match(Char *, Char *, Char *);
1661573Srgrimes#ifdef DEBUG
16790045Sobrienstatic void	 qprintf(const char *, Char *);
1681573Srgrimes#endif
1691573Srgrimes
1701573Srgrimesint
171228754Seadlerglob(const char * __restrict pattern, int flags,
172228754Seadler	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
1731573Srgrimes{
174159294Sdelphij	const char *patnext;
175158812Sache	size_t limit;
176132817Stjr	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
177132817Stjr	mbstate_t mbs;
178132817Stjr	wchar_t wc;
179132817Stjr	size_t clen;
1801573Srgrimes
181159294Sdelphij	patnext = pattern;
1821573Srgrimes	if (!(flags & GLOB_APPEND)) {
1831573Srgrimes		pglob->gl_pathc = 0;
1841573Srgrimes		pglob->gl_pathv = NULL;
1851573Srgrimes		if (!(flags & GLOB_DOOFFS))
1861573Srgrimes			pglob->gl_offs = 0;
1871573Srgrimes	}
18880525Smikeh	if (flags & GLOB_LIMIT) {
18974469Sjlemon		limit = pglob->gl_matchc;
19080525Smikeh		if (limit == 0)
19180525Smikeh			limit = ARG_MAX;
19280525Smikeh	} else
19374469Sjlemon		limit = 0;
1941573Srgrimes	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
1951573Srgrimes	pglob->gl_errfunc = errfunc;
1961573Srgrimes	pglob->gl_matchc = 0;
1971573Srgrimes
1981573Srgrimes	bufnext = patbuf;
19974963Speter	bufend = bufnext + MAXPATHLEN - 1;
200132817Stjr	if (flags & GLOB_NOESCAPE) {
201132817Stjr		memset(&mbs, 0, sizeof(mbs));
202132817Stjr		while (bufend - bufnext >= MB_CUR_MAX) {
203132817Stjr			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
204132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2)
205132817Stjr				return (GLOB_NOMATCH);
206132817Stjr			else if (clen == 0)
207132817Stjr				break;
208132817Stjr			*bufnext++ = wc;
209132817Stjr			patnext += clen;
210132817Stjr		}
211132817Stjr	} else {
2121573Srgrimes		/* Protect the quoted characters. */
213132817Stjr		memset(&mbs, 0, sizeof(mbs));
214132817Stjr		while (bufend - bufnext >= MB_CUR_MAX) {
215132817Stjr			if (*patnext == QUOTE) {
216132817Stjr				if (*++patnext == EOS) {
217132817Stjr					*bufnext++ = QUOTE | M_PROTECT;
218132817Stjr					continue;
2191573Srgrimes				}
220132817Stjr				prot = M_PROTECT;
221132817Stjr			} else
222132817Stjr				prot = 0;
223132817Stjr			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
224132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2)
225132817Stjr				return (GLOB_NOMATCH);
226132817Stjr			else if (clen == 0)
227132817Stjr				break;
228132817Stjr			*bufnext++ = wc | prot;
229132817Stjr			patnext += clen;
230132817Stjr		}
2311573Srgrimes	}
2321573Srgrimes	*bufnext = EOS;
2331573Srgrimes
2341573Srgrimes	if (flags & GLOB_BRACE)
23574469Sjlemon	    return globexp1(patbuf, pglob, &limit);
2361573Srgrimes	else
23774469Sjlemon	    return glob0(patbuf, pglob, &limit);
2381573Srgrimes}
2391573Srgrimes
2401573Srgrimes/*
2411573Srgrimes * Expand recursively a glob {} pattern. When there is no more expansion
2421573Srgrimes * invoke the standard globbing routine to glob the rest of the magic
2431573Srgrimes * characters
2441573Srgrimes */
24574963Speterstatic int
246159294Sdelphijglobexp1(const Char *pattern, glob_t *pglob, size_t *limit)
2471573Srgrimes{
2481573Srgrimes	const Char* ptr = pattern;
2491573Srgrimes	int rv;
2501573Srgrimes
2511573Srgrimes	/* Protect a single {}, for find(1), like csh */
2521573Srgrimes	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
25374469Sjlemon		return glob0(pattern, pglob, limit);
2541573Srgrimes
255180021Smtm	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
25674469Sjlemon		if (!globexp2(ptr, pattern, pglob, &rv, limit))
2571573Srgrimes			return rv;
2581573Srgrimes
25974469Sjlemon	return glob0(pattern, pglob, limit);
2601573Srgrimes}
2611573Srgrimes
2621573Srgrimes
2631573Srgrimes/*
2641573Srgrimes * Recursive brace globbing helper. Tries to expand a single brace.
2651573Srgrimes * If it succeeds then it invokes globexp1 with the new pattern.
2661573Srgrimes * If it fails then it tries to glob the rest of the pattern and returns.
2671573Srgrimes */
26874963Speterstatic int
269159294Sdelphijglobexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
2701573Srgrimes{
2711573Srgrimes	int     i;
2721573Srgrimes	Char   *lm, *ls;
273150137Sache	const Char *pe, *pm, *pm1, *pl;
27474963Speter	Char    patbuf[MAXPATHLEN];
2751573Srgrimes
2761573Srgrimes	/* copy part up to the brace */
2771573Srgrimes	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
2781573Srgrimes		continue;
27974963Speter	*lm = EOS;
2801573Srgrimes	ls = lm;
2811573Srgrimes
2821573Srgrimes	/* Find the balanced brace */
2831573Srgrimes	for (i = 0, pe = ++ptr; *pe; pe++)
2841573Srgrimes		if (*pe == LBRACKET) {
2851573Srgrimes			/* Ignore everything between [] */
2861573Srgrimes			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
2871573Srgrimes				continue;
2881573Srgrimes			if (*pe == EOS) {
2898870Srgrimes				/*
2901573Srgrimes				 * We could not find a matching RBRACKET.
2911573Srgrimes				 * Ignore and just look for RBRACE
2921573Srgrimes				 */
2931573Srgrimes				pe = pm;
2941573Srgrimes			}
2951573Srgrimes		}
2961573Srgrimes		else if (*pe == LBRACE)
2971573Srgrimes			i++;
2981573Srgrimes		else if (*pe == RBRACE) {
2991573Srgrimes			if (i == 0)
3001573Srgrimes				break;
3011573Srgrimes			i--;
3021573Srgrimes		}
3031573Srgrimes
3041573Srgrimes	/* Non matching braces; just glob the pattern */
3051573Srgrimes	if (i != 0 || *pe == EOS) {
30674469Sjlemon		*rv = glob0(patbuf, pglob, limit);
3071573Srgrimes		return 0;
3081573Srgrimes	}
3091573Srgrimes
3101573Srgrimes	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
3111573Srgrimes		switch (*pm) {
3121573Srgrimes		case LBRACKET:
3131573Srgrimes			/* Ignore everything between [] */
314150137Sache			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
3151573Srgrimes				continue;
3161573Srgrimes			if (*pm == EOS) {
3178870Srgrimes				/*
3181573Srgrimes				 * We could not find a matching RBRACKET.
3191573Srgrimes				 * Ignore and just look for RBRACE
3201573Srgrimes				 */
321150137Sache				pm = pm1;
3221573Srgrimes			}
3231573Srgrimes			break;
3241573Srgrimes
3251573Srgrimes		case LBRACE:
3261573Srgrimes			i++;
3271573Srgrimes			break;
3281573Srgrimes
3291573Srgrimes		case RBRACE:
3301573Srgrimes			if (i) {
3311573Srgrimes			    i--;
3321573Srgrimes			    break;
3331573Srgrimes			}
3341573Srgrimes			/* FALLTHROUGH */
3351573Srgrimes		case COMMA:
3361573Srgrimes			if (i && *pm == COMMA)
3371573Srgrimes				break;
3381573Srgrimes			else {
3391573Srgrimes				/* Append the current string */
3401573Srgrimes				for (lm = ls; (pl < pm); *lm++ = *pl++)
3411573Srgrimes					continue;
3428870Srgrimes				/*
3431573Srgrimes				 * Append the rest of the pattern after the
3441573Srgrimes				 * closing brace
3451573Srgrimes				 */
3461573Srgrimes				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
3471573Srgrimes					continue;
3481573Srgrimes
3491573Srgrimes				/* Expand the current pattern */
3501573Srgrimes#ifdef DEBUG
3511573Srgrimes				qprintf("globexp2:", patbuf);
3521573Srgrimes#endif
35374469Sjlemon				*rv = globexp1(patbuf, pglob, limit);
3541573Srgrimes
3551573Srgrimes				/* move after the comma, to the next string */
3561573Srgrimes				pl = pm + 1;
3571573Srgrimes			}
3581573Srgrimes			break;
3591573Srgrimes
3601573Srgrimes		default:
3611573Srgrimes			break;
3621573Srgrimes		}
3631573Srgrimes	*rv = 0;
3641573Srgrimes	return 0;
3651573Srgrimes}
3661573Srgrimes
3671573Srgrimes
3681573Srgrimes
3691573Srgrimes/*
3701573Srgrimes * expand tilde from the passwd file.
3711573Srgrimes */
3721573Srgrimesstatic const Char *
373159294Sdelphijglobtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
3741573Srgrimes{
3751573Srgrimes	struct passwd *pwd;
3761573Srgrimes	char *h;
3771573Srgrimes	const Char *p;
37824158Simp	Char *b, *eb;
3791573Srgrimes
3801573Srgrimes	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
3811573Srgrimes		return pattern;
3821573Srgrimes
38324158Simp	/*
38424158Simp	 * Copy up to the end of the string or /
38524158Simp	 */
38624158Simp	eb = &patbuf[patbuf_len - 1];
38724158Simp	for (p = pattern + 1, h = (char *) patbuf;
38824158Simp	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
3891573Srgrimes		continue;
3901573Srgrimes
3911573Srgrimes	*h = EOS;
3921573Srgrimes
3931573Srgrimes	if (((char *) patbuf)[0] == EOS) {
3948870Srgrimes		/*
39528820Simp		 * handle a plain ~ or ~/ by expanding $HOME first (iff
39628820Simp		 * we're not running setuid or setgid) and then trying
39728820Simp		 * the password file
3981573Srgrimes		 */
399121667Stjr		if (issetugid() != 0 ||
40033664Sjb		    (h = getenv("HOME")) == NULL) {
40128836Sache			if (((h = getlogin()) != NULL &&
40228836Sache			     (pwd = getpwnam(h)) != NULL) ||
40328836Sache			    (pwd = getpwuid(getuid())) != NULL)
40428836Sache				h = pwd->pw_dir;
40528836Sache			else
4061573Srgrimes				return pattern;
4071573Srgrimes		}
4081573Srgrimes	}
4091573Srgrimes	else {
4101573Srgrimes		/*
4111573Srgrimes		 * Expand a ~user
4121573Srgrimes		 */
4131573Srgrimes		if ((pwd = getpwnam((char*) patbuf)) == NULL)
4141573Srgrimes			return pattern;
4151573Srgrimes		else
4161573Srgrimes			h = pwd->pw_dir;
4171573Srgrimes	}
4181573Srgrimes
4191573Srgrimes	/* Copy the home directory */
42024158Simp	for (b = patbuf; b < eb && *h; *b++ = *h++)
4211573Srgrimes		continue;
4228870Srgrimes
4231573Srgrimes	/* Append the rest of the pattern */
42424158Simp	while (b < eb && (*b++ = *p++) != EOS)
4251573Srgrimes		continue;
42624158Simp	*b = EOS;
4271573Srgrimes
4281573Srgrimes	return patbuf;
4291573Srgrimes}
4301573Srgrimes
4318870Srgrimes
4321573Srgrimes/*
4331573Srgrimes * The main glob() routine: compiles the pattern (optionally processing
4341573Srgrimes * quotes), calls glob1() to do the real pattern matching, and finally
4351573Srgrimes * sorts the list (unless unsorted operation is requested).  Returns 0
436100217Smikeh * if things went well, nonzero if errors occurred.
4371573Srgrimes */
4381573Srgrimesstatic int
439159294Sdelphijglob0(const Char *pattern, glob_t *pglob, size_t *limit)
4401573Srgrimes{
4411573Srgrimes	const Char *qpatnext;
442207981Sgordon	int err;
443158812Sache	size_t oldpathc;
444207981Sgordon	Char *bufnext, c, patbuf[MAXPATHLEN];
4451573Srgrimes
44674963Speter	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
4471573Srgrimes	oldpathc = pglob->gl_pathc;
4481573Srgrimes	bufnext = patbuf;
4491573Srgrimes
4501573Srgrimes	/* We don't need to check for buffer overflow any more. */
4511573Srgrimes	while ((c = *qpatnext++) != EOS) {
4521573Srgrimes		switch (c) {
4531573Srgrimes		case LBRACKET:
4541573Srgrimes			c = *qpatnext;
4551573Srgrimes			if (c == NOT)
4561573Srgrimes				++qpatnext;
4571573Srgrimes			if (*qpatnext == EOS ||
458180021Smtm			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
4591573Srgrimes				*bufnext++ = LBRACKET;
4601573Srgrimes				if (c == NOT)
4611573Srgrimes					--qpatnext;
4621573Srgrimes				break;
4631573Srgrimes			}
4641573Srgrimes			*bufnext++ = M_SET;
4651573Srgrimes			if (c == NOT)
4661573Srgrimes				*bufnext++ = M_NOT;
4671573Srgrimes			c = *qpatnext++;
4681573Srgrimes			do {
4691573Srgrimes				*bufnext++ = CHAR(c);
4701573Srgrimes				if (*qpatnext == RANGE &&
4711573Srgrimes				    (c = qpatnext[1]) != RBRACKET) {
4721573Srgrimes					*bufnext++ = M_RNG;
4731573Srgrimes					*bufnext++ = CHAR(c);
4741573Srgrimes					qpatnext += 2;
4751573Srgrimes				}
4761573Srgrimes			} while ((c = *qpatnext++) != RBRACKET);
4771573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4781573Srgrimes			*bufnext++ = M_END;
4791573Srgrimes			break;
4801573Srgrimes		case QUESTION:
4811573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4821573Srgrimes			*bufnext++ = M_ONE;
4831573Srgrimes			break;
4841573Srgrimes		case STAR:
4851573Srgrimes			pglob->gl_flags |= GLOB_MAGCHAR;
4868870Srgrimes			/* collapse adjacent stars to one,
4871573Srgrimes			 * to avoid exponential behavior
4881573Srgrimes			 */
4891573Srgrimes			if (bufnext == patbuf || bufnext[-1] != M_ALL)
4901573Srgrimes			    *bufnext++ = M_ALL;
4911573Srgrimes			break;
4921573Srgrimes		default:
4931573Srgrimes			*bufnext++ = CHAR(c);
4941573Srgrimes			break;
4951573Srgrimes		}
4961573Srgrimes	}
4971573Srgrimes	*bufnext = EOS;
4981573Srgrimes#ifdef DEBUG
4991573Srgrimes	qprintf("glob0:", patbuf);
5001573Srgrimes#endif
5011573Srgrimes
50274469Sjlemon	if ((err = glob1(patbuf, pglob, limit)) != 0)
5031573Srgrimes		return(err);
5041573Srgrimes
5051573Srgrimes	/*
5068870Srgrimes	 * If there was no match we are going to append the pattern
5071573Srgrimes	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
5081573Srgrimes	 * and the pattern did not contain any magic characters
5091573Srgrimes	 * GLOB_NOMAGIC is there just for compatibility with csh.
5101573Srgrimes	 */
511100217Smikeh	if (pglob->gl_pathc == oldpathc) {
512100217Smikeh		if (((pglob->gl_flags & GLOB_NOCHECK) ||
513100217Smikeh		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
514100217Smikeh			!(pglob->gl_flags & GLOB_MAGCHAR))))
515100217Smikeh			return(globextend(pattern, pglob, limit));
516100217Smikeh		else
517100217Smikeh			return(GLOB_NOMATCH);
518100217Smikeh	}
519100217Smikeh	if (!(pglob->gl_flags & GLOB_NOSORT))
5201573Srgrimes		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
5211573Srgrimes		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
5221573Srgrimes	return(0);
5231573Srgrimes}
5241573Srgrimes
5251573Srgrimesstatic int
526159294Sdelphijcompare(const void *p, const void *q)
5271573Srgrimes{
5281573Srgrimes	return(strcmp(*(char **)p, *(char **)q));
5291573Srgrimes}
5301573Srgrimes
5311573Srgrimesstatic int
532159294Sdelphijglob1(Char *pattern, glob_t *pglob, size_t *limit)
5331573Srgrimes{
53474963Speter	Char pathbuf[MAXPATHLEN];
5351573Srgrimes
5361573Srgrimes	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
5371573Srgrimes	if (*pattern == EOS)
5381573Srgrimes		return(0);
53974963Speter	return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
54074963Speter	    pattern, pglob, limit));
5411573Srgrimes}
5421573Srgrimes
5431573Srgrimes/*
5441573Srgrimes * The functions glob2 and glob3 are mutually recursive; there is one level
5451573Srgrimes * of recursion for each segment in the pattern that contains one or more
5461573Srgrimes * meta characters.
5471573Srgrimes */
5481573Srgrimesstatic int
549159294Sdelphijglob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
550159294Sdelphij      glob_t *pglob, size_t *limit)
5511573Srgrimes{
5521573Srgrimes	struct stat sb;
5531573Srgrimes	Char *p, *q;
5541573Srgrimes	int anymeta;
5551573Srgrimes
5561573Srgrimes	/*
5571573Srgrimes	 * Loop over pattern segments until end of pattern or until
5581573Srgrimes	 * segment with meta character found.
5591573Srgrimes	 */
5601573Srgrimes	for (anymeta = 0;;) {
5611573Srgrimes		if (*pattern == EOS) {		/* End of pattern? */
5621573Srgrimes			*pathend = EOS;
5631573Srgrimes			if (g_lstat(pathbuf, &sb, pglob))
5641573Srgrimes				return(0);
5658870Srgrimes
5661573Srgrimes			if (((pglob->gl_flags & GLOB_MARK) &&
5671573Srgrimes			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
5681573Srgrimes			    || (S_ISLNK(sb.st_mode) &&
5691573Srgrimes			    (g_stat(pathbuf, &sb, pglob) == 0) &&
5701573Srgrimes			    S_ISDIR(sb.st_mode)))) {
57174963Speter				if (pathend + 1 > pathend_last)
572100217Smikeh					return (GLOB_ABORTED);
5731573Srgrimes				*pathend++ = SEP;
5741573Srgrimes				*pathend = EOS;
5751573Srgrimes			}
5761573Srgrimes			++pglob->gl_matchc;
57774469Sjlemon			return(globextend(pathbuf, pglob, limit));
5781573Srgrimes		}
5791573Srgrimes
5801573Srgrimes		/* Find end of next segment, copy tentatively to pathend. */
5811573Srgrimes		q = pathend;
5821573Srgrimes		p = pattern;
5831573Srgrimes		while (*p != EOS && *p != SEP) {
5841573Srgrimes			if (ismeta(*p))
5851573Srgrimes				anymeta = 1;
58674963Speter			if (q + 1 > pathend_last)
587100217Smikeh				return (GLOB_ABORTED);
5881573Srgrimes			*q++ = *p++;
5891573Srgrimes		}
5901573Srgrimes
5911573Srgrimes		if (!anymeta) {		/* No expansion, do next segment. */
5921573Srgrimes			pathend = q;
5931573Srgrimes			pattern = p;
59474963Speter			while (*pattern == SEP) {
59574963Speter				if (pathend + 1 > pathend_last)
596100217Smikeh					return (GLOB_ABORTED);
5971573Srgrimes				*pathend++ = *pattern++;
59874963Speter			}
5991573Srgrimes		} else			/* Need expansion, recurse. */
60074963Speter			return(glob3(pathbuf, pathend, pathend_last, pattern, p,
60174963Speter			    pglob, limit));
6021573Srgrimes	}
6031573Srgrimes	/* NOTREACHED */
6041573Srgrimes}
6051573Srgrimes
6061573Srgrimesstatic int
607159294Sdelphijglob3(Char *pathbuf, Char *pathend, Char *pathend_last,
608159294Sdelphij      Char *pattern, Char *restpattern,
609159294Sdelphij      glob_t *pglob, size_t *limit)
6101573Srgrimes{
61190045Sobrien	struct dirent *dp;
6121573Srgrimes	DIR *dirp;
6131573Srgrimes	int err;
6141573Srgrimes	char buf[MAXPATHLEN];
6151573Srgrimes
6161573Srgrimes	/*
6171573Srgrimes	 * The readdirfunc declaration can't be prototyped, because it is
6181573Srgrimes	 * assigned, below, to two functions which are prototyped in glob.h
6191573Srgrimes	 * and dirent.h as taking pointers to differently typed opaque
6201573Srgrimes	 * structures.
6211573Srgrimes	 */
6221573Srgrimes	struct dirent *(*readdirfunc)();
6231573Srgrimes
62474963Speter	if (pathend > pathend_last)
625100217Smikeh		return (GLOB_ABORTED);
6261573Srgrimes	*pathend = EOS;
6271573Srgrimes	errno = 0;
6288870Srgrimes
6291573Srgrimes	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
6301573Srgrimes		/* TODO: don't call for ENOENT or ENOTDIR? */
6311573Srgrimes		if (pglob->gl_errfunc) {
63274921Speter			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
633100217Smikeh				return (GLOB_ABORTED);
6341573Srgrimes			if (pglob->gl_errfunc(buf, errno) ||
6351573Srgrimes			    pglob->gl_flags & GLOB_ERR)
636100217Smikeh				return (GLOB_ABORTED);
6371573Srgrimes		}
6381573Srgrimes		return(0);
6391573Srgrimes	}
6401573Srgrimes
6411573Srgrimes	err = 0;
6421573Srgrimes
6431573Srgrimes	/* Search directory for matching names. */
6441573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
6451573Srgrimes		readdirfunc = pglob->gl_readdir;
6461573Srgrimes	else
6471573Srgrimes		readdirfunc = readdir;
6481573Srgrimes	while ((dp = (*readdirfunc)(dirp))) {
649159294Sdelphij		char *sc;
65090045Sobrien		Char *dc;
651132817Stjr		wchar_t wc;
652132817Stjr		size_t clen;
653132817Stjr		mbstate_t mbs;
6541573Srgrimes
6551573Srgrimes		/* Initial DOT must be matched literally. */
6561573Srgrimes		if (dp->d_name[0] == DOT && *pattern != DOT)
6571573Srgrimes			continue;
658132817Stjr		memset(&mbs, 0, sizeof(mbs));
65974963Speter		dc = pathend;
660159294Sdelphij		sc = dp->d_name;
661132817Stjr		while (dc < pathend_last) {
662132817Stjr			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
663132817Stjr			if (clen == (size_t)-1 || clen == (size_t)-2) {
664132817Stjr				wc = *sc;
665132817Stjr				clen = 1;
666132817Stjr				memset(&mbs, 0, sizeof(mbs));
667132817Stjr			}
668132817Stjr			if ((*dc++ = wc) == EOS)
669132817Stjr				break;
670132817Stjr			sc += clen;
671132817Stjr		}
6721573Srgrimes		if (!match(pathend, pattern, restpattern)) {
6731573Srgrimes			*pathend = EOS;
6741573Srgrimes			continue;
6751573Srgrimes		}
67674963Speter		err = glob2(pathbuf, --dc, pathend_last, restpattern,
67774963Speter		    pglob, limit);
6781573Srgrimes		if (err)
6791573Srgrimes			break;
6801573Srgrimes	}
6811573Srgrimes
6821573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
6831573Srgrimes		(*pglob->gl_closedir)(dirp);
6841573Srgrimes	else
6851573Srgrimes		closedir(dirp);
6861573Srgrimes	return(err);
6871573Srgrimes}
6881573Srgrimes
6891573Srgrimes
6901573Srgrimes/*
6911573Srgrimes * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
6921573Srgrimes * add the new item, and update gl_pathc.
6931573Srgrimes *
6941573Srgrimes * This assumes the BSD realloc, which only copies the block when its size
6951573Srgrimes * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
6961573Srgrimes * behavior.
6971573Srgrimes *
6981573Srgrimes * Return 0 if new item added, error code if memory couldn't be allocated.
6991573Srgrimes *
7001573Srgrimes * Invariant of the glob_t structure:
7011573Srgrimes *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
7021573Srgrimes *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
7031573Srgrimes */
7041573Srgrimesstatic int
705159294Sdelphijglobextend(const Char *path, glob_t *pglob, size_t *limit)
7061573Srgrimes{
70790045Sobrien	char **pathv;
708158812Sache	size_t i, newsize, len;
7091573Srgrimes	char *copy;
7101573Srgrimes	const Char *p;
7111573Srgrimes
71280525Smikeh	if (*limit && pglob->gl_pathc > *limit) {
71380525Smikeh		errno = 0;
71480525Smikeh		return (GLOB_NOSPACE);
71580525Smikeh	}
71674307Sjlemon
7171573Srgrimes	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
7188870Srgrimes	pathv = pglob->gl_pathv ?
7191573Srgrimes		    realloc((char *)pglob->gl_pathv, newsize) :
7201573Srgrimes		    malloc(newsize);
72174918Speter	if (pathv == NULL) {
72274918Speter		if (pglob->gl_pathv) {
72374918Speter			free(pglob->gl_pathv);
72474918Speter			pglob->gl_pathv = NULL;
72574918Speter		}
7261573Srgrimes		return(GLOB_NOSPACE);
72774918Speter	}
7281573Srgrimes
7291573Srgrimes	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
7301573Srgrimes		/* first time around -- clear initial gl_offs items */
7311573Srgrimes		pathv += pglob->gl_offs;
732158812Sache		for (i = pglob->gl_offs + 1; --i > 0; )
7331573Srgrimes			*--pathv = NULL;
7341573Srgrimes	}
7351573Srgrimes	pglob->gl_pathv = pathv;
7361573Srgrimes
7371573Srgrimes	for (p = path; *p++;)
7381573Srgrimes		continue;
739132817Stjr	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
74074921Speter	if ((copy = malloc(len)) != NULL) {
74174921Speter		if (g_Ctoc(path, copy, len)) {
74274918Speter			free(copy);
74374918Speter			return (GLOB_NOSPACE);
74474918Speter		}
7451573Srgrimes		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
7461573Srgrimes	}
7471573Srgrimes	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
7481573Srgrimes	return(copy == NULL ? GLOB_NOSPACE : 0);
7491573Srgrimes}
7501573Srgrimes
7511573Srgrimes/*
7521573Srgrimes * pattern matching function for filenames.  Each occurrence of the *
7531573Srgrimes * pattern causes a recursion level.
7541573Srgrimes */
7551573Srgrimesstatic int
756159294Sdelphijmatch(Char *name, Char *pat, Char *patend)
7571573Srgrimes{
7581573Srgrimes	int ok, negate_range;
7591573Srgrimes	Char c, k;
760227753Stheraven	struct xlocale_collate *table =
761227753Stheraven		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
7621573Srgrimes
7631573Srgrimes	while (pat < patend) {
7641573Srgrimes		c = *pat++;
7651573Srgrimes		switch (c & M_MASK) {
7661573Srgrimes		case M_ALL:
7671573Srgrimes			if (pat == patend)
7681573Srgrimes				return(1);
7698870Srgrimes			do
7701573Srgrimes			    if (match(name, pat, patend))
7711573Srgrimes				    return(1);
7721573Srgrimes			while (*name++ != EOS);
7731573Srgrimes			return(0);
7741573Srgrimes		case M_ONE:
7751573Srgrimes			if (*name++ == EOS)
7761573Srgrimes				return(0);
7771573Srgrimes			break;
7781573Srgrimes		case M_SET:
7791573Srgrimes			ok = 0;
7801573Srgrimes			if ((k = *name++) == EOS)
7811573Srgrimes				return(0);
7821573Srgrimes			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
7831573Srgrimes				++pat;
7841573Srgrimes			while (((c = *pat++) & M_MASK) != M_END)
7851573Srgrimes				if ((*pat & M_MASK) == M_RNG) {
786227753Stheraven					if (table->__collate_load_error ?
78724633Sache					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
788227753Stheraven					       __collate_range_cmp(table, CHAR(c), CHAR(k)) <= 0
789227753Stheraven					    && __collate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0
79017528Sache					   )
7911573Srgrimes						ok = 1;
7921573Srgrimes					pat += 2;
7931573Srgrimes				} else if (c == k)
7941573Srgrimes					ok = 1;
7951573Srgrimes			if (ok == negate_range)
7961573Srgrimes				return(0);
7971573Srgrimes			break;
7981573Srgrimes		default:
7991573Srgrimes			if (*name++ != c)
8001573Srgrimes				return(0);
8011573Srgrimes			break;
8021573Srgrimes		}
8031573Srgrimes	}
8041573Srgrimes	return(*name == EOS);
8051573Srgrimes}
8061573Srgrimes
8071573Srgrimes/* Free allocated data belonging to a glob_t structure. */
8081573Srgrimesvoid
809159294Sdelphijglobfree(glob_t *pglob)
8101573Srgrimes{
811158812Sache	size_t i;
81290045Sobrien	char **pp;
8131573Srgrimes
8141573Srgrimes	if (pglob->gl_pathv != NULL) {
8151573Srgrimes		pp = pglob->gl_pathv + pglob->gl_offs;
8161573Srgrimes		for (i = pglob->gl_pathc; i--; ++pp)
8171573Srgrimes			if (*pp)
8181573Srgrimes				free(*pp);
8191573Srgrimes		free(pglob->gl_pathv);
82074918Speter		pglob->gl_pathv = NULL;
8211573Srgrimes	}
8221573Srgrimes}
8231573Srgrimes
8241573Srgrimesstatic DIR *
825159294Sdelphijg_opendir(Char *str, glob_t *pglob)
8261573Srgrimes{
8271573Srgrimes	char buf[MAXPATHLEN];
8281573Srgrimes
8291573Srgrimes	if (!*str)
8301573Srgrimes		strcpy(buf, ".");
83174918Speter	else {
83274921Speter		if (g_Ctoc(str, buf, sizeof(buf)))
83374918Speter			return (NULL);
83474918Speter	}
8351573Srgrimes
8361573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8371573Srgrimes		return((*pglob->gl_opendir)(buf));
8381573Srgrimes
8391573Srgrimes	return(opendir(buf));
8401573Srgrimes}
8411573Srgrimes
8421573Srgrimesstatic int
843159294Sdelphijg_lstat(Char *fn, struct stat *sb, glob_t *pglob)
8441573Srgrimes{
8451573Srgrimes	char buf[MAXPATHLEN];
8461573Srgrimes
84774921Speter	if (g_Ctoc(fn, buf, sizeof(buf))) {
84874918Speter		errno = ENAMETOOLONG;
84974918Speter		return (-1);
85074918Speter	}
8511573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8521573Srgrimes		return((*pglob->gl_lstat)(buf, sb));
8531573Srgrimes	return(lstat(buf, sb));
8541573Srgrimes}
8551573Srgrimes
8561573Srgrimesstatic int
857159294Sdelphijg_stat(Char *fn, struct stat *sb, glob_t *pglob)
8581573Srgrimes{
8591573Srgrimes	char buf[MAXPATHLEN];
8601573Srgrimes
86174921Speter	if (g_Ctoc(fn, buf, sizeof(buf))) {
86274918Speter		errno = ENAMETOOLONG;
86374918Speter		return (-1);
86474918Speter	}
8651573Srgrimes	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
8661573Srgrimes		return((*pglob->gl_stat)(buf, sb));
8671573Srgrimes	return(stat(buf, sb));
8681573Srgrimes}
8691573Srgrimes
870180021Smtmstatic const Char *
871180021Smtmg_strchr(const Char *str, wchar_t ch)
8721573Srgrimes{
873159294Sdelphij
8741573Srgrimes	do {
8751573Srgrimes		if (*str == ch)
8761573Srgrimes			return (str);
8771573Srgrimes	} while (*str++);
8781573Srgrimes	return (NULL);
8791573Srgrimes}
8801573Srgrimes
88174918Speterstatic int
882159294Sdelphijg_Ctoc(const Char *str, char *buf, size_t len)
8831573Srgrimes{
884132817Stjr	mbstate_t mbs;
885132817Stjr	size_t clen;
8861573Srgrimes
887132817Stjr	memset(&mbs, 0, sizeof(mbs));
888132817Stjr	while (len >= MB_CUR_MAX) {
889132817Stjr		clen = wcrtomb(buf, *str, &mbs);
890132817Stjr		if (clen == (size_t)-1)
891132817Stjr			return (1);
892132817Stjr		if (*str == L'\0')
89374921Speter			return (0);
894132817Stjr		str++;
895132817Stjr		buf += clen;
896132817Stjr		len -= clen;
89774921Speter	}
89874921Speter	return (1);
8991573Srgrimes}
9001573Srgrimes
9011573Srgrimes#ifdef DEBUG
9028870Srgrimesstatic void
903159294Sdelphijqprintf(const char *str, Char *s)
9041573Srgrimes{
90590045Sobrien	Char *p;
9061573Srgrimes
9071573Srgrimes	(void)printf("%s:\n", str);
9081573Srgrimes	for (p = s; *p; p++)
9091573Srgrimes		(void)printf("%c", CHAR(*p));
9101573Srgrimes	(void)printf("\n");
9111573Srgrimes	for (p = s; *p; p++)
9121573Srgrimes		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
9131573Srgrimes	(void)printf("\n");
9141573Srgrimes	for (p = s; *p; p++)
9151573Srgrimes		(void)printf("%c", ismeta(*p) ? '_' : ' ');
9161573Srgrimes	(void)printf("\n");
9171573Srgrimes}
9181573Srgrimes#endif
919