glob.c revision 158812
1127664Sbms/*
2127664Sbms * Copyright (c) 1989, 1993
3127664Sbms *	The Regents of the University of California.  All rights reserved.
4127664Sbms *
5127664Sbms * This code is derived from software contributed to Berkeley by
6127664Sbms * Guido van Rossum.
7127664Sbms *
8127664Sbms * Redistribution and use in source and binary forms, with or without
9127664Sbms * modification, are permitted provided that the following conditions
10127664Sbms * are met:
11127664Sbms * 1. Redistributions of source code must retain the above copyright
12127664Sbms *    notice, this list of conditions and the following disclaimer.
13127664Sbms * 2. Redistributions in binary form must reproduce the above copyright
14127664Sbms *    notice, this list of conditions and the following disclaimer in the
15127664Sbms *    documentation and/or other materials provided with the distribution.
16127664Sbms * 3. All advertising materials mentioning features or use of this software
17127664Sbms *    must display the following acknowledgement:
18127664Sbms *	This product includes software developed by the University of
19127664Sbms *	California, Berkeley and its contributors.
20127664Sbms * 4. Neither the name of the University nor the names of its contributors
21127664Sbms *    may be used to endorse or promote products derived from this software
22127664Sbms *    without specific prior written permission.
23127664Sbms *
24127664Sbms * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25127664Sbms * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26127664Sbms * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27127664Sbms * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28127664Sbms * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29127664Sbms * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30127664Sbms * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31127664Sbms * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32127664Sbms * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33127664Sbms * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34127664Sbms * SUCH DAMAGE.
35127664Sbms */
36127664Sbms
37127664Sbms#if defined(LIBC_SCCS) && !defined(lint)
38127664Sbmsstatic char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
39127664Sbms#endif /* LIBC_SCCS and not lint */
40127664Sbms#include <sys/cdefs.h>
41127664Sbms__FBSDID("$FreeBSD: head/lib/libc/gen/glob.c 158812 2006-05-22 06:33:19Z ache $");
42127664Sbms
43127664Sbms/*
44127664Sbms * glob(3) -- a superset of the one defined in POSIX 1003.2.
45127664Sbms *
46127664Sbms * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
47127664Sbms *
48127664Sbms * Optional extra services, controlled by flags not defined by POSIX:
49127664Sbms *
50127664Sbms * GLOB_QUOTE:
51127664Sbms *	Escaping convention: \ inhibits any special meaning the following
52127664Sbms *	character might have (except \ at end of string is retained).
53127664Sbms * GLOB_MAGCHAR:
54127664Sbms *	Set in gl_flags if pattern contained a globbing character.
55127664Sbms * GLOB_NOMAGIC:
56127664Sbms *	Same as GLOB_NOCHECK, but it will only append pattern if it did
57127664Sbms *	not contain any magic characters.  [Used in csh style globbing]
58127664Sbms * GLOB_ALTDIRFUNC:
59127664Sbms *	Use alternately specified directory access functions.
60127664Sbms * GLOB_TILDE:
61127664Sbms *	expand ~user/foo to the /home/dir/of/user/foo
62127664Sbms * GLOB_BRACE:
63127664Sbms *	expand {1,2}{a,b} to 1a 1b 2a 2b
64127664Sbms * gl_matchc:
65127664Sbms *	Number of matches in the current invocation of glob.
66127664Sbms */
67127664Sbms
68127664Sbms/*
69127664Sbms * Some notes on multibyte character support:
70127664Sbms * 1. Patterns with illegal byte sequences match nothing - even if
71127664Sbms *    GLOB_NOCHECK is specified.
72127664Sbms * 2. Illegal byte sequences in filenames are handled by treating them as
73127664Sbms *    single-byte characters with a value of the first byte of the sequence
74127664Sbms *    cast to wchar_t.
75127664Sbms * 3. State-dependent encodings are not currently supported.
76127664Sbms */
77127664Sbms
78127664Sbms#include <sys/param.h>
79127664Sbms#include <sys/stat.h>
80127664Sbms
81127664Sbms#include <ctype.h>
82127664Sbms#include <dirent.h>
83127664Sbms#include <errno.h>
84127664Sbms#include <glob.h>
85127664Sbms#include <limits.h>
86127664Sbms#include <pwd.h>
87127664Sbms#include <stdint.h>
88#include <stdio.h>
89#include <stdlib.h>
90#include <string.h>
91#include <unistd.h>
92#include <wchar.h>
93
94#include "collate.h"
95
96#define	DOLLAR		'$'
97#define	DOT		'.'
98#define	EOS		'\0'
99#define	LBRACKET	'['
100#define	NOT		'!'
101#define	QUESTION	'?'
102#define	QUOTE		'\\'
103#define	RANGE		'-'
104#define	RBRACKET	']'
105#define	SEP		'/'
106#define	STAR		'*'
107#define	TILDE		'~'
108#define	UNDERSCORE	'_'
109#define	LBRACE		'{'
110#define	RBRACE		'}'
111#define	SLASH		'/'
112#define	COMMA		','
113
114#ifndef DEBUG
115
116#define	M_QUOTE		0x8000000000ULL
117#define	M_PROTECT	0x4000000000ULL
118#define	M_MASK		0xffffffffffULL
119#define	M_CHAR		0x00ffffffffULL
120
121typedef uint_fast64_t Char;
122
123#else
124
125#define	M_QUOTE		0x80
126#define	M_PROTECT	0x40
127#define	M_MASK		0xff
128#define	M_CHAR		0x7f
129
130typedef char Char;
131
132#endif
133
134
135#define	CHAR(c)		((Char)((c)&M_CHAR))
136#define	META(c)		((Char)((c)|M_QUOTE))
137#define	M_ALL		META('*')
138#define	M_END		META(']')
139#define	M_NOT		META('!')
140#define	M_ONE		META('?')
141#define	M_RNG		META('-')
142#define	M_SET		META('[')
143#define	ismeta(c)	(((c)&M_QUOTE) != 0)
144
145
146static int	 compare(const void *, const void *);
147static int	 g_Ctoc(const Char *, char *, size_t);
148static int	 g_lstat(Char *, struct stat *, glob_t *);
149static DIR	*g_opendir(Char *, glob_t *);
150static Char	*g_strchr(Char *, wchar_t);
151#ifdef notdef
152static Char	*g_strcat(Char *, const Char *);
153#endif
154static int	 g_stat(Char *, struct stat *, glob_t *);
155static int	 glob0(const Char *, glob_t *, size_t *);
156static int	 glob1(Char *, glob_t *, size_t *);
157static int	 glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
158static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
159static int	 globextend(const Char *, glob_t *, size_t *);
160static const Char *
161		 globtilde(const Char *, Char *, size_t, glob_t *);
162static int	 globexp1(const Char *, glob_t *, size_t *);
163static int	 globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
164static int	 match(Char *, Char *, Char *);
165#ifdef DEBUG
166static void	 qprintf(const char *, Char *);
167#endif
168
169int
170glob(pattern, flags, errfunc, pglob)
171	const char *pattern;
172	int flags, (*errfunc)(const char *, int);
173	glob_t *pglob;
174{
175	const u_char *patnext;
176	size_t limit;
177	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
178	mbstate_t mbs;
179	wchar_t wc;
180	size_t clen;
181
182	patnext = (u_char *) pattern;
183	if (!(flags & GLOB_APPEND)) {
184		pglob->gl_pathc = 0;
185		pglob->gl_pathv = NULL;
186		if (!(flags & GLOB_DOOFFS))
187			pglob->gl_offs = 0;
188	}
189	if (flags & GLOB_LIMIT) {
190		limit = pglob->gl_matchc;
191		if (limit == 0)
192			limit = ARG_MAX;
193	} else
194		limit = 0;
195	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
196	pglob->gl_errfunc = errfunc;
197	pglob->gl_matchc = 0;
198
199	bufnext = patbuf;
200	bufend = bufnext + MAXPATHLEN - 1;
201	if (flags & GLOB_NOESCAPE) {
202		memset(&mbs, 0, sizeof(mbs));
203		while (bufend - bufnext >= MB_CUR_MAX) {
204			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
205			if (clen == (size_t)-1 || clen == (size_t)-2)
206				return (GLOB_NOMATCH);
207			else if (clen == 0)
208				break;
209			*bufnext++ = wc;
210			patnext += clen;
211		}
212	} else {
213		/* Protect the quoted characters. */
214		memset(&mbs, 0, sizeof(mbs));
215		while (bufend - bufnext >= MB_CUR_MAX) {
216			if (*patnext == QUOTE) {
217				if (*++patnext == EOS) {
218					*bufnext++ = QUOTE | M_PROTECT;
219					continue;
220				}
221				prot = M_PROTECT;
222			} else
223				prot = 0;
224			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
225			if (clen == (size_t)-1 || clen == (size_t)-2)
226				return (GLOB_NOMATCH);
227			else if (clen == 0)
228				break;
229			*bufnext++ = wc | prot;
230			patnext += clen;
231		}
232	}
233	*bufnext = EOS;
234
235	if (flags & GLOB_BRACE)
236	    return globexp1(patbuf, pglob, &limit);
237	else
238	    return glob0(patbuf, pglob, &limit);
239}
240
241/*
242 * Expand recursively a glob {} pattern. When there is no more expansion
243 * invoke the standard globbing routine to glob the rest of the magic
244 * characters
245 */
246static int
247globexp1(pattern, pglob, limit)
248	const Char *pattern;
249	glob_t *pglob;
250	size_t *limit;
251{
252	const Char* ptr = pattern;
253	int rv;
254
255	/* Protect a single {}, for find(1), like csh */
256	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
257		return glob0(pattern, pglob, limit);
258
259	while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL)
260		if (!globexp2(ptr, pattern, pglob, &rv, limit))
261			return rv;
262
263	return glob0(pattern, pglob, limit);
264}
265
266
267/*
268 * Recursive brace globbing helper. Tries to expand a single brace.
269 * If it succeeds then it invokes globexp1 with the new pattern.
270 * If it fails then it tries to glob the rest of the pattern and returns.
271 */
272static int
273globexp2(ptr, pattern, pglob, rv, limit)
274	const Char *ptr, *pattern;
275	glob_t *pglob;
276	int *rv;
277	size_t *limit;
278{
279	int     i;
280	Char   *lm, *ls;
281	const Char *pe, *pm, *pm1, *pl;
282	Char    patbuf[MAXPATHLEN];
283
284	/* copy part up to the brace */
285	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
286		continue;
287	*lm = EOS;
288	ls = lm;
289
290	/* Find the balanced brace */
291	for (i = 0, pe = ++ptr; *pe; pe++)
292		if (*pe == LBRACKET) {
293			/* Ignore everything between [] */
294			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
295				continue;
296			if (*pe == EOS) {
297				/*
298				 * We could not find a matching RBRACKET.
299				 * Ignore and just look for RBRACE
300				 */
301				pe = pm;
302			}
303		}
304		else if (*pe == LBRACE)
305			i++;
306		else if (*pe == RBRACE) {
307			if (i == 0)
308				break;
309			i--;
310		}
311
312	/* Non matching braces; just glob the pattern */
313	if (i != 0 || *pe == EOS) {
314		*rv = glob0(patbuf, pglob, limit);
315		return 0;
316	}
317
318	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
319		switch (*pm) {
320		case LBRACKET:
321			/* Ignore everything between [] */
322			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
323				continue;
324			if (*pm == EOS) {
325				/*
326				 * We could not find a matching RBRACKET.
327				 * Ignore and just look for RBRACE
328				 */
329				pm = pm1;
330			}
331			break;
332
333		case LBRACE:
334			i++;
335			break;
336
337		case RBRACE:
338			if (i) {
339			    i--;
340			    break;
341			}
342			/* FALLTHROUGH */
343		case COMMA:
344			if (i && *pm == COMMA)
345				break;
346			else {
347				/* Append the current string */
348				for (lm = ls; (pl < pm); *lm++ = *pl++)
349					continue;
350				/*
351				 * Append the rest of the pattern after the
352				 * closing brace
353				 */
354				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
355					continue;
356
357				/* Expand the current pattern */
358#ifdef DEBUG
359				qprintf("globexp2:", patbuf);
360#endif
361				*rv = globexp1(patbuf, pglob, limit);
362
363				/* move after the comma, to the next string */
364				pl = pm + 1;
365			}
366			break;
367
368		default:
369			break;
370		}
371	*rv = 0;
372	return 0;
373}
374
375
376
377/*
378 * expand tilde from the passwd file.
379 */
380static const Char *
381globtilde(pattern, patbuf, patbuf_len, pglob)
382	const Char *pattern;
383	Char *patbuf;
384	size_t patbuf_len;
385	glob_t *pglob;
386{
387	struct passwd *pwd;
388	char *h;
389	const Char *p;
390	Char *b, *eb;
391
392	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
393		return pattern;
394
395	/*
396	 * Copy up to the end of the string or /
397	 */
398	eb = &patbuf[patbuf_len - 1];
399	for (p = pattern + 1, h = (char *) patbuf;
400	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
401		continue;
402
403	*h = EOS;
404
405	if (((char *) patbuf)[0] == EOS) {
406		/*
407		 * handle a plain ~ or ~/ by expanding $HOME first (iff
408		 * we're not running setuid or setgid) and then trying
409		 * the password file
410		 */
411		if (issetugid() != 0 ||
412		    (h = getenv("HOME")) == NULL) {
413			if (((h = getlogin()) != NULL &&
414			     (pwd = getpwnam(h)) != NULL) ||
415			    (pwd = getpwuid(getuid())) != NULL)
416				h = pwd->pw_dir;
417			else
418				return pattern;
419		}
420	}
421	else {
422		/*
423		 * Expand a ~user
424		 */
425		if ((pwd = getpwnam((char*) patbuf)) == NULL)
426			return pattern;
427		else
428			h = pwd->pw_dir;
429	}
430
431	/* Copy the home directory */
432	for (b = patbuf; b < eb && *h; *b++ = *h++)
433		continue;
434
435	/* Append the rest of the pattern */
436	while (b < eb && (*b++ = *p++) != EOS)
437		continue;
438	*b = EOS;
439
440	return patbuf;
441}
442
443
444/*
445 * The main glob() routine: compiles the pattern (optionally processing
446 * quotes), calls glob1() to do the real pattern matching, and finally
447 * sorts the list (unless unsorted operation is requested).  Returns 0
448 * if things went well, nonzero if errors occurred.
449 */
450static int
451glob0(pattern, pglob, limit)
452	const Char *pattern;
453	glob_t *pglob;
454	size_t *limit;
455{
456	const Char *qpatnext;
457	int c, err;
458	size_t oldpathc;
459	Char *bufnext, patbuf[MAXPATHLEN];
460
461	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
462	oldpathc = pglob->gl_pathc;
463	bufnext = patbuf;
464
465	/* We don't need to check for buffer overflow any more. */
466	while ((c = *qpatnext++) != EOS) {
467		switch (c) {
468		case LBRACKET:
469			c = *qpatnext;
470			if (c == NOT)
471				++qpatnext;
472			if (*qpatnext == EOS ||
473			    g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) {
474				*bufnext++ = LBRACKET;
475				if (c == NOT)
476					--qpatnext;
477				break;
478			}
479			*bufnext++ = M_SET;
480			if (c == NOT)
481				*bufnext++ = M_NOT;
482			c = *qpatnext++;
483			do {
484				*bufnext++ = CHAR(c);
485				if (*qpatnext == RANGE &&
486				    (c = qpatnext[1]) != RBRACKET) {
487					*bufnext++ = M_RNG;
488					*bufnext++ = CHAR(c);
489					qpatnext += 2;
490				}
491			} while ((c = *qpatnext++) != RBRACKET);
492			pglob->gl_flags |= GLOB_MAGCHAR;
493			*bufnext++ = M_END;
494			break;
495		case QUESTION:
496			pglob->gl_flags |= GLOB_MAGCHAR;
497			*bufnext++ = M_ONE;
498			break;
499		case STAR:
500			pglob->gl_flags |= GLOB_MAGCHAR;
501			/* collapse adjacent stars to one,
502			 * to avoid exponential behavior
503			 */
504			if (bufnext == patbuf || bufnext[-1] != M_ALL)
505			    *bufnext++ = M_ALL;
506			break;
507		default:
508			*bufnext++ = CHAR(c);
509			break;
510		}
511	}
512	*bufnext = EOS;
513#ifdef DEBUG
514	qprintf("glob0:", patbuf);
515#endif
516
517	if ((err = glob1(patbuf, pglob, limit)) != 0)
518		return(err);
519
520	/*
521	 * If there was no match we are going to append the pattern
522	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
523	 * and the pattern did not contain any magic characters
524	 * GLOB_NOMAGIC is there just for compatibility with csh.
525	 */
526	if (pglob->gl_pathc == oldpathc) {
527		if (((pglob->gl_flags & GLOB_NOCHECK) ||
528		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
529			!(pglob->gl_flags & GLOB_MAGCHAR))))
530			return(globextend(pattern, pglob, limit));
531		else
532			return(GLOB_NOMATCH);
533	}
534	if (!(pglob->gl_flags & GLOB_NOSORT))
535		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
536		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
537	return(0);
538}
539
540static int
541compare(p, q)
542	const void *p, *q;
543{
544	return(strcmp(*(char **)p, *(char **)q));
545}
546
547static int
548glob1(pattern, pglob, limit)
549	Char *pattern;
550	glob_t *pglob;
551	size_t *limit;
552{
553	Char pathbuf[MAXPATHLEN];
554
555	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
556	if (*pattern == EOS)
557		return(0);
558	return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
559	    pattern, pglob, limit));
560}
561
562/*
563 * The functions glob2 and glob3 are mutually recursive; there is one level
564 * of recursion for each segment in the pattern that contains one or more
565 * meta characters.
566 */
567static int
568glob2(pathbuf, pathend, pathend_last, pattern, pglob, limit)
569	Char *pathbuf, *pathend, *pathend_last, *pattern;
570	glob_t *pglob;
571	size_t *limit;
572{
573	struct stat sb;
574	Char *p, *q;
575	int anymeta;
576
577	/*
578	 * Loop over pattern segments until end of pattern or until
579	 * segment with meta character found.
580	 */
581	for (anymeta = 0;;) {
582		if (*pattern == EOS) {		/* End of pattern? */
583			*pathend = EOS;
584			if (g_lstat(pathbuf, &sb, pglob))
585				return(0);
586
587			if (((pglob->gl_flags & GLOB_MARK) &&
588			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
589			    || (S_ISLNK(sb.st_mode) &&
590			    (g_stat(pathbuf, &sb, pglob) == 0) &&
591			    S_ISDIR(sb.st_mode)))) {
592				if (pathend + 1 > pathend_last)
593					return (GLOB_ABORTED);
594				*pathend++ = SEP;
595				*pathend = EOS;
596			}
597			++pglob->gl_matchc;
598			return(globextend(pathbuf, pglob, limit));
599		}
600
601		/* Find end of next segment, copy tentatively to pathend. */
602		q = pathend;
603		p = pattern;
604		while (*p != EOS && *p != SEP) {
605			if (ismeta(*p))
606				anymeta = 1;
607			if (q + 1 > pathend_last)
608				return (GLOB_ABORTED);
609			*q++ = *p++;
610		}
611
612		if (!anymeta) {		/* No expansion, do next segment. */
613			pathend = q;
614			pattern = p;
615			while (*pattern == SEP) {
616				if (pathend + 1 > pathend_last)
617					return (GLOB_ABORTED);
618				*pathend++ = *pattern++;
619			}
620		} else			/* Need expansion, recurse. */
621			return(glob3(pathbuf, pathend, pathend_last, pattern, p,
622			    pglob, limit));
623	}
624	/* NOTREACHED */
625}
626
627static int
628glob3(pathbuf, pathend, pathend_last, pattern, restpattern, pglob, limit)
629	Char *pathbuf, *pathend, *pathend_last, *pattern, *restpattern;
630	glob_t *pglob;
631	size_t *limit;
632{
633	struct dirent *dp;
634	DIR *dirp;
635	int err;
636	char buf[MAXPATHLEN];
637
638	/*
639	 * The readdirfunc declaration can't be prototyped, because it is
640	 * assigned, below, to two functions which are prototyped in glob.h
641	 * and dirent.h as taking pointers to differently typed opaque
642	 * structures.
643	 */
644	struct dirent *(*readdirfunc)();
645
646	if (pathend > pathend_last)
647		return (GLOB_ABORTED);
648	*pathend = EOS;
649	errno = 0;
650
651	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
652		/* TODO: don't call for ENOENT or ENOTDIR? */
653		if (pglob->gl_errfunc) {
654			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
655				return (GLOB_ABORTED);
656			if (pglob->gl_errfunc(buf, errno) ||
657			    pglob->gl_flags & GLOB_ERR)
658				return (GLOB_ABORTED);
659		}
660		return(0);
661	}
662
663	err = 0;
664
665	/* Search directory for matching names. */
666	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
667		readdirfunc = pglob->gl_readdir;
668	else
669		readdirfunc = readdir;
670	while ((dp = (*readdirfunc)(dirp))) {
671		u_char *sc;
672		Char *dc;
673		wchar_t wc;
674		size_t clen;
675		mbstate_t mbs;
676
677		/* Initial DOT must be matched literally. */
678		if (dp->d_name[0] == DOT && *pattern != DOT)
679			continue;
680		memset(&mbs, 0, sizeof(mbs));
681		dc = pathend;
682		sc = (u_char *) dp->d_name;
683		while (dc < pathend_last) {
684			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
685			if (clen == (size_t)-1 || clen == (size_t)-2) {
686				wc = *sc;
687				clen = 1;
688				memset(&mbs, 0, sizeof(mbs));
689			}
690			if ((*dc++ = wc) == EOS)
691				break;
692			sc += clen;
693		}
694		if (!match(pathend, pattern, restpattern)) {
695			*pathend = EOS;
696			continue;
697		}
698		err = glob2(pathbuf, --dc, pathend_last, restpattern,
699		    pglob, limit);
700		if (err)
701			break;
702	}
703
704	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
705		(*pglob->gl_closedir)(dirp);
706	else
707		closedir(dirp);
708	return(err);
709}
710
711
712/*
713 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
714 * add the new item, and update gl_pathc.
715 *
716 * This assumes the BSD realloc, which only copies the block when its size
717 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
718 * behavior.
719 *
720 * Return 0 if new item added, error code if memory couldn't be allocated.
721 *
722 * Invariant of the glob_t structure:
723 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
724 *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
725 */
726static int
727globextend(path, pglob, limit)
728	const Char *path;
729	glob_t *pglob;
730	size_t *limit;
731{
732	char **pathv;
733	size_t i, newsize, len;
734	char *copy;
735	const Char *p;
736
737	if (*limit && pglob->gl_pathc > *limit) {
738		errno = 0;
739		return (GLOB_NOSPACE);
740	}
741
742	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
743	pathv = pglob->gl_pathv ?
744		    realloc((char *)pglob->gl_pathv, newsize) :
745		    malloc(newsize);
746	if (pathv == NULL) {
747		if (pglob->gl_pathv) {
748			free(pglob->gl_pathv);
749			pglob->gl_pathv = NULL;
750		}
751		return(GLOB_NOSPACE);
752	}
753
754	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
755		/* first time around -- clear initial gl_offs items */
756		pathv += pglob->gl_offs;
757		for (i = pglob->gl_offs + 1; --i > 0; )
758			*--pathv = NULL;
759	}
760	pglob->gl_pathv = pathv;
761
762	for (p = path; *p++;)
763		continue;
764	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
765	if ((copy = malloc(len)) != NULL) {
766		if (g_Ctoc(path, copy, len)) {
767			free(copy);
768			return (GLOB_NOSPACE);
769		}
770		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
771	}
772	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
773	return(copy == NULL ? GLOB_NOSPACE : 0);
774}
775
776/*
777 * pattern matching function for filenames.  Each occurrence of the *
778 * pattern causes a recursion level.
779 */
780static int
781match(name, pat, patend)
782	Char *name, *pat, *patend;
783{
784	int ok, negate_range;
785	Char c, k;
786
787	while (pat < patend) {
788		c = *pat++;
789		switch (c & M_MASK) {
790		case M_ALL:
791			if (pat == patend)
792				return(1);
793			do
794			    if (match(name, pat, patend))
795				    return(1);
796			while (*name++ != EOS);
797			return(0);
798		case M_ONE:
799			if (*name++ == EOS)
800				return(0);
801			break;
802		case M_SET:
803			ok = 0;
804			if ((k = *name++) == EOS)
805				return(0);
806			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
807				++pat;
808			while (((c = *pat++) & M_MASK) != M_END)
809				if ((*pat & M_MASK) == M_RNG) {
810					if (__collate_load_error ?
811					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
812					       __collate_range_cmp(CHAR(c), CHAR(k)) <= 0
813					    && __collate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0
814					   )
815						ok = 1;
816					pat += 2;
817				} else if (c == k)
818					ok = 1;
819			if (ok == negate_range)
820				return(0);
821			break;
822		default:
823			if (*name++ != c)
824				return(0);
825			break;
826		}
827	}
828	return(*name == EOS);
829}
830
831/* Free allocated data belonging to a glob_t structure. */
832void
833globfree(pglob)
834	glob_t *pglob;
835{
836	size_t i;
837	char **pp;
838
839	if (pglob->gl_pathv != NULL) {
840		pp = pglob->gl_pathv + pglob->gl_offs;
841		for (i = pglob->gl_pathc; i--; ++pp)
842			if (*pp)
843				free(*pp);
844		free(pglob->gl_pathv);
845		pglob->gl_pathv = NULL;
846	}
847}
848
849static DIR *
850g_opendir(str, pglob)
851	Char *str;
852	glob_t *pglob;
853{
854	char buf[MAXPATHLEN];
855
856	if (!*str)
857		strcpy(buf, ".");
858	else {
859		if (g_Ctoc(str, buf, sizeof(buf)))
860			return (NULL);
861	}
862
863	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
864		return((*pglob->gl_opendir)(buf));
865
866	return(opendir(buf));
867}
868
869static int
870g_lstat(fn, sb, pglob)
871	Char *fn;
872	struct stat *sb;
873	glob_t *pglob;
874{
875	char buf[MAXPATHLEN];
876
877	if (g_Ctoc(fn, buf, sizeof(buf))) {
878		errno = ENAMETOOLONG;
879		return (-1);
880	}
881	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
882		return((*pglob->gl_lstat)(buf, sb));
883	return(lstat(buf, sb));
884}
885
886static int
887g_stat(fn, sb, pglob)
888	Char *fn;
889	struct stat *sb;
890	glob_t *pglob;
891{
892	char buf[MAXPATHLEN];
893
894	if (g_Ctoc(fn, buf, sizeof(buf))) {
895		errno = ENAMETOOLONG;
896		return (-1);
897	}
898	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
899		return((*pglob->gl_stat)(buf, sb));
900	return(stat(buf, sb));
901}
902
903static Char *
904g_strchr(str, ch)
905	Char *str;
906	wchar_t ch;
907{
908	do {
909		if (*str == ch)
910			return (str);
911	} while (*str++);
912	return (NULL);
913}
914
915static int
916g_Ctoc(str, buf, len)
917	const Char *str;
918	char *buf;
919	size_t len;
920{
921	mbstate_t mbs;
922	size_t clen;
923
924	memset(&mbs, 0, sizeof(mbs));
925	while (len >= MB_CUR_MAX) {
926		clen = wcrtomb(buf, *str, &mbs);
927		if (clen == (size_t)-1)
928			return (1);
929		if (*str == L'\0')
930			return (0);
931		str++;
932		buf += clen;
933		len -= clen;
934	}
935	return (1);
936}
937
938#ifdef DEBUG
939static void
940qprintf(str, s)
941	const char *str;
942	Char *s;
943{
944	Char *p;
945
946	(void)printf("%s:\n", str);
947	for (p = s; *p; p++)
948		(void)printf("%c", CHAR(*p));
949	(void)printf("\n");
950	for (p = s; *p; p++)
951		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
952	(void)printf("\n");
953	for (p = s; *p; p++)
954		(void)printf("%c", ismeta(*p) ? '_' : ' ');
955	(void)printf("\n");
956}
957#endif
958