1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Guido van Rossum.
9 *
10 * Copyright (c) 2011 The FreeBSD Foundation
11 *
12 * Portions of this software were developed by David Chisnall
13 * under sponsorship from the FreeBSD Foundation.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40/*
41 * glob(3) -- a superset of the one defined in POSIX 1003.2.
42 *
43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
44 *
45 * Optional extra services, controlled by flags not defined by POSIX:
46 *
47 * GLOB_QUOTE:
48 *	Escaping convention: \ inhibits any special meaning the following
49 *	character might have (except \ at end of string is retained).
50 * GLOB_MAGCHAR:
51 *	Set in gl_flags if pattern contained a globbing character.
52 * GLOB_NOMAGIC:
53 *	Same as GLOB_NOCHECK, but it will only append pattern if it did
54 *	not contain any magic characters.  [Used in csh style globbing]
55 * GLOB_ALTDIRFUNC:
56 *	Use alternately specified directory access functions.
57 * GLOB_TILDE:
58 *	expand ~user/foo to the /home/dir/of/user/foo
59 * GLOB_BRACE:
60 *	expand {1,2}{a,b} to 1a 1b 2a 2b
61 * gl_matchc:
62 *	Number of matches in the current invocation of glob.
63 */
64
65/*
66 * Some notes on multibyte character support:
67 * 1. Patterns with illegal byte sequences match nothing - even if
68 *    GLOB_NOCHECK is specified.
69 * 2. Illegal byte sequences in filenames are handled by treating them as
70 *    single-byte characters with a values of such bytes of the sequence
71 *    cast to wchar_t.
72 * 3. State-dependent encodings are not currently supported.
73 */
74
75#include <sys/param.h>
76#include <sys/stat.h>
77
78#include <ctype.h>
79#include <dirent.h>
80#include <errno.h>
81#include <glob.h>
82#include <limits.h>
83#include <pwd.h>
84#include <stdint.h>
85#include <stdio.h>
86#include <stdlib.h>
87#include <string.h>
88#include <unistd.h>
89#include <wchar.h>
90
91#include "collate.h"
92
93/*
94 * glob(3) expansion limits. Stop the expansion if any of these limits
95 * is reached. This caps the runtime in the face of DoS attacks. See
96 * also CVE-2010-2632
97 */
98#define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
99#define	GLOB_LIMIT_PATH		65536	/* number of path elements */
100#define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
101#define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
102#define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
103
104struct glob_limit {
105	size_t	l_brace_cnt;
106	size_t	l_path_lim;
107	size_t	l_readdir_cnt;
108	size_t	l_stat_cnt;
109	size_t	l_string_cnt;
110};
111
112#define	DOT		L'.'
113#define	EOS		L'\0'
114#define	LBRACKET	L'['
115#define	NOT		L'!'
116#define	QUESTION	L'?'
117#define	QUOTE		L'\\'
118#define	RANGE		L'-'
119#define	RBRACKET	L']'
120#define	SEP		L'/'
121#define	STAR		L'*'
122#define	TILDE		L'~'
123#define	LBRACE		L'{'
124#define	RBRACE		L'}'
125#define	COMMA		L','
126
127#define	M_QUOTE		0x8000000000ULL
128#define	M_PROTECT	0x4000000000ULL
129#define	M_MASK		0xffffffffffULL
130#define	M_CHAR		0x00ffffffffULL
131
132typedef uint_fast64_t Char;
133
134#define	CHAR(c)		((Char)((c)&M_CHAR))
135#define	META(c)		((Char)((c)|M_QUOTE))
136#define	UNPROT(c)	((c) & ~M_PROTECT)
137#define	M_ALL		META(L'*')
138#define	M_END		META(L']')
139#define	M_NOT		META(L'!')
140#define	M_ONE		META(L'?')
141#define	M_RNG		META(L'-')
142#define	M_SET		META(L'[')
143#define	ismeta(c)	(((c)&M_QUOTE) != 0)
144#ifdef DEBUG
145#define	isprot(c)	(((c)&M_PROTECT) != 0)
146#endif
147
148static int	 compare(const void *, const void *);
149static int	 g_Ctoc(const Char *, char *, size_t);
150static int	 g_lstat(Char *, struct stat *, glob_t *);
151static DIR	*g_opendir(Char *, glob_t *);
152static const Char *g_strchr(const Char *, wchar_t);
153#ifdef notdef
154static Char	*g_strcat(Char *, const Char *);
155#endif
156static int	 g_stat(Char *, struct stat *, glob_t *);
157static int	 glob0(const Char *, glob_t *, struct glob_limit *,
158    const char *);
159static int	 glob1(Char *, glob_t *, struct glob_limit *);
160static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
161    struct glob_limit *);
162static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
163    struct glob_limit *);
164static int	 globextend(const Char *, glob_t *, struct glob_limit *,
165    const char *);
166static const Char *
167		 globtilde(const Char *, Char *, size_t, glob_t *);
168static int	 globexp0(const Char *, glob_t *, struct glob_limit *,
169    const char *);
170static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
171static int	 globexp2(const Char *, const Char *, glob_t *,
172    struct glob_limit *);
173static int	 globfinal(glob_t *, struct glob_limit *, size_t,
174    const char *);
175static int	 match(Char *, Char *, Char *);
176static int	 err_nomatch(glob_t *, struct glob_limit *, const char *);
177static int	 err_aborted(glob_t *, int, char *);
178#ifdef DEBUG
179static void	 qprintf(const char *, Char *);
180#endif
181
182int
183glob(const char * __restrict pattern, int flags,
184	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
185{
186	struct glob_limit limit = { 0, 0, 0, 0, 0 };
187	const char *patnext;
188	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
189	mbstate_t mbs;
190	wchar_t wc;
191	size_t clen;
192	int too_long;
193
194	patnext = pattern;
195	if (!(flags & GLOB_APPEND)) {
196		pglob->gl_pathc = 0;
197		pglob->gl_pathv = NULL;
198		if (!(flags & GLOB_DOOFFS))
199			pglob->gl_offs = 0;
200	}
201	if (flags & GLOB_LIMIT) {
202		limit.l_path_lim = pglob->gl_matchc;
203		if (limit.l_path_lim == 0)
204			limit.l_path_lim = GLOB_LIMIT_PATH;
205	}
206	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
207	pglob->gl_errfunc = errfunc;
208	pglob->gl_matchc = 0;
209
210	bufnext = patbuf;
211	bufend = bufnext + MAXPATHLEN - 1;
212	too_long = 1;
213	if (flags & GLOB_NOESCAPE) {
214		memset(&mbs, 0, sizeof(mbs));
215		while (bufnext <= bufend) {
216			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
217			if (clen == (size_t)-1 || clen == (size_t)-2)
218				return (err_nomatch(pglob, &limit, pattern));
219			else if (clen == 0) {
220				too_long = 0;
221				break;
222			}
223			*bufnext++ = wc;
224			patnext += clen;
225		}
226	} else {
227		/* Protect the quoted characters. */
228		memset(&mbs, 0, sizeof(mbs));
229		while (bufnext <= bufend) {
230			if (*patnext == '\\') {
231				if (*++patnext == '\0') {
232					*bufnext++ = QUOTE;
233					continue;
234				}
235				prot = M_PROTECT;
236			} else
237				prot = 0;
238			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
239			if (clen == (size_t)-1 || clen == (size_t)-2)
240				return (err_nomatch(pglob, &limit, pattern));
241			else if (clen == 0) {
242				too_long = 0;
243				break;
244			}
245			*bufnext++ = wc | prot;
246			patnext += clen;
247		}
248	}
249	if (too_long)
250		return (err_nomatch(pglob, &limit, pattern));
251	*bufnext = EOS;
252
253	if (flags & GLOB_BRACE)
254	    return (globexp0(patbuf, pglob, &limit, pattern));
255	else
256	    return (glob0(patbuf, pglob, &limit, pattern));
257}
258
259static int
260globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
261    const char *origpat) {
262	int rv;
263	size_t oldpathc;
264
265	/* Protect a single {}, for find(1), like csh */
266	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
267		if ((pglob->gl_flags & GLOB_LIMIT) &&
268		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
269			errno = E2BIG;
270			return (GLOB_NOSPACE);
271		}
272		return (glob0(pattern, pglob, limit, origpat));
273	}
274
275	oldpathc = pglob->gl_pathc;
276
277	if ((rv = globexp1(pattern, pglob, limit)) != 0)
278		return rv;
279
280	return (globfinal(pglob, limit, oldpathc, origpat));
281}
282
283/*
284 * Expand recursively a glob {} pattern. When there is no more expansion
285 * invoke the standard globbing routine to glob the rest of the magic
286 * characters
287 */
288static int
289globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
290{
291	const Char* ptr;
292
293	if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
294		if ((pglob->gl_flags & GLOB_LIMIT) &&
295		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
296			errno = E2BIG;
297			return (GLOB_NOSPACE);
298		}
299		return (globexp2(ptr, pattern, pglob, limit));
300	}
301
302	return (glob0(pattern, pglob, limit, NULL));
303}
304
305
306/*
307 * Recursive brace globbing helper. Tries to expand a single brace.
308 * If it succeeds then it invokes globexp1 with the new pattern.
309 * If it fails then it tries to glob the rest of the pattern and returns.
310 */
311static int
312globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
313    struct glob_limit *limit)
314{
315	int     i, rv;
316	Char   *lm, *ls;
317	const Char *pe, *pm, *pm1, *pl;
318	Char    patbuf[MAXPATHLEN];
319
320	/* copy part up to the brace */
321	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
322		continue;
323	*lm = EOS;
324	ls = lm;
325
326	/* Find the balanced brace */
327	for (i = 0, pe = ++ptr; *pe != EOS; pe++)
328		if (*pe == LBRACKET) {
329			/* Ignore everything between [] */
330			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
331				continue;
332			if (*pe == EOS) {
333				/*
334				 * We could not find a matching RBRACKET.
335				 * Ignore and just look for RBRACE
336				 */
337				pe = pm;
338			}
339		}
340		else if (*pe == LBRACE)
341			i++;
342		else if (*pe == RBRACE) {
343			if (i == 0)
344				break;
345			i--;
346		}
347
348	/* Non matching braces; just glob the pattern */
349	if (i != 0 || *pe == EOS)
350		return (glob0(pattern, pglob, limit, NULL));
351
352	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
353		switch (*pm) {
354		case LBRACKET:
355			/* Ignore everything between [] */
356			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
357				continue;
358			if (*pm == EOS) {
359				/*
360				 * We could not find a matching RBRACKET.
361				 * Ignore and just look for RBRACE
362				 */
363				pm = pm1;
364			}
365			break;
366
367		case LBRACE:
368			i++;
369			break;
370
371		case RBRACE:
372			if (i) {
373			    i--;
374			    break;
375			}
376			/* FALLTHROUGH */
377		case COMMA:
378			if (i && *pm == COMMA)
379				break;
380			else {
381				/* Append the current string */
382				for (lm = ls; (pl < pm); *lm++ = *pl++)
383					continue;
384				/*
385				 * Append the rest of the pattern after the
386				 * closing brace
387				 */
388				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
389					continue;
390
391				/* Expand the current pattern */
392#ifdef DEBUG
393				qprintf("globexp2:", patbuf);
394#endif
395				rv = globexp1(patbuf, pglob, limit);
396				if (rv)
397					return (rv);
398
399				/* move after the comma, to the next string */
400				pl = pm + 1;
401			}
402			break;
403
404		default:
405			break;
406		}
407	return (0);
408}
409
410
411
412/*
413 * expand tilde from the passwd file.
414 */
415static const Char *
416globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
417{
418	struct passwd *pwd;
419	char *h, *sc;
420	const Char *p;
421	Char *b, *eb;
422	wchar_t wc;
423	wchar_t wbuf[MAXPATHLEN];
424	wchar_t *wbufend, *dc;
425	size_t clen;
426	mbstate_t mbs;
427	int too_long;
428
429	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
430		return (pattern);
431
432	/*
433	 * Copy up to the end of the string or /
434	 */
435	eb = &patbuf[patbuf_len - 1];
436	for (p = pattern + 1, b = patbuf;
437	    b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
438		continue;
439
440	if (*p != EOS && UNPROT(*p) != SEP)
441		return (NULL);
442
443	*b = EOS;
444	h = NULL;
445
446	if (patbuf[0] == EOS) {
447		/*
448		 * handle a plain ~ or ~/ by expanding $HOME first (iff
449		 * we're not running setuid or setgid) and then trying
450		 * the password file
451		 */
452		if ((h = secure_getenv("HOME")) == NULL) {
453			if (((h = getlogin()) != NULL &&
454			     (pwd = getpwnam(h)) != NULL) ||
455			    (pwd = getpwuid(getuid())) != NULL)
456				h = pwd->pw_dir;
457			else
458				return (pattern);
459		}
460	}
461	else {
462		/*
463		 * Expand a ~user
464		 */
465		if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
466			return (NULL);
467		if ((pwd = getpwnam((char *)wbuf)) == NULL)
468			return (pattern);
469		else
470			h = pwd->pw_dir;
471	}
472
473	/* Copy the home directory */
474	dc = wbuf;
475	sc = h;
476	wbufend = wbuf + MAXPATHLEN - 1;
477	too_long = 1;
478	memset(&mbs, 0, sizeof(mbs));
479	while (dc <= wbufend) {
480		clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
481		if (clen == (size_t)-1 || clen == (size_t)-2) {
482			/* XXX See initial comment #2. */
483			wc = (unsigned char)*sc;
484			clen = 1;
485			memset(&mbs, 0, sizeof(mbs));
486		}
487		if ((*dc++ = wc) == EOS) {
488			too_long = 0;
489			break;
490		}
491		sc += clen;
492	}
493	if (too_long)
494		return (NULL);
495
496	dc = wbuf;
497	for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
498		continue;
499	if (*dc != EOS)
500		return (NULL);
501
502	/* Append the rest of the pattern */
503	if (*p != EOS) {
504		too_long = 1;
505		while (b <= eb) {
506			if ((*b++ = *p++) == EOS) {
507				too_long = 0;
508				break;
509			}
510		}
511		if (too_long)
512			return (NULL);
513	} else
514		*b = EOS;
515
516	return (patbuf);
517}
518
519
520/*
521 * The main glob() routine: compiles the pattern (optionally processing
522 * quotes), calls glob1() to do the real pattern matching, and finally
523 * sorts the list (unless unsorted operation is requested).  Returns 0
524 * if things went well, nonzero if errors occurred.
525 */
526static int
527glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
528    const char *origpat) {
529	const Char *qpatnext;
530	int err;
531	size_t oldpathc;
532	Char *bufnext, c, patbuf[MAXPATHLEN];
533
534	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
535	if (qpatnext == NULL) {
536		errno = E2BIG;
537		return (GLOB_NOSPACE);
538	}
539	oldpathc = pglob->gl_pathc;
540	bufnext = patbuf;
541
542	/* We don't need to check for buffer overflow any more. */
543	while ((c = *qpatnext++) != EOS) {
544		switch (c) {
545		case LBRACKET:
546			c = *qpatnext;
547			if (c == NOT)
548				++qpatnext;
549			if (*qpatnext == EOS ||
550			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
551				*bufnext++ = LBRACKET;
552				if (c == NOT)
553					--qpatnext;
554				break;
555			}
556			*bufnext++ = M_SET;
557			if (c == NOT)
558				*bufnext++ = M_NOT;
559			c = *qpatnext++;
560			do {
561				*bufnext++ = CHAR(c);
562				if (*qpatnext == RANGE &&
563				    (c = qpatnext[1]) != RBRACKET) {
564					*bufnext++ = M_RNG;
565					*bufnext++ = CHAR(c);
566					qpatnext += 2;
567				}
568			} while ((c = *qpatnext++) != RBRACKET);
569			pglob->gl_flags |= GLOB_MAGCHAR;
570			*bufnext++ = M_END;
571			break;
572		case QUESTION:
573			pglob->gl_flags |= GLOB_MAGCHAR;
574			*bufnext++ = M_ONE;
575			break;
576		case STAR:
577			pglob->gl_flags |= GLOB_MAGCHAR;
578			/* collapse adjacent stars to one,
579			 * to ensure "**" at the end continues to match the
580			 * empty string
581			 */
582			if (bufnext == patbuf || bufnext[-1] != M_ALL)
583			    *bufnext++ = M_ALL;
584			break;
585		default:
586			*bufnext++ = CHAR(c);
587			break;
588		}
589	}
590	*bufnext = EOS;
591#ifdef DEBUG
592	qprintf("glob0:", patbuf);
593#endif
594
595	if ((err = glob1(patbuf, pglob, limit)) != 0)
596		return(err);
597
598	if (origpat != NULL)
599		return (globfinal(pglob, limit, oldpathc, origpat));
600
601	return (0);
602}
603
604static int
605globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc,
606    const char *origpat) {
607	if (pglob->gl_pathc == oldpathc)
608		return (err_nomatch(pglob, limit, origpat));
609
610	if (!(pglob->gl_flags & GLOB_NOSORT))
611		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
612		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
613
614	return (0);
615}
616
617static int
618compare(const void *p, const void *q)
619{
620	return (strcoll(*(char **)p, *(char **)q));
621}
622
623static int
624glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
625{
626	Char pathbuf[MAXPATHLEN];
627
628	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
629	if (*pattern == EOS)
630		return (0);
631	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
632	    pattern, pglob, limit));
633}
634
635/*
636 * The functions glob2 and glob3 are mutually recursive; there is one level
637 * of recursion for each segment in the pattern that contains one or more
638 * meta characters.
639 */
640static int
641glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
642      glob_t *pglob, struct glob_limit *limit)
643{
644	struct stat sb;
645	Char *p, *q;
646	int anymeta;
647
648	/*
649	 * Loop over pattern segments until end of pattern or until
650	 * segment with meta character found.
651	 */
652	for (anymeta = 0;;) {
653		if (*pattern == EOS) {		/* End of pattern? */
654			*pathend = EOS;
655			if (g_lstat(pathbuf, &sb, pglob))
656				return (0);
657
658			if ((pglob->gl_flags & GLOB_LIMIT) &&
659			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
660				errno = E2BIG;
661				return (GLOB_NOSPACE);
662			}
663			if ((pglob->gl_flags & GLOB_MARK) &&
664			    UNPROT(pathend[-1]) != SEP &&
665			    (S_ISDIR(sb.st_mode) ||
666			    (S_ISLNK(sb.st_mode) &&
667			    g_stat(pathbuf, &sb, pglob) == 0 &&
668			    S_ISDIR(sb.st_mode)))) {
669				if (pathend + 1 > pathend_last) {
670					errno = E2BIG;
671					return (GLOB_NOSPACE);
672				}
673				*pathend++ = SEP;
674				*pathend = EOS;
675			}
676			++pglob->gl_matchc;
677			return (globextend(pathbuf, pglob, limit, NULL));
678		}
679
680		/* Find end of next segment, copy tentatively to pathend. */
681		q = pathend;
682		p = pattern;
683		while (*p != EOS && UNPROT(*p) != SEP) {
684			if (ismeta(*p))
685				anymeta = 1;
686			if (q + 1 > pathend_last) {
687				errno = E2BIG;
688				return (GLOB_NOSPACE);
689			}
690			*q++ = *p++;
691		}
692
693		if (!anymeta) {		/* No expansion, do next segment. */
694			pathend = q;
695			pattern = p;
696			while (UNPROT(*pattern) == SEP) {
697				if (pathend + 1 > pathend_last) {
698					errno = E2BIG;
699					return (GLOB_NOSPACE);
700				}
701				*pathend++ = *pattern++;
702			}
703		} else			/* Need expansion, recurse. */
704			return (glob3(pathbuf, pathend, pathend_last, pattern,
705			    p, pglob, limit));
706	}
707	/* NOTREACHED */
708}
709
710static int
711glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
712      Char *pattern, Char *restpattern,
713      glob_t *pglob, struct glob_limit *limit)
714{
715	struct dirent *dp;
716	DIR *dirp;
717	int err, too_long, saverrno, saverrno2;
718	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
719
720	struct dirent *(*readdirfunc)(DIR *);
721
722	if (pathend > pathend_last) {
723		errno = E2BIG;
724		return (GLOB_NOSPACE);
725	}
726	*pathend = EOS;
727	if (pglob->gl_errfunc != NULL &&
728	    g_Ctoc(pathbuf, buf, sizeof(buf))) {
729		errno = E2BIG;
730		return (GLOB_NOSPACE);
731	}
732
733	saverrno = errno;
734	errno = 0;
735	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
736		if (errno == ENOENT || errno == ENOTDIR)
737			return (0);
738		err = err_aborted(pglob, errno, buf);
739		if (errno == 0)
740			errno = saverrno;
741		return (err);
742	}
743
744	err = 0;
745
746	/* pglob->gl_readdir takes a void *, fix this manually */
747	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
748		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
749	else
750		readdirfunc = readdir;
751
752	errno = 0;
753	/* Search directory for matching names. */
754	while ((dp = (*readdirfunc)(dirp)) != NULL) {
755		char *sc;
756		Char *dc;
757		wchar_t wc;
758		size_t clen;
759		mbstate_t mbs;
760
761		if ((pglob->gl_flags & GLOB_LIMIT) &&
762		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
763			errno = E2BIG;
764			err = GLOB_NOSPACE;
765			break;
766		}
767
768		/* Initial DOT must be matched literally. */
769		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
770			errno = 0;
771			continue;
772		}
773		memset(&mbs, 0, sizeof(mbs));
774		dc = pathend;
775		sc = dp->d_name;
776		too_long = 1;
777		while (dc <= pathend_last) {
778			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
779			if (clen == (size_t)-1 || clen == (size_t)-2) {
780				/* XXX See initial comment #2. */
781				wc = (unsigned char)*sc;
782				clen = 1;
783				memset(&mbs, 0, sizeof(mbs));
784			}
785			if ((*dc++ = wc) == EOS) {
786				too_long = 0;
787				break;
788			}
789			sc += clen;
790		}
791		if (too_long && (err = err_aborted(pglob, ENAMETOOLONG,
792		    buf))) {
793			errno = ENAMETOOLONG;
794			break;
795		}
796		if (too_long || !match(pathend, pattern, restpattern)) {
797			*pathend = EOS;
798			errno = 0;
799			continue;
800		}
801		if (errno == 0)
802			errno = saverrno;
803		err = glob2(pathbuf, --dc, pathend_last, restpattern,
804		    pglob, limit);
805		if (err)
806			break;
807		errno = 0;
808	}
809
810	saverrno2 = errno;
811	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
812		(*pglob->gl_closedir)(dirp);
813	else
814		closedir(dirp);
815	errno = saverrno2;
816
817	if (err)
818		return (err);
819
820	if (dp == NULL && errno != 0 &&
821	    (err = err_aborted(pglob, errno, buf)))
822		return (err);
823
824	if (errno == 0)
825		errno = saverrno;
826	return (0);
827}
828
829
830/*
831 * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
832 * add the new item, and update gl_pathc.
833 *
834 * This assumes the BSD realloc, which only copies the block when its size
835 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
836 * behavior.
837 *
838 * Return 0 if new item added, error code if memory couldn't be allocated.
839 *
840 * Invariant of the glob_t structure:
841 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
842 *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
843 */
844static int
845globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
846    const char *origpat)
847{
848	char **pathv;
849	size_t i, newn, len;
850	char *copy;
851	const Char *p;
852
853	if ((pglob->gl_flags & GLOB_LIMIT) &&
854	    pglob->gl_matchc > limit->l_path_lim) {
855		errno = E2BIG;
856		return (GLOB_NOSPACE);
857	}
858
859	newn = 2 + pglob->gl_pathc + pglob->gl_offs;
860	/* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */
861	pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv));
862	if (pathv == NULL)
863		return (GLOB_NOSPACE);
864
865	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
866		/* first time around -- clear initial gl_offs items */
867		pathv += pglob->gl_offs;
868		for (i = pglob->gl_offs + 1; --i > 0; )
869			*--pathv = NULL;
870	}
871	pglob->gl_pathv = pathv;
872
873	if (origpat != NULL)
874		copy = strdup(origpat);
875	else {
876		for (p = path; *p++ != EOS;)
877			continue;
878		len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
879		if ((copy = malloc(len)) != NULL) {
880			if (g_Ctoc(path, copy, len)) {
881				free(copy);
882				errno = E2BIG;
883				return (GLOB_NOSPACE);
884			}
885		}
886	}
887	if (copy != NULL) {
888		limit->l_string_cnt += strlen(copy) + 1;
889		if ((pglob->gl_flags & GLOB_LIMIT) &&
890		    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
891			free(copy);
892			errno = E2BIG;
893			return (GLOB_NOSPACE);
894		}
895		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
896	}
897	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
898	return (copy == NULL ? GLOB_NOSPACE : 0);
899}
900
901/*
902 * pattern matching function for filenames.
903 */
904static int
905match(Char *name, Char *pat, Char *patend)
906{
907	int ok, negate_range;
908	Char c, k, *nextp, *nextn;
909	struct xlocale_collate *table =
910		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
911
912	nextn = NULL;
913	nextp = NULL;
914
915	while (1) {
916		while (pat < patend) {
917			c = *pat++;
918			switch (c & M_MASK) {
919			case M_ALL:
920				if (pat == patend)
921					return (1);
922				if (*name == EOS)
923					return (0);
924				nextn = name + 1;
925				nextp = pat - 1;
926				break;
927			case M_ONE:
928				if (*name++ == EOS)
929					goto fail;
930				break;
931			case M_SET:
932				ok = 0;
933				if ((k = *name++) == EOS)
934					goto fail;
935				negate_range = ((*pat & M_MASK) == M_NOT);
936				if (negate_range != 0)
937					++pat;
938				while (((c = *pat++) & M_MASK) != M_END)
939					if ((*pat & M_MASK) == M_RNG) {
940						if (table->__collate_load_error ?
941						    CHAR(c) <= CHAR(k) &&
942						    CHAR(k) <= CHAR(pat[1]) :
943						    __wcollate_range_cmp(CHAR(c),
944						    CHAR(k)) <= 0 &&
945						    __wcollate_range_cmp(CHAR(k),
946						    CHAR(pat[1])) <= 0)
947							ok = 1;
948						pat += 2;
949					} else if (c == k)
950						ok = 1;
951				if (ok == negate_range)
952					goto fail;
953				break;
954			default:
955				if (*name++ != c)
956					goto fail;
957				break;
958			}
959		}
960		if (*name == EOS)
961			return (1);
962
963	fail:
964		if (nextn == NULL)
965			break;
966		pat = nextp;
967		name = nextn;
968	}
969	return (0);
970}
971
972/* Free allocated data belonging to a glob_t structure. */
973void
974globfree(glob_t *pglob)
975{
976	size_t i;
977	char **pp;
978
979	if (pglob->gl_pathv != NULL) {
980		pp = pglob->gl_pathv + pglob->gl_offs;
981		for (i = pglob->gl_pathc; i--; ++pp)
982			if (*pp)
983				free(*pp);
984		free(pglob->gl_pathv);
985		pglob->gl_pathv = NULL;
986	}
987}
988
989static DIR *
990g_opendir(Char *str, glob_t *pglob)
991{
992	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
993
994	if (*str == EOS)
995		strcpy(buf, ".");
996	else {
997		if (g_Ctoc(str, buf, sizeof(buf))) {
998			errno = ENAMETOOLONG;
999			return (NULL);
1000		}
1001	}
1002
1003	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1004		return ((*pglob->gl_opendir)(buf));
1005
1006	return (opendir(buf));
1007}
1008
1009static int
1010g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
1011{
1012	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
1013
1014	if (g_Ctoc(fn, buf, sizeof(buf))) {
1015		errno = ENAMETOOLONG;
1016		return (-1);
1017	}
1018	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1019		return((*pglob->gl_lstat)(buf, sb));
1020	return (lstat(buf, sb));
1021}
1022
1023static int
1024g_stat(Char *fn, struct stat *sb, glob_t *pglob)
1025{
1026	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
1027
1028	if (g_Ctoc(fn, buf, sizeof(buf))) {
1029		errno = ENAMETOOLONG;
1030		return (-1);
1031	}
1032	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1033		return ((*pglob->gl_stat)(buf, sb));
1034	return (stat(buf, sb));
1035}
1036
1037static const Char *
1038g_strchr(const Char *str, wchar_t ch)
1039{
1040
1041	do {
1042		if (*str == ch)
1043			return (str);
1044	} while (*str++);
1045	return (NULL);
1046}
1047
1048static int
1049g_Ctoc(const Char *str, char *buf, size_t len)
1050{
1051	mbstate_t mbs;
1052	size_t clen;
1053
1054	memset(&mbs, 0, sizeof(mbs));
1055	while (len >= MB_CUR_MAX) {
1056		clen = wcrtomb(buf, CHAR(*str), &mbs);
1057		if (clen == (size_t)-1) {
1058			/* XXX See initial comment #2. */
1059			*buf = (char)CHAR(*str);
1060			clen = 1;
1061			memset(&mbs, 0, sizeof(mbs));
1062		}
1063		if (CHAR(*str) == EOS)
1064			return (0);
1065		str++;
1066		buf += clen;
1067		len -= clen;
1068	}
1069	return (1);
1070}
1071
1072static int
1073err_nomatch(glob_t *pglob, struct glob_limit *limit, const char *origpat) {
1074	/*
1075	 * If there was no match we are going to append the origpat
1076	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
1077	 * and the origpat did not contain any magic characters
1078	 * GLOB_NOMAGIC is there just for compatibility with csh.
1079	 */
1080	if ((pglob->gl_flags & GLOB_NOCHECK) ||
1081	    ((pglob->gl_flags & GLOB_NOMAGIC) &&
1082	    !(pglob->gl_flags & GLOB_MAGCHAR)))
1083		return (globextend(NULL, pglob, limit, origpat));
1084	return (GLOB_NOMATCH);
1085}
1086
1087static int
1088err_aborted(glob_t *pglob, int err, char *buf) {
1089	if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) ||
1090	    (pglob->gl_flags & GLOB_ERR))
1091		return (GLOB_ABORTED);
1092	return (0);
1093}
1094
1095#ifdef DEBUG
1096static void
1097qprintf(const char *str, Char *s)
1098{
1099	Char *p;
1100
1101	(void)printf("%s\n", str);
1102	if (s != NULL) {
1103		for (p = s; *p != EOS; p++)
1104			(void)printf("%c", (char)CHAR(*p));
1105		(void)printf("\n");
1106		for (p = s; *p != EOS; p++)
1107			(void)printf("%c", (isprot(*p) ? '\\' : ' '));
1108		(void)printf("\n");
1109		for (p = s; *p != EOS; p++)
1110			(void)printf("%c", (ismeta(*p) ? '_' : ' '));
1111		(void)printf("\n");
1112	}
1113}
1114#endif
1115