glob.c revision 225736
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32#if defined(LIBC_SCCS) && !defined(lint)
33static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 6/24/91";
34#endif /* LIBC_SCCS and not lint */
35/*
36 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
37 * draft 9.
38 *
39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
40 *
41 * Optional extra services, controlled by flags not defined by POSIX:
42 *
43 * GLOB_QUOTE:
44 *	Escaping convention: \ inhibits any special meaning the following
45 *	character might have (except \ at end of string is retained).
46 * GLOB_MAGCHAR:
47 *	Set in gl_flags if pattern contained a globbing character.
48 * GLOB_ALTNOT:
49 *	Use ^ instead of ! for "not".
50 * gl_matchc:
51 *	Number of matches in the current invocation of glob.
52 */
53
54#ifdef WINNT_NATIVE
55	#pragma warning(disable:4244)
56#endif /* WINNT_NATIVE */
57
58#define Char __Char
59#include "sh.h"
60#include "glob.h"
61
62#undef Char
63#undef QUOTE
64#undef TILDE
65#undef META
66#undef ismeta
67#undef Strchr
68
69#ifndef S_ISDIR
70#define S_ISDIR(a)	(((a) & S_IFMT) == S_IFDIR)
71#endif
72
73#if !defined(S_ISLNK) && defined(S_IFLNK)
74#define S_ISLNK(a)	(((a) & S_IFMT) == S_IFLNK)
75#endif
76
77#if !defined(S_ISLNK) && !defined(lstat)
78#define lstat stat
79#endif
80
81typedef unsigned short Char;
82
83static	int	 glob1 		(Char *, glob_t *, int);
84static	int	 glob2		(struct strbuf *, const Char *, glob_t *, int);
85static	int	 glob3		(struct strbuf *, const Char *, const Char *,
86				 glob_t *, int);
87static	void	 globextend	(const char *, glob_t *);
88static	int	 match		(const char *, const Char *, const Char *,
89				 int);
90static	int	 compare	(const void *, const void *);
91static 	DIR	*Opendir	(const char *);
92#ifdef S_IFLNK
93static	int	 Lstat		(const char *, struct stat *);
94#endif
95static	int	 Stat		(const char *, struct stat *sb);
96static 	Char 	*Strchr		(Char *, int);
97#ifdef DEBUG
98static	void	 qprintf	(const Char *);
99#endif
100
101#define	DOLLAR		'$'
102#define	DOT		'.'
103#define	EOS		'\0'
104#define	LBRACKET	'['
105#define	NOT		'!'
106#define ALTNOT		'^'
107#define	QUESTION	'?'
108#define	QUOTE		'\\'
109#define	RANGE		'-'
110#define	RBRACKET	']'
111#define	SEP		'/'
112#define	STAR		'*'
113#define	TILDE		'~'
114#define	UNDERSCORE	'_'
115
116#define	M_META		0x8000
117#define M_PROTECT	0x4000
118#define	M_MASK		0xffff
119#define	M_ASCII		0x00ff
120
121#define	LCHAR(c)	((c)&M_ASCII)
122#define	META(c)		((c)|M_META)
123#define	M_ALL		META('*')
124#define	M_END		META(']')
125#define	M_NOT		META('!')
126#define	M_ALTNOT	META('^')
127#define	M_ONE		META('?')
128#define	M_RNG		META('-')
129#define	M_SET		META('[')
130#define	ismeta(c)	(((c)&M_META) != 0)
131
132int
133globcharcoll(__Char c1, __Char c2, int cs)
134{
135#if defined(NLS) && defined(LC_COLLATE) && defined(HAVE_STRCOLL)
136# if defined(WIDE_STRINGS)
137    wchar_t s1[2], s2[2];
138
139    if (c1 == c2)
140	return (0);
141    if (cs) {
142	c1 = towlower(c1);
143	c2 = towlower(c2);
144    } else {
145	/* This should not be here, but I'll rather leave it in than engage in
146	   a LC_COLLATE flamewar about a shell I don't use... */
147	if (iswlower(c1) && iswupper(c2))
148	    return (1);
149	if (iswupper(c1) && iswlower(c2))
150	    return (-1);
151    }
152    s1[0] = c1;
153    s2[0] = c2;
154    s1[1] = s2[1] = '\0';
155    return wcscoll(s1, s2);
156# else /* not WIDE_STRINGS */
157    char s1[2], s2[2];
158
159    if (c1 == c2)
160	return (0);
161    /*
162     * From kevin lyda <kevin@suberic.net>:
163     * strcoll does not guarantee case sorting, so we pre-process now:
164     */
165    if (cs) {
166	c1 = islower(c1) ? c1 : tolower(c1);
167	c2 = islower(c2) ? c2 : tolower(c2);
168    } else {
169	if (islower(c1) && isupper(c2))
170	    return (1);
171	if (isupper(c1) && islower(c2))
172	    return (-1);
173    }
174    s1[0] = c1;
175    s2[0] = c2;
176    s1[1] = s2[1] = '\0';
177    return strcoll(s1, s2);
178# endif
179#else
180    return (c1 - c2);
181#endif
182}
183
184/*
185 * Need to dodge two kernel bugs:
186 * opendir("") != opendir(".")
187 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
188 *            POSIX specifies that they should be ignored in directories.
189 */
190
191static DIR *
192Opendir(const char *str)
193{
194#if defined(hpux) || defined(__hpux)
195    struct stat st;
196#endif
197
198    if (!*str)
199	return (opendir("."));
200#if defined(hpux) || defined(__hpux)
201    /*
202     * Opendir on some device files hangs, so avoid it
203     */
204    if (stat(str, &st) == -1 || !S_ISDIR(st.st_mode))
205	return NULL;
206#endif
207    return opendir(str);
208}
209
210#ifdef S_IFLNK
211static int
212Lstat(const char *fn, struct stat *sb)
213{
214    int st;
215
216    st = lstat(fn, sb);
217# ifdef NAMEI_BUG
218    if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
219	st = -1;
220# endif	/* NAMEI_BUG */
221    return st;
222}
223#else
224#define Lstat Stat
225#endif /* S_IFLNK */
226
227static int
228Stat(const char *fn, struct stat *sb)
229{
230    int st;
231
232    st = stat(fn, sb);
233#ifdef NAMEI_BUG
234    if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
235	st = -1;
236#endif /* NAMEI_BUG */
237    return st;
238}
239
240static Char *
241Strchr(Char *str, int ch)
242{
243    do
244	if (*str == ch)
245	    return (str);
246    while (*str++);
247    return (NULL);
248}
249
250#ifdef DEBUG
251static void
252qprintf(const Char *s)
253{
254    const Char *p;
255
256    for (p = s; *p; p++)
257	printf("%c", *p & 0xff);
258    printf("\n");
259    for (p = s; *p; p++)
260	printf("%c", *p & M_PROTECT ? '"' : ' ');
261    printf("\n");
262    for (p = s; *p; p++)
263	printf("%c", *p & M_META ? '_' : ' ');
264    printf("\n");
265}
266#endif /* DEBUG */
267
268static int
269compare(const void *p, const void *q)
270{
271#if defined(NLS) && defined(HAVE_STRCOLL)
272    return (strcoll(*(char *const *) p, *(char *const *) q));
273#else
274    return (strcmp(*(char *const *) p, *(char *const *) q));
275#endif /* NLS && HAVE_STRCOLL */
276}
277
278/*
279 * The main glob() routine: compiles the pattern (optionally processing
280 * quotes), calls glob1() to do the real pattern matching, and finally
281 * sorts the list (unless unsorted operation is requested).  Returns 0
282 * if things went well, nonzero if errors occurred.  It is not an error
283 * to find no matches.
284 */
285int
286glob(const char *pattern, int flags, int (*errfunc) (const char *, int),
287     glob_t *pglob)
288{
289    int     err, oldpathc;
290    Char *bufnext, m_not;
291    const unsigned char *patnext;
292    int     c, not;
293    Char *qpatnext, *patbuf;
294    int     no_match;
295
296    patnext = (const unsigned char *) pattern;
297    if (!(flags & GLOB_APPEND)) {
298	pglob->gl_pathc = 0;
299	pglob->gl_pathv = NULL;
300	if (!(flags & GLOB_DOOFFS))
301	    pglob->gl_offs = 0;
302    }
303    pglob->gl_flags = flags & ~GLOB_MAGCHAR;
304    pglob->gl_errfunc = errfunc;
305    oldpathc = pglob->gl_pathc;
306    pglob->gl_matchc = 0;
307
308    if (pglob->gl_flags & GLOB_ALTNOT) {
309	not = ALTNOT;
310	m_not = M_ALTNOT;
311    }
312    else {
313	not = NOT;
314	m_not = M_NOT;
315    }
316
317    patbuf = xmalloc((strlen(pattern) + 1) * sizeof(*patbuf));
318    bufnext = patbuf;
319
320    no_match = *patnext == not;
321    if (no_match)
322	patnext++;
323
324    if (flags & GLOB_QUOTE) {
325	/* Protect the quoted characters */
326	while ((c = *patnext++) != EOS) {
327#ifdef WIDE_STRINGS
328	    int len;
329
330	    len = mblen((const char *)(patnext - 1), MB_LEN_MAX);
331	    if (len == -1)
332		(void)mblen(NULL, 0);
333	    else if (len > 1) {
334		*bufnext++ = (Char) c;
335		while (--len != 0)
336		    *bufnext++ = (Char) (*patnext++ | M_PROTECT);
337	    } else
338#endif /* WIDE_STRINGS */
339	    if (c == QUOTE) {
340		if ((c = *patnext++) == EOS) {
341		    c = QUOTE;
342		    --patnext;
343		}
344		*bufnext++ = (Char) (c | M_PROTECT);
345	    }
346	    else
347		*bufnext++ = (Char) c;
348	}
349    }
350    else
351	while ((c = *patnext++) != EOS)
352	    *bufnext++ = (Char) c;
353    *bufnext = EOS;
354
355    bufnext = patbuf;
356    qpatnext = patbuf;
357    while ((c = *qpatnext++) != EOS) {
358	switch (c) {
359	case LBRACKET:
360	    c = *qpatnext;
361	    if (c == not)
362		++qpatnext;
363	    if (*qpatnext == EOS ||
364		Strchr(qpatnext + 1, RBRACKET) == NULL) {
365		*bufnext++ = LBRACKET;
366		if (c == not)
367		    --qpatnext;
368		break;
369	    }
370	    pglob->gl_flags |= GLOB_MAGCHAR;
371	    *bufnext++ = M_SET;
372	    if (c == not)
373		*bufnext++ = m_not;
374	    c = *qpatnext++;
375	    do {
376		*bufnext++ = LCHAR(c);
377		if (*qpatnext == RANGE &&
378		    (c = qpatnext[1]) != RBRACKET) {
379		    *bufnext++ = M_RNG;
380		    *bufnext++ = LCHAR(c);
381		    qpatnext += 2;
382		}
383	    } while ((c = *qpatnext++) != RBRACKET);
384	    *bufnext++ = M_END;
385	    break;
386	case QUESTION:
387	    pglob->gl_flags |= GLOB_MAGCHAR;
388	    *bufnext++ = M_ONE;
389	    break;
390	case STAR:
391	    pglob->gl_flags |= GLOB_MAGCHAR;
392	    /* collapse adjacent stars to one, to avoid
393	     * exponential behavior
394	     */
395	    if (bufnext == patbuf || bufnext[-1] != M_ALL)
396		*bufnext++ = M_ALL;
397	    break;
398	default:
399	    *bufnext++ = LCHAR(c);
400	    break;
401	}
402    }
403    *bufnext = EOS;
404#ifdef DEBUG
405    qprintf(patbuf);
406#endif
407
408    if ((err = glob1(patbuf, pglob, no_match)) != 0) {
409	xfree(patbuf);
410	return (err);
411    }
412
413    /*
414     * If there was no match we are going to append the pattern
415     * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
416     * and the pattern did not contain any magic characters
417     * GLOB_NOMAGIC is there just for compatibility with csh.
418     */
419    if (pglob->gl_pathc == oldpathc &&
420	((flags & GLOB_NOCHECK) ||
421	 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
422	if (!(flags & GLOB_QUOTE))
423	    globextend(pattern, pglob);
424	else {
425	    char *copy, *dest;
426	    const char *src;
427
428	    /* copy pattern, interpreting quotes */
429	    copy = xmalloc(strlen(pattern) + 1);
430	    dest = copy;
431	    src = pattern;
432	    while (*src != EOS) {
433		if (*src == QUOTE) {
434		    if (*++src == EOS)
435			--src;
436		}
437		*dest++ = *src++;
438	    }
439	    *dest = EOS;
440	    globextend(copy, pglob);
441	    xfree(copy);
442	}
443	xfree(patbuf);
444	return 0;
445    }
446    else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc))
447	qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
448	      pglob->gl_pathc - oldpathc, sizeof(char *), compare);
449    xfree(patbuf);
450    return (0);
451}
452
453static int
454glob1(Char *pattern, glob_t *pglob, int no_match)
455{
456    struct strbuf pathbuf = strbuf_INIT;
457    int err;
458
459    /*
460     * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
461     */
462    if (*pattern == EOS)
463	return (0);
464    err = glob2(&pathbuf, pattern, pglob, no_match);
465    xfree(pathbuf.s);
466    return err;
467}
468
469/*
470 * functions glob2 and glob3 are mutually recursive; there is one level
471 * of recursion for each segment in the pattern that contains one or
472 * more meta characters.
473 */
474static int
475glob2(struct strbuf *pathbuf, const Char *pattern, glob_t *pglob, int no_match)
476{
477    struct stat sbuf;
478    int anymeta;
479    const Char *p;
480    size_t orig_len;
481
482    /*
483     * loop over pattern segments until end of pattern or until segment with
484     * meta character found.
485     */
486    anymeta = 0;
487    for (;;) {
488	if (*pattern == EOS) {	/* end of pattern? */
489	    strbuf_terminate(pathbuf);
490
491	    if (Lstat(pathbuf->s, &sbuf))
492		return (0);
493
494	    if (((pglob->gl_flags & GLOB_MARK) &&
495		 pathbuf->s[pathbuf->len - 1] != SEP) &&
496		(S_ISDIR(sbuf.st_mode)
497#ifdef S_IFLNK
498		 || (S_ISLNK(sbuf.st_mode) &&
499		     (Stat(pathbuf->s, &sbuf) == 0) &&
500		     S_ISDIR(sbuf.st_mode))
501#endif
502		 )) {
503		strbuf_append1(pathbuf, SEP);
504		strbuf_terminate(pathbuf);
505	    }
506	    ++pglob->gl_matchc;
507	    globextend(pathbuf->s, pglob);
508	    return 0;
509	}
510
511	/* find end of next segment, tentatively copy to pathbuf */
512	p = pattern;
513	orig_len = pathbuf->len;
514	while (*p != EOS && *p != SEP) {
515	    if (ismeta(*p))
516		anymeta = 1;
517	    strbuf_append1(pathbuf, *p++);
518	}
519
520	if (!anymeta) {		/* no expansion, do next segment */
521	    pattern = p;
522	    while (*pattern == SEP)
523		strbuf_append1(pathbuf, *pattern++);
524	}
525	else {			/* need expansion, recurse */
526	    pathbuf->len = orig_len;
527	    return (glob3(pathbuf, pattern, p, pglob, no_match));
528	}
529    }
530    /* NOTREACHED */
531}
532
533
534static int
535glob3(struct strbuf *pathbuf, const Char *pattern, const Char *restpattern,
536      glob_t *pglob, int no_match)
537{
538    DIR    *dirp;
539    struct dirent *dp;
540    int     err;
541    Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT;
542    size_t orig_len;
543
544    strbuf_terminate(pathbuf);
545    errno = 0;
546
547    if (!(dirp = Opendir(pathbuf->s))) {
548	/* todo: don't call for ENOENT or ENOTDIR? */
549	if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (pathbuf->s, errno)) ||
550	    (pglob->gl_flags & GLOB_ERR))
551	    return (GLOB_ABEND);
552	else
553	    return (0);
554    }
555
556    err = 0;
557
558    orig_len = pathbuf->len;
559    /* search directory for matching names */
560    while ((dp = readdir(dirp)) != NULL) {
561	/* initial DOT must be matched literally */
562	if (dp->d_name[0] == DOT && *pattern != DOT)
563	    continue;
564	pathbuf->len = orig_len;
565	strbuf_append(pathbuf, dp->d_name);
566	strbuf_terminate(pathbuf);
567	if (match(pathbuf->s + orig_len, pattern, restpattern, (int) m_not)
568	    == no_match)
569	    continue;
570	err = glob2(pathbuf, restpattern, pglob, no_match);
571	if (err)
572	    break;
573    }
574    /* todo: check error from readdir? */
575    closedir(dirp);
576    return (err);
577}
578
579
580/*
581 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
582 * add the new item, and update gl_pathc.
583 *
584 * This assumes the BSD realloc, which only copies the block when its size
585 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
586 * behavior.
587 *
588 * Return 0 if new item added, error code if memory couldn't be allocated.
589 *
590 * Invariant of the glob_t structure:
591 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
592 *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
593 */
594static void
595globextend(const char *path, glob_t *pglob)
596{
597    char **pathv;
598    int i;
599    size_t newsize;
600
601    newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
602    pathv = xrealloc(pglob->gl_pathv, newsize);
603
604    if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
605	/* first time around -- clear initial gl_offs items */
606	pathv += pglob->gl_offs;
607	for (i = pglob->gl_offs; --i >= 0;)
608	    *--pathv = NULL;
609    }
610    pglob->gl_pathv = pathv;
611
612    pathv[pglob->gl_offs + pglob->gl_pathc++] = strsave(path);
613    pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
614}
615
616static size_t
617One_Char_mbtowc(__Char *pwc, const Char *s, size_t n)
618{
619#ifdef WIDE_STRINGS
620    char buf[MB_LEN_MAX], *p;
621
622    if (n > MB_LEN_MAX)
623	n = MB_LEN_MAX;
624    p = buf;
625    while (p < buf + n && (*p++ = LCHAR(*s++)) != 0)
626	;
627    return one_mbtowc(pwc, buf, n);
628#else
629    *pwc = *s & CHAR;
630    return 1;
631#endif
632}
633
634/*
635 * pattern matching function for filenames.  Each occurrence of the *
636 * pattern causes a recursion level.
637 */
638static  int
639match(const char *name, const Char *pat, const Char *patend, int m_not)
640{
641    int ok, negate_range;
642    Char c;
643
644    while (pat < patend) {
645	size_t lwk;
646	__Char wc, wk;
647
648	c = *pat; /* Only for M_MASK bits */
649	pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
650	lwk = one_mbtowc(&wk, name, MB_LEN_MAX);
651	switch (c & M_MASK) {
652	case M_ALL:
653	    if (pat == patend)
654		return (1);
655	    for (;;) {
656		if (match(name, pat, patend, m_not))
657		    return (1);
658		if (*name == EOS)
659		    break;
660		name += lwk;
661		lwk = one_mbtowc(&wk, name, MB_LEN_MAX);
662	    }
663	    return (0);
664	case M_ONE:
665	    if (*name == EOS)
666		return (0);
667	    name += lwk;
668	    break;
669	case M_SET:
670	    ok = 0;
671	    if (*name == EOS)
672		return (0);
673	    name += lwk;
674	    if ((negate_range = ((*pat & M_MASK) == m_not)) != 0)
675		++pat;
676	    while ((*pat & M_MASK) != M_END) {
677		pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
678		if ((*pat & M_MASK) == M_RNG) {
679		    __Char wc2;
680
681		    pat++;
682		    pat += One_Char_mbtowc(&wc2, pat, MB_LEN_MAX);
683		    if (globcharcoll(wc, wk, 0) <= 0 &&
684			globcharcoll(wk, wc2, 0) <= 0)
685			ok = 1;
686		} else if (wc == wk)
687		    ok = 1;
688	    }
689	    pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
690	    if (ok == negate_range)
691		return (0);
692	    break;
693	default:
694	    name += lwk;
695	    if (samecase(wk) != samecase(wc))
696		return (0);
697	    break;
698	}
699    }
700    return (*name == EOS);
701}
702
703/* free allocated data belonging to a glob_t structure */
704void
705globfree(glob_t *pglob)
706{
707    int i;
708    char **pp;
709
710    if (pglob->gl_pathv != NULL) {
711	pp = pglob->gl_pathv + pglob->gl_offs;
712	for (i = pglob->gl_pathc; i--; ++pp)
713	    if (*pp)
714		xfree(*pp), *pp = NULL;
715	xfree(pglob->gl_pathv), pglob->gl_pathv = NULL;
716    }
717}
718