glob.c revision 100616
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32#if defined(LIBC_SCCS) && !defined(lint)
33static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 6/24/91";
34#endif /* LIBC_SCCS and not lint */
35/*
36 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
37 * draft 9.
38 *
39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
40 *
41 * Optional extra services, controlled by flags not defined by POSIX:
42 *
43 * GLOB_QUOTE:
44 *	Escaping convention: \ inhibits any special meaning the following
45 *	character might have (except \ at end of string is retained).
46 * GLOB_MAGCHAR:
47 *	Set in gl_flags if pattern contained a globbing character.
48 * GLOB_ALTNOT:
49 *	Use ^ instead of ! for "not".
50 * gl_matchc:
51 *	Number of matches in the current invocation of glob.
52 */
53
54#ifdef notdef
55#include <sys/types.h>
56#include <sys/param.h>
57#include <sys/stat.h>
58#include <dirent.h>
59#include <ctype.h>
60typedef void * ptr_t;
61#endif
62#ifdef WINNT_NATIVE
63	#pragma warning(disable:4244)
64#endif /* WINNT_NATIVE */
65
66#define Char __Char
67#include "sh.h"
68#undef Char
69#undef QUOTE
70#undef TILDE
71#undef META
72#undef CHAR
73#undef ismeta
74#undef Strchr
75
76#include "glob.h"
77
78#ifndef S_ISDIR
79#define S_ISDIR(a)	(((a) & S_IFMT) == S_IFDIR)
80#endif
81
82#if !defined(S_ISLNK) && defined(S_IFLNK)
83#define S_ISLNK(a)	(((a) & S_IFMT) == S_IFLNK)
84#endif
85
86#if !defined(S_ISLNK) && !defined(lstat)
87#define lstat stat
88#endif
89
90typedef unsigned short Char;
91
92static	int	 glob1 		__P((Char *, glob_t *, int));
93static	int	 glob2		__P((Char *, Char *, Char *, glob_t *, int));
94static	int	 glob3		__P((Char *, Char *, Char *, Char *,
95				     glob_t *, int));
96static	int	 globextend	__P((Char *, glob_t *));
97static	int	 match		__P((Char *, Char *, Char *, int));
98#ifndef __clipper__
99static	int	 compare	__P((const ptr_t, const ptr_t));
100#endif
101static 	DIR	*Opendir	__P((Char *));
102#ifdef S_IFLNK
103static	int	 Lstat		__P((Char *, struct stat *));
104#endif
105static	int	 Stat		__P((Char *, struct stat *sb));
106static 	Char 	*Strchr		__P((Char *, int));
107#ifdef DEBUG
108static	void	 qprintf	__P((Char *));
109#endif
110
111#define	DOLLAR		'$'
112#define	DOT		'.'
113#define	EOS		'\0'
114#define	LBRACKET	'['
115#define	NOT		'!'
116#define ALTNOT		'^'
117#define	QUESTION	'?'
118#define	QUOTE		'\\'
119#define	RANGE		'-'
120#define	RBRACKET	']'
121#define	SEP		'/'
122#define	STAR		'*'
123#define	TILDE		'~'
124#define	UNDERSCORE	'_'
125
126#define	M_META		0x8000
127#define M_PROTECT	0x4000
128#define	M_MASK		0xffff
129#define	M_ASCII		0x00ff
130
131#define	CHAR(c)		((c)&M_ASCII)
132#define	META(c)		((c)|M_META)
133#define	M_ALL		META('*')
134#define	M_END		META(']')
135#define	M_NOT		META('!')
136#define	M_ALTNOT	META('^')
137#define	M_ONE		META('?')
138#define	M_RNG		META('-')
139#define	M_SET		META('[')
140#define	ismeta(c)	(((c)&M_META) != 0)
141
142#ifndef BUFSIZE
143#define GLOBBUFLEN	MAXPATHLEN
144#else
145#define GLOBBUFLEN	BUFSIZE
146#endif
147
148int
149globcharcoll(c1, c2)
150    int c1, c2;
151{
152#if defined(NLS) && defined(LC_COLLATE) && !defined(NOSTRCOLL)
153    char s1[2], s2[2];
154
155    if (c1 == c2)
156	return (0);
157    /*
158     * From kevin lyda <kevin@suberic.net>:
159     * strcoll does not guarantee case sorting, so we pre-process now:
160     */
161    if (islower(c1) && isupper(c2))
162	return (1);
163    s1[0] = c1;
164    s2[0] = c2;
165    s1[1] = s2[1] = '\0';
166    return strcoll(s1, s2);
167#else
168    return (c1 - c2);
169#endif
170}
171
172/*
173 * Need to dodge two kernel bugs:
174 * opendir("") != opendir(".")
175 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
176 *            POSIX specifies that they should be ignored in directories.
177 */
178
179static DIR *
180Opendir(str)
181    register Char *str;
182{
183    char    buf[GLOBBUFLEN];
184    register char *dc = buf;
185#if defined(hpux) || defined(__hpux)
186    struct stat st;
187#endif
188
189    if (!*str)
190	return (opendir("."));
191    while ((*dc++ = *str++) != '\0')
192	continue;
193#if defined(hpux) || defined(__hpux)
194    /*
195     * Opendir on some device files hangs, so avoid it
196     */
197    if (stat(buf, &st) == -1 || !S_ISDIR(st.st_mode))
198	return NULL;
199#endif
200    return (opendir(buf));
201}
202
203#ifdef S_IFLNK
204static int
205Lstat(fn, sb)
206    register Char *fn;
207    struct stat *sb;
208{
209    char    buf[GLOBBUFLEN];
210    register char *dc = buf;
211
212    while ((*dc++ = *fn++) != '\0')
213	continue;
214# ifdef NAMEI_BUG
215    {
216	int     st;
217
218	st = lstat(buf, sb);
219	if (*buf)
220	    dc--;
221	return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st);
222    }
223# else
224    return (lstat(buf, sb));
225# endif	/* NAMEI_BUG */
226}
227#else
228#define Lstat Stat
229#endif /* S_IFLNK */
230
231static int
232Stat(fn, sb)
233    register Char *fn;
234    struct stat *sb;
235{
236    char    buf[GLOBBUFLEN];
237    register char *dc = buf;
238
239    while ((*dc++ = *fn++) != '\0')
240	continue;
241#ifdef NAMEI_BUG
242    {
243	int     st;
244
245	st = stat(buf, sb);
246	if (*buf)
247	    dc--;
248	return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st);
249    }
250#else
251    return (stat(buf, sb));
252#endif /* NAMEI_BUG */
253}
254
255static Char *
256Strchr(str, ch)
257    Char *str;
258    int ch;
259{
260    do
261	if (*str == ch)
262	    return (str);
263    while (*str++);
264    return (NULL);
265}
266
267#ifdef DEBUG
268static void
269qprintf(s)
270Char *s;
271{
272    Char *p;
273
274    for (p = s; *p; p++)
275	printf("%c", *p & 0xff);
276    printf("\n");
277    for (p = s; *p; p++)
278	printf("%c", *p & M_PROTECT ? '"' : ' ');
279    printf("\n");
280    for (p = s; *p; p++)
281	printf("%c", *p & M_META ? '_' : ' ');
282    printf("\n");
283}
284#endif /* DEBUG */
285
286static int
287compare(p, q)
288    const ptr_t  p, q;
289{
290#if defined(NLS) && !defined(NOSTRCOLL)
291    errno = 0;  /* strcoll sets errno, another brain-damage */
292
293    return (strcoll(*(char **) p, *(char **) q));
294#else
295    return (strcmp(*(char **) p, *(char **) q));
296#endif /* NLS && !NOSTRCOLL */
297}
298
299/*
300 * The main glob() routine: compiles the pattern (optionally processing
301 * quotes), calls glob1() to do the real pattern matching, and finally
302 * sorts the list (unless unsorted operation is requested).  Returns 0
303 * if things went well, nonzero if errors occurred.  It is not an error
304 * to find no matches.
305 */
306int
307glob(pattern, flags, errfunc, pglob)
308    const char *pattern;
309    int     flags;
310    int     (*errfunc) __P((const char *, int));
311    glob_t *pglob;
312{
313    int     err, oldpathc;
314    Char *bufnext, *bufend, *compilebuf, m_not;
315    const unsigned char *compilepat, *patnext;
316    int     c, not;
317    Char patbuf[GLOBBUFLEN + 1], *qpatnext;
318    int     no_match;
319
320    patnext = (unsigned char *) pattern;
321    if (!(flags & GLOB_APPEND)) {
322	pglob->gl_pathc = 0;
323	pglob->gl_pathv = NULL;
324	if (!(flags & GLOB_DOOFFS))
325	    pglob->gl_offs = 0;
326    }
327    pglob->gl_flags = flags & ~GLOB_MAGCHAR;
328    pglob->gl_errfunc = errfunc;
329    oldpathc = pglob->gl_pathc;
330    pglob->gl_matchc = 0;
331
332    if (pglob->gl_flags & GLOB_ALTNOT) {
333	not = ALTNOT;
334	m_not = M_ALTNOT;
335    }
336    else {
337	not = NOT;
338	m_not = M_NOT;
339    }
340
341    bufnext = patbuf;
342    bufend = bufnext + GLOBBUFLEN;
343    compilebuf = bufnext;
344    compilepat = patnext;
345
346    no_match = *patnext == not;
347    if (no_match)
348	patnext++;
349
350    if (flags & GLOB_QUOTE) {
351	/* Protect the quoted characters */
352	while (bufnext < bufend && (c = *patnext++) != EOS)
353#ifdef DSPMBYTE
354	    if (Ismbyte1(c) && *patnext != EOS)
355	    {
356	      *bufnext++ = (Char) c;
357	      *bufnext++ = (Char) *patnext++;
358	    }
359	    else
360#endif /* DSPMBYTE */
361	    if (c == QUOTE) {
362		if ((c = *patnext++) == EOS) {
363		    c = QUOTE;
364		    --patnext;
365		}
366		*bufnext++ = (Char) (c | M_PROTECT);
367	    }
368	    else
369		*bufnext++ = (Char) c;
370    }
371    else
372	while (bufnext < bufend && (c = *patnext++) != EOS)
373	    *bufnext++ = (Char) c;
374    *bufnext = EOS;
375
376    bufnext = patbuf;
377    qpatnext = patbuf;
378    /* we don't need to check for buffer overflow any more */
379    while ((c = *qpatnext++) != EOS) {
380#ifdef DSPMBYTE
381	if (Ismbyte1(c) && *qpatnext != EOS)
382	{
383	  *bufnext++ = CHAR(c);
384	  *bufnext++ = CHAR(*qpatnext++);
385	}
386	else
387#endif /* DSPMBYTE */
388	switch (c) {
389	case LBRACKET:
390	    c = *qpatnext;
391	    if (c == not)
392		++qpatnext;
393	    if (*qpatnext == EOS ||
394		Strchr(qpatnext + 1, RBRACKET) == NULL) {
395		*bufnext++ = LBRACKET;
396		if (c == not)
397		    --qpatnext;
398		break;
399	    }
400	    pglob->gl_flags |= GLOB_MAGCHAR;
401	    *bufnext++ = M_SET;
402	    if (c == not)
403		*bufnext++ = m_not;
404	    c = *qpatnext++;
405	    do {
406		*bufnext++ = CHAR(c);
407		if (*qpatnext == RANGE &&
408		    (c = qpatnext[1]) != RBRACKET) {
409		    *bufnext++ = M_RNG;
410		    *bufnext++ = CHAR(c);
411		    qpatnext += 2;
412		}
413	    } while ((c = *qpatnext++) != RBRACKET);
414	    *bufnext++ = M_END;
415	    break;
416	case QUESTION:
417	    pglob->gl_flags |= GLOB_MAGCHAR;
418	    *bufnext++ = M_ONE;
419	    break;
420	case STAR:
421	    pglob->gl_flags |= GLOB_MAGCHAR;
422	    /* collapse adjacent stars to one, to avoid
423	     * exponential behavior
424	     */
425	    if (bufnext == patbuf || bufnext[-1] != M_ALL)
426		*bufnext++ = M_ALL;
427	    break;
428	default:
429	    *bufnext++ = CHAR(c);
430	    break;
431	}
432    }
433    *bufnext = EOS;
434#ifdef DEBUG
435    qprintf(patbuf);
436#endif
437
438    if ((err = glob1(patbuf, pglob, no_match)) != 0)
439	return (err);
440
441    /*
442     * If there was no match we are going to append the pattern
443     * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
444     * and the pattern did not contain any magic characters
445     * GLOB_NOMAGIC is there just for compatibility with csh.
446     */
447    if (pglob->gl_pathc == oldpathc &&
448	((flags & GLOB_NOCHECK) ||
449	 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
450	if (!(flags & GLOB_QUOTE)) {
451	    Char *dp = compilebuf;
452	    const unsigned char *sp = compilepat;
453
454	    while ((*dp++ = *sp++) != '\0')
455		continue;
456	}
457	else {
458	    /*
459	     * copy pattern, interpreting quotes; this is slightly different
460	     * than the interpretation of quotes above -- which should prevail?
461	     */
462	    while (*compilepat != EOS) {
463		if (*compilepat == QUOTE) {
464		    if (*++compilepat == EOS)
465			--compilepat;
466		}
467		*compilebuf++ = (unsigned char) *compilepat++;
468	    }
469	    *compilebuf = EOS;
470	}
471	return (globextend(patbuf, pglob));
472    }
473    else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc))
474	qsort((char *) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
475	      pglob->gl_pathc - oldpathc, sizeof(char *),
476	      (int (*) __P((const void *, const void *))) compare);
477    return (0);
478}
479
480static int
481glob1(pattern, pglob, no_match)
482    Char *pattern;
483    glob_t *pglob;
484    int     no_match;
485{
486    Char pathbuf[GLOBBUFLEN + 1];
487
488    /*
489     * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
490     */
491    if (*pattern == EOS)
492	return (0);
493    return (glob2(pathbuf, pathbuf, pattern, pglob, no_match));
494}
495
496/*
497 * functions glob2 and glob3 are mutually recursive; there is one level
498 * of recursion for each segment in the pattern that contains one or
499 * more meta characters.
500 */
501static int
502glob2(pathbuf, pathend, pattern, pglob, no_match)
503    Char *pathbuf, *pathend, *pattern;
504    glob_t *pglob;
505    int     no_match;
506{
507    struct stat sbuf;
508    int anymeta;
509    Char *p, *q;
510
511    /*
512     * loop over pattern segments until end of pattern or until segment with
513     * meta character found.
514     */
515    anymeta = 0;
516    for (;;) {
517	if (*pattern == EOS) {	/* end of pattern? */
518	    *pathend = EOS;
519
520	    if (Lstat(pathbuf, &sbuf))
521		return (0);
522
523	    if (((pglob->gl_flags & GLOB_MARK) &&
524		 pathend[-1] != SEP) &&
525		(S_ISDIR(sbuf.st_mode)
526#ifdef S_IFLNK
527		 || (S_ISLNK(sbuf.st_mode) &&
528		     (Stat(pathbuf, &sbuf) == 0) &&
529		     S_ISDIR(sbuf.st_mode))
530#endif
531		 )) {
532		*pathend++ = SEP;
533		*pathend = EOS;
534	    }
535	    ++pglob->gl_matchc;
536	    return (globextend(pathbuf, pglob));
537	}
538
539	/* find end of next segment, copy tentatively to pathend */
540	q = pathend;
541	p = pattern;
542	while (*p != EOS && *p != SEP) {
543	    if (ismeta(*p))
544		anymeta = 1;
545	    *q++ = *p++;
546	}
547
548	if (!anymeta) {		/* no expansion, do next segment */
549	    pathend = q;
550	    pattern = p;
551	    while (*pattern == SEP)
552		*pathend++ = *pattern++;
553	}
554	else			/* need expansion, recurse */
555	    return (glob3(pathbuf, pathend, pattern, p, pglob, no_match));
556    }
557    /* NOTREACHED */
558}
559
560
561static int
562glob3(pathbuf, pathend, pattern, restpattern, pglob, no_match)
563    Char *pathbuf, *pathend, *pattern, *restpattern;
564    glob_t *pglob;
565    int     no_match;
566{
567    DIR    *dirp;
568    struct dirent *dp;
569    int     err;
570    Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT;
571    char cpathbuf[GLOBBUFLEN], *ptr;;
572
573    *pathend = EOS;
574    errno = 0;
575
576    if (!(dirp = Opendir(pathbuf))) {
577	/* todo: don't call for ENOENT or ENOTDIR? */
578	for (ptr = cpathbuf; (*ptr++ = (char) *pathbuf++) != EOS;)
579	    continue;
580	if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (cpathbuf, errno)) ||
581	    (pglob->gl_flags & GLOB_ERR))
582	    return (GLOB_ABEND);
583	else
584	    return (0);
585    }
586
587    err = 0;
588
589    /* search directory for matching names */
590    while ((dp = readdir(dirp)) != NULL) {
591	register unsigned char *sc;
592	register Char *dc;
593
594	/* initial DOT must be matched literally */
595	if (dp->d_name[0] == DOT && *pattern != DOT)
596	    continue;
597	for (sc = (unsigned char *) dp->d_name, dc = pathend;
598	     (*dc++ = *sc++) != '\0';)
599	    continue;
600	if (match(pathend, pattern, restpattern, (int) m_not) == no_match) {
601	    *pathend = EOS;
602	    continue;
603	}
604	err = glob2(pathbuf, --dc, restpattern, pglob, no_match);
605	if (err)
606	    break;
607    }
608    /* todo: check error from readdir? */
609    (void) closedir(dirp);
610    return (err);
611}
612
613
614/*
615 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
616 * add the new item, and update gl_pathc.
617 *
618 * This assumes the BSD realloc, which only copies the block when its size
619 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
620 * behavior.
621 *
622 * Return 0 if new item added, error code if memory couldn't be allocated.
623 *
624 * Invariant of the glob_t structure:
625 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
626 *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
627 */
628static int
629globextend(path, pglob)
630    Char *path;
631    glob_t *pglob;
632{
633    register char **pathv;
634    register int i;
635    unsigned int newsize;
636    char   *copy;
637    Char *p;
638
639    newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
640    pathv = (char **) (pglob->gl_pathv ?
641		       xrealloc((ptr_t) pglob->gl_pathv, (size_t) newsize) :
642		       xmalloc((size_t) newsize));
643    if (pathv == NULL)
644	return (GLOB_NOSPACE);
645
646    if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
647	/* first time around -- clear initial gl_offs items */
648	pathv += pglob->gl_offs;
649	for (i = pglob->gl_offs; --i >= 0;)
650	    *--pathv = NULL;
651    }
652    pglob->gl_pathv = pathv;
653
654    for (p = path; *p++;)
655	continue;
656    if ((copy = (char *) xmalloc((size_t) (p - path))) != NULL) {
657	register char *dc = copy;
658	register Char *sc = path;
659
660	while ((*dc++ = *sc++) != '\0')
661	    continue;
662	pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
663    }
664    pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
665    return ((copy == NULL) ? GLOB_NOSPACE : 0);
666}
667
668
669/*
670 * pattern matching function for filenames.  Each occurrence of the *
671 * pattern causes a recursion level.
672 */
673static  int
674match(name, pat, patend, m_not)
675    register Char *name, *pat, *patend;
676    int m_not;
677{
678    int ok, negate_range;
679    Char c, k;
680
681    while (pat < patend) {
682	c = *pat++;
683	switch (c & M_MASK) {
684	case M_ALL:
685	    if (pat == patend)
686		return (1);
687	    do
688		if (match(name, pat, patend, m_not))
689		    return (1);
690	    while (*name++ != EOS);
691	    return (0);
692	case M_ONE:
693	    if (*name++ == EOS)
694		return (0);
695	    break;
696	case M_SET:
697	    ok = 0;
698	    if ((k = *name++) == EOS)
699		return (0);
700	    if ((negate_range = ((*pat & M_MASK) == m_not)) != 0)
701		++pat;
702	    while (((c = *pat++) & M_MASK) != M_END) {
703		if ((*pat & M_MASK) == M_RNG) {
704		    if (globcharcoll(CHAR(c), CHAR(k)) <= 0 &&
705			globcharcoll(CHAR(k), CHAR(pat[1])) <= 0)
706			ok = 1;
707		    pat += 2;
708		}
709		else if (c == k)
710		    ok = 1;
711	    }
712	    if (ok == negate_range)
713		return (0);
714	    break;
715	default:
716	    k = *name++;
717	    if (samecase(k) != samecase(c))
718		return (0);
719	    break;
720	}
721    }
722    return (*name == EOS);
723}
724
725/* free allocated data belonging to a glob_t structure */
726void
727globfree(pglob)
728    glob_t *pglob;
729{
730    register int i;
731    register char **pp;
732
733    if (pglob->gl_pathv != NULL) {
734	pp = pglob->gl_pathv + pglob->gl_offs;
735	for (i = pglob->gl_pathc; i--; ++pp)
736	    if (*pp)
737		xfree((ptr_t) *pp), *pp = NULL;
738	xfree((ptr_t) pglob->gl_pathv), pglob->gl_pathv = NULL;
739    }
740}
741