glob.c revision 59243
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36#if defined(LIBC_SCCS) && !defined(lint)
37static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 6/24/91";
38#endif /* LIBC_SCCS and not lint */
39/*
40 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
41 * draft 9.
42 *
43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
44 *
45 * Optional extra services, controlled by flags not defined by POSIX:
46 *
47 * GLOB_QUOTE:
48 *	Escaping convention: \ inhibits any special meaning the following
49 *	character might have (except \ at end of string is retained).
50 * GLOB_MAGCHAR:
51 *	Set in gl_flags if pattern contained a globbing character.
52 * GLOB_ALTNOT:
53 *	Use ^ instead of ! for "not".
54 * gl_matchc:
55 *	Number of matches in the current invocation of glob.
56 */
57
58#ifdef notdef
59#include <sys/types.h>
60#include <sys/param.h>
61#include <sys/stat.h>
62#include <dirent.h>
63#include <ctype.h>
64typedef void * ptr_t;
65#endif
66#ifdef WINNT
67	#pragma warning(disable:4244)
68#endif /* WINNT */
69
70#define Char __Char
71#include "sh.h"
72#undef Char
73#undef QUOTE
74#undef TILDE
75#undef META
76#undef CHAR
77#undef ismeta
78#undef Strchr
79
80#include "glob.h"
81
82#ifndef S_ISDIR
83#define S_ISDIR(a)	(((a) & S_IFMT) == S_IFDIR)
84#endif
85
86#if !defined(S_ISLNK) && defined(S_IFLNK)
87#define S_ISLNK(a)	(((a) & S_IFMT) == S_IFLNK)
88#endif
89
90#if !defined(S_ISLNK) && !defined(lstat)
91#define lstat stat
92#endif
93
94typedef unsigned short Char;
95
96static	int	 glob1 		__P((Char *, glob_t *, int));
97static	int	 glob2		__P((Char *, Char *, Char *, glob_t *, int));
98static	int	 glob3		__P((Char *, Char *, Char *, Char *,
99				     glob_t *, int));
100static	int	 globextend	__P((Char *, glob_t *));
101static	int	 match		__P((Char *, Char *, Char *, int));
102#ifndef __clipper__
103static	int	 compare	__P((const ptr_t, const ptr_t));
104#endif
105static 	DIR	*Opendir	__P((Char *));
106#ifdef S_IFLNK
107static	int	 Lstat		__P((Char *, struct stat *));
108#endif
109static	int	 Stat		__P((Char *, struct stat *sb));
110static 	Char 	*Strchr		__P((Char *, int));
111#ifdef DEBUG
112static	void	 qprintf	__P((Char *));
113#endif
114
115#define	DOLLAR		'$'
116#define	DOT		'.'
117#define	EOS		'\0'
118#define	LBRACKET	'['
119#define	NOT		'!'
120#define ALTNOT		'^'
121#define	QUESTION	'?'
122#define	QUOTE		'\\'
123#define	RANGE		'-'
124#define	RBRACKET	']'
125#define	SEP		'/'
126#define	STAR		'*'
127#define	TILDE		'~'
128#define	UNDERSCORE	'_'
129
130#define	M_META		0x8000
131#define M_PROTECT	0x4000
132#define	M_MASK		0xffff
133#define	M_ASCII		0x00ff
134
135#define	CHAR(c)		((c)&M_ASCII)
136#define	META(c)		((c)|M_META)
137#define	M_ALL		META('*')
138#define	M_END		META(']')
139#define	M_NOT		META('!')
140#define	M_ALTNOT	META('^')
141#define	M_ONE		META('?')
142#define	M_RNG		META('-')
143#define	M_SET		META('[')
144#define	ismeta(c)	(((c)&M_META) != 0)
145
146#ifndef BUFSIZE
147#define GLOBBUFLEN	MAXPATHLEN
148#else
149#define GLOBBUFLEN	BUFSIZE
150#endif
151
152int
153globcharcoll(c1, c2)
154    int c1, c2;
155{
156#if defined(NLS) && defined(LC_COLLATE) && !defined(NOSTRCOLL)
157    char s1[2], s2[2];
158
159    if (c1 == c2)
160	return (0);
161    s1[0] = c1;
162    s2[0] = c2;
163    s1[1] = s2[1] = '\0';
164    return strcoll(s1, s2);
165#else
166    return (c1 - c2);
167#endif
168}
169
170/*
171 * Need to dodge two kernel bugs:
172 * opendir("") != opendir(".")
173 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
174 *            POSIX specifies that they should be ignored in directories.
175 */
176
177static DIR *
178Opendir(str)
179    register Char *str;
180{
181    char    buf[GLOBBUFLEN];
182    register char *dc = buf;
183#if defined(hpux) || defined(__hpux)
184    struct stat st;
185#endif
186
187    if (!*str)
188	return (opendir("."));
189    while ((*dc++ = *str++) != '\0')
190	continue;
191#if defined(hpux) || defined(__hpux)
192    /*
193     * Opendir on some device files hangs, so avoid it
194     */
195    if (stat(buf, &st) == -1 || !S_ISDIR(st.st_mode))
196	return NULL;
197#endif
198    return (opendir(buf));
199}
200
201#ifdef S_IFLNK
202static int
203Lstat(fn, sb)
204    register Char *fn;
205    struct stat *sb;
206{
207    char    buf[GLOBBUFLEN];
208    register char *dc = buf;
209
210    while ((*dc++ = *fn++) != '\0')
211	continue;
212# ifdef NAMEI_BUG
213    {
214	int     st;
215
216	st = lstat(buf, sb);
217	if (*buf)
218	    dc--;
219	return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st);
220    }
221# else
222    return (lstat(buf, sb));
223# endif	/* NAMEI_BUG */
224}
225#else
226#define Lstat Stat
227#endif /* S_IFLNK */
228
229static int
230Stat(fn, sb)
231    register Char *fn;
232    struct stat *sb;
233{
234    char    buf[GLOBBUFLEN];
235    register char *dc = buf;
236
237    while ((*dc++ = *fn++) != '\0')
238	continue;
239#ifdef NAMEI_BUG
240    {
241	int     st;
242
243	st = stat(buf, sb);
244	if (*buf)
245	    dc--;
246	return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st);
247    }
248#else
249    return (stat(buf, sb));
250#endif /* NAMEI_BUG */
251}
252
253static Char *
254Strchr(str, ch)
255    Char *str;
256    int ch;
257{
258    do
259	if (*str == ch)
260	    return (str);
261    while (*str++);
262    return (NULL);
263}
264
265#ifdef DEBUG
266static void
267qprintf(s)
268Char *s;
269{
270    Char *p;
271
272    for (p = s; *p; p++)
273	printf("%c", *p & 0xff);
274    printf("\n");
275    for (p = s; *p; p++)
276	printf("%c", *p & M_PROTECT ? '"' : ' ');
277    printf("\n");
278    for (p = s; *p; p++)
279	printf("%c", *p & M_META ? '_' : ' ');
280    printf("\n");
281}
282#endif /* DEBUG */
283
284static int
285compare(p, q)
286    const ptr_t  p, q;
287{
288#if defined(NLS) && !defined(NOSTRCOLL)
289    errno = 0;  /* strcoll sets errno, another brain-damage */
290
291    return (strcoll(*(char **) p, *(char **) q));
292#else
293    return (strcmp(*(char **) p, *(char **) q));
294#endif /* NLS && !NOSTRCOLL */
295}
296
297/*
298 * The main glob() routine: compiles the pattern (optionally processing
299 * quotes), calls glob1() to do the real pattern matching, and finally
300 * sorts the list (unless unsorted operation is requested).  Returns 0
301 * if things went well, nonzero if errors occurred.  It is not an error
302 * to find no matches.
303 */
304int
305glob(pattern, flags, errfunc, pglob)
306    const char *pattern;
307    int     flags;
308    int     (*errfunc) __P((const char *, int));
309    glob_t *pglob;
310{
311    int     err, oldpathc;
312    Char *bufnext, *bufend, *compilebuf, m_not;
313    const unsigned char *compilepat, *patnext;
314    int     c, not;
315    Char patbuf[GLOBBUFLEN + 1], *qpatnext;
316    int     no_match;
317
318    patnext = (unsigned char *) pattern;
319    if (!(flags & GLOB_APPEND)) {
320	pglob->gl_pathc = 0;
321	pglob->gl_pathv = NULL;
322	if (!(flags & GLOB_DOOFFS))
323	    pglob->gl_offs = 0;
324    }
325    pglob->gl_flags = flags & ~GLOB_MAGCHAR;
326    pglob->gl_errfunc = errfunc;
327    oldpathc = pglob->gl_pathc;
328    pglob->gl_matchc = 0;
329
330    if (pglob->gl_flags & GLOB_ALTNOT) {
331	not = ALTNOT;
332	m_not = M_ALTNOT;
333    }
334    else {
335	not = NOT;
336	m_not = M_NOT;
337    }
338
339    bufnext = patbuf;
340    bufend = bufnext + GLOBBUFLEN;
341    compilebuf = bufnext;
342    compilepat = patnext;
343
344    no_match = *patnext == not;
345    if (no_match)
346	patnext++;
347
348    if (flags & GLOB_QUOTE) {
349	/* Protect the quoted characters */
350	while (bufnext < bufend && (c = *patnext++) != EOS)
351	    if (c == QUOTE) {
352		if ((c = *patnext++) == EOS) {
353		    c = QUOTE;
354		    --patnext;
355		}
356		*bufnext++ = (Char) (c | M_PROTECT);
357	    }
358	    else
359		*bufnext++ = (Char) c;
360    }
361    else
362	while (bufnext < bufend && (c = *patnext++) != EOS)
363	    *bufnext++ = (Char) c;
364    *bufnext = EOS;
365
366    bufnext = patbuf;
367    qpatnext = patbuf;
368    /* we don't need to check for buffer overflow any more */
369    while ((c = *qpatnext++) != EOS) {
370	switch (c) {
371	case LBRACKET:
372	    c = *qpatnext;
373	    if (c == not)
374		++qpatnext;
375	    if (*qpatnext == EOS ||
376		Strchr(qpatnext + 1, RBRACKET) == NULL) {
377		*bufnext++ = LBRACKET;
378		if (c == not)
379		    --qpatnext;
380		break;
381	    }
382	    pglob->gl_flags |= GLOB_MAGCHAR;
383	    *bufnext++ = M_SET;
384	    if (c == not)
385		*bufnext++ = m_not;
386	    c = *qpatnext++;
387	    do {
388		*bufnext++ = CHAR(c);
389		if (*qpatnext == RANGE &&
390		    (c = qpatnext[1]) != RBRACKET) {
391		    *bufnext++ = M_RNG;
392		    *bufnext++ = CHAR(c);
393		    qpatnext += 2;
394		}
395	    } while ((c = *qpatnext++) != RBRACKET);
396	    *bufnext++ = M_END;
397	    break;
398	case QUESTION:
399	    pglob->gl_flags |= GLOB_MAGCHAR;
400	    *bufnext++ = M_ONE;
401	    break;
402	case STAR:
403	    pglob->gl_flags |= GLOB_MAGCHAR;
404	    /* collapse adjacent stars to one, to avoid
405	     * exponential behavior
406	     */
407	    if (bufnext == patbuf || bufnext[-1] != M_ALL)
408		*bufnext++ = M_ALL;
409	    break;
410	default:
411	    *bufnext++ = CHAR(c);
412	    break;
413	}
414    }
415    *bufnext = EOS;
416#ifdef DEBUG
417    qprintf(patbuf);
418#endif
419
420    if ((err = glob1(patbuf, pglob, no_match)) != 0)
421	return (err);
422
423    /*
424     * If there was no match we are going to append the pattern
425     * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
426     * and the pattern did not contain any magic characters
427     * GLOB_NOMAGIC is there just for compatibility with csh.
428     */
429    if (pglob->gl_pathc == oldpathc &&
430	((flags & GLOB_NOCHECK) ||
431	 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
432	if (!(flags & GLOB_QUOTE)) {
433	    Char *dp = compilebuf;
434	    const unsigned char *sp = compilepat;
435
436	    while ((*dp++ = *sp++) != '\0')
437		continue;
438	}
439	else {
440	    /*
441	     * copy pattern, interpreting quotes; this is slightly different
442	     * than the interpretation of quotes above -- which should prevail?
443	     */
444	    while (*compilepat != EOS) {
445		if (*compilepat == QUOTE) {
446		    if (*++compilepat == EOS)
447			--compilepat;
448		}
449		*compilebuf++ = (unsigned char) *compilepat++;
450	    }
451	    *compilebuf = EOS;
452	}
453	return (globextend(patbuf, pglob));
454    }
455    else if (!(flags & GLOB_NOSORT))
456	qsort((char *) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
457	      pglob->gl_pathc - oldpathc, sizeof(char *),
458	      (int (*) __P((const void *, const void *))) compare);
459    return (0);
460}
461
462static int
463glob1(pattern, pglob, no_match)
464    Char *pattern;
465    glob_t *pglob;
466    int     no_match;
467{
468    Char pathbuf[GLOBBUFLEN + 1];
469
470    /*
471     * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
472     */
473    if (*pattern == EOS)
474	return (0);
475    return (glob2(pathbuf, pathbuf, pattern, pglob, no_match));
476}
477
478/*
479 * functions glob2 and glob3 are mutually recursive; there is one level
480 * of recursion for each segment in the pattern that contains one or
481 * more meta characters.
482 */
483static int
484glob2(pathbuf, pathend, pattern, pglob, no_match)
485    Char *pathbuf, *pathend, *pattern;
486    glob_t *pglob;
487    int     no_match;
488{
489    struct stat sbuf;
490    int anymeta;
491    Char *p, *q;
492
493    /*
494     * loop over pattern segments until end of pattern or until segment with
495     * meta character found.
496     */
497    anymeta = 0;
498    for (;;) {
499	if (*pattern == EOS) {	/* end of pattern? */
500	    *pathend = EOS;
501
502	    if (Lstat(pathbuf, &sbuf))
503		return (0);
504
505	    if (((pglob->gl_flags & GLOB_MARK) &&
506		 pathend[-1] != SEP) &&
507		(S_ISDIR(sbuf.st_mode)
508#ifdef S_IFLNK
509		 || (S_ISLNK(sbuf.st_mode) &&
510		     (Stat(pathbuf, &sbuf) == 0) &&
511		     S_ISDIR(sbuf.st_mode))
512#endif
513		 )) {
514		*pathend++ = SEP;
515		*pathend = EOS;
516	    }
517	    ++pglob->gl_matchc;
518	    return (globextend(pathbuf, pglob));
519	}
520
521	/* find end of next segment, copy tentatively to pathend */
522	q = pathend;
523	p = pattern;
524	while (*p != EOS && *p != SEP) {
525	    if (ismeta(*p))
526		anymeta = 1;
527	    *q++ = *p++;
528	}
529
530	if (!anymeta) {		/* no expansion, do next segment */
531	    pathend = q;
532	    pattern = p;
533	    while (*pattern == SEP)
534		*pathend++ = *pattern++;
535	}
536	else			/* need expansion, recurse */
537	    return (glob3(pathbuf, pathend, pattern, p, pglob, no_match));
538    }
539    /* NOTREACHED */
540}
541
542
543static int
544glob3(pathbuf, pathend, pattern, restpattern, pglob, no_match)
545    Char *pathbuf, *pathend, *pattern, *restpattern;
546    glob_t *pglob;
547    int     no_match;
548{
549    extern int errno;
550    DIR    *dirp;
551    struct dirent *dp;
552    int     err;
553    Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT;
554    char cpathbuf[GLOBBUFLEN], *ptr;;
555
556    *pathend = EOS;
557    errno = 0;
558
559    if (!(dirp = Opendir(pathbuf))) {
560	/* todo: don't call for ENOENT or ENOTDIR? */
561	for (ptr = cpathbuf; (*ptr++ = (char) *pathbuf++) != EOS;)
562	    continue;
563	if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (cpathbuf, errno)) ||
564	    (pglob->gl_flags & GLOB_ERR))
565	    return (GLOB_ABEND);
566	else
567	    return (0);
568    }
569
570    err = 0;
571
572    /* search directory for matching names */
573    while ((dp = readdir(dirp)) != NULL) {
574	register unsigned char *sc;
575	register Char *dc;
576
577	/* initial DOT must be matched literally */
578	if (dp->d_name[0] == DOT && *pattern != DOT)
579	    continue;
580	for (sc = (unsigned char *) dp->d_name, dc = pathend;
581	     (*dc++ = *sc++) != '\0';)
582	    continue;
583	if (match(pathend, pattern, restpattern, (int) m_not) == no_match) {
584	    *pathend = EOS;
585	    continue;
586	}
587	err = glob2(pathbuf, --dc, restpattern, pglob, no_match);
588	if (err)
589	    break;
590    }
591    /* todo: check error from readdir? */
592    (void) closedir(dirp);
593    return (err);
594}
595
596
597/*
598 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
599 * add the new item, and update gl_pathc.
600 *
601 * This assumes the BSD realloc, which only copies the block when its size
602 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
603 * behavior.
604 *
605 * Return 0 if new item added, error code if memory couldn't be allocated.
606 *
607 * Invariant of the glob_t structure:
608 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
609 *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
610 */
611static int
612globextend(path, pglob)
613    Char *path;
614    glob_t *pglob;
615{
616    register char **pathv;
617    register int i;
618    unsigned int newsize;
619    char   *copy;
620    Char *p;
621
622    newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
623    pathv = (char **) (pglob->gl_pathv ?
624		       xrealloc((ptr_t) pglob->gl_pathv, (size_t) newsize) :
625		       xmalloc((size_t) newsize));
626    if (pathv == NULL)
627	return (GLOB_NOSPACE);
628
629    if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
630	/* first time around -- clear initial gl_offs items */
631	pathv += pglob->gl_offs;
632	for (i = pglob->gl_offs; --i >= 0;)
633	    *--pathv = NULL;
634    }
635    pglob->gl_pathv = pathv;
636
637    for (p = path; *p++;)
638	continue;
639    if ((copy = (char *) xmalloc((size_t) (p - path))) != NULL) {
640	register char *dc = copy;
641	register Char *sc = path;
642
643	while ((*dc++ = *sc++) != '\0')
644	    continue;
645	pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
646    }
647    pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
648    return ((copy == NULL) ? GLOB_NOSPACE : 0);
649}
650
651
652/*
653 * pattern matching function for filenames.  Each occurrence of the *
654 * pattern causes a recursion level.
655 */
656static  int
657match(name, pat, patend, m_not)
658    register Char *name, *pat, *patend;
659    int m_not;
660{
661    int ok, negate_range;
662    Char c, k;
663
664    while (pat < patend) {
665	c = *pat++;
666	switch (c & M_MASK) {
667	case M_ALL:
668	    if (pat == patend)
669		return (1);
670	    do
671		if (match(name, pat, patend, m_not))
672		    return (1);
673	    while (*name++ != EOS);
674	    return (0);
675	case M_ONE:
676	    if (*name++ == EOS)
677		return (0);
678	    break;
679	case M_SET:
680	    ok = 0;
681	    if ((k = *name++) == EOS)
682		return (0);
683	    if ((negate_range = ((*pat & M_MASK) == m_not)) != 0)
684		++pat;
685	    while (((c = *pat++) & M_MASK) != M_END) {
686		if ((*pat & M_MASK) == M_RNG) {
687		    if (globcharcoll(CHAR(c), CHAR(k)) <= 0 &&
688			globcharcoll(CHAR(k), CHAR(pat[1])) <= 0)
689			ok = 1;
690		    pat += 2;
691		}
692		else if (c == k)
693		    ok = 1;
694	    }
695	    if (ok == negate_range)
696		return (0);
697	    break;
698	default:
699	    k = *name++;
700	    if (samecase(k) != samecase(c))
701		return (0);
702	    break;
703	}
704    }
705    return (*name == EOS);
706}
707
708/* free allocated data belonging to a glob_t structure */
709void
710globfree(pglob)
711    glob_t *pglob;
712{
713    register int i;
714    register char **pp;
715
716    if (pglob->gl_pathv != NULL) {
717	pp = pglob->gl_pathv + pglob->gl_offs;
718	for (i = pglob->gl_pathc; i--; ++pp)
719	    if (*pp)
720		xfree((ptr_t) *pp), *pp = NULL;
721	xfree((ptr_t) pglob->gl_pathv), pglob->gl_pathv = NULL;
722    }
723}
724