1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if defined(LIBC_SCCS) && !defined(lint)
34static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
35/* most changes between the version above and the one below have been ported:
36static char sscsid[]=  "$OpenBSD: glob.c,v 1.8.10.1 2001/04/10 jason Exp $";
37 */
38#endif /* LIBC_SCCS and not lint */
39
40/*
41 * glob(3) -- a superset of the one defined in POSIX 1003.2.
42 *
43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
44 *
45 * Optional extra services, controlled by flags not defined by POSIX:
46 *
47 * GLOB_QUOTE:
48 *	Escaping convention: \ inhibits any special meaning the following
49 *	character might have (except \ at end of string is retained).
50 * GLOB_MAGCHAR:
51 *	Set in gl_flags if pattern contained a globbing character.
52 * GLOB_NOMAGIC:
53 *	Same as GLOB_NOCHECK, but it will only append pattern if it did
54 *	not contain any magic characters.  [Used in csh style globbing]
55 * GLOB_ALTDIRFUNC:
56 *	Use alternately specified directory access functions.
57 * GLOB_TILDE:
58 *	expand ~user/foo to the /home/dir/of/user/foo
59 * GLOB_BRACE:
60 *	expand {1,2}{a,b} to 1a 1b 2a 2b
61 * gl_matchc:
62 *	Number of matches in the current invocation of glob.
63 * GLOB_ALPHASORT:
64 *	sort alphabetically like csh (case doesn't matter) instead of in ASCII
65 *	order
66 */
67
68#include <EXTERN.h>
69#include <perl.h>
70#include <XSUB.h>
71
72#include "bsd_glob.h"
73#ifdef I_PWD
74#	include <pwd.h>
75#else
76#if defined(HAS_PASSWD) && !defined(VMS)
77        struct passwd *getpwnam(char *);
78        struct passwd *getpwuid(Uid_t);
79#endif
80#endif
81
82#ifndef MAXPATHLEN
83#  ifdef PATH_MAX
84#    define MAXPATHLEN  PATH_MAX
85#  else
86#    define MAXPATHLEN  1024
87#  endif
88#endif
89
90#include <limits.h>
91
92#ifndef ARG_MAX
93#  ifdef _SC_ARG_MAX
94#    define     ARG_MAX         (sysconf(_SC_ARG_MAX))
95#  else
96#    ifdef _POSIX_ARG_MAX
97#      define   ARG_MAX         _POSIX_ARG_MAX
98#    else
99#      ifdef WIN32
100#        define ARG_MAX         14500   /* from VC's limits.h */
101#      else
102#        define ARG_MAX         4096    /* from POSIX, be conservative */
103#      endif
104#    endif
105#  endif
106#endif
107
108#define BG_DOLLAR       '$'
109#define BG_DOT          '.'
110#define BG_EOS          '\0'
111#define BG_LBRACKET     '['
112#define BG_NOT          '!'
113#define BG_QUESTION     '?'
114#define BG_QUOTE        '\\'
115#define BG_RANGE        '-'
116#define BG_RBRACKET     ']'
117#define BG_SEP  '/'
118#ifdef DOSISH
119#define BG_SEP2		'\\'
120#endif
121#define BG_STAR         '*'
122#define BG_TILDE        '~'
123#define BG_UNDERSCORE   '_'
124#define BG_LBRACE       '{'
125#define BG_RBRACE       '}'
126#define BG_SLASH        '/'
127#define BG_COMMA        ','
128
129#ifndef GLOB_DEBUG
130
131#define M_QUOTE         0x8000
132#define M_PROTECT       0x4000
133#define M_MASK          0xffff
134#define M_ASCII         0x00ff
135
136typedef U16 Char;
137
138#else
139
140#define M_QUOTE         0x80
141#define M_PROTECT       0x40
142#define M_MASK          0xff
143#define M_ASCII         0x7f
144
145typedef U8 Char;
146
147#endif /* !GLOB_DEBUG */
148
149
150#define CHAR(c)         ((Char)((c)&M_ASCII))
151#define META(c)         ((Char)((c)|M_QUOTE))
152#define M_ALL           META('*')
153#define M_END           META(']')
154#define M_NOT           META('!')
155#define M_ONE           META('?')
156#define M_RNG           META('-')
157#define M_SET           META('[')
158#define ismeta(c)       (((c)&M_QUOTE) != 0)
159
160
161static int	 compare(const void *, const void *);
162static int	 ci_compare(const void *, const void *);
163static int	 g_Ctoc(const Char *, char *, STRLEN);
164static int	 g_lstat(Char *, Stat_t *, glob_t *);
165static DIR	*g_opendir(Char *, glob_t *);
166static Char	*g_strchr(Char *, int);
167static int	 g_stat(Char *, Stat_t *, glob_t *);
168static int	 glob0(const Char *, glob_t *);
169static int	 glob1(Char *, Char *, glob_t *, size_t *);
170static int	 glob2(Char *, Char *, Char *, Char *, Char *, Char *,
171                       glob_t *, size_t *);
172static int	 glob3(Char *, Char *, Char *, Char *, Char *,
173                       Char *, Char *, glob_t *, size_t *);
174static int	 globextend(const Char *, glob_t *, size_t *);
175static const Char *
176                 globtilde(const Char *, Char *, size_t, glob_t *);
177static int	 globexp1(const Char *, glob_t *);
178static int	 globexp2(const Char *, const Char *, glob_t *, int *);
179static int	 match(Char *, Char *, Char *, int);
180#ifdef GLOB_DEBUG
181static void	 qprintf(const char *, Char *);
182#endif /* GLOB_DEBUG */
183
184#ifdef MULTIPLICITY
185static Direntry_t *	my_readdir(DIR*);
186
187static Direntry_t *
188my_readdir(DIR *d)
189{
190    return PerlDir_read(d);
191}
192#else
193
194/* ReliantUNIX (OS formerly known as SINIX) defines readdir
195 * in LFS-mode to be a 64-bit version of readdir.  */
196
197#   ifdef sinix
198static Direntry_t *    my_readdir(DIR*);
199
200static Direntry_t *
201my_readdir(DIR *d)
202{
203    return readdir(d);
204}
205#   else
206
207#       define my_readdir       readdir
208
209#   endif
210
211#endif
212
213int
214bsd_glob(const char *pattern, int flags,
215         int (*errfunc)(const char *, int), glob_t *pglob)
216{
217        const U8 *patnext;
218        int c;
219        Char *bufnext, *bufend, patbuf[MAXPATHLEN];
220        patnext = (U8 *) pattern;
221        /* TODO: GLOB_APPEND / GLOB_DOOFFS aren't supported yet */
222#if 0
223        if (!(flags & GLOB_APPEND)) {
224                pglob->gl_pathc = 0;
225                pglob->gl_pathv = NULL;
226                if (!(flags & GLOB_DOOFFS))
227                        pglob->gl_offs = 0;
228        }
229#else
230        pglob->gl_pathc = 0;
231        pglob->gl_pathv = NULL;
232        pglob->gl_offs = 0;
233#endif
234        pglob->gl_flags = flags & ~GLOB_MAGCHAR;
235        pglob->gl_errfunc = errfunc;
236        pglob->gl_matchc = 0;
237
238        bufnext = patbuf;
239        bufend = bufnext + MAXPATHLEN - 1;
240#ifdef DOSISH
241        /* Nasty hack to treat patterns like "C:*" correctly. In this
242         * case, the * should match any file in the current directory
243         * on the C: drive. However, the glob code does not treat the
244         * colon specially, so it looks for files beginning "C:" in
245         * the current directory. To fix this, change the pattern to
246         * add an explicit "./" at the start (just after the drive
247         * letter and colon - ie change to "C:./").
248         */
249        if (isalpha(pattern[0]) && pattern[1] == ':' &&
250            pattern[2] != BG_SEP && pattern[2] != BG_SEP2 &&
251            bufend - bufnext > 4) {
252                *bufnext++ = pattern[0];
253                *bufnext++ = ':';
254                *bufnext++ = '.';
255                *bufnext++ = BG_SEP;
256                patnext += 2;
257        }
258#endif
259
260        if (flags & GLOB_QUOTE) {
261                /* Protect the quoted characters. */
262                while (bufnext < bufend && (c = *patnext++) != BG_EOS)
263                        if (c == BG_QUOTE) {
264#ifdef DOSISH
265                                    /* To avoid backslashitis on Win32,
266                                     * we only treat \ as a quoting character
267                                     * if it precedes one of the
268                                     * metacharacters []-{}~\
269                                     */
270                                if ((c = *patnext++) != '[' && c != ']' &&
271                                    c != '-' && c != '{' && c != '}' &&
272                                    c != '~' && c != '\\') {
273#else
274                                if ((c = *patnext++) == BG_EOS) {
275#endif
276                                        c = BG_QUOTE;
277                                        --patnext;
278                                }
279                                *bufnext++ = c | M_PROTECT;
280                        } else
281                                *bufnext++ = c;
282        } else
283                while (bufnext < bufend && (c = *patnext++) != BG_EOS)
284                        *bufnext++ = c;
285        *bufnext = BG_EOS;
286
287        if (flags & GLOB_BRACE)
288            return globexp1(patbuf, pglob);
289        else
290            return glob0(patbuf, pglob);
291}
292
293/*
294 * Expand recursively a glob {} pattern. When there is no more expansion
295 * invoke the standard globbing routine to glob the rest of the magic
296 * characters
297 */
298static int
299globexp1(const Char *pattern, glob_t *pglob)
300{
301        const Char* ptr = pattern;
302        int rv;
303
304        /* Protect a single {}, for find(1), like csh */
305        if (pattern[0] == BG_LBRACE && pattern[1] == BG_RBRACE && pattern[2] == BG_EOS)
306                return glob0(pattern, pglob);
307
308        while ((ptr = (const Char *) g_strchr((Char *) ptr, BG_LBRACE)) != NULL)
309                if (!globexp2(ptr, pattern, pglob, &rv))
310                        return rv;
311
312        return glob0(pattern, pglob);
313}
314
315
316/*
317 * Recursive brace globbing helper. Tries to expand a single brace.
318 * If it succeeds then it invokes globexp1 with the new pattern.
319 * If it fails then it tries to glob the rest of the pattern and returns.
320 */
321static int
322globexp2(const Char *ptr, const Char *pattern,
323         glob_t *pglob, int *rv)
324{
325        int     i;
326        Char   *lm, *ls;
327        const Char *pe, *pm, *pm1, *pl;
328        Char    patbuf[MAXPATHLEN];
329
330        /* copy part up to the brace */
331        for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
332                ;
333        *lm = BG_EOS;
334        ls = lm;
335
336        /* Find the balanced brace */
337        for (i = 0, pe = ++ptr; *pe; pe++)
338                if (*pe == BG_LBRACKET) {
339                        /* Ignore everything between [] */
340                        for (pm = pe++; *pe != BG_RBRACKET && *pe != BG_EOS; pe++)
341                                ;
342                        if (*pe == BG_EOS) {
343                                /*
344                                 * We could not find a matching BG_RBRACKET.
345                                 * Ignore and just look for BG_RBRACE
346                                 */
347                                pe = pm;
348                        }
349                } else if (*pe == BG_LBRACE)
350                        i++;
351                else if (*pe == BG_RBRACE) {
352                        if (i == 0)
353                                break;
354                        i--;
355                }
356
357        /* Non matching braces; just glob the pattern */
358        if (i != 0 || *pe == BG_EOS) {
359                *rv = glob0(patbuf, pglob);
360                return 0;
361        }
362
363        for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
364                switch (*pm) {
365                case BG_LBRACKET:
366                        /* Ignore everything between [] */
367                        for (pm1 = pm++; *pm != BG_RBRACKET && *pm != BG_EOS; pm++)
368                                ;
369                        if (*pm == BG_EOS) {
370                                /*
371                                 * We could not find a matching BG_RBRACKET.
372                                 * Ignore and just look for BG_RBRACE
373                                 */
374                                pm = pm1;
375                        }
376                        break;
377
378                case BG_LBRACE:
379                        i++;
380                        break;
381
382                case BG_RBRACE:
383                        if (i) {
384                                i--;
385                                break;
386                        }
387                        /* FALLTHROUGH */
388                case BG_COMMA:
389                        if (i && *pm == BG_COMMA)
390                                break;
391                        else {
392                                /* Append the current string */
393                                for (lm = ls; (pl < pm); *lm++ = *pl++)
394                                        ;
395
396                                /*
397                                 * Append the rest of the pattern after the
398                                 * closing brace
399                                 */
400                                for (pl = pe + 1; (*lm++ = *pl++) != BG_EOS; )
401                                        ;
402
403                                /* Expand the current pattern */
404#ifdef GLOB_DEBUG
405                                qprintf("globexp2:", patbuf);
406#endif /* GLOB_DEBUG */
407                                *rv = globexp1(patbuf, pglob);
408
409                                /* move after the comma, to the next string */
410                                pl = pm + 1;
411                        }
412                        break;
413
414                default:
415                        break;
416                }
417        }
418        *rv = 0;
419        return 0;
420}
421
422
423
424/*
425 * expand tilde from the passwd file.
426 */
427static const Char *
428globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
429{
430        char *h;
431        const Char *p;
432        Char *b, *eb;
433
434        if (*pattern != BG_TILDE || !(pglob->gl_flags & GLOB_TILDE))
435                return pattern;
436
437        /* Copy up to the end of the string or / */
438        eb = &patbuf[patbuf_len - 1];
439        for (p = pattern + 1, h = (char *) patbuf;
440             h < (char*)eb && *p && *p != BG_SLASH; *h++ = (char)*p++)
441                ;
442
443        *h = BG_EOS;
444
445#if 0
446        if (h == (char *)eb)
447                return what;
448#endif
449
450        if (((char *) patbuf)[0] == BG_EOS) {
451                /*
452                 * handle a plain ~ or ~/ by expanding $HOME
453                 * first and then trying the password file
454                 * or $USERPROFILE on DOSISH systems
455                 */
456                if ((h = PerlEnv_getenv("HOME")) == NULL) {
457#ifdef HAS_PASSWD
458                        struct passwd *pwd;
459                        if ((pwd = getpwuid(getuid())) == NULL)
460                                return pattern;
461                        else
462                                h = pwd->pw_dir;
463#elif DOSISH
464                        /*
465                         * When no passwd file, fallback to the USERPROFILE
466                         * environment variable on DOSish systems.
467                         */
468                        if ((h = PerlEnv_getenv("USERPROFILE")) == NULL) {
469                            return pattern;
470                        }
471#else
472                        return pattern;
473#endif
474                }
475        } else {
476                /*
477                 * Expand a ~user
478                 */
479#ifdef HAS_PASSWD
480                struct passwd *pwd;
481                if ((pwd = getpwnam((char*) patbuf)) == NULL)
482                        return pattern;
483                else
484                        h = pwd->pw_dir;
485#else
486                return pattern;
487#endif
488        }
489
490        /* Copy the home directory */
491        for (b = patbuf; b < eb && *h; *b++ = *h++)
492                ;
493
494        /* Append the rest of the pattern */
495        while (b < eb && (*b++ = *p++) != BG_EOS)
496                ;
497        *b = BG_EOS;
498
499        return patbuf;
500}
501
502
503/*
504 * The main glob() routine: compiles the pattern (optionally processing
505 * quotes), calls glob1() to do the real pattern matching, and finally
506 * sorts the list (unless unsorted operation is requested).  Returns 0
507 * if things went well, nonzero if errors occurred.  It is not an error
508 * to find no matches.
509 */
510static int
511glob0(const Char *pattern, glob_t *pglob)
512{
513        const Char *qpat, *qpatnext;
514        int c, err, oldflags, oldpathc;
515        Char *bufnext, patbuf[MAXPATHLEN];
516        size_t limit = 0;
517
518        qpat = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
519        qpatnext = qpat;
520        oldflags = pglob->gl_flags;
521        oldpathc = pglob->gl_pathc;
522        bufnext = patbuf;
523
524        /* We don't need to check for buffer overflow any more. */
525        while ((c = *qpatnext++) != BG_EOS) {
526                switch (c) {
527                case BG_LBRACKET:
528                        c = *qpatnext;
529                        if (c == BG_NOT)
530                                ++qpatnext;
531                        if (*qpatnext == BG_EOS ||
532                            g_strchr((Char *) qpatnext+1, BG_RBRACKET) == NULL) {
533                                *bufnext++ = BG_LBRACKET;
534                                if (c == BG_NOT)
535                                        --qpatnext;
536                                break;
537                        }
538                        *bufnext++ = M_SET;
539                        if (c == BG_NOT)
540                                *bufnext++ = M_NOT;
541                        c = *qpatnext++;
542                        do {
543                                *bufnext++ = CHAR(c);
544                                if (*qpatnext == BG_RANGE &&
545                                    (c = qpatnext[1]) != BG_RBRACKET) {
546                                        *bufnext++ = M_RNG;
547                                        *bufnext++ = CHAR(c);
548                                        qpatnext += 2;
549                                }
550                        } while ((c = *qpatnext++) != BG_RBRACKET);
551                        pglob->gl_flags |= GLOB_MAGCHAR;
552                        *bufnext++ = M_END;
553                        break;
554                case BG_QUESTION:
555                        pglob->gl_flags |= GLOB_MAGCHAR;
556                        *bufnext++ = M_ONE;
557                        break;
558                case BG_STAR:
559                        pglob->gl_flags |= GLOB_MAGCHAR;
560                        /* Collapse adjacent stars to one.
561                         * This is required to ensure that a pattern like
562                         * "a**" matches a name like "a", as without this
563                         * check when the first star matched everything it would
564                         * cause the second star to return a match fail.
565                         * As long ** is folded here this does not happen.
566                         */
567                        if (bufnext == patbuf || bufnext[-1] != M_ALL)
568                                *bufnext++ = M_ALL;
569                        break;
570                default:
571                        *bufnext++ = CHAR(c);
572                        break;
573                }
574        }
575        *bufnext = BG_EOS;
576#ifdef GLOB_DEBUG
577        qprintf("glob0:", patbuf);
578#endif /* GLOB_DEBUG */
579
580        if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, &limit)) != 0) {
581                pglob->gl_flags = oldflags;
582                return(err);
583        }
584
585        /*
586         * If there was no match we are going to append the pattern
587         * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
588         * and the pattern did not contain any magic characters
589         * GLOB_NOMAGIC is there just for compatibility with csh.
590         */
591        if (pglob->gl_pathc == oldpathc &&
592            ((pglob->gl_flags & GLOB_NOCHECK) ||
593              ((pglob->gl_flags & GLOB_NOMAGIC) &&
594               !(pglob->gl_flags & GLOB_MAGCHAR))))
595        {
596#ifdef GLOB_DEBUG
597                printf("calling globextend from glob0\n");
598#endif /* GLOB_DEBUG */
599                pglob->gl_flags = oldflags;
600                return(globextend(qpat, pglob, &limit));
601        }
602        else if (!(pglob->gl_flags & GLOB_NOSORT))
603            if (pglob->gl_pathv)
604                qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
605                    pglob->gl_pathc - oldpathc, sizeof(char *),
606                    (pglob->gl_flags & (GLOB_ALPHASORT|GLOB_NOCASE))
607                        ? ci_compare : compare);
608        pglob->gl_flags = oldflags;
609        return(0);
610}
611
612static int
613ci_compare(const void *p, const void *q)
614{
615        const char *pp = *(const char **)p;
616        const char *qq = *(const char **)q;
617        int ci;
618        while (*pp && *qq) {
619                if (toFOLD(*pp) != toFOLD(*qq))
620                        break;
621                ++pp;
622                ++qq;
623        }
624        ci = toFOLD(*pp) - toFOLD(*qq);
625        if (ci == 0)
626                return compare(p, q);
627        return ci;
628}
629
630static int
631compare(const void *p, const void *q)
632{
633        return(strcmp(*(char **)p, *(char **)q));
634}
635
636static int
637glob1(Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
638{
639        Char pathbuf[MAXPATHLEN];
640
641        assert(pattern < pattern_last);
642
643        /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
644        if (*pattern == BG_EOS)
645                return(0);
646        return(glob2(pathbuf, pathbuf+MAXPATHLEN-1,
647                     pathbuf, pathbuf+MAXPATHLEN-1,
648                     pattern, pattern_last, pglob, limitp));
649}
650
651/*
652 * The functions glob2 and glob3 are mutually recursive; there is one level
653 * of recursion for each segment in the pattern that contains one or more
654 * meta characters.
655 */
656static int
657glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
658      Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
659{
660        Stat_t sb;
661        Char *p, *q;
662        int anymeta;
663
664        assert(pattern < pattern_last);
665
666        /*
667         * Loop over pattern segments until end of pattern or until
668         * segment with meta character found.
669         */
670        for (anymeta = 0;;) {
671                if (*pattern == BG_EOS) {		/* End of pattern? */
672                        *pathend = BG_EOS;
673                        if (g_lstat(pathbuf, &sb, pglob))
674                                return(0);
675
676                        if (((pglob->gl_flags & GLOB_MARK) &&
677                            pathend[-1] != BG_SEP
678#ifdef DOSISH
679                            && pathend[-1] != BG_SEP2
680#endif
681                            ) && (S_ISDIR(sb.st_mode) ||
682                                  (S_ISLNK(sb.st_mode) &&
683                            (g_stat(pathbuf, &sb, pglob) == 0) &&
684                            S_ISDIR(sb.st_mode)))) {
685                                if (pathend+1 > pathend_last)
686                                        return (1);
687                                *pathend++ = BG_SEP;
688                                *pathend = BG_EOS;
689                        }
690                        ++pglob->gl_matchc;
691#ifdef GLOB_DEBUG
692                        printf("calling globextend from glob2\n");
693#endif /* GLOB_DEBUG */
694                        return(globextend(pathbuf, pglob, limitp));
695                }
696
697                /* Find end of next segment, copy tentatively to pathend. */
698                q = pathend;
699                p = pattern;
700                while (*p != BG_EOS && *p != BG_SEP
701#ifdef DOSISH
702                       && *p != BG_SEP2
703#endif
704                       ) {
705                        assert(p < pattern_last);
706                        if (ismeta(*p))
707                                anymeta = 1;
708                        if (q+1 > pathend_last)
709                                return (1);
710                        *q++ = *p++;
711                }
712
713                if (!anymeta) {		/* No expansion, do next segment. */
714                        pathend = q;
715                        pattern = p;
716                        while (*pattern == BG_SEP
717#ifdef DOSISH
718                               || *pattern == BG_SEP2
719#endif
720                               ) {
721                                assert(p < pattern_last);
722                                if (pathend+1 > pathend_last)
723                                        return (1);
724                                *pathend++ = *pattern++;
725                        }
726                } else
727                        /* Need expansion, recurse. */
728                        return(glob3(pathbuf, pathbuf_last, pathend,
729                                     pathend_last, pattern,
730                                     p, pattern_last, pglob, limitp));
731        }
732        /* NOTREACHED */
733}
734
735static int
736glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
737      Char *pattern,
738      Char *restpattern, Char *restpattern_last, glob_t *pglob, size_t *limitp)
739{
740        Direntry_t *dp;
741        DIR *dirp;
742        int err;
743        int nocase;
744        char buf[MAXPATHLEN];
745
746        /*
747         * The readdirfunc declaration can't be prototyped, because it is
748         * assigned, below, to two functions which are prototyped in glob.h
749         * and dirent.h as taking pointers to differently typed opaque
750         * structures.
751         */
752        Direntry_t *(*readdirfunc)(DIR*);
753
754        assert(pattern < restpattern_last);
755        assert(restpattern < restpattern_last);
756
757        if (pathend > pathend_last)
758                return (1);
759        *pathend = BG_EOS;
760        errno = 0;
761
762#ifdef VMS
763        {
764                Char *q = pathend;
765                if (q - pathbuf > 5) {
766                        q -= 5;
767                        if (q[0] == '.' &&
768                            tolower(q[1]) == 'd' && tolower(q[2]) == 'i' &&
769                            tolower(q[3]) == 'r' && q[4] == '/')
770                        {
771                                q[0] = '/';
772                                q[1] = BG_EOS;
773                                pathend = q+1;
774                        }
775                }
776        }
777#endif
778
779        if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
780                /* TODO: don't call for ENOENT or ENOTDIR? */
781                if (pglob->gl_errfunc) {
782                        if (g_Ctoc(pathbuf, buf, sizeof(buf)))
783                                return (GLOB_ABEND);
784                        if (pglob->gl_errfunc(buf, errno) ||
785                            (pglob->gl_flags & GLOB_ERR))
786                                return (GLOB_ABEND);
787                }
788                return(0);
789        }
790
791        err = 0;
792        nocase = ((pglob->gl_flags & GLOB_NOCASE) != 0);
793
794        /* Search directory for matching names. */
795        if (pglob->gl_flags & GLOB_ALTDIRFUNC)
796                readdirfunc = (Direntry_t *(*)(DIR *))pglob->gl_readdir;
797        else
798                readdirfunc = (Direntry_t *(*)(DIR *))my_readdir;
799        while ((dp = (*readdirfunc)(dirp))) {
800                U8 *sc;
801                Char *dc;
802
803                /* Initial BG_DOT must be matched literally. */
804                if (dp->d_name[0] == BG_DOT && *pattern != BG_DOT)
805                        continue;
806                dc = pathend;
807                sc = (U8 *) dp->d_name;
808                while (dc < pathend_last && (*dc++ = *sc++) != BG_EOS)
809                        ;
810                if (dc >= pathend_last) {
811                        *dc = BG_EOS;
812                        err = 1;
813                        break;
814                }
815
816                if (!match(pathend, pattern, restpattern, nocase)) {
817                        *pathend = BG_EOS;
818                        continue;
819                }
820                err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
821                            restpattern, restpattern_last, pglob, limitp);
822                if (err)
823                        break;
824        }
825
826        if (pglob->gl_flags & GLOB_ALTDIRFUNC)
827                (*pglob->gl_closedir)(dirp);
828        else
829                PerlDir_close(dirp);
830        return(err);
831}
832
833
834/*
835 * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
836 * add the new item, and update gl_pathc.
837 *
838 * This assumes the BSD realloc, which only copies the block when its size
839 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
840 * behavior.
841 *
842 * Return 0 if new item added, error code if memory couldn't be allocated.
843 *
844 * Invariant of the glob_t structure:
845 *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
846 *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
847 */
848static int
849globextend(const Char *path, glob_t *pglob, size_t *limitp)
850{
851        char **pathv;
852        int i;
853        STRLEN newsize, len;
854        char *copy;
855        const Char *p;
856
857#ifdef GLOB_DEBUG
858        printf("Adding ");
859        for (p = path; *p; p++)
860                (void)printf("%c", CHAR(*p));
861        printf("\n");
862#endif /* GLOB_DEBUG */
863
864        newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
865        if (pglob->gl_pathv)
866                pathv = Renew(pglob->gl_pathv,newsize,char*);
867        else
868                Newx(pathv,newsize,char*);
869        if (pathv == NULL) {
870                if (pglob->gl_pathv) {
871                        Safefree(pglob->gl_pathv);
872                        pglob->gl_pathv = NULL;
873                }
874                return(GLOB_NOSPACE);
875        }
876
877        if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
878                /* first time around -- clear initial gl_offs items */
879                pathv += pglob->gl_offs;
880                for (i = pglob->gl_offs; --i >= 0; )
881                        *--pathv = NULL;
882        }
883        pglob->gl_pathv = pathv;
884
885        for (p = path; *p++;)
886                ;
887        len = (STRLEN)(p - path);
888        *limitp += len;
889        Newx(copy, p-path, char);
890        if (copy != NULL) {
891                if (g_Ctoc(path, copy, len)) {
892                        Safefree(copy);
893                        return(GLOB_NOSPACE);
894                }
895                pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
896        }
897        pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
898
899        if ((pglob->gl_flags & GLOB_LIMIT) &&
900            newsize + *limitp >= (unsigned long)ARG_MAX) {
901                errno = 0;
902                return(GLOB_NOSPACE);
903        }
904
905        return(copy == NULL ? GLOB_NOSPACE : 0);
906}
907
908
909/*
910 * pattern matching function for filenames using state machine to avoid
911 * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
912 * without additional callframes, and to do cleanly prune the backtracking
913 * state when multiple '*' (start) matches are included in the pattern.
914 *
915 * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
916 * matching on failure.
917 *
918 * https://research.swtch.com/glob
919 *
920 * An example would be a pattern
921 *  ("a*" x 100) . "y"
922 * against a file name like
923 *  ("a" x 100) . "x"
924 *
925 */
926static int
927match(Char *name, Char *pat, Char *patend, int nocase)
928{
929        int ok, negate_range;
930        Char c, k;
931        Char *nextp = NULL;
932        Char *nextn = NULL;
933
934    redo:
935        while (pat < patend) {
936                c = *pat++;
937                switch (c & M_MASK) {
938                case M_ALL:
939                        if (pat == patend)
940                                return(1);
941                        if (*name == BG_EOS)
942                                return 0;
943                        nextn = name + 1;
944                        nextp = pat - 1;
945                        break;
946                case M_ONE:
947                        /* since * matches leftmost-shortest first   *
948                         * if we encounter the EOS then backtracking *
949                         * will not help, so we can exit early here. */
950                        if (*name++ == BG_EOS)
951                                return 0;
952                        break;
953                case M_SET:
954                        ok = 0;
955                        /* since * matches leftmost-shortest first   *
956                         * if we encounter the EOS then backtracking *
957                         * will not help, so we can exit early here. */
958                        if ((k = *name++) == BG_EOS)
959                                return 0;
960                        if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
961                                ++pat;
962                        while (((c = *pat++) & M_MASK) != M_END)
963                                if ((*pat & M_MASK) == M_RNG) {
964                                        if (nocase) {
965                                                if (tolower(c) <= tolower(k) && tolower(k) <= tolower(pat[1]))
966                                                        ok = 1;
967                                        } else {
968                                                if (c <= k && k <= pat[1])
969                                                        ok = 1;
970                                        }
971                                        pat += 2;
972                                } else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
973                                        ok = 1;
974                        if (ok == negate_range)
975                                goto fail;
976                        break;
977                default:
978                        k = *name++;
979                        if (nocase ? (tolower(k) != tolower(c)) : (k != c))
980                                goto fail;
981                        break;
982                }
983        }
984        if (*name == BG_EOS)
985                return 1;
986
987    fail:
988        if (nextn) {
989                pat = nextp;
990                name = nextn;
991                goto redo;
992        }
993        return 0;
994}
995
996/* Free allocated data belonging to a glob_t structure. */
997void
998bsd_globfree(glob_t *pglob)
999{
1000        int i;
1001        char **pp;
1002
1003        if (pglob->gl_pathv != NULL) {
1004                pp = pglob->gl_pathv + pglob->gl_offs;
1005                for (i = pglob->gl_pathc; i--; ++pp)
1006                        if (*pp)
1007                                Safefree(*pp);
1008                Safefree(pglob->gl_pathv);
1009                pglob->gl_pathv = NULL;
1010        }
1011}
1012
1013static DIR *
1014g_opendir(Char *str, glob_t *pglob)
1015{
1016        char buf[MAXPATHLEN];
1017
1018        if (!*str) {
1019                my_strlcpy(buf, ".", sizeof(buf));
1020        } else {
1021                if (g_Ctoc(str, buf, sizeof(buf)))
1022                        return(NULL);
1023        }
1024
1025        if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1026                return((DIR*)(*pglob->gl_opendir)(buf));
1027
1028        return(PerlDir_open(buf));
1029}
1030
1031static int
1032g_lstat(Char *fn, Stat_t *sb, glob_t *pglob)
1033{
1034        char buf[MAXPATHLEN];
1035
1036        if (g_Ctoc(fn, buf, sizeof(buf)))
1037                return(-1);
1038        if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1039                return((*pglob->gl_lstat)(buf, sb));
1040#ifdef HAS_LSTAT
1041        return(PerlLIO_lstat(buf, sb));
1042#else
1043        return(PerlLIO_stat(buf, sb));
1044#endif /* HAS_LSTAT */
1045}
1046
1047static int
1048g_stat(Char *fn, Stat_t *sb, glob_t *pglob)
1049{
1050        char buf[MAXPATHLEN];
1051
1052        if (g_Ctoc(fn, buf, sizeof(buf)))
1053                return(-1);
1054        if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1055                return((*pglob->gl_stat)(buf, sb));
1056        return(PerlLIO_stat(buf, sb));
1057}
1058
1059static Char *
1060g_strchr(Char *str, int ch)
1061{
1062        do {
1063                if (*str == ch)
1064                        return (str);
1065        } while (*str++);
1066        return (NULL);
1067}
1068
1069static int
1070g_Ctoc(const Char *str, char *buf, STRLEN len)
1071{
1072        while (len--) {
1073                if ((*buf++ = (char)*str++) == BG_EOS)
1074                        return (0);
1075        }
1076        return (1);
1077}
1078
1079#ifdef GLOB_DEBUG
1080static void
1081qprintf(const char *str, Char *s)
1082{
1083        Char *p;
1084
1085        (void)printf("%s:\n", str);
1086        for (p = s; *p; p++)
1087                (void)printf("%c", CHAR(*p));
1088        (void)printf("\n");
1089        for (p = s; *p; p++)
1090                (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
1091        (void)printf("\n");
1092        for (p = s; *p; p++)
1093                (void)printf("%c", ismeta(*p) ? '_' : ' ');
1094        (void)printf("\n");
1095}
1096#endif /* GLOB_DEBUG */
1097