1/*
2 * glob.c - filename generation
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "zsh.mdh"
31#include "glob.pro"
32
33#if defined(OFF_T_IS_64_BIT) && defined(__GNUC__)
34# define ALIGN64 __attribute__((aligned(8)))
35#else
36# define ALIGN64
37#endif
38
39/* flag for CSHNULLGLOB */
40
41typedef struct gmatch *Gmatch;
42
43struct gmatch {
44    char *name;
45    /*
46     * Array of sort strings:  one for each GS_EXEC sort type in
47     * the glob qualifiers.
48     */
49    char **sortstrs;
50    off_t size ALIGN64;
51    long atime;
52    long mtime;
53    long ctime;
54    long links;
55    off_t _size ALIGN64;
56    long _atime;
57    long _mtime;
58    long _ctime;
59    long _links;
60#ifdef GET_ST_ATIME_NSEC
61    long ansec;
62    long _ansec;
63#endif
64#ifdef GET_ST_MTIME_NSEC
65    long mnsec;
66    long _mnsec;
67#endif
68#ifdef GET_ST_CTIME_NSEC
69    long cnsec;
70    long _cnsec;
71#endif
72};
73
74#define GS_NAME   1
75#define GS_DEPTH  2
76#define GS_EXEC	  4
77
78#define GS_SHIFT_BASE	8
79
80#define GS_SIZE  (GS_SHIFT_BASE)
81#define GS_ATIME (GS_SHIFT_BASE << 1)
82#define GS_MTIME (GS_SHIFT_BASE << 2)
83#define GS_CTIME (GS_SHIFT_BASE << 3)
84#define GS_LINKS (GS_SHIFT_BASE << 4)
85
86#define GS_SHIFT  5
87#define GS__SIZE  (GS_SIZE << GS_SHIFT)
88#define GS__ATIME (GS_ATIME << GS_SHIFT)
89#define GS__MTIME (GS_MTIME << GS_SHIFT)
90#define GS__CTIME (GS_CTIME << GS_SHIFT)
91#define GS__LINKS (GS_LINKS << GS_SHIFT)
92
93#define GS_DESC  (GS_SHIFT_BASE << (2*GS_SHIFT))
94#define GS_NONE  (GS_SHIFT_BASE << (2*GS_SHIFT+1))
95
96#define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS)
97#define GS_LINKED (GS_NORMAL << GS_SHIFT)
98
99/**/
100int badcshglob;
101
102/**/
103int pathpos;		/* position in pathbuf (needed by pattern code) */
104
105/**/
106char *pathbuf;		/* pathname buffer (needed by pattern code) */
107
108typedef struct stat *Statptr;	 /* This makes the Ultrix compiler happy.  Go figure. */
109
110/* modifier for unit conversions */
111
112#define TT_DAYS 0
113#define TT_HOURS 1
114#define TT_MINS 2
115#define TT_WEEKS 3
116#define TT_MONTHS 4
117#define TT_SECONDS 5
118
119#define TT_BYTES 0
120#define TT_POSIX_BLOCKS 1
121#define TT_KILOBYTES 2
122#define TT_MEGABYTES 3
123
124
125typedef int (*TestMatchFunc) _((char *, struct stat *, off_t, char *));
126
127struct qual {
128    struct qual *next;		/* Next qualifier, must match                */
129    struct qual *or;		/* Alternative set of qualifiers to match    */
130    TestMatchFunc func;		/* Function to call to test match            */
131    off_t data ALIGN64;		/* Argument passed to function               */
132    int sense;			/* Whether asserting or negating             */
133    int amc;			/* Flag for which time to test (a, m, c)     */
134    int range;			/* Whether to test <, > or = (as per signum) */
135    int units;			/* Multiplier for time or size, respectively */
136    char *sdata;		/* currently only: expression to eval        */
137};
138
139/* Prefix, suffix for doing zle trickery */
140
141/**/
142mod_export char *glob_pre, *glob_suf;
143
144/* Element of a glob sort */
145struct globsort {
146    /* Sort type */
147    int tp;
148    /* Sort code to eval, if type is GS_EXEC */
149    char *exec;
150};
151
152/* Maximum entries in sort array */
153#define MAX_SORTS	(12)
154
155/* struct to easily save/restore current state */
156
157struct globdata {
158    int gd_pathpos;
159    char *gd_pathbuf;
160
161    int gd_matchsz;		/* size of matchbuf                     */
162    int gd_matchct;		/* number of matches found              */
163    int gd_pathbufsz;		/* size of pathbuf			*/
164    int gd_pathbufcwd;		/* where did we chdir()'ed		*/
165    Gmatch gd_matchbuf;		/* array of matches                     */
166    Gmatch gd_matchptr;		/* &matchbuf[matchct]                   */
167    char *gd_colonmod;		/* colon modifiers in qualifier list    */
168
169    /* Qualifiers pertaining to current pattern */
170    struct qual *gd_quals;
171
172    /* Other state values for current pattern */
173    int gd_qualct, gd_qualorct;
174    int gd_range, gd_amc, gd_units;
175    int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
176    int gd_gf_numsort;
177    int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
178    struct globsort gd_gf_sortlist[MAX_SORTS];
179    LinkList gd_gf_pre_words;
180
181    char *gd_glob_pre, *gd_glob_suf;
182};
183
184/* The variable with the current globbing state and convenience macros */
185
186static struct globdata curglobdata;
187
188#define matchsz       (curglobdata.gd_matchsz)
189#define matchct       (curglobdata.gd_matchct)
190#define pathbufsz     (curglobdata.gd_pathbufsz)
191#define pathbufcwd    (curglobdata.gd_pathbufcwd)
192#define matchbuf      (curglobdata.gd_matchbuf)
193#define matchptr      (curglobdata.gd_matchptr)
194#define colonmod      (curglobdata.gd_colonmod)
195#define quals         (curglobdata.gd_quals)
196#define qualct        (curglobdata.gd_qualct)
197#define qualorct      (curglobdata.gd_qualorct)
198#define g_range       (curglobdata.gd_range)
199#define g_amc         (curglobdata.gd_amc)
200#define g_units       (curglobdata.gd_units)
201#define gf_nullglob   (curglobdata.gd_gf_nullglob)
202#define gf_markdirs   (curglobdata.gd_gf_markdirs)
203#define gf_noglobdots (curglobdata.gd_gf_noglobdots)
204#define gf_listtypes  (curglobdata.gd_gf_listtypes)
205#define gf_numsort    (curglobdata.gd_gf_numsort)
206#define gf_follow     (curglobdata.gd_gf_follow)
207#define gf_sorts      (curglobdata.gd_gf_sorts)
208#define gf_nsorts     (curglobdata.gd_gf_nsorts)
209#define gf_sortlist   (curglobdata.gd_gf_sortlist)
210#define gf_pre_words  (curglobdata.gd_gf_pre_words)
211
212/* and macros for save/restore */
213
214#define save_globstate(N) \
215  do { \
216    memcpy(&(N), &curglobdata, sizeof(struct globdata)); \
217    (N).gd_pathpos = pathpos; \
218    (N).gd_pathbuf = pathbuf; \
219    (N).gd_glob_pre = glob_pre; \
220    (N).gd_glob_suf = glob_suf; \
221    pathbuf = NULL; \
222  } while (0)
223
224#define restore_globstate(N) \
225  do { \
226    zfree(pathbuf, pathbufsz); \
227    memcpy(&curglobdata, &(N), sizeof(struct globdata)); \
228    pathpos = (N).gd_pathpos; \
229    pathbuf = (N).gd_pathbuf; \
230    glob_pre = (N).gd_glob_pre; \
231    glob_suf = (N).gd_glob_suf; \
232  } while (0)
233
234/* pathname component in filename patterns */
235
236struct complist {
237    Complist next;
238    Patprog pat;
239    int closure;		/* 1 if this is a (foo/)# */
240    int follow; 		/* 1 to go thru symlinks */
241};
242
243/* Add a component to pathbuf: This keeps track of how    *
244 * far we are into a file name, since each path component *
245 * must be matched separately.                            */
246
247/**/
248static void
249addpath(char *s, int l)
250{
251    DPUTS(!pathbuf, "BUG: pathbuf not initialised");
252    while (pathpos + l + 1 >= pathbufsz)
253	pathbuf = realloc(pathbuf, pathbufsz *= 2);
254    while (l--)
255	pathbuf[pathpos++] = *s++;
256    pathbuf[pathpos++] = '/';
257    pathbuf[pathpos] = '\0';
258}
259
260/* stat the filename s appended to pathbuf.  l should be true for lstat,    *
261 * false for stat.  If st is NULL, the file is only checked for existance.  *
262 * s == "" is treated as s == ".".  This is necessary since on most systems *
263 * foo/ can be used to reference a non-directory foo.  Returns nonzero if   *
264 * the file does not exists.                                                */
265
266/**/
267static int
268statfullpath(const char *s, struct stat *st, int l)
269{
270    char buf[PATH_MAX];
271
272    DPUTS(strlen(s) + !*s + pathpos - pathbufcwd >= PATH_MAX,
273	  "BUG: statfullpath(): pathname too long");
274    strcpy(buf, pathbuf + pathbufcwd);
275    strcpy(buf + pathpos - pathbufcwd, s);
276    if (!*s && *buf) {
277	/*
278	 * Don't add the '.' if the path so far is empty, since
279	 * then we get bogus empty strings inserted as files.
280	 */
281	buf[pathpos - pathbufcwd] = '.';
282	buf[pathpos - pathbufcwd + 1] = '\0';
283	l = 0;
284    }
285    unmetafy(buf, NULL);
286    if (!st) {
287	char lbuf[1];
288	return access(buf, F_OK) && (!l || readlink(buf, lbuf, 1) < 0);
289    }
290    return l ? lstat(buf, st) : stat(buf, st);
291}
292
293/* This may be set by qualifier functions to an array of strings to insert
294 * into the list instead of the original string. */
295
296char **inserts;
297
298/* add a match to the list */
299
300/**/
301static void
302insert(char *s, int checked)
303{
304    struct stat buf, buf2, *bp;
305    char *news = s;
306    int statted = 0;
307
308    queue_signals();
309    inserts = NULL;
310
311    if (gf_listtypes || gf_markdirs) {
312	/* Add the type marker to the end of the filename */
313	mode_t mode;
314	checked = statted = 1;
315	if (statfullpath(s, &buf, 1)) {
316	    unqueue_signals();
317	    return;
318	}
319	mode = buf.st_mode;
320	if (gf_follow) {
321	    if (!S_ISLNK(mode) || statfullpath(s, &buf2, 0))
322		memcpy(&buf2, &buf, sizeof(buf));
323	    statted |= 2;
324	    mode = buf2.st_mode;
325	}
326	if (gf_listtypes || S_ISDIR(mode)) {
327	    int ll = strlen(s);
328
329	    news = (char *) hcalloc(ll + 2);
330	    strcpy(news, s);
331	    news[ll] = file_type(mode);
332	    news[ll + 1] = '\0';
333	}
334    }
335    if (qualct || qualorct) {
336	/* Go through the qualifiers, rejecting the file if appropriate */
337	struct qual *qo, *qn;
338
339	if (!statted && statfullpath(s, &buf, 1)) {
340	    unqueue_signals();
341	    return;
342	}
343	news = dyncat(pathbuf, news);
344
345	statted = 1;
346	qo = quals;
347	for (qn = qo; qn && qn->func;) {
348	    g_range = qn->range;
349	    g_amc = qn->amc;
350	    g_units = qn->units;
351	    if ((qn->sense & 2) && !(statted & 2)) {
352		/* If (sense & 2), we're following links */
353		if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
354		    memcpy(&buf2, &buf, sizeof(buf));
355		statted |= 2;
356	    }
357	    bp = (qn->sense & 2) ? &buf2 : &buf;
358	    /* Reject the file if the function returned zero *
359	     * and the sense was positive (sense&1 == 0), or *
360	     * vice versa.                                   */
361	    if ((!((qn->func) (news, bp, qn->data, qn->sdata))
362		 ^ qn->sense) & 1) {
363		/* Try next alternative, or return if there are no more */
364		if (!(qo = qo->or)) {
365		    unqueue_signals();
366		    return;
367		}
368		qn = qo;
369		continue;
370	    }
371	    qn = qn->next;
372	}
373    } else if (!checked) {
374	if (statfullpath(s, NULL, 1)) {
375	    unqueue_signals();
376	    return;
377	}
378	statted = 1;
379	news = dyncat(pathbuf, news);
380    } else
381	news = dyncat(pathbuf, news);
382
383    while (!inserts || (news = dupstring(*inserts++))) {
384	if (colonmod) {
385	    /* Handle the remainder of the qualifier:  e.g. (:r:s/foo/bar/). */
386	    s = colonmod;
387	    modify(&news, &s);
388	}
389	if (!statted && (gf_sorts & GS_NORMAL)) {
390	    statfullpath(s, &buf, 1);
391	    statted = 1;
392	}
393	if (!(statted & 2) && (gf_sorts & GS_LINKED)) {
394	    if (statted) {
395		if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
396		    memcpy(&buf2, &buf, sizeof(buf));
397	    } else if (statfullpath(s, &buf2, 0))
398		statfullpath(s, &buf2, 1);
399	    statted |= 2;
400	}
401	matchptr->name = news;
402	if (statted & 1) {
403	    matchptr->size = buf.st_size;
404	    matchptr->atime = buf.st_atime;
405	    matchptr->mtime = buf.st_mtime;
406	    matchptr->ctime = buf.st_ctime;
407	    matchptr->links = buf.st_nlink;
408#ifdef GET_ST_ATIME_NSEC
409	    matchptr->ansec = GET_ST_ATIME_NSEC(buf);
410#endif
411#ifdef GET_ST_MTIME_NSEC
412	    matchptr->mnsec = GET_ST_MTIME_NSEC(buf);
413#endif
414#ifdef GET_ST_CTIME_NSEC
415	    matchptr->cnsec = GET_ST_CTIME_NSEC(buf);
416#endif
417	}
418	if (statted & 2) {
419	    matchptr->_size = buf2.st_size;
420	    matchptr->_atime = buf2.st_atime;
421	    matchptr->_mtime = buf2.st_mtime;
422	    matchptr->_ctime = buf2.st_ctime;
423	    matchptr->_links = buf2.st_nlink;
424#ifdef GET_ST_ATIME_NSEC
425	    matchptr->_ansec = GET_ST_ATIME_NSEC(buf2);
426#endif
427#ifdef GET_ST_MTIME_NSEC
428	    matchptr->_mnsec = GET_ST_MTIME_NSEC(buf2);
429#endif
430#ifdef GET_ST_CTIME_NSEC
431	    matchptr->_cnsec = GET_ST_CTIME_NSEC(buf2);
432#endif
433	}
434	matchptr++;
435
436	if (++matchct == matchsz) {
437	    matchbuf = (Gmatch )realloc((char *)matchbuf,
438					sizeof(struct gmatch) * (matchsz *= 2));
439
440	    matchptr = matchbuf + matchct;
441	}
442	if (!inserts)
443	    break;
444    }
445    unqueue_signals();
446}
447
448/* Check to see if str is eligible for filename generation. */
449
450/**/
451mod_export int
452haswilds(char *str)
453{
454    /* `[' and `]' are legal even if bad patterns are usually not. */
455    if ((*str == Inbrack || *str == Outbrack) && !str[1])
456	return 0;
457
458    /* If % is immediately followed by ?, then that ? is     *
459     * not treated as a wildcard.  This is so you don't have *
460     * to escape job references such as %?foo.               */
461    if (str[0] == '%' && str[1] == Quest)
462	str[1] = '?';
463
464    for (; *str; str++) {
465	switch (*str) {
466	    case Inpar:
467	    case Bar:
468	    case Star:
469	    case Inbrack:
470	    case Inang:
471	    case Quest:
472		return 1;
473	    case Pound:
474	    case Hat:
475		if (isset(EXTENDEDGLOB))
476		    return 1;
477		break;
478	}
479    }
480    return 0;
481}
482
483/* Do the globbing:  scanner is called recursively *
484 * with successive bits of the path until we've    *
485 * tried all of it.                                */
486
487/**/
488static void
489scanner(Complist q)
490{
491    Patprog p;
492    int closure;
493    int pbcwdsav = pathbufcwd;
494    int errssofar = errsfound;
495    struct dirsav ds;
496
497    init_dirsav(&ds);
498    if (!q)
499	return;
500
501    if ((closure = q->closure)) {
502	/* (foo/)# - match zero or more dirs */
503	if (q->closure == 2)	/* (foo/)## - match one or more dirs */
504	    q->closure = 1;
505	else
506	    scanner(q->next);
507    }
508    p = q->pat;
509    /* Now the actual matching for the current path section. */
510    if (p->flags & PAT_PURES) {
511	/*
512	 * It's a straight string to the end of the path section.
513	 */
514	char *str = (char *)p + p->startoff;
515	int l = p->patmlen;
516
517	if (l + !l + pathpos - pathbufcwd >= PATH_MAX) {
518	    int err;
519
520	    if (l >= PATH_MAX)
521		return;
522	    err = lchdir(pathbuf + pathbufcwd, &ds, 0);
523	    if (err == -1)
524		return;
525	    if (err) {
526		zerr("current directory lost during glob");
527		return;
528	    }
529	    pathbufcwd = pathpos;
530	}
531	if (q->next) {
532	    /* Not the last path section. Just add it to the path. */
533	    int oppos = pathpos;
534
535	    if (!errflag) {
536		int add = 1;
537
538		if (q->closure && *pathbuf) {
539		    if (!strcmp(str, "."))
540			add = 0;
541		    else if (!strcmp(str, "..")) {
542			struct stat sc, sr;
543
544			add = (stat("/", &sr) || stat(pathbuf, &sc) ||
545			       sr.st_ino != sc.st_ino ||
546			       sr.st_dev != sc.st_dev);
547		    }
548		}
549		if (add) {
550		    addpath(str, l);
551		    if (!closure || !statfullpath("", NULL, 1))
552			scanner((q->closure) ? q : q->next);
553		    pathbuf[pathpos = oppos] = '\0';
554		}
555	    }
556	} else {
557	    if (str[l])
558		str = dupstrpfx(str, l);
559	    insert(str, 0);
560	}
561    } else {
562	/* Do pattern matching on current path section. */
563	char *fn = pathbuf[pathbufcwd] ? unmeta(pathbuf + pathbufcwd) : ".";
564	int dirs = !!q->next;
565	DIR *lock = opendir(fn);
566	char *subdirs = NULL;
567	int subdirlen = 0;
568
569	if (lock == NULL)
570	    return;
571	while ((fn = zreaddir(lock, 1)) && !errflag) {
572	    /* prefix and suffix are zle trickery */
573	    if (!dirs && !colonmod &&
574		((glob_pre && !strpfx(glob_pre, fn))
575		 || (glob_suf && !strsfx(glob_suf, fn))))
576		continue;
577	    errsfound = errssofar;
578	    if (pattry(p, fn)) {
579		/* if this name matchs the pattern... */
580		if (pbcwdsav == pathbufcwd &&
581		    strlen(fn) + pathpos - pathbufcwd >= PATH_MAX) {
582		    int err;
583
584		    DPUTS(pathpos == pathbufcwd,
585			  "BUG: filename longer than PATH_MAX");
586		    err = lchdir(pathbuf + pathbufcwd, &ds, 0);
587		    if (err == -1)
588			break;
589		    if (err) {
590			zerr("current directory lost during glob");
591			break;
592		    }
593		    pathbufcwd = pathpos;
594		}
595		if (dirs) {
596		    int l;
597
598		    /*
599		     * If not the last component in the path:
600		     *
601		     * If we made an approximation in the new path segment,
602		     * then it is possible we made too many errors.  For
603		     * example, (ab)#(cb)# will match the directory abcb
604		     * with one error if allowed to, even though it can
605		     * match with none.  This will stop later parts of the
606		     * path matching, so we need to check by reducing the
607		     * maximum number of errors and seeing if the directory
608		     * still matches.  Luckily, this is not a terribly
609		     * common case, since complex patterns typically occur
610		     * in the last part of the path which is not affected
611		     * by this problem.
612		     */
613		    if (errsfound > errssofar) {
614			forceerrs = errsfound - 1;
615			while (forceerrs >= errssofar) {
616			    errsfound = errssofar;
617			    if (!pattry(p, fn))
618				break;
619			    forceerrs = errsfound - 1;
620			}
621			errsfound = forceerrs + 1;
622			forceerrs = -1;
623		    }
624		    if (closure) {
625			/* if matching multiple directories */
626			struct stat buf;
627
628			if (statfullpath(fn, &buf, !q->follow)) {
629			    if (errno != ENOENT && errno != EINTR &&
630				errno != ENOTDIR && !errflag) {
631				zwarn("%e: %s", errno, fn);
632			    }
633			    continue;
634			}
635			if (!S_ISDIR(buf.st_mode))
636			    continue;
637		    }
638		    l = strlen(fn) + 1;
639		    subdirs = hrealloc(subdirs, subdirlen, subdirlen + l
640				       + sizeof(int));
641		    strcpy(subdirs + subdirlen, fn);
642		    subdirlen += l;
643		    /* store the count of errors made so far, too */
644		    memcpy(subdirs + subdirlen, (char *)&errsfound,
645			   sizeof(int));
646		    subdirlen += sizeof(int);
647		} else
648		    /* if the last filename component, just add it */
649		    insert(fn, 1);
650	    }
651	}
652	closedir(lock);
653	if (subdirs) {
654	    int oppos = pathpos;
655
656	    for (fn = subdirs; fn < subdirs+subdirlen; ) {
657		int l = strlen(fn);
658		addpath(fn, l);
659		fn += l + 1;
660		memcpy((char *)&errsfound, fn, sizeof(int));
661		fn += sizeof(int);
662		scanner((q->closure) ? q : q->next);  /* scan next level */
663		pathbuf[pathpos = oppos] = '\0';
664	    }
665	    hrealloc(subdirs, subdirlen, 0);
666	}
667    }
668    if (pbcwdsav < pathbufcwd) {
669	if (restoredir(&ds))
670	    zerr("current directory lost during glob");
671	zsfree(ds.dirname);
672	if (ds.dirfd >= 0)
673	    close(ds.dirfd);
674	pathbufcwd = pbcwdsav;
675    }
676}
677
678/* This function tokenizes a zsh glob pattern */
679
680/**/
681static Complist
682parsecomplist(char *instr)
683{
684    Patprog p1;
685    Complist l1;
686    char *str;
687    int compflags = gf_noglobdots ? (PAT_FILE|PAT_NOGLD) : PAT_FILE;
688
689    if (instr[0] == Star && instr[1] == Star &&
690        (instr[2] == '/' || (instr[2] == Star && instr[3] == '/'))) {
691	/* Match any number of directories. */
692	int follow;
693
694	/* with three stars, follow symbolic links */
695	follow = (instr[2] == Star);
696	instr += (3 + follow);
697
698	/* Now get the next path component if there is one. */
699	l1 = (Complist) zhalloc(sizeof *l1);
700	if ((l1->next = parsecomplist(instr)) == NULL) {
701	    errflag = 1;
702	    return NULL;
703	}
704	l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL);
705	l1->closure = 1;		/* ...zero or more times. */
706	l1->follow = follow;
707	return l1;
708    }
709
710    /* Parse repeated directories such as (dir/)# and (dir/)## */
711    if (*(str = instr) == Inpar && !skipparens(Inpar, Outpar, (char **)&str) &&
712        *str == Pound && isset(EXTENDEDGLOB) && str[-2] == '/') {
713	instr++;
714	if (!(p1 = patcompile(instr, compflags, &instr)))
715	    return NULL;
716	if (instr[0] == '/' && instr[1] == Outpar && instr[2] == Pound) {
717	    int pdflag = 0;
718
719	    instr += 3;
720	    if (*instr == Pound) {
721		pdflag = 1;
722		instr++;
723	    }
724	    l1 = (Complist) zhalloc(sizeof *l1);
725	    l1->pat = p1;
726	    l1->closure = 1 + pdflag;
727	    l1->follow = 0;
728	    l1->next = parsecomplist(instr);
729	    return (l1->pat) ? l1 : NULL;
730	}
731    } else {
732	/* parse single path component */
733	if (!(p1 = patcompile(instr, compflags|PAT_FILET, &instr)))
734	    return NULL;
735	/* then do the remaining path components */
736	if (*instr == '/' || !*instr) {
737	    int ef = *instr == '/';
738
739	    l1 = (Complist) zhalloc(sizeof *l1);
740	    l1->pat = p1;
741	    l1->closure = 0;
742	    l1->next = ef ? parsecomplist(instr+1) : NULL;
743	    return (ef && !l1->next) ? NULL : l1;
744	}
745    }
746    errflag = 1;
747    return NULL;
748}
749
750/* turn a string into a Complist struct:  this has path components */
751
752/**/
753static Complist
754parsepat(char *str)
755{
756    long assert;
757    int ignore;
758
759    patcompstart();
760    /*
761     * Check for initial globbing flags, so that they don't form
762     * a bogus path component.
763     */
764    if ((*str == Inpar && str[1] == Pound && isset(EXTENDEDGLOB)) ||
765	(isset(KSHGLOB) && *str == '@' && str[1] == Inpar &&
766	 str[2] == Pound)) {
767	str += (*str == Inpar) ? 2 : 3;
768	if (!patgetglobflags(&str, &assert, &ignore))
769	    return NULL;
770    }
771
772    /* Now there is no (#X) in front, we can check the path. */
773    if (!pathbuf)
774	pathbuf = zalloc(pathbufsz = PATH_MAX);
775    DPUTS(pathbufcwd, "BUG: glob changed directory");
776    if (*str == '/') {		/* pattern has absolute path */
777	str++;
778	pathbuf[0] = '/';
779	pathbuf[pathpos = 1] = '\0';
780    } else			/* pattern is relative to pwd */
781	pathbuf[pathpos = 0] = '\0';
782
783    return parsecomplist(str);
784}
785
786/* get number after qualifier */
787
788/**/
789static off_t
790qgetnum(char **s)
791{
792    off_t v = 0;
793
794    if (!idigit(**s)) {
795	zerr("number expected");
796	return 0;
797    }
798    while (idigit(**s))
799	v = v * 10 + *(*s)++ - '0';
800    return v;
801}
802
803/* get mode spec after qualifier */
804
805/**/
806static zlong
807qgetmodespec(char **s)
808{
809    zlong yes = 0, no = 0, val, mask, t;
810    char *p = *s, c, how, end;
811
812    if ((c = *p) == '=' || c == Equals || c == '+' || c == '-' ||
813	c == '?' || c == Quest || (c >= '0' && c <= '7')) {
814	end = 0;
815	c = 0;
816    } else {
817	end = (c == '<' ? '>' :
818	       (c == '[' ? ']' :
819		(c == '{' ? '}' :
820		 (c == Inang ? Outang :
821		  (c == Inbrack ? Outbrack :
822		   (c == Inbrace ? Outbrace : c))))));
823	p++;
824    }
825    do {
826	mask = 0;
827	while (((c = *p) == 'u' || c == 'g' || c == 'o' || c == 'a') && end) {
828	    switch (c) {
829	    case 'o': mask |= 01007; break;
830	    case 'g': mask |= 02070; break;
831	    case 'u': mask |= 04700; break;
832	    case 'a': mask |= 07777; break;
833	    }
834	    p++;
835	}
836	how = ((c == '+' || c == '-') ? c : '=');
837	if (c == '+' || c == '-' || c == '=' || c == Equals)
838	    p++;
839	val = 0;
840	if (mask) {
841	    while ((c = *p++) != ',' && c != end) {
842		switch (c) {
843		case 'x': val |= 00111; break;
844		case 'w': val |= 00222; break;
845		case 'r': val |= 00444; break;
846		case 's': val |= 06000; break;
847		case 't': val |= 01000; break;
848		case '0': case '1': case '2': case '3':
849		case '4': case '5': case '6': case '7':
850		    t = ((zlong) c - '0');
851		    val |= t | (t << 3) | (t << 6);
852		    break;
853		default:
854		    zerr("invalid mode specification");
855		    return 0;
856		}
857	    }
858	    if (how == '=' || how == '+') {
859		yes |= val & mask;
860		val = ~val;
861	    }
862	    if (how == '=' || how == '-')
863		no |= val & mask;
864	} else if (!(end && c == end) && c != ',' && c) {
865	    t = 07777;
866	    while ((c = *p) == '?' || c == Quest ||
867		   (c >= '0' && c <= '7')) {
868		if (c == '?' || c == Quest) {
869		    t = (t << 3) | 7;
870		    val <<= 3;
871		} else {
872		    t <<= 3;
873		    val = (val << 3) | ((zlong) c - '0');
874		}
875		p++;
876	    }
877	    if (end && c != end && c != ',') {
878		zerr("invalid mode specification");
879		return 0;
880	    }
881	    if (how == '=') {
882		yes = (yes & ~t) | val;
883		no = (no & ~t) | (~val & ~t);
884	    } else if (how == '+')
885		yes |= val;
886	    else
887		no |= val;
888	} else {
889	    zerr("invalid mode specification");
890	    return 0;
891        }
892    } while (end && c != end);
893
894    *s = p;
895    return ((yes & 07777) | ((no & 07777) << 12));
896}
897
898static int
899gmatchcmp(Gmatch a, Gmatch b)
900{
901    int i;
902    off_t r = 0L;
903    struct globsort *s;
904    char **asortstrp = NULL, **bsortstrp = NULL;
905
906    for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
907	switch (s->tp & ~GS_DESC) {
908	case GS_NAME:
909	    r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
910	    break;
911	case GS_DEPTH:
912	    {
913		char *aptr = a->name, *bptr = b->name;
914		int slasha = 0, slashb = 0;
915		/* Count slashes.  Trailing slashes don't count. */
916		while (*aptr && *aptr == *bptr)
917		    aptr++, bptr++;
918		if (*aptr)
919		    for (; aptr[1]; aptr++)
920			if (*aptr == '/') {
921			    slasha = 1;
922			    break;
923			}
924		if (*bptr)
925		    for (; bptr[1]; bptr++)
926			if (*bptr == '/') {
927			    slashb = 1;
928			    break;
929			}
930		r = slasha - slashb;
931	    }
932	    break;
933	case GS_EXEC:
934	    if (!asortstrp) {
935		asortstrp = a->sortstrs;
936		bsortstrp = b->sortstrs;
937	    } else {
938		asortstrp++;
939		bsortstrp++;
940	    }
941	    r = zstrcmp(*bsortstrp, *asortstrp,
942			gf_numsort ? SORTIT_NUMERICALLY : 0);
943	    break;
944	case GS_SIZE:
945	    r = b->size - a->size;
946	    break;
947	case GS_ATIME:
948	    r = a->atime - b->atime;
949#ifdef GET_ST_ATIME_NSEC
950            if (!r)
951              r = a->ansec - b->ansec;
952#endif
953	    break;
954	case GS_MTIME:
955	    r = a->mtime - b->mtime;
956#ifdef GET_ST_MTIME_NSEC
957            if (!r)
958              r = a->mnsec - b->mnsec;
959#endif
960	    break;
961	case GS_CTIME:
962	    r = a->ctime - b->ctime;
963#ifdef GET_ST_CTIME_NSEC
964            if (!r)
965              r = a->cnsec - b->cnsec;
966#endif
967	    break;
968	case GS_LINKS:
969	    r = b->links - a->links;
970	    break;
971	case GS__SIZE:
972	    r = b->_size - a->_size;
973	    break;
974	case GS__ATIME:
975	    r = a->_atime - b->_atime;
976#ifdef GET_ST_ATIME_NSEC
977            if (!r)
978              r = a->_ansec - b->_ansec;
979#endif
980	    break;
981	case GS__MTIME:
982	    r = a->_mtime - b->_mtime;
983#ifdef GET_ST_MTIME_NSEC
984            if (!r)
985              r = a->_mnsec - b->_mnsec;
986#endif
987	    break;
988	case GS__CTIME:
989	    r = a->_ctime - b->_ctime;
990#ifdef GET_ST_CTIME_NSEC
991            if (!r)
992              r = a->_cnsec - b->_cnsec;
993#endif
994	    break;
995	case GS__LINKS:
996	    r = b->_links - a->_links;
997	    break;
998	}
999	if (r)
1000	    return (s->tp & GS_DESC) ?
1001	      (r < 0L ? 1 : -1) :
1002	      (r > 0L ? 1 : -1);
1003    }
1004    return 0;
1005}
1006
1007/*
1008 * Duplicate a list of qualifiers using the `next' linkage (not the
1009 * `or' linkage).  Return the head element and set *last (if last non-NULL)
1010 * to point to the last element of the new list.  All allocation is on the
1011 * heap (or off the heap?)
1012 */
1013static struct qual *dup_qual_list(struct qual *orig, struct qual **lastp)
1014{
1015    struct qual *qfirst = NULL, *qlast = NULL;
1016
1017    while (orig) {
1018	struct qual *qnew = (struct qual *)zhalloc(sizeof(struct qual));
1019	*qnew = *orig;
1020	qnew->next = qnew->or = NULL;
1021
1022	if (!qfirst)
1023	    qfirst = qnew;
1024	if (qlast)
1025	    qlast->next = qnew;
1026	qlast = qnew;
1027
1028	orig = orig->next;
1029    }
1030
1031    if (lastp)
1032	*lastp = qlast;
1033    return qfirst;
1034}
1035
1036
1037/*
1038 * Get a glob string for execution, following e, P or + qualifiers.
1039 * Pointer is character after the e, P or +.
1040 */
1041
1042/**/
1043static char *
1044glob_exec_string(char **sp)
1045{
1046    char sav, *tt, *sdata, *s = *sp;
1047    int plus;
1048
1049    if (s[-1] == '+') {
1050	plus = 0;
1051	tt = itype_end(s, IIDENT, 0);
1052	if (tt == s)
1053	{
1054	    zerr("missing identifier after `+'");
1055	    return NULL;
1056	}
1057    } else {
1058	tt = get_strarg(s, &plus);
1059	if (!*tt)
1060	{
1061	    zerr("missing end of string");
1062	    return NULL;
1063	}
1064    }
1065
1066    sav = *tt;
1067    *tt = '\0';
1068    sdata = dupstring(s + plus);
1069    untokenize(sdata);
1070    *tt = sav;
1071    if (sav)
1072	*sp = tt + plus;
1073    else
1074	*sp = tt;
1075
1076    return sdata;
1077}
1078
1079/*
1080 * Insert a glob match.
1081 * If there were words to prepend given by the P glob qualifier, do so.
1082 */
1083static void
1084insert_glob_match(LinkList list, LinkNode next, char *data)
1085{
1086    if (gf_pre_words) {
1087	LinkNode added;
1088	for (added = firstnode(gf_pre_words); added; incnode(added)) {
1089	    next = insertlinknode(list, next, dupstring(getdata(added)));
1090	}
1091    }
1092
1093    insertlinknode(list, next, data);
1094}
1095
1096/* Main entry point to the globbing code for filename globbing. *
1097 * np points to a node in the list list which will be expanded  *
1098 * into a series of nodes.                                      */
1099
1100/**/
1101void
1102zglob(LinkList list, LinkNode np, int nountok)
1103{
1104    struct qual *qo, *qn, *ql;
1105    LinkNode node = prevnode(np);
1106    char *str;				/* the pattern                   */
1107    int sl;				/* length of the pattern         */
1108    Complist q;				/* pattern after parsing         */
1109    char *ostr = (char *)getdata(np);	/* the pattern before the parser */
1110					/* chops it up                   */
1111    int first = 0, end = -1;		/* index of first match to return */
1112					/* and index+1 of the last match */
1113    struct globdata saved;		/* saved glob state              */
1114    int nobareglob = !isset(BAREGLOBQUAL);
1115
1116    if (unset(GLOBOPT) || !haswilds(ostr) || unset(EXECOPT)) {
1117	if (!nountok)
1118	    untokenize(ostr);
1119	return;
1120    }
1121    save_globstate(saved);
1122
1123    str = dupstring(ostr);
1124    uremnode(list, np);
1125
1126    /* quals will hold the complete list of qualifiers (file static). */
1127    quals = NULL;
1128    /*
1129     * qualct and qualorct indicate we have qualifiers in the last
1130     * alternative, or a set of alternatives, respectively.  They
1131     * are not necessarily an accurate count, however.
1132     */
1133    qualct = qualorct = 0;
1134    /*
1135     * colonmod is a concatenated list of all colon modifiers found in
1136     * all sets of qualifiers.
1137     */
1138    colonmod = NULL;
1139    /* The gf_* flags are qualifiers which are applied globally. */
1140    gf_nullglob = isset(NULLGLOB);
1141    gf_markdirs = isset(MARKDIRS);
1142    gf_listtypes = gf_follow = 0;
1143    gf_noglobdots = unset(GLOBDOTS);
1144    gf_numsort = isset(NUMERICGLOBSORT);
1145    gf_sorts = gf_nsorts = 0;
1146    gf_pre_words = NULL;
1147
1148    /* Check for qualifiers */
1149    while (!nobareglob || isset(EXTENDEDGLOB)) {
1150	struct qual *newquals;
1151	char *s;
1152	int sense, paren;
1153	off_t data;
1154	char *sdata, *newcolonmod;
1155	int (*func) _((char *, Statptr, off_t, char *));
1156
1157	/*
1158	 * Initialise state variables for current file pattern.
1159	 * newquals is the root for the linked list of all qualifiers.
1160	 * qo is the root of the current list of alternatives.
1161	 * ql is the end of the current alternative where the `next' will go.
1162	 * qn is the current qualifier node to be added.
1163	 *
1164	 * Here is an attempt at a diagram.  An `or' is added horizontally
1165	 * to the top line, a `next' at the bottom of the right hand line.
1166	 * `qn' is usually NULL unless a new `or' has just been added.
1167	 *
1168	 * quals -> x  -> x -> qo
1169	 *          |     |    |
1170	 *          x     x    x
1171	 *          |          |
1172	 *          x          ql
1173	 *
1174	 * In fact, after each loop the complete set is in the file static
1175	 * `quals'.  Then, if we have a second set of qualifiers, we merge
1176	 * the lists together.  This is only tricky if one or both have an
1177	 * `or' in them; then we need to distribute over all alternatives.
1178	 */
1179	newquals = qo = qn = ql = NULL;
1180
1181	sl = strlen(str);
1182	if (str[sl - 1] != Outpar)
1183	    break;
1184
1185	/* Check these are really qualifiers, not a set of *
1186	 * alternatives or exclusions.  We can be more     *
1187	 * lenient with an explicit (#q) than with a bare  *
1188	 * set of qualifiers.                              */
1189	paren = 0;
1190	for (s = str + sl - 2; *s && (*s != Inpar || paren); s--) {
1191	    switch (*s) {
1192	    case Outpar:
1193		paren++; /*FALLTHROUGH*/
1194	    case Bar:
1195		nobareglob = 1;
1196		break;
1197	    case Tilde:
1198		if (isset(EXTENDEDGLOB))
1199		    nobareglob = 1;
1200		break;
1201	    case Inpar:
1202		paren--;
1203		break;
1204	    }
1205	}
1206	if (*s != Inpar)
1207	    break;
1208	if (isset(EXTENDEDGLOB) && s[1] == Pound) {
1209	    if (s[2] == 'q') {
1210		*s = 0;
1211		s += 2;
1212	    } else
1213		break;
1214	} else if (nobareglob)
1215	    break;
1216
1217	/* Real qualifiers found. */
1218	nobareglob = 1;
1219	sense = 0;	   /* bit 0 for match (0)/don't match (1)   */
1220			   /* bit 1 for follow links (2), don't (0) */
1221	data = 0;	   /* Any numerical argument required       */
1222	sdata = NULL;	   /* Any list argument required            */
1223	newcolonmod = NULL; /* Contains trailing colon modifiers    */
1224
1225	str[sl-1] = 0;
1226	*s++ = 0;
1227	while (*s && !newcolonmod) {
1228	    func = (int (*) _((char *, Statptr, off_t, char *)))0;
1229	    if (idigit(*s)) {
1230		/* Store numeric argument for qualifier */
1231		func = qualflags;
1232		data = 0;
1233		sdata = NULL;
1234		while (idigit(*s))
1235		    data = data * 010 + (*s++ - '0');
1236	    } else if (*s == ',') {
1237		/* A comma separates alternative sets of qualifiers */
1238		s++;
1239		sense = 0;
1240		if (qualct) {
1241		    qn = (struct qual *)hcalloc(sizeof *qn);
1242		    qo->or = qn;
1243		    qo = qn;
1244		    qualorct++;
1245		    qualct = 0;
1246		    ql = NULL;
1247		}
1248	    } else {
1249		switch (*s++) {
1250		case ':':
1251		    /* Remaining arguments are history-type     *
1252		     * colon substitutions, handled separately. */
1253		    newcolonmod = s - 1;
1254		    untokenize(newcolonmod);
1255		    if (colonmod) {
1256			/* remember we're searching backwards */
1257			colonmod = dyncat(newcolonmod, colonmod);
1258		    } else
1259			colonmod = newcolonmod;
1260		    break;
1261		case Hat:
1262		case '^':
1263		    /* Toggle sense:  go from positive to *
1264		     * negative match and vice versa.     */
1265		    sense ^= 1;
1266		    break;
1267		case '-':
1268		    /* Toggle matching of symbolic links */
1269		    sense ^= 2;
1270		    break;
1271		case '@':
1272		    /* Match symbolic links */
1273		    func = qualislnk;
1274		    break;
1275		case Equals:
1276		case '=':
1277		    /* Match sockets */
1278		    func = qualissock;
1279		    break;
1280		case 'p':
1281		    /* Match named pipes */
1282		    func = qualisfifo;
1283		    break;
1284		case '/':
1285		    /* Match directories */
1286		    func = qualisdir;
1287		    break;
1288		case '.':
1289		    /* Match regular files */
1290		    func = qualisreg;
1291		    break;
1292		case '%':
1293		    /* Match special files: block, *
1294		     * character or any device     */
1295		    if (*s == 'b')
1296			s++, func = qualisblk;
1297		    else if (*s == 'c')
1298			s++, func = qualischr;
1299		    else
1300			func = qualisdev;
1301		    break;
1302		case Star:
1303		    /* Match executable plain files */
1304		    func = qualiscom;
1305		    break;
1306		case 'R':
1307		    /* Match world-readable files */
1308		    func = qualflags;
1309		    data = 0004;
1310		    break;
1311		case 'W':
1312		    /* Match world-writeable files */
1313		    func = qualflags;
1314		    data = 0002;
1315		    break;
1316		case 'X':
1317		    /* Match world-executable files */
1318		    func = qualflags;
1319		    data = 0001;
1320		    break;
1321		case 'A':
1322		    func = qualflags;
1323		    data = 0040;
1324		    break;
1325		case 'I':
1326		    func = qualflags;
1327		    data = 0020;
1328		    break;
1329		case 'E':
1330		    func = qualflags;
1331		    data = 0010;
1332		    break;
1333		case 'r':
1334		    /* Match files readable by current process */
1335		    func = qualflags;
1336		    data = 0400;
1337		    break;
1338		case 'w':
1339		    /* Match files writeable by current process */
1340		    func = qualflags;
1341		    data = 0200;
1342		    break;
1343		case 'x':
1344		    /* Match files executable by current process */
1345		    func = qualflags;
1346		    data = 0100;
1347		    break;
1348		case 's':
1349		    /* Match setuid files */
1350		    func = qualflags;
1351		    data = 04000;
1352		    break;
1353		case 'S':
1354		    /* Match setgid files */
1355		    func = qualflags;
1356		    data = 02000;
1357		    break;
1358		case 't':
1359		    func = qualflags;
1360		    data = 01000;
1361		    break;
1362		case 'd':
1363		    /* Match device files by device number  *
1364		     * (as given by stat's st_dev element). */
1365		    func = qualdev;
1366		    data = qgetnum(&s);
1367		    break;
1368		case 'l':
1369		    /* Match files with the given no. of hard links */
1370		    func = qualnlink;
1371		    g_amc = -1;
1372		    goto getrange;
1373		case 'U':
1374		    /* Match files owned by effective user ID */
1375		    func = qualuid;
1376		    data = geteuid();
1377		    break;
1378		case 'G':
1379		    /* Match files owned by effective group ID */
1380		    func = qualgid;
1381		    data = getegid();
1382		    break;
1383		case 'u':
1384		    /* Match files owned by given user id */
1385		    func = qualuid;
1386		    /* either the actual uid... */
1387		    if (idigit(*s))
1388			data = qgetnum(&s);
1389		    else {
1390			/* ... or a user name */
1391			char sav, *tt;
1392			int arglen;
1393
1394			/* Find matching delimiters */
1395			tt = get_strarg(s, &arglen);
1396			if (!*tt) {
1397			    zerr("missing end of name");
1398			    data = 0;
1399			} else {
1400#ifdef USE_GETPWNAM
1401			    struct passwd *pw;
1402			    sav = *tt;
1403			    *tt = '\0';
1404
1405			    if ((pw = getpwnam(s + arglen)))
1406				data = pw->pw_uid;
1407			    else {
1408				zerr("unknown user");
1409				data = 0;
1410			    }
1411			    *tt = sav;
1412#else /* !USE_GETPWNAM */
1413			    sav = *tt;
1414			    zerr("unknown user");
1415			    data = 0;
1416#endif /* !USE_GETPWNAM */
1417			    if (sav)
1418				s = tt + arglen;
1419			    else
1420				s = tt;
1421			}
1422		    }
1423		    break;
1424		case 'g':
1425		    /* Given gid or group id... works like `u' */
1426		    func = qualgid;
1427		    /* either the actual gid... */
1428		    if (idigit(*s))
1429			data = qgetnum(&s);
1430		    else {
1431			/* ...or a delimited group name. */
1432			char sav, *tt;
1433			int arglen;
1434
1435			tt = get_strarg(s, &arglen);
1436			if (!*tt) {
1437			    zerr("missing end of name");
1438			    data = 0;
1439			} else {
1440#ifdef USE_GETGRNAM
1441			    struct group *gr;
1442			    sav = *tt;
1443			    *tt = '\0';
1444
1445			    if ((gr = getgrnam(s + arglen)))
1446				data = gr->gr_gid;
1447			    else {
1448				zerr("unknown group");
1449				data = 0;
1450			    }
1451			    *tt = sav;
1452#else /* !USE_GETGRNAM */
1453			    sav = *tt;
1454			    zerr("unknown group");
1455			    data = 0;
1456#endif /* !USE_GETGRNAM */
1457			    if (sav)
1458				s = tt + arglen;
1459			    else
1460				s = tt;
1461			}
1462		    }
1463		    break;
1464		case 'f':
1465		    /* Match modes with chmod-spec. */
1466		    func = qualmodeflags;
1467		    data = qgetmodespec(&s);
1468		    break;
1469		case 'F':
1470		    func = qualnonemptydir;
1471		    break;
1472		case 'M':
1473		    /* Mark directories with a / */
1474		    if ((gf_markdirs = !(sense & 1)))
1475			gf_follow = sense & 2;
1476		    break;
1477		case 'T':
1478		    /* Mark types in a `ls -F' type fashion */
1479		    if ((gf_listtypes = !(sense & 1)))
1480			gf_follow = sense & 2;
1481		    break;
1482		case 'N':
1483		    /* Nullglob:  remove unmatched patterns. */
1484		    gf_nullglob = !(sense & 1);
1485		    break;
1486		case 'D':
1487		    /* Glob dots: match leading dots implicitly */
1488		    gf_noglobdots = sense & 1;
1489		    break;
1490		case 'n':
1491		    /* Numeric glob sort */
1492		    gf_numsort = !(sense & 1);
1493		    break;
1494		case 'a':
1495		    /* Access time in given range */
1496		    g_amc = 0;
1497		    func = qualtime;
1498		    goto getrange;
1499		case 'm':
1500		    /* Modification time in given range */
1501		    g_amc = 1;
1502		    func = qualtime;
1503		    goto getrange;
1504		case 'c':
1505		    /* Inode creation time in given range */
1506		    g_amc = 2;
1507		    func = qualtime;
1508		    goto getrange;
1509		case 'L':
1510		    /* File size (Length) in given range */
1511		    func = qualsize;
1512		    g_amc = -1;
1513		    /* Get size multiplier */
1514		    g_units = TT_BYTES;
1515		    if (*s == 'p' || *s == 'P')
1516			g_units = TT_POSIX_BLOCKS, ++s;
1517		    else if (*s == 'k' || *s == 'K')
1518			g_units = TT_KILOBYTES, ++s;
1519		    else if (*s == 'm' || *s == 'M')
1520			g_units = TT_MEGABYTES, ++s;
1521		  getrange:
1522		    /* Get time multiplier */
1523		    if (g_amc >= 0) {
1524			g_units = TT_DAYS;
1525			if (*s == 'h')
1526			    g_units = TT_HOURS, ++s;
1527			else if (*s == 'm')
1528			    g_units = TT_MINS, ++s;
1529			else if (*s == 'w')
1530			    g_units = TT_WEEKS, ++s;
1531			else if (*s == 'M')
1532			    g_units = TT_MONTHS, ++s;
1533			else if (*s == 's')
1534			    g_units = TT_SECONDS, ++s;
1535			else if (*s == 'd')
1536			    ++s;
1537		    }
1538		    /* See if it's greater than, equal to, or less than */
1539		    if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0))
1540			++s;
1541		    data = qgetnum(&s);
1542		    break;
1543
1544		case 'o':
1545		case 'O':
1546		{
1547		    int t;
1548		    char *send;
1549
1550		    if (gf_nsorts == MAX_SORTS) {
1551			zerr("too many glob sort specifiers");
1552			restore_globstate(saved);
1553			return;
1554		    }
1555
1556		    /* usually just one character */
1557		    send = s+1;
1558		    switch (*s) {
1559		    case 'n': t = GS_NAME; break;
1560		    case 'L': t = GS_SIZE; break;
1561		    case 'l': t = GS_LINKS; break;
1562		    case 'a': t = GS_ATIME; break;
1563		    case 'm': t = GS_MTIME; break;
1564		    case 'c': t = GS_CTIME; break;
1565		    case 'd': t = GS_DEPTH; break;
1566		    case 'N': t = GS_NONE; break;
1567		    case 'e':
1568		    case '+':
1569		    {
1570			t = GS_EXEC;
1571			if ((gf_sortlist[gf_nsorts].exec =
1572			     glob_exec_string(&send)) == NULL)
1573			{
1574			    restore_globstate(saved);
1575			    return;
1576			}
1577			break;
1578		    }
1579		    default:
1580			zerr("unknown sort specifier");
1581			restore_globstate(saved);
1582			return;
1583		    }
1584		    if (t != GS_EXEC) {
1585			if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
1586			    t <<= GS_SHIFT; /* HERE: GS_EXEC? */
1587			if (gf_sorts & t) {
1588			    zerr("doubled sort specifier");
1589			    restore_globstate(saved);
1590			    return;
1591			}
1592		    }
1593		    gf_sorts |= t;
1594		    gf_sortlist[gf_nsorts++].tp = t |
1595			(((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
1596		    s = send;
1597		    break;
1598		}
1599		case '+':
1600		case 'e':
1601		{
1602		    char *tt;
1603
1604		    tt = glob_exec_string(&s);
1605
1606		    if (tt == NULL) {
1607			data = 0;
1608		    } else {
1609			func = qualsheval;
1610			sdata = tt;
1611		    }
1612		    break;
1613		}
1614		case '[':
1615		case Inbrack:
1616		{
1617		    char *os = --s;
1618		    struct value v;
1619
1620		    v.isarr = SCANPM_WANTVALS;
1621		    v.pm = NULL;
1622		    v.end = -1;
1623		    v.flags = 0;
1624		    if (getindex(&s, &v, 0) || s == os) {
1625			zerr("invalid subscript");
1626			restore_globstate(saved);
1627			return;
1628		    }
1629		    first = v.start;
1630		    end = v.end;
1631		    break;
1632		}
1633		case 'P':
1634		{
1635		    char *tt;
1636		    tt = glob_exec_string(&s);
1637
1638		    if (tt != NULL)
1639		    {
1640			if (!gf_pre_words)
1641			    gf_pre_words = newlinklist();
1642			addlinknode(gf_pre_words, tt);
1643		    }
1644		    break;
1645		}
1646		default:
1647		    zerr("unknown file attribute");
1648		    restore_globstate(saved);
1649		    return;
1650		}
1651	    }
1652	    if (func) {
1653		/* Requested test is performed by function func */
1654		if (!qn)
1655		    qn = (struct qual *)hcalloc(sizeof *qn);
1656		if (ql)
1657		    ql->next = qn;
1658		ql = qn;
1659		if (!newquals)
1660		    newquals = qo = qn;
1661		qn->func = func;
1662		qn->sense = sense;
1663		qn->data = data;
1664		qn->sdata = sdata;
1665		qn->range = g_range;
1666		qn->units = g_units;
1667		qn->amc = g_amc;
1668
1669		qn = NULL;
1670		qualct++;
1671	    }
1672	    if (errflag) {
1673		restore_globstate(saved);
1674		return;
1675	    }
1676	}
1677
1678	if (quals && newquals) {
1679	    /* Merge previous group of qualifiers with new set. */
1680	    if (quals->or || newquals->or) {
1681		/* The hard case. */
1682		struct qual *qorhead = NULL, *qortail = NULL;
1683		/*
1684		 * Distribute in the most trivial way, by creating
1685		 * all possible combinations of the two sets and chaining
1686		 * these into one long set of alternatives given
1687		 * by qorhead and qortail.
1688		 */
1689		for (qn = newquals; qn; qn = qn->or) {
1690		    for (qo = quals; qo; qo = qo->or) {
1691			struct qual *qfirst, *qlast;
1692			int islast = !qn->or && !qo->or;
1693			/* Generate first set of qualifiers... */
1694			if (islast) {
1695			    /* Last time round:  don't bother copying. */
1696			    qfirst = qn;
1697			    for (qlast = qfirst; qlast->next;
1698				 qlast = qlast->next)
1699				;
1700			} else
1701			    qfirst = dup_qual_list(qn, &qlast);
1702			/* ... link into new `or' chain ... */
1703			if (!qorhead)
1704			    qorhead = qfirst;
1705			if (qortail)
1706			    qortail->or = qfirst;
1707			qortail = qfirst;
1708			/* ... and concatenate second set. */
1709			qlast->next = islast ? qo : dup_qual_list(qo, NULL);
1710		    }
1711		}
1712		quals = qorhead;
1713	    } else {
1714		/*
1715		 * Easy: we can just chain the qualifiers together.
1716		 * This is an optimisation; the code above will work, too.
1717		 * We retain the original left to right ordering --- remember
1718		 * we are searching for sets of qualifiers from the right.
1719		 */
1720		qn = newquals;
1721		for ( ; newquals->next; newquals = newquals->next)
1722		    ;
1723		newquals->next = quals;
1724		quals = qn;
1725	    }
1726	} else if (newquals)
1727	    quals = newquals;
1728    }
1729    q = parsepat(str);
1730    if (!q || errflag) {	/* if parsing failed */
1731	restore_globstate(saved);
1732	if (unset(BADPATTERN)) {
1733	    if (!nountok)
1734		untokenize(ostr);
1735	    insertlinknode(list, node, ostr);
1736	    return;
1737	}
1738	errflag = 0;
1739	zerr("bad pattern: %s", ostr);
1740	return;
1741    }
1742    if (!gf_nsorts) {
1743	gf_sortlist[0].tp = gf_sorts = GS_NAME;
1744	gf_nsorts = 1;
1745    }
1746    /* Initialise receptacle for matched files, *
1747     * expanded by insert() where necessary.    */
1748    matchptr = matchbuf = (Gmatch)zalloc((matchsz = 16) *
1749					 sizeof(struct gmatch));
1750    matchct = 0;
1751    pattrystart();
1752
1753    /* The actual processing takes place here: matches go into  *
1754     * matchbuf.  This is the only top-level call to scanner(). */
1755    scanner(q);
1756
1757    /* Deal with failures to match depending on options */
1758    if (matchct)
1759	badcshglob |= 2;	/* at least one cmd. line expansion O.K. */
1760    else if (!gf_nullglob) {
1761	if (isset(CSHNULLGLOB)) {
1762	    badcshglob |= 1;	/* at least one cmd. line expansion failed */
1763	} else if (isset(NOMATCH)) {
1764	    zerr("no matches found: %s", ostr);
1765	    free(matchbuf);
1766	    restore_globstate(saved);
1767	    return;
1768	} else {
1769	    /* treat as an ordinary string */
1770	    untokenize(matchptr->name = dupstring(ostr));
1771	    matchptr++;
1772	    matchct = 1;
1773	}
1774    }
1775
1776    if (!(gf_sortlist[0].tp & GS_NONE)) {
1777	/*
1778	 * Get the strings to use for sorting by executing
1779	 * the code chunk.  We allow more than one of these.
1780	 */
1781	int nexecs = 0;
1782	struct globsort *sortp;
1783	struct globsort *lastsortp = gf_sortlist + gf_nsorts;
1784
1785	/* First find out if there are any GS_EXECs, counting them. */
1786	for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1787	{
1788	    if (sortp->tp & GS_EXEC)
1789		nexecs++;
1790	}
1791
1792	if (nexecs) {
1793	    Gmatch tmpptr;
1794	    int iexec = 0;
1795
1796	    /* Yes; allocate enough space for strings for each */
1797	    for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1798		tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
1799
1800	    /* Loop over each one, incrementing iexec */
1801	    for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1802	    {
1803		/* Ignore unless this is a GS_EXEC */
1804		if (sortp->tp & GS_EXEC) {
1805		    Eprog prog;
1806
1807		    if ((prog = parse_string(sortp->exec, 0))) {
1808			int ef = errflag, lv = lastval;
1809
1810			/* Parsed OK, execute for each name */
1811			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
1812			    setsparam("REPLY", ztrdup(tmpptr->name));
1813			    execode(prog, 1, 0, "globsort");
1814			    if (!errflag)
1815				tmpptr->sortstrs[iexec] =
1816				    dupstring(getsparam("REPLY"));
1817			    else
1818				tmpptr->sortstrs[iexec] = tmpptr->name;
1819			}
1820
1821			errflag = ef;
1822			lastval = lv;
1823		    } else {
1824			/* Failed, let's be safe */
1825			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1826			    tmpptr->sortstrs[iexec] = tmpptr->name;
1827		    }
1828
1829		    iexec++;
1830		}
1831	    }
1832	}
1833
1834	/* Sort arguments in to lexical (and possibly numeric) order. *
1835	 * This is reversed to facilitate insertion into the list.    */
1836	qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
1837	      (int (*) _((const void *, const void *)))gmatchcmp);
1838    }
1839
1840    if (first < 0) {
1841	first += matchct;
1842	if (first < 0)
1843	    first = 0;
1844    }
1845    if (end < 0)
1846	end += matchct + 1;
1847    else if (end > matchct)
1848	end = matchct;
1849    if ((end -= first) > 0) {
1850	if (gf_sortlist[0].tp & GS_NONE) {
1851	    /* Match list was never reversed, so insert back to front. */
1852	    matchptr = matchbuf + matchct - first - 1;
1853	    while (end-- > 0) {
1854		/* insert matches in the arg list */
1855		insert_glob_match(list, node, matchptr->name);
1856		matchptr--;
1857	    }
1858	} else {
1859	    matchptr = matchbuf + matchct - first - end;
1860	    while (end-- > 0) {
1861		/* insert matches in the arg list */
1862		insert_glob_match(list, node, matchptr->name);
1863		matchptr++;
1864	    }
1865	}
1866    }
1867    free(matchbuf);
1868
1869    restore_globstate(saved);
1870}
1871
1872/* Return the trailing character for marking file types */
1873
1874/**/
1875mod_export char
1876file_type(mode_t filemode)
1877{
1878    if(S_ISBLK(filemode))
1879	return '#';
1880    else if(S_ISCHR(filemode))
1881	return '%';
1882    else if(S_ISDIR(filemode))
1883	return '/';
1884    else if(S_ISFIFO(filemode))
1885	return '|';
1886    else if(S_ISLNK(filemode))
1887	return '@';
1888    else if(S_ISREG(filemode))
1889	return (filemode & S_IXUGO) ? '*' : ' ';
1890    else if(S_ISSOCK(filemode))
1891	return '=';
1892    else
1893	return '?';
1894}
1895
1896/* check to see if str is eligible for brace expansion */
1897
1898/**/
1899mod_export int
1900hasbraces(char *str)
1901{
1902    char *lbr, *mbr, *comma;
1903
1904    if (isset(BRACECCL)) {
1905	/* In this case, any properly formed brace expression  *
1906	 * will match and expand to the characters in between. */
1907	int bc, c;
1908
1909	for (bc = 0; (c = *str); ++str)
1910	    if (c == Inbrace) {
1911		if (!bc && str[1] == Outbrace)
1912		    *str++ = '{', *str = '}';
1913		else
1914		    bc++;
1915	    } else if (c == Outbrace) {
1916		if (!bc)
1917		    *str = '}';
1918		else if (!--bc)
1919		    return 1;
1920	    }
1921	return 0;
1922    }
1923    /* Otherwise we need to look for... */
1924    lbr = mbr = comma = NULL;
1925    for (;;) {
1926	switch (*str++) {
1927	case Inbrace:
1928	    if (!lbr) {
1929		lbr = str - 1;
1930		if (*str == '-')
1931		    str++;
1932		while (idigit(*str))
1933		    str++;
1934		if (*str == '.' && str[1] == '.') {
1935		    str++; str++;
1936		    if (*str == '-')
1937			str++;
1938		    while (idigit(*str))
1939			str++;
1940		    if (*str == Outbrace &&
1941			(idigit(lbr[1]) || idigit(str[-1])))
1942			return 1;
1943		    else if (*str == '.' && str[1] == '.') {
1944			str++; str++;
1945			if (*str == '-')
1946			    str++;
1947			while (idigit(*str))
1948			    str++;
1949			if (*str == Outbrace &&
1950			    (idigit(lbr[1]) || idigit(str[-1])))
1951			    return 1;
1952		    }
1953		}
1954	    } else {
1955		char *s = --str;
1956
1957		if (skipparens(Inbrace, Outbrace, &str)) {
1958		    *lbr = *s = '{';
1959		    if (comma)
1960			str = comma;
1961		    if (mbr && mbr < str)
1962			str = mbr;
1963		    lbr = mbr = comma = NULL;
1964		} else if (!mbr)
1965		    mbr = s;
1966	    }
1967	    break;
1968	case Outbrace:
1969	    if (!lbr)
1970		str[-1] = '}';
1971	    else if (comma)
1972		return 1;
1973	    else {
1974		*lbr = '{';
1975		str[-1] = '}';
1976		if (mbr)
1977		    str = mbr;
1978		mbr = lbr = NULL;
1979	    }
1980	    break;
1981	case Comma:
1982	    if (!lbr)
1983		str[-1] = ',';
1984	    else if (!comma)
1985		comma = str - 1;
1986	    break;
1987	case '\0':
1988	    if (lbr)
1989		*lbr = '{';
1990	    if (!mbr && !comma)
1991		return 0;
1992	    if (comma)
1993		str = comma;
1994	    if (mbr && mbr < str)
1995		str = mbr;
1996	    lbr = mbr = comma = NULL;
1997	    break;
1998	}
1999    }
2000}
2001
2002/* expand stuff like >>*.c */
2003
2004/**/
2005int
2006xpandredir(struct redir *fn, LinkList redirtab)
2007{
2008    char *nam;
2009    struct redir *ff;
2010    int ret = 0;
2011    local_list1(fake);
2012
2013    /* Stick the name in a list... */
2014    init_list1(fake, fn->name);
2015    /* ...which undergoes all the usual shell expansions */
2016    prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE);
2017    /* Globbing is only done for multios. */
2018    if (!errflag && isset(MULTIOS))
2019	globlist(&fake, 0);
2020    if (errflag)
2021	return 0;
2022    if (nonempty(&fake) && !nextnode(firstnode(&fake))) {
2023	/* Just one match, the usual case. */
2024	char *s = peekfirst(&fake);
2025	fn->name = s;
2026	untokenize(s);
2027	if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
2028	    if (s[0] == '-' && !s[1])
2029		fn->type = REDIR_CLOSE;
2030	    else if (s[0] == 'p' && !s[1])
2031		fn->fd2 = -2;
2032	    else {
2033		while (idigit(*s))
2034		    s++;
2035		if (!*s && s > fn->name)
2036		    fn->fd2 = zstrtol(fn->name, NULL, 10);
2037		else if (fn->type == REDIR_MERGEIN)
2038		    zerr("file number expected");
2039		else
2040		    fn->type = REDIR_ERRWRITE;
2041	    }
2042	}
2043    } else if (fn->type == REDIR_MERGEIN)
2044	zerr("file number expected");
2045    else {
2046	if (fn->type == REDIR_MERGEOUT)
2047	    fn->type = REDIR_ERRWRITE;
2048	while ((nam = (char *)ugetnode(&fake))) {
2049	    /* Loop over matches, duplicating the *
2050	     * redirection for each file found.   */
2051	    ff = (struct redir *) zhalloc(sizeof *ff);
2052	    *ff = *fn;
2053	    ff->name = nam;
2054	    addlinknode(redirtab, ff);
2055	    ret = 1;
2056	}
2057    }
2058    return ret;
2059}
2060
2061/* brace expansion */
2062
2063/**/
2064mod_export void
2065xpandbraces(LinkList list, LinkNode *np)
2066{
2067    LinkNode node = (*np), last = prevnode(node);
2068    char *str = (char *)getdata(node), *str3 = str, *str2;
2069    int prev, bc, comma, dotdot;
2070
2071    for (; *str != Inbrace; str++);
2072    /* First, match up braces and see what we have. */
2073    for (str2 = str, bc = comma = dotdot = 0; *str2; ++str2)
2074	if (*str2 == Inbrace)
2075	    ++bc;
2076	else if (*str2 == Outbrace) {
2077	    if (--bc == 0)
2078		break;
2079	} else if (bc == 1) {
2080	    if (*str2 == Comma)
2081		++comma;	/* we have {foo,bar} */
2082	    else if (*str2 == '.' && str2[1] == '.') {
2083		dotdot++;	/* we have {num1..num2} */
2084		++str2;
2085	    }
2086	}
2087    DPUTS(bc, "BUG: unmatched brace in xpandbraces()");
2088    if (!comma && dotdot) {
2089	/* Expand range like 0..10 numerically: comma or recursive
2090	   brace expansion take precedence. */
2091	char *dots, *p, *dots2 = NULL;
2092	LinkNode olast = last;
2093	/* Get the first number of the range */
2094	zlong rstart = zstrtol(str+1,&dots,10), rend = 0;
2095	int err = 0, rev = 0, rincr = 1;
2096	int wid1 = (dots - str) - 1, wid2 = (str2 - dots) - 2, wid3 = 0;
2097	int strp = str - str3;
2098
2099	if (dots == str + 1 || *dots != '.' || dots[1] != '.')
2100	    err++;
2101	else {
2102	    /* Get the last number of the range */
2103	    rend = zstrtol(dots+2,&p,10);
2104	    if (p == dots+2)
2105		err++;
2106	    /* check for {num1..num2..incr} */
2107	    if (p != str2) {
2108		wid2 = (p - dots) - 2;
2109		dots2 = p;
2110		if (dotdot == 2 && *p == '.' && p[1] == '.') {
2111		    rincr = zstrtol(p+2, &p, 10);
2112		    wid3 = p - dots2 - 2;
2113		    if (p != str2 || !rincr)
2114			err++;
2115		} else
2116		    err++;
2117	    }
2118	}
2119	if (!err) {
2120	    /* If either no. begins with a zero, pad the output with   *
2121	     * zeroes. Otherwise, set min width to 0 to suppress them.
2122	     * str+1 is the first number in the range, dots+2 the last,
2123	     * and dots2+2 is the increment if that's given. */
2124	    /* TODO: sorry about this */
2125	    int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0'))
2126		       ? wid1
2127		       : (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0'))
2128		       ? wid2
2129		       : (dots2 && (dots2[2] == '0' ||
2130				    (dots2[2] == '-' && dots2[3] == '0')))
2131		       ? wid3
2132		       : 0;
2133	    if (rincr < 0) {
2134		/* Handle negative increment */
2135		rincr = -rincr;
2136		rev = !rev;
2137	    }
2138	    if (rstart > rend) {
2139		/* Handle decreasing ranges correctly. */
2140		zlong rt = rend;
2141		rend = rstart;
2142		rstart = rt;
2143		rev = !rev;
2144	    } else if (rincr > 1) {
2145		/* when incr > 1, range is aligned to the highest number of str1,
2146		 * compensate for this so that it is aligned to the first number */
2147		rend -= (rend - rstart) % rincr;
2148	    }
2149	    uremnode(list, node);
2150	    for (; rend >= rstart; rend -= rincr) {
2151		/* Node added in at end, so do highest first */
2152		p = dupstring(str3);
2153#if defined(ZLONG_IS_LONG_LONG) && defined(PRINTF_HAS_LLD)
2154		sprintf(p + strp, "%0*lld", minw, rend);
2155#else
2156		sprintf(p + strp, "%0*ld", minw, (long)rend);
2157#endif
2158		strcat(p + strp, str2 + 1);
2159		insertlinknode(list, last, p);
2160		if (rev)	/* decreasing:  add in reverse order. */
2161		    last = nextnode(last);
2162	    }
2163	    *np = nextnode(olast);
2164	    return;
2165	}
2166    }
2167    if (!comma && isset(BRACECCL)) {	/* {a-mnop} */
2168	/* Here we expand each character to a separate node,      *
2169	 * but also ranges of characters like a-m.  ccl is a      *
2170	 * set of flags saying whether each character is present; *
2171	 * the final list is in lexical order.                    */
2172	char ccl[256], *p;
2173	unsigned char c1, c2;
2174	unsigned int len, pl;
2175	int lastch = -1;
2176
2177	uremnode(list, node);
2178	memset(ccl, 0, sizeof(ccl) / sizeof(ccl[0]));
2179	for (p = str + 1; p < str2;) {
2180	    if (itok(c1 = *p++))
2181		c1 = ztokens[c1 - STOUC(Pound)];
2182	    if ((char) c1 == Meta)
2183		c1 = 32 ^ *p++;
2184	    if (itok(c2 = *p))
2185		c2 = ztokens[c2 - STOUC(Pound)];
2186	    if ((char) c2 == Meta)
2187		c2 = 32 ^ p[1];
2188	    if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) {
2189		while (lastch < (int)c2)
2190		    ccl[lastch++] = 1;
2191		lastch = -1;
2192	    } else
2193		ccl[lastch = c1] = 1;
2194	}
2195	pl = str - str3;
2196	len = pl + strlen(++str2) + 2;
2197	for (p = ccl + 256; p-- > ccl;)
2198	    if (*p) {
2199		c1 = p - ccl;
2200		if (imeta(c1)) {
2201		    str = hcalloc(len + 1);
2202		    str[pl] = Meta;
2203		    str[pl+1] = c1 ^ 32;
2204		    strcpy(str + pl + 2, str2);
2205		} else {
2206		    str = hcalloc(len);
2207		    str[pl] = c1;
2208		    strcpy(str + pl + 1, str2);
2209		}
2210		memcpy(str, str3, pl);
2211		insertlinknode(list, last, str);
2212	    }
2213	*np = nextnode(last);
2214	return;
2215    }
2216    prev = str++ - str3;
2217    str2++;
2218    uremnode(list, node);
2219    node = last;
2220    /* Finally, normal comma expansion               *
2221     * str1{foo,bar}str2 -> str1foostr2 str1barstr2. *
2222     * Any number of intervening commas is allowed.  */
2223    for (;;) {
2224	char *zz, *str4;
2225	int cnt;
2226
2227	for (str4 = str, cnt = 0; cnt || (*str != Comma && *str !=
2228					  Outbrace); str++) {
2229	    if (*str == Inbrace)
2230		cnt++;
2231	    else if (*str == Outbrace)
2232		cnt--;
2233	    DPUTS(!*str, "BUG: illegal brace expansion");
2234	}
2235	/* Concatenate the string before the braces (str3), the section *
2236	 * just found (str4) and the text after the braces (str2)       */
2237	zz = (char *) hcalloc(prev + (str - str4) + strlen(str2) + 1);
2238	ztrncpy(zz, str3, prev);
2239	strncat(zz, str4, str - str4);
2240	strcat(zz, str2);
2241	/* and add this text to the argument list. */
2242	insertlinknode(list, node, zz);
2243	incnode(node);
2244	if (*str != Outbrace)
2245	    str++;
2246	else
2247	    break;
2248    }
2249    *np = nextnode(last);
2250}
2251
2252/* check to see if a matches b (b is not a filename pattern) */
2253
2254/**/
2255int
2256matchpat(char *a, char *b)
2257{
2258    Patprog p = patcompile(b, PAT_STATIC, NULL);
2259
2260    if (!p) {
2261	zerr("bad pattern: %s", b);
2262	return 0;
2263    }
2264    return pattry(p, a);
2265}
2266
2267/* do the ${foo%%bar}, ${foo#bar} stuff */
2268/* please do not laugh at this code. */
2269
2270/* Having found a match in getmatch, decide what part of string
2271 * to return.  The matched part starts b characters into string s
2272 * and finishes e characters in: 0 <= b <= e <= strlen(s)
2273 * (yes, empty matches should work).
2274 * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards;
2275 * the lower parts are ignored.
2276 * replstr is the replacement string for a substitution
2277 */
2278
2279/**/
2280static char *
2281get_match_ret(char *s, int b, int e, int fl, char *replstr,
2282	      LinkList repllist)
2283{
2284    char buf[80], *r, *p, *rr;
2285    int ll = 0, l = strlen(s), bl = 0, t = 0, i;
2286
2287    if (replstr || (fl & SUB_LIST)) {
2288	if (fl & SUB_DOSUBST) {
2289	    replstr = dupstring(replstr);
2290	    singsub(&replstr);
2291	    untokenize(replstr);
2292	}
2293	if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) {
2294	    /* We are replacing the chunk, just add this to the list */
2295	    Repldata rd = (Repldata)
2296		((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
2297	    rd->b = b;
2298	    rd->e = e;
2299	    rd->replstr = replstr;
2300	    if (fl & SUB_LIST)
2301		zaddlinknode(repllist, rd);
2302	    else
2303		addlinknode(repllist, rd);
2304	    return s;
2305	}
2306	ll += strlen(replstr);
2307    }
2308    if (fl & SUB_MATCH)			/* matched portion */
2309	ll += 1 + (e - b);
2310    if (fl & SUB_REST)		/* unmatched portion */
2311	ll += 1 + (l - (e - b));
2312    if (fl & SUB_BIND) {
2313	/* position of start of matched portion */
2314	sprintf(buf, "%d ", b + 1);
2315	ll += (bl = strlen(buf));
2316    }
2317    if (fl & SUB_EIND) {
2318	/* position of end of matched portion */
2319	sprintf(buf + bl, "%d ", e + 1);
2320	ll += (bl = strlen(buf));
2321    }
2322    if (fl & SUB_LEN) {
2323	/* length of matched portion */
2324	sprintf(buf + bl, "%d ", e - b);
2325	ll += (bl = strlen(buf));
2326    }
2327    if (bl)
2328	buf[bl - 1] = '\0';
2329
2330    rr = r = (char *) hcalloc(ll);
2331
2332    if (fl & SUB_MATCH) {
2333	/* copy matched portion to new buffer */
2334	for (i = b, p = s + b; i < e; i++)
2335	    *rr++ = *p++;
2336	t = 1;
2337    }
2338    if (fl & SUB_REST) {
2339	/* Copy unmatched portion to buffer.  If both portions *
2340	 * requested, put a space in between (why?)            */
2341	if (t)
2342	    *rr++ = ' ';
2343	/* there may be unmatched bits at both beginning and end of string */
2344	for (i = 0, p = s; i < b; i++)
2345	    *rr++ = *p++;
2346	if (replstr)
2347	    for (p = replstr; *p; )
2348		*rr++ = *p++;
2349	for (i = e, p = s + e; i < l; i++)
2350	    *rr++ = *p++;
2351	t = 1;
2352    }
2353    *rr = '\0';
2354    if (bl) {
2355	/* if there was a buffer (with a numeric result), add it; *
2356	 * if there was other stuff too, stick in a space first.  */
2357	if (t)
2358	    *rr++ = ' ';
2359	strcpy(rr, buf);
2360    }
2361    return r;
2362}
2363
2364static Patprog
2365compgetmatch(char *pat, int *flp, char **replstrp)
2366{
2367    Patprog p;
2368    /*
2369     * Flags to pattern compiler:  use static buffer since we only
2370     * have one pattern at a time; we will try the must-match test ourselves,
2371     * so tell the pattern compiler we are scanning.
2372     */
2373
2374    /* int patflags = PAT_STATIC|PAT_SCAN|PAT_NOANCH;*/
2375
2376    /* Unfortunately, PAT_STATIC doesn't work if we have a replstr with
2377     * something like ${x#...} in it which will be singsub()ed below because
2378     * that would overwrite the pattern buffer. */
2379
2380    int patflags = PAT_SCAN|PAT_NOANCH | (*replstrp ? 0 : PAT_STATIC);
2381
2382    /*
2383     * Search is anchored to the end of the string if we want to match
2384     * it all, or if we are matching at the end of the string and not
2385     * using substrings.
2386     */
2387    if ((*flp & SUB_ALL) || ((*flp & SUB_END) && !(*flp & SUB_SUBSTR)))
2388	patflags &= ~PAT_NOANCH;
2389    p = patcompile(pat, patflags, NULL);
2390    if (!p) {
2391	zerr("bad pattern: %s", pat);
2392	return NULL;
2393    }
2394    if (*replstrp) {
2395	if (p->patnpar || (p->globend & GF_MATCHREF)) {
2396	    /*
2397	     * Either backreferences or match references, so we
2398	     * need to re-substitute replstr each time round.
2399	     */
2400	    *flp |= SUB_DOSUBST;
2401	} else {
2402	    singsub(replstrp);
2403	    untokenize(*replstrp);
2404	}
2405    }
2406
2407    return p;
2408}
2409
2410/*
2411 * This is called from paramsubst to get the match for ${foo#bar} etc.
2412 * fl is a set of the SUB_* flags defined in zsh.h
2413 * *sp points to the string we have to modify. The n'th match will be
2414 * returned in *sp. The heap is used to get memory for the result string.
2415 * replstr is the replacement string from a ${.../orig/repl}, in
2416 * which case pat is the original.
2417 *
2418 * n is now ignored unless we are looking for a substring, in
2419 * which case the n'th match from the start is counted such that
2420 * there is no more than one match from each position.
2421 */
2422
2423/**/
2424int
2425getmatch(char **sp, char *pat, int fl, int n, char *replstr)
2426{
2427    Patprog p;
2428
2429    if (!(p = compgetmatch(pat, &fl, &replstr)))
2430	return 1;
2431
2432    return igetmatch(sp, p, fl, n, replstr, NULL);
2433}
2434
2435/*
2436 * This is the corresponding function for array variables.
2437 * Matching is done with the same pattern on each element.
2438 */
2439
2440/**/
2441void
2442getmatcharr(char ***ap, char *pat, int fl, int n, char *replstr)
2443{
2444    char **arr = *ap, **pp;
2445    Patprog p;
2446
2447    if (!(p = compgetmatch(pat, &fl, &replstr)))
2448	return;
2449
2450    *ap = pp = hcalloc(sizeof(char *) * (arrlen(arr) + 1));
2451    while ((*pp = *arr++))
2452	if (igetmatch(pp, p, fl, n, replstr, NULL))
2453	    pp++;
2454}
2455
2456/*
2457 * Match against str using pattern pp; return a list of
2458 * Repldata matches in the linked list *repllistp; this is
2459 * in permanent storage and to be freed by freematchlist()
2460 */
2461
2462/**/
2463mod_export int
2464getmatchlist(char *str, Patprog p, LinkList *repllistp)
2465{
2466    char **sp = &str;
2467
2468    /*
2469     * We don't care if we have longest or shortest match, but SUB_LONG
2470     * is cheaper since the pattern code does that by default.
2471     * We need SUB_GLOBAL to get all matches.
2472     * We need SUB_SUBSTR to scan through for substrings.
2473     * We need SUB_LIST to activate the special handling of the list
2474     * passed in.
2475     */
2476    return igetmatch(sp, p, SUB_LONG|SUB_GLOBAL|SUB_SUBSTR|SUB_LIST,
2477		     0, NULL, repllistp);
2478}
2479
2480static void
2481freerepldata(void *ptr)
2482{
2483    zfree(ptr, sizeof(struct repldata));
2484}
2485
2486/**/
2487mod_export void
2488freematchlist(LinkList repllist)
2489{
2490    freelinklist(repllist, freerepldata);
2491}
2492
2493/**/
2494static void
2495set_pat_start(Patprog p, int offs)
2496{
2497    /*
2498     * If we are messing around with the test string by advancing up
2499     * it from the start, we need to tell the pattern matcher that
2500     * a start-of-string assertion, i.e. (#s), should fail.  Hence
2501     * we test whether the offset of the real start of string from
2502     * the actual start, passed as offs, is zero.
2503     */
2504    if (offs)
2505	p->flags |= PAT_NOTSTART;
2506    else
2507	p->flags &= ~PAT_NOTSTART;
2508}
2509
2510/**/
2511static void
2512set_pat_end(Patprog p, char null_me)
2513{
2514    /*
2515     * If we are messing around with the string by shortening it at the
2516     * tail, we need to tell the pattern matcher that an end-of-string
2517     * assertion, i.e. (#e), should fail.  Hence we test whether
2518     * the character null_me about to be zapped is or is not already a null.
2519     */
2520    if (null_me)
2521	p->flags |= PAT_NOTEND;
2522    else
2523	p->flags &= ~PAT_NOTEND;
2524}
2525
2526/**/
2527#ifdef MULTIBYTE_SUPPORT
2528
2529/*
2530 * Increment *tp over character which may be multibyte.
2531 * Return number of bytes that remain in the character after unmetafication.
2532 */
2533
2534/**/
2535static int iincchar(char **tp)
2536{
2537    char *t = *tp;
2538    int mbclen = mb_metacharlenconv(t, NULL);
2539    int umlen = 0;
2540
2541    while (mbclen--) {
2542	umlen++;
2543	if (*t++ == Meta) {
2544	    t++;
2545	    mbclen--;
2546	}
2547    }
2548    *tp = t;
2549
2550    return umlen;
2551}
2552
2553/**/
2554static int
2555igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
2556	  LinkList *repllistp)
2557{
2558    char *s = *sp, *t, *tmatch;
2559    /*
2560     * Note that ioff counts (possibly multibyte) characters in the
2561     * character set (Meta's are not included), while l counts characters in
2562     * the metafied string.
2563     *
2564     * umlen is a counter for (unmetafied) byte lengths---neither characters
2565     * nor raw byte indices; this is simply an optimisation for allocation.
2566     * umltot is the full length of the string in this scheme.
2567     *
2568     * l is the raw string length, used together with any pointers into
2569     * the string (typically t).
2570     */
2571    int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
2572    int umlen, nmatches;
2573    /*
2574     * List of bits of matches to concatenate with replacement string.
2575     * The data is a struct repldata.  It is not used in cases like
2576     * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
2577     * is anchored.  It goes on the heap.
2578     */
2579    LinkList repllist = NULL;
2580
2581    /* perform must-match test for complex closures */
2582    if (p->mustoff)
2583    {
2584	/*
2585	 * Yuk.  Probably we should rewrite this whole function to
2586	 * use an unmetafied test string.
2587	 *
2588	 * Use META_HEAPDUP because we need a terminating NULL.
2589	 */
2590	char *muststr = metafy((char *)p + p->mustoff,
2591			       p->patmlen, META_HEAPDUP);
2592
2593	if (!strstr(s, muststr))
2594	    matched = 0;
2595    }
2596
2597    /* in case we used the prog before... */
2598    p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
2599
2600    if (fl & SUB_ALL) {
2601	int i = matched && pattry(p, s);
2602	*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
2603	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
2604	    return 0;
2605	return 1;
2606    }
2607    if (matched) {
2608	/*
2609	 * The default behaviour is to match at the start; this
2610	 * is modified by SUB_END and SUB_SUBSTR.  SUB_END matches
2611	 * at the end of the string instead of the start.  SUB_SUBSTR
2612	 * without SUB_END matches substrings searching from the start;
2613	 * with SUB_END it matches substrings searching from the end.
2614	 *
2615	 * The possibilities are further modified by whether we want the
2616	 * longest (SUB_LONG) or shortest possible match.
2617	 *
2618	 * SUB_START is only used in the case where we are also
2619	 * forcing a match at the end (SUB_END with no SUB_SUBSTR,
2620	 * with or without SUB_LONG), to indicate we should match
2621	 * the entire string.
2622	 */
2623	switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
2624	case 0:
2625	case SUB_LONG:
2626	    /*
2627	     * Largest/smallest possible match at head of string.
2628	     * First get the longest match...
2629	     */
2630	    if (pattry(p, s)) {
2631		/* patmatchlen returns metafied length, as we need */
2632	        int mlen = patmatchlen();
2633		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2634		    /*
2635		     * ... now we know whether it's worth looking for the
2636		     * shortest, which we do by brute force.
2637		     */
2638		    mb_metacharinit();
2639		    for (t = s, umlen = 0; t < s + mlen; ) {
2640			set_pat_end(p, *t);
2641			if (pattrylen(p, s, t - s, umlen, 0)) {
2642			    mlen = patmatchlen();
2643			    break;
2644			}
2645			umlen += iincchar(&t);
2646		    }
2647		}
2648		*sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
2649		return 1;
2650	    }
2651	    break;
2652
2653	case SUB_END:
2654	    /*
2655	     * Smallest possible match at tail of string.
2656	     * As we can only be sure we've got wide characters right
2657	     * when going forwards, we need to match at every point
2658	     * until we fail and record the last successful match.
2659	     *
2660	     * It's important that we return the last successful match
2661	     * so that match, mbegin, mend and MATCH, MBEGIN, MEND are
2662	     * correct.
2663	     */
2664	    mb_metacharinit();
2665	    tmatch = NULL;
2666	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2667		set_pat_start(p, t-s);
2668		if (pattrylen(p, t, s + l - t, umlen, ioff))
2669		    tmatch = t;
2670		if (fl & SUB_START)
2671		    break;
2672		umlen -= iincchar(&t);
2673	    }
2674	    if (tmatch) {
2675		*sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
2676		return 1;
2677	    }
2678	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
2679		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2680		return 1;
2681	    }
2682	    break;
2683
2684	case (SUB_END|SUB_LONG):
2685	    /* Largest possible match at tail of string:       *
2686	     * move forward along string until we get a match. *
2687	     * Again there's no optimisation.                  */
2688	    mb_metacharinit();
2689	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2690		set_pat_start(p, t-s);
2691		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2692		    *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
2693		    return 1;
2694		}
2695		if (fl & SUB_START)
2696		    break;
2697		umlen -= iincchar(&t);
2698	    }
2699	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
2700		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2701		return 1;
2702	    }
2703	    break;
2704
2705	case SUB_SUBSTR:
2706	    /* Smallest at start, but matching substrings. */
2707	    set_pat_start(p, l);
2708	    if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
2709		*sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
2710		return 1;
2711	    } /* fall through */
2712	case (SUB_SUBSTR|SUB_LONG):
2713	    /* longest or smallest at start with substrings */
2714	    t = s;
2715	    if (fl & SUB_GLOBAL) {
2716		repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
2717		if (repllistp)
2718		     *repllistp = repllist;
2719	    }
2720	    ioff = 0;		/* offset into string */
2721	    umlen = umltot;
2722	    mb_metacharinit();
2723	    do {
2724		/* loop over all matches for global substitution */
2725		matched = 0;
2726		for (; t < s + l; ioff++) {
2727		    /* Find the longest match from this position. */
2728		    set_pat_start(p, t-s);
2729		    if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2730			char *mpos = t + patmatchlen();
2731			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2732			    char *ptr;
2733			    int umlen2;
2734			    /*
2735			     * If searching for the shortest match,
2736			     * start with a zero length and increase
2737			     * it until we reach the longest possible
2738			     * match, accepting the first successful
2739			     * match.
2740			     */
2741			    for (ptr = t, umlen2 = 0; ptr < mpos;) {
2742				set_pat_end(p, *ptr);
2743				if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
2744				    mpos = t + patmatchlen();
2745				    break;
2746				}
2747				umlen2 += iincchar(&ptr);
2748			    }
2749			}
2750			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
2751			    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
2752						replstr, repllist);
2753			    if (mpos == t)
2754				mpos += mb_metacharlenconv(mpos, NULL);
2755			}
2756			if (!(fl & SUB_GLOBAL)) {
2757			    if (n) {
2758				/*
2759				 * Looking for a later match: in this case,
2760				 * we can continue looking for matches from
2761				 * the next character, even if it overlaps
2762				 * with what we just found.
2763				 */
2764				umlen -= iincchar(&t);
2765				continue;
2766			    } else {
2767				return 1;
2768			    }
2769			}
2770			/*
2771			 * For a global match, we need to skip the stuff
2772			 * which is already marked for replacement.
2773			 */
2774			matched = 1;
2775			while (t < mpos) {
2776			    ioff++;
2777			    umlen -= iincchar(&t);
2778			}
2779			break;
2780		    }
2781		    umlen -= iincchar(&t);
2782		}
2783	    } while (matched);
2784	    /*
2785	     * check if we can match a blank string, if so do it
2786	     * at the start.  Goodness knows if this is a good idea
2787	     * with global substitution, so it doesn't happen.
2788	     */
2789	    set_pat_start(p, l);
2790	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
2791		pattry(p, s + l) && !--n) {
2792		*sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
2793		return 1;
2794	    }
2795	    break;
2796
2797	case (SUB_END|SUB_SUBSTR):
2798	case (SUB_END|SUB_LONG|SUB_SUBSTR):
2799	    /* Longest/shortest at end, matching substrings.       */
2800	    if (!(fl & SUB_LONG)) {
2801		set_pat_start(p, l);
2802		if (pattrylen(p, s + l, 0, 0, umltot) && !--n) {
2803		    *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2804		    return 1;
2805		}
2806	    }
2807	    /*
2808	     * If multibyte characters are present we need to start from the
2809	     * beginning.  This is a bit unpleasant because we can't tell in
2810	     * advance how many times it will match and from where, so if n is
2811	     * greater then 1 we will need to count the number of times it
2812	     * matched and then go through again until we reach the right
2813	     * point.  (Either that or record every single match in a list,
2814	     * which isn't stupid; it involves more memory management at this
2815	     * level but less use of the pattern matcher.)
2816	     */
2817	    nmatches = 0;
2818	    tmatch = NULL;
2819	    mb_metacharinit();
2820	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2821		set_pat_start(p, t-s);
2822		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2823		    nmatches++;
2824		    tmatch = t;
2825		}
2826		umlen -= iincchar(&t);
2827	    }
2828	    if (nmatches) {
2829		char *mpos;
2830		if (n > 1) {
2831		    /*
2832		     * We need to find the n'th last match.
2833		     */
2834		    n = nmatches - n;
2835		    mb_metacharinit();
2836		    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2837			set_pat_start(p, t-s);
2838			if (pattrylen(p, t, s + l - t, umlen, ioff) &&
2839			    !n--) {
2840			    tmatch = t;
2841			    break;
2842			}
2843			umlen -= iincchar(&t);
2844		    }
2845		}
2846		mpos = tmatch + patmatchlen();
2847		/* Look for the shortest match if necessary */
2848		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2849		    for (t = tmatch, umlen = 0; t < mpos; ) {
2850			set_pat_end(p, *t);
2851			if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) {
2852			    mpos = tmatch + patmatchlen();
2853			    break;
2854			}
2855			umlen += iincchar(&t);
2856		    }
2857		}
2858		*sp = get_match_ret(*sp, tmatch-s, mpos-s, fl,
2859				    replstr, NULL);
2860		return 1;
2861	    }
2862	    set_pat_start(p, l);
2863	    if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) {
2864		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2865		return 1;
2866	    }
2867	    break;
2868	}
2869    }
2870
2871    if (repllist && nonempty(repllist)) {
2872	/* Put all the bits of a global search and replace together. */
2873	LinkNode nd;
2874	Repldata rd;
2875	int lleft;
2876	char *ptr, *start;
2877	int i;
2878
2879	if (!(fl & SUB_LIST)) {
2880	    lleft = 0;		/* size of returned string */
2881	    i = 0;			/* start of last chunk we got from *sp */
2882	    for (nd = firstnode(repllist); nd; incnode(nd)) {
2883		rd = (Repldata) getdata(nd);
2884		lleft += rd->b - i; /* previous chunk of *sp */
2885		lleft += strlen(rd->replstr);	/* the replaced bit */
2886		i = rd->e;		/* start of next chunk of *sp */
2887	    }
2888	    lleft += l - i;	/* final chunk from *sp */
2889	    start = t = zhalloc(lleft+1);
2890	    i = 0;
2891	    for (nd = firstnode(repllist); nd; incnode(nd)) {
2892		rd = (Repldata) getdata(nd);
2893		memcpy(t, s + i, rd->b - i);
2894		t += rd->b - i;
2895		ptr = rd->replstr;
2896		while (*ptr)
2897		    *t++ = *ptr++;
2898		i = rd->e;
2899	    }
2900	    memcpy(t, s + i, l - i);
2901	    start[lleft] = '\0';
2902	    *sp = (char *)start;
2903	}
2904	return 1;
2905    }
2906    if (fl & SUB_LIST)		/* safety: don't think this can happen */
2907	return 0;
2908
2909    /* munge the whole string: no match, so no replstr */
2910    *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
2911    return (fl & SUB_RETFAIL) ? 0 : 1;
2912}
2913
2914/**/
2915#else
2916
2917/*
2918 * Increment pointer which may be on a Meta (x is a pointer variable),
2919 * returning the incremented value (i.e. like pre-increment).
2920 */
2921#define METAINC(x)	((x) += (*(x) == Meta) ? 2 : 1)
2922
2923/**/
2924static int
2925igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
2926	  LinkList *repllistp)
2927{
2928    char *s = *sp, *t;
2929    /*
2930     * Note that ioff and uml count characters in the character
2931     * set (Meta's are not included), while l counts characters in the
2932     * metafied string.  umlen is a counter for (unmetafied) character
2933     * lengths.
2934     */
2935    int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
2936    /*
2937     * List of bits of matches to concatenate with replacement string.
2938     * The data is a struct repldata.  It is not used in cases like
2939     * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
2940     * is anchored.  It goes on the heap.
2941     */
2942    LinkList repllist = NULL;
2943
2944    /* perform must-match test for complex closures */
2945    if (p->mustoff)
2946    {
2947	/*
2948	 * Yuk.  Probably we should rewrite this whole function to
2949	 * use an unmetafied test string.
2950	 *
2951	 * Use META_HEAPDUP because we need a terminating NULL.
2952	 */
2953	char *muststr = metafy((char *)p + p->mustoff,
2954			       p->patmlen, META_HEAPDUP);
2955
2956	if (!strstr(s, muststr))
2957	    matched = 0;
2958    }
2959
2960    /* in case we used the prog before... */
2961    p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
2962
2963    if (fl & SUB_ALL) {
2964	int i = matched && pattry(p, s);
2965	*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
2966	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
2967	    return 0;
2968	return 1;
2969    }
2970    if (matched) {
2971	switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
2972	case 0:
2973	case SUB_LONG:
2974	    /*
2975	     * Largest/smallest possible match at head of string.
2976	     * First get the longest match...
2977	     */
2978	    if (pattry(p, s)) {
2979		/* patmatchlen returns metafied length, as we need */
2980	        int mlen = patmatchlen();
2981		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2982		    /*
2983		     * ... now we know whether it's worth looking for the
2984		     * shortest, which we do by brute force.
2985		     */
2986		    for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
2987			set_pat_end(p, *t);
2988			if (pattrylen(p, s, t - s, umlen, 0)) {
2989			    mlen = patmatchlen();
2990			    break;
2991			}
2992		    }
2993		}
2994		*sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
2995		return 1;
2996	    }
2997	    break;
2998
2999	case SUB_END:
3000	    /* Smallest possible match at tail of string:  *
3001	     * move back down string until we get a match. *
3002	     * There's no optimization here.               */
3003	    for (ioff = uml, t = s + l, umlen = 0; t >= s;
3004		 t--, ioff--, umlen++) {
3005		if (t > s && t[-1] == Meta)
3006		    t--;
3007		set_pat_start(p, t-s);
3008		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
3009		    *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
3010		    return 1;
3011		}
3012		if (t > s+1 && t[-2] == Meta)
3013		    t--;
3014	    }
3015	    break;
3016
3017	case (SUB_END|SUB_LONG):
3018	    /* Largest possible match at tail of string:       *
3019	     * move forward along string until we get a match. *
3020	     * Again there's no optimisation.                  */
3021	    for (ioff = 0, t = s, umlen = uml; t < s + l;
3022		 ioff++, METAINC(t), umlen--) {
3023		set_pat_start(p, t-s);
3024		if (pattrylen(p, t, s + l - t, umlen, ioff)) {
3025		    *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
3026		    return 1;
3027		}
3028		if (*t == Meta)
3029		    t++;
3030	    }
3031	    break;
3032
3033	case SUB_SUBSTR:
3034	    /* Smallest at start, but matching substrings. */
3035	    set_pat_start(p, l);
3036	    if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
3037		*sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
3038		return 1;
3039	    } /* fall through */
3040	case (SUB_SUBSTR|SUB_LONG):
3041	    /* longest or smallest at start with substrings */
3042	    t = s;
3043	    if (fl & SUB_GLOBAL) {
3044		repllist = newlinklist();
3045		if (repllistp)
3046		    *repllistp = repllist;
3047	    }
3048	    ioff = 0;		/* offset into string */
3049	    umlen = uml;
3050	    do {
3051		/* loop over all matches for global substitution */
3052		matched = 0;
3053		for (; t < s + l; METAINC(t), ioff++, umlen--) {
3054		    /* Find the longest match from this position. */
3055		    set_pat_start(p, t-s);
3056		    if (pattrylen(p, t, s + l - t, umlen, ioff)) {
3057			char *mpos = t + patmatchlen();
3058			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3059			    char *ptr;
3060			    int umlen2;
3061			    for (ptr = t, umlen2 = 0; ptr < mpos;
3062				 METAINC(ptr), umlen2++) {
3063				set_pat_end(p, *ptr);
3064				if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
3065				    mpos = t + patmatchlen();
3066				    break;
3067				}
3068			    }
3069			}
3070			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
3071			    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
3072						replstr, repllist);
3073			    if (mpos == t)
3074				METAINC(mpos);
3075			}
3076			if (!(fl & SUB_GLOBAL)) {
3077			    if (n) {
3078				/*
3079				 * Looking for a later match: in this case,
3080				 * we can continue looking for matches from
3081				 * the next character, even if it overlaps
3082				 * with what we just found.
3083				 */
3084				continue;
3085			    } else {
3086				return 1;
3087			    }
3088			}
3089			/*
3090			 * For a global match, we need to skip the stuff
3091			 * which is already marked for replacement.
3092			 */
3093			matched = 1;
3094			for ( ; t < mpos; t++, ioff++, umlen--)
3095			    if (*t == Meta)
3096				t++;
3097			break;
3098		    }
3099		    if (*t == Meta)
3100			t++;
3101		}
3102	    } while (matched);
3103	    /*
3104	     * check if we can match a blank string, if so do it
3105	     * at the start.  Goodness knows if this is a good idea
3106	     * with global substitution, so it doesn't happen.
3107	     */
3108	    set_pat_start(p, l);
3109	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
3110		pattry(p, s + l) && !--n) {
3111		*sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
3112		return 1;
3113	    }
3114	    break;
3115
3116	case (SUB_END|SUB_SUBSTR):
3117	case (SUB_END|SUB_LONG|SUB_SUBSTR):
3118	    /* Longest/shortest at end, matching substrings.       */
3119	    if (!(fl & SUB_LONG)) {
3120		set_pat_start(p, l);
3121		if (pattrylen(p, s + l, 0, 0, uml) && !--n) {
3122		    *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
3123		    return 1;
3124		}
3125	    }
3126	    for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
3127		 t--, ioff--, umlen++) {
3128		if (t > s && t[-1] == Meta)
3129		    t--;
3130		set_pat_start(p, t-s);
3131		if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) {
3132		    /* Found the longest match */
3133		    char *mpos = t + patmatchlen();
3134		    if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3135			char *ptr;
3136			int umlen2;
3137			for (ptr = t, umlen2 = 0; ptr < mpos;
3138			     METAINC(ptr), umlen2++) {
3139			    set_pat_end(p, *ptr);
3140			    if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
3141				mpos = t + patmatchlen();
3142				break;
3143			    }
3144			}
3145		    }
3146		    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
3147					replstr, NULL);
3148		    return 1;
3149		}
3150	    }
3151	    set_pat_start(p, l);
3152	    if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) {
3153		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
3154		return 1;
3155	    }
3156	    break;
3157	}
3158    }
3159
3160    if (repllist && nonempty(repllist)) {
3161	/* Put all the bits of a global search and replace together. */
3162	LinkNode nd;
3163	Repldata rd;
3164	int lleft = 0;		/* size of returned string */
3165	char *ptr, *start;
3166	int i;
3167
3168	i = 0;			/* start of last chunk we got from *sp */
3169	for (nd = firstnode(repllist); nd; incnode(nd)) {
3170	    rd = (Repldata) getdata(nd);
3171	    lleft += rd->b - i; /* previous chunk of *sp */
3172	    lleft += strlen(rd->replstr);	/* the replaced bit */
3173	    i = rd->e;		/* start of next chunk of *sp */
3174	}
3175	lleft += l - i;	/* final chunk from *sp */
3176	start = t = zhalloc(lleft+1);
3177	i = 0;
3178	for (nd = firstnode(repllist); nd; incnode(nd)) {
3179	    rd = (Repldata) getdata(nd);
3180	    memcpy(t, s + i, rd->b - i);
3181	    t += rd->b - i;
3182	    ptr = rd->replstr;
3183	    while (*ptr)
3184		*t++ = *ptr++;
3185	    i = rd->e;
3186	}
3187	memcpy(t, s + i, l - i);
3188	start[lleft] = '\0';
3189	*sp = (char *)start;
3190	return 1;
3191    }
3192
3193    /* munge the whole string: no match, so no replstr */
3194    *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
3195    return 1;
3196}
3197
3198/**/
3199#endif /* MULTIBYTE_SUPPORT */
3200
3201/* blindly turn a string into a tokenised expression without lexing */
3202
3203/**/
3204mod_export void
3205tokenize(char *s)
3206{
3207    zshtokenize(s, 0);
3208}
3209
3210/*
3211 * shtokenize is used when we tokenize a string with GLOB_SUBST set.
3212 * In that case we need to retain backslashes when we turn the
3213 * pattern back into a string, so that the string is not
3214 * modified if it failed to match a pattern.
3215 *
3216 * It may be modified by the effect of SH_GLOB which turns off
3217 * various zsh-specific options.
3218 */
3219
3220/**/
3221mod_export void
3222shtokenize(char *s)
3223{
3224    int flags = ZSHTOK_SUBST;
3225    if (isset(SHGLOB))
3226	flags |= ZSHTOK_SHGLOB;
3227    zshtokenize(s, flags);
3228}
3229
3230/**/
3231static void
3232zshtokenize(char *s, int flags)
3233{
3234    char *t;
3235    int bslash = 0;
3236
3237    for (; *s; s++) {
3238      cont:
3239	switch (*s) {
3240	case Bnull:
3241	case Bnullkeep:
3242	case '\\':
3243	    if (bslash) {
3244		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3245		break;
3246	    }
3247	    bslash = 1;
3248	    continue;
3249	case '<':
3250	    if (flags & ZSHTOK_SHGLOB)
3251		break;
3252	    if (bslash) {
3253		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3254		break;
3255	    }
3256	    t = s;
3257	    while (idigit(*++s));
3258	    if (*s != '-')
3259		goto cont;
3260	    while (idigit(*++s));
3261	    if (*s != '>')
3262		goto cont;
3263	    *t = Inang;
3264	    *s = Outang;
3265	    break;
3266	case '(':
3267	case '|':
3268	case ')':
3269	    if (flags & ZSHTOK_SHGLOB)
3270		break;
3271	case '>':
3272	case '^':
3273	case '#':
3274	case '~':
3275	case '[':
3276	case ']':
3277	case '*':
3278	case '?':
3279	case '=':
3280	    for (t = ztokens; *t; t++)
3281		if (*t == *s) {
3282		    if (bslash)
3283			s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3284		    else
3285			*s = (t - ztokens) + Pound;
3286		    break;
3287		}
3288	}
3289	bslash = 0;
3290    }
3291}
3292
3293/* remove unnecessary Nulargs */
3294
3295/**/
3296mod_export void
3297remnulargs(char *s)
3298{
3299    if (*s) {
3300	char *o = s, c;
3301
3302	while ((c = *s++))
3303	    if (c == Bnullkeep) {
3304		/*
3305		 * An active backslash that needs to be turned back into
3306		 * a real backslash for output.  However, we don't
3307		 * do that yet since we need to ignore it during
3308		 * pattern matching.
3309		 */
3310		continue;
3311	    } else if (inull(c)) {
3312		char *t = s - 1;
3313
3314		while ((c = *s++)) {
3315		    if (c == Bnullkeep)
3316			*t++ = '\\';
3317		    else if (!inull(c))
3318			*t++ = c;
3319		}
3320		*t = '\0';
3321		if (!*o) {
3322		    o[0] = Nularg;
3323		    o[1] = '\0';
3324		}
3325		break;
3326	    }
3327    }
3328}
3329
3330/* qualifier functions:  mostly self-explanatory, see glob(). */
3331
3332/* device number */
3333
3334/**/
3335static int
3336qualdev(UNUSED(char *name), struct stat *buf, off_t dv, UNUSED(char *dummy))
3337{
3338    return (off_t)buf->st_dev == dv;
3339}
3340
3341/* number of hard links to file */
3342
3343/**/
3344static int
3345qualnlink(UNUSED(char *name), struct stat *buf, off_t ct, UNUSED(char *dummy))
3346{
3347    return (g_range < 0 ? buf->st_nlink < ct :
3348	    g_range > 0 ? buf->st_nlink > ct :
3349	    buf->st_nlink == ct);
3350}
3351
3352/* user ID */
3353
3354/**/
3355static int
3356qualuid(UNUSED(char *name), struct stat *buf, off_t uid, UNUSED(char *dummy))
3357{
3358    return buf->st_uid == uid;
3359}
3360
3361/* group ID */
3362
3363/**/
3364static int
3365qualgid(UNUSED(char *name), struct stat *buf, off_t gid, UNUSED(char *dummy))
3366{
3367    return buf->st_gid == gid;
3368}
3369
3370/* device special file? */
3371
3372/**/
3373static int
3374qualisdev(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3375{
3376    return S_ISBLK(buf->st_mode) || S_ISCHR(buf->st_mode);
3377}
3378
3379/* block special file? */
3380
3381/**/
3382static int
3383qualisblk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3384{
3385    return S_ISBLK(buf->st_mode);
3386}
3387
3388/* character special file? */
3389
3390/**/
3391static int
3392qualischr(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3393{
3394    return S_ISCHR(buf->st_mode);
3395}
3396
3397/* directory? */
3398
3399/**/
3400static int
3401qualisdir(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3402{
3403    return S_ISDIR(buf->st_mode);
3404}
3405
3406/* FIFO? */
3407
3408/**/
3409static int
3410qualisfifo(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3411{
3412    return S_ISFIFO(buf->st_mode);
3413}
3414
3415/* symbolic link? */
3416
3417/**/
3418static int
3419qualislnk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3420{
3421    return S_ISLNK(buf->st_mode);
3422}
3423
3424/* regular file? */
3425
3426/**/
3427static int
3428qualisreg(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3429{
3430    return S_ISREG(buf->st_mode);
3431}
3432
3433/* socket? */
3434
3435/**/
3436static int
3437qualissock(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3438{
3439    return S_ISSOCK(buf->st_mode);
3440}
3441
3442/* given flag is set in mode */
3443
3444/**/
3445static int
3446qualflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3447{
3448    return mode_to_octal(buf->st_mode) & mod;
3449}
3450
3451/* mode matches specification */
3452
3453/**/
3454static int
3455qualmodeflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3456{
3457    long v = mode_to_octal(buf->st_mode), y = mod & 07777, n = mod >> 12;
3458
3459    return ((v & y) == y && !(v & n));
3460}
3461
3462/* regular executable file? */
3463
3464/**/
3465static int
3466qualiscom(UNUSED(char *name), struct stat *buf, UNUSED(off_t mod), UNUSED(char *dummy))
3467{
3468    return S_ISREG(buf->st_mode) && (buf->st_mode & S_IXUGO);
3469}
3470
3471/* size in required range? */
3472
3473/**/
3474static int
3475qualsize(UNUSED(char *name), struct stat *buf, off_t size, UNUSED(char *dummy))
3476{
3477#if defined(LONG_IS_64_BIT) || defined(OFF_T_IS_64_BIT)
3478# define QS_CAST_SIZE()
3479    off_t scaled = buf->st_size;
3480#else
3481# define QS_CAST_SIZE() (unsigned long)
3482    unsigned long scaled = (unsigned long)buf->st_size;
3483#endif
3484
3485    switch (g_units) {
3486    case TT_POSIX_BLOCKS:
3487	scaled += 511l;
3488	scaled /= 512l;
3489	break;
3490    case TT_KILOBYTES:
3491	scaled += 1023l;
3492	scaled /= 1024l;
3493	break;
3494    case TT_MEGABYTES:
3495	scaled += 1048575l;
3496	scaled /= 1048576l;
3497	break;
3498    }
3499
3500    return (g_range < 0 ? scaled < QS_CAST_SIZE() size :
3501	    g_range > 0 ? scaled > QS_CAST_SIZE() size :
3502	    scaled == QS_CAST_SIZE() size);
3503#undef QS_CAST_SIZE
3504}
3505
3506/* time in required range? */
3507
3508/**/
3509static int
3510qualtime(UNUSED(char *name), struct stat *buf, off_t days, UNUSED(char *dummy))
3511{
3512    time_t now, diff;
3513
3514    time(&now);
3515    diff = now - (g_amc == 0 ? buf->st_atime : g_amc == 1 ? buf->st_mtime :
3516		  buf->st_ctime);
3517    /* handle multipliers indicating units */
3518    switch (g_units) {
3519    case TT_DAYS:
3520	diff /= 86400l;
3521	break;
3522    case TT_HOURS:
3523	diff /= 3600l;
3524	break;
3525    case TT_MINS:
3526	diff /= 60l;
3527	break;
3528    case TT_WEEKS:
3529	diff /= 604800l;
3530	break;
3531    case TT_MONTHS:
3532	diff /= 2592000l;
3533	break;
3534    }
3535
3536    return (g_range < 0 ? diff < days :
3537	    g_range > 0 ? diff > days :
3538	    diff == days);
3539}
3540
3541/* evaluate a string */
3542
3543/**/
3544static int
3545qualsheval(char *name, UNUSED(struct stat *buf), UNUSED(off_t days), char *str)
3546{
3547    Eprog prog;
3548
3549    if ((prog = parse_string(str, 0))) {
3550	int ef = errflag, lv = lastval, ret;
3551
3552	unsetparam("reply");
3553	setsparam("REPLY", ztrdup(name));
3554
3555	execode(prog, 1, 0, "globqual");
3556
3557	ret = lastval;
3558	errflag = ef;
3559	lastval = lv;
3560
3561	if (!(inserts = getaparam("reply")) &&
3562	    !(inserts = gethparam("reply"))) {
3563	    char *tmp;
3564
3565	    if ((tmp = getsparam("reply")) || (tmp = getsparam("REPLY"))) {
3566		static char *tmparr[2];
3567
3568		tmparr[0] = tmp;
3569		tmparr[1] = NULL;
3570
3571		inserts = tmparr;
3572	    }
3573	}
3574
3575	return !ret;
3576    }
3577    return 0;
3578}
3579
3580/**/
3581static int
3582qualnonemptydir(char *name, struct stat *buf, UNUSED(off_t days), UNUSED(char *str))
3583{
3584    DIR *dirh;
3585    struct dirent *de;
3586    int unamelen;
3587    char *uname = unmetafy(dupstring(name), &unamelen);
3588
3589    if (!S_ISDIR(buf->st_mode))
3590	return 0;
3591
3592    if (buf->st_nlink > 2)
3593	return 1;
3594
3595    if (!(dirh = opendir(uname)))
3596	return 0;
3597
3598    while ((de = readdir(dirh))) {
3599	if (strcmp(de->d_name, ".") && strcmp(de->d_name, "..")) {
3600	    closedir(dirh);
3601	    return 1;
3602	}
3603    }
3604
3605    closedir(dirh);
3606    return 0;
3607}
3608