1/*
2 * pattern.c - pattern matching
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1999 Peter Stephenson
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Peter Stephenson or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Peter Stephenson and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Peter Stephenson and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 * Pattern matching code derived from the regexp library by Henry
29 * Spencer, which has the following copyright.
30 *
31 *	Copyright (c) 1986 by University of Toronto.
32 *	Written by Henry Spencer.  Not derived from licensed software.
33 *
34 *	Permission is granted to anyone to use this software for any
35 *	purpose on any computer system, and to redistribute it freely,
36 *	subject to the following restrictions:
37 *
38 *	1. The author is not responsible for the consequences of use of
39 *		this software, no matter how awful, even if they arise
40 *		from defects in it.
41 *
42 *	2. The origin of this software must not be misrepresented, either
43 *		by explicit claim or by omission.
44 *
45 *	3. Altered versions must be plainly marked as such, and must not
46 *		be misrepresented as being the original software.
47 *
48 * Eagle-eyed readers will notice this is an altered version.  Incredibly
49 * sharp-eyed readers might even find bits that weren't altered.
50 *
51 *
52 *      And I experienced a sense that, like certain regular
53 *      expressions, seemed to match the day from beginning to end, so
54 *      that I did not need to identify the parenthesised subexpression
55 *      that told of dawn, nor the group of characters that indicated
56 *      the moment when my grandfather returned home with news of
57 *      Swann's departure for Paris; and the whole length of the month
58 *      of May, as if matched by a closure, fitted into the buffer of my
59 *      life with no sign of overflowing, turning the days, like a
60 *      procession of insects that could consist of this or that
61 *      species, into a random and unstructured repetition of different
62 *      sequences, anchored from the first day of the month to the last
63 *      in the same fashion as the weeks when I knew I would not see
64 *      Gilberte and would search in vain for any occurrences of the
65 *      string in the avenue of hawthorns by Tansonville, without my
66 *      having to delimit explicitly the start or finish of the pattern.
67 *
68 *                                 M. Proust, "In Search of Lost Files",
69 *                                 bk I, "The Walk by Bourne's Place".
70 */
71
72#include "zsh.mdh"
73
74/*
75 * The following union is used mostly for alignment purposes.
76 * Normal nodes are longs, while certain nodes take a char * as an argument;
77 * here we make sure that they both work out to the same length.
78 * The compiled regexp we construct consists of upats stuck together;
79 * anything else to be added (strings, numbers) is stuck after and
80 * then aligned to a whole number of upat units.
81 *
82 * Note also that offsets are in terms of the sizes of these things.
83 */
84union upat {
85    long l;
86    unsigned char *p;
87};
88
89typedef union upat *Upat;
90
91#include "pattern.pro"
92
93/* Number of active parenthesized expressions allowed in backreferencing */
94#define NSUBEXP  9
95
96/* definition	number	opnd?	meaning */
97#define	P_END	  0x00	/* no	End of program. */
98#define P_EXCSYNC 0x01	/* no   Test if following exclude already failed */
99#define P_EXCEND  0x02	/* no   Test if exclude matched orig branch */
100#define	P_BACK	  0x03	/* no	Match "", "next" ptr points backward. */
101#define	P_EXACTLY 0x04	/* lstr	Match this string. */
102#define	P_NOTHING 0x05	/* no	Match empty string. */
103#define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
104#define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
105#define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
106#define P_ISSTART 0x09  /* no   Match start of string. */
107#define P_ISEND   0x0a  /* no   Match end of string. */
108#define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
109#define P_COUNT   0x0c  /* 3*long uc* node Match a number of repetitions */
110/* numbered so we can test bit 5 for a branch */
111#define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
112#define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
113/* excludes are also branches, but have bit 4 set, too */
114#define P_EXCLUDE 0x30	/* uc* node Exclude this from previous branch */
115#define P_EXCLUDP 0x31	/* uc* node Exclude, using full file path so far */
116/* numbered so we can test bit 6 so as not to match initial '.' */
117#define	P_ANY	  0x40	/* no	Match any one character. */
118#define	P_ANYOF	  0x41	/* str  Match any character in this string. */
119#define	P_ANYBUT  0x42	/* str  Match any character not in this string. */
120#define P_STAR    0x43	/* no   Match any set of characters. */
121#define P_NUMRNG  0x44	/* zr, zr Match a numeric range. */
122#define P_NUMFROM 0x45	/* zr   Match a number >= X */
123#define P_NUMTO   0x46	/* zr   Match a number <= X */
124#define P_NUMANY  0x47	/* no   Match any set of decimal digits */
125/* spaces left for P_OPEN+n,... for backreferences */
126#define	P_OPEN	  0x80	/* no	Mark this point in input as start of n. */
127#define	P_CLOSE	  0x90	/* no	Analogous to OPEN. */
128/*
129 * no    no argument
130 * zr    the range type zrange_t:  may be zlong or unsigned long
131 * char  a single char
132 * uc*   a pointer to unsigned char, used at run time and initialised
133 *       to NULL.
134 * str   null-terminated, metafied string
135 * lstr  length as long then string, not null-terminated, unmetafied.
136 */
137
138/*
139 * Notes on usage:
140 * P_WBRANCH:  This works like a branch and is used in complex closures,
141 *    to ensure we don't succeed on a zero-length match of the pattern,
142 *    since that would cause an infinite loop.  We do this by recording
143 *    the positions where we have already tried to match.   See the
144 *    P_WBRANCH test in patmatch().
145 *
146 *  P_ANY, P_ANYOF:  the operand is a null terminated
147 *    string.  Normal characters match as expected.  Characters
148 *    in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
149 *    Posix range tests.  This relies on imeta returning true for these
150 *    characters.  We treat unknown POSIX ranges as never matching.
151 *    PP_RANGE means the next two (possibly metafied) characters form
152 *    the limits of a range to test; it's too much like hard work to
153 *    expand the range.
154 *
155 *  P_EXCLUDE, P_EXCSYNC, PEXCEND:  P_EXCLUDE appears in the pattern like
156 *    P_BRANCH, but applies to the immediately preceding branch.  The code in
157 *    the corresponding branch is followed by a P_EXCSYNC, which simply
158 *    acts as a marker that a P_EXCLUDE comes next.  The P_EXCLUDE
159 *    has a pointer to char embeded in it, which works
160 *    like P_WBRANCH:  if we get to the P_EXCSYNC, and we already matched
161 *    up to the same position, fail.  Thus we are forced to backtrack
162 *    on closures in the P_BRANCH if the first attempt was excluded.
163 *    Corresponding to P_EXCSYNC in the original branch, there is a
164 *    P_EXCEND in the exclusion.  If we get to this point, and we did
165 *    *not* match in the original branch, the exclusion itself fails,
166 *    otherwise it succeeds since we know the tail already matches,
167 *    so P_EXCEND is the end of the exclusion test.
168 *    The whole sorry mess looks like this, where the upper lines
169 *    show the linkage of the branches, and the lower shows the linkage
170 *    of their pattern arguments.
171 *
172 *     	        ---------------------      ----------------------
173 *              ^      	       	     v    ^      	         v
174 *      ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
175 *                               	                         ^
176 *		       	  |                                      |
177 *			   --------------------------------------
178 *
179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
180 *   that we prepend the path so far to the exclude pattern.   This is
181 *   for top level file globs, e.g. ** / *.c~*foo.c
182 *                                    ^ I had to leave this space
183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
184 *
185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
186 * closure (#cN,M) and is used to initialise the count.  Executing
187 * the pattern leads back to the P_COUNT, while the next links of the
188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
189 *
190 *	       	        ----------------
191 *     	       	       v       	        ^
192 *        <COUNTSTART><COUNT>pattern<BACK> tail
193 *	     	    v      v  	  	    ^
194 *	            ------------------------
195 */
196
197#define	P_OP(p)		((p)->l & 0xff)
198#define	P_NEXT(p)	((p)->l >> 8)
199#define	P_OPERAND(p)	((p) + 1)
200#define P_ISBRANCH(p)   ((p)->l & 0x20)
201#define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
202#define P_NOTDOT(p)	((p)->l & 0x40)
203
204/* Specific to lstr type, i.e. P_EXACTLY. */
205#define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */
206#define P_LS_STR(p)	((char *)((p) + 2))
207
208/* Specific to P_COUNT: arguments as offset in nodes from operator */
209#define P_CT_CURRENT	(1)	/* Current count */
210#define P_CT_MIN	(2)     /* Minimum count */
211#define P_CT_MAX	(3)	/* Maximum count, -1 for none */
212#define P_CT_PTR	(4)	/* Pointer to last match start */
213#define P_CT_OPERAND	(5)	/* Operand of P_COUNT */
214
215/* Flags needed when pattern is executed */
216#define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
217#define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
218#define P_PURESTR	0x04	/* Can be matched with a strcmp */
219
220#if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
221typedef zlong zrange_t;
222#define ZRANGE_T_IS_SIGNED	(1)
223#else
224typedef unsigned long zrange_t;
225#endif
226
227/*
228 * Characters which terminate a pattern segment.  We actually use
229 * a pointer patendseg which skips the first character if we are not
230 * parsing a file pattern.
231 * Note that the size of this and the next array are hard-wired
232 * via the definitions.
233 */
234
235static char endseg[] = {
236    '/',			/* file only */
237    '\0', Bar, Outpar,		/* all patterns */
238    Tilde			/* extended glob only */
239};
240
241#define PATENDSEGLEN_NORM 4
242#define PATENDSEGLEN_EXT  5
243
244/* Characters which terminate a simple string */
245
246static char endstr[] = {
247    '/',			/* file only */
248    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
249				/* all patterns */
250    Tilde, Hat, Pound		/* extended glob only */
251};
252
253#define PATENDSTRLEN_NORM 10
254#define PATENDSTRLEN_EXT  13
255
256
257/* Default size for pattern buffer */
258#define P_DEF_ALLOC 256
259
260/* Flags used in compilation */
261static char *patstart, *patparse;	/* input pointers */
262static int patnpar;		/* () count */
263static char *patcode;		/* point of code emission */
264static long patsize;		/* size of code */
265static char *patout;		/* start of code emission string */
266static long patalloc;		/* size allocated for same */
267static char *patendseg;		/* characters ending segment */
268static int patendseglen;	/* length of same */
269static char *patendstr;		/* characters ending plain string */
270static int patendstrlen;	/* length of sameo */
271
272/* Flags used in both compilation and execution */
273static int patflags;		    /* flags passed down to patcompile */
274static int patglobflags;  /* globbing flags & approx */
275
276/*
277 * Increment pointer to metafied multibyte string.
278 */
279#ifdef MULTIBYTE_SUPPORT
280typedef wint_t patint_t;
281
282#define PEOF WEOF
283
284#define METACHARINC(x) ((void)metacharinc(&x))
285
286/*
287 * TODO: the shiftstate isn't well handled; we don't guarantee
288 * to maintain it properly between characters.  If we don't
289 * need it we should use mbtowc() instead.
290 */
291static mbstate_t shiftstate;
292
293/*
294 * Multibyte version: it's (almost) as easy to return the
295 * value as not, so do so since we sometimes need it..
296 */
297static wchar_t
298metacharinc(char **x)
299{
300    char *inptr = *x;
301    char inchar;
302    size_t ret = MB_INVALID;
303    wchar_t wc;
304
305    /*
306     * Cheat if the top bit isn't set.  This is second-guessing
307     * the library, but we know for sure that if the character
308     * set doesn't have the property that all bytes with the 8th
309     * bit clear are single characters then we are stuffed.
310     */
311    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
312    {
313	if (itok(*inptr))
314	    inchar = ztokens[*inptr++ - Pound];
315	else if (*inptr == Meta) {
316	    inptr++;
317	    inchar = *inptr++ ^ 32;
318	} else {
319	    inchar = *inptr++;
320	}
321	*x = inptr;
322	return (wchar_t)STOUC(inchar);
323    }
324
325    while (*inptr) {
326	if (itok(*inptr))
327	    inchar = ztokens[*inptr++ - Pound];
328	else if (*inptr == Meta) {
329	    inptr++;
330	    inchar = *inptr++ ^ 32;
331	} else {
332	    inchar = *inptr++;
333	}
334	ret = mbrtowc(&wc, &inchar, 1, &shiftstate);
335
336	if (ret == MB_INVALID)
337	    break;
338	if (ret == MB_INCOMPLETE)
339	    continue;
340	*x = inptr;
341	return wc;
342    }
343
344    /* Error.  Treat as single byte. */
345    /* Reset the shift state for next time. */
346    memset(&shiftstate, 0, sizeof(shiftstate));
347    return (wchar_t) STOUC(*(*x)++);
348}
349
350#else
351typedef int patint_t;
352
353#define PEOF EOF
354
355#define METACHARINC(x)	((void)((x) += (*(x) == Meta) ? 2 : 1))
356#endif
357
358/*
359 * Return unmetafied char from string (x is any char *).
360 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
361 * in effect.
362 */
363#define UNMETA(x)	(*(x) == Meta ? (x)[1] ^ 32 : *(x))
364
365/* Add n more characters, ensuring there is enough space. */
366
367enum {
368    PA_NOALIGN = 1,
369    PA_UNMETA  = 2
370};
371
372/**/
373static void
374patadd(char *add, int ch, long n, int paflags)
375{
376    /* Make sure everything gets aligned unless we get PA_NOALIGN. */
377    long newpatsize = patsize + n;
378    if (!(paflags & PA_NOALIGN))
379	newpatsize = (newpatsize + sizeof(union upat) - 1) &
380		      ~(sizeof(union upat) - 1);
381    if (patalloc < newpatsize) {
382	long newpatalloc =
383	    2*(newpatsize > patalloc ? newpatsize : patalloc);
384	patout = (char *)zrealloc((char *)patout, newpatalloc);
385	patcode = patout + patsize;
386	patalloc = newpatalloc;
387    }
388    patsize = newpatsize;
389    if (add) {
390	if (paflags & PA_UNMETA) {
391	    /*
392	     * Unmetafy and untokenize the string as we go.
393	     * The Meta characters in add aren't counted in n.
394	     */
395	    while (n--) {
396		if (itok(*add))
397		    *patcode++ = ztokens[*add++ - Pound];
398		else if (*add == Meta) {
399		    add++;
400		    *patcode++ = *add++ ^ 32;
401		} else {
402		    *patcode++ = *add++;
403		}
404	    }
405	} else {
406	    while (n--)
407		*patcode++ = *add++;
408	}
409    } else
410	*patcode++ = ch;
411    patcode = patout + patsize;
412}
413
414static long rn_offs;
415/* operates on pointers to union upat, returns a pointer */
416#define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
417		    (P_OP(p) == P_BACK) ? \
418		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
419
420/* Called before parsing a set of file matchs to initialize flags */
421
422/**/
423void
424patcompstart(void)
425{
426    if (isset(CASEGLOB))
427	patglobflags = 0;
428    else
429	patglobflags = GF_IGNCASE;
430    if (isset(MULTIBYTE))
431	patglobflags |= GF_MULTIBYTE;
432}
433
434/*
435 * Top level pattern compilation subroutine
436 * exp is a null-terminated, metafied string.
437 * inflags is an or of some PAT_* flags.
438 * endexp, if non-null, is set to a pointer to the end of the
439 *   part of exp which was compiled.  This is used when
440 *   compiling patterns for directories which must be
441 *   matched recursively.
442 */
443
444/**/
445mod_export Patprog
446patcompile(char *exp, int inflags, char **endexp)
447{
448    int flags = 0;
449    long len = 0;
450    long startoff;
451    Upat pscan;
452    char *lng, *strp = NULL;
453    Patprog p;
454
455    startoff = sizeof(struct patprog);
456    /* Ensure alignment of start of program string */
457    startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
458
459    /* Allocate reasonable sized chunk if none, reduce size if too big */
460    if (patalloc != P_DEF_ALLOC)
461	patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
462    patcode = patout + startoff;
463    patsize = patcode - patout;
464    patstart = patparse = exp;
465    /*
466     * Note global patnpar numbers parentheses 1..9, while patnpar
467     * in struct is actual count of parentheses.
468     */
469    patnpar = 1;
470    patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
471
472    patendseg = endseg;
473    patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
474    patendstr = endstr;
475    patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;
476
477    if (!(patflags & PAT_FILE)) {
478	patendseg++;
479	patendstr++;
480	patendseglen--;
481	patendstrlen--;
482	remnulargs(patparse);
483	if (isset(MULTIBYTE))
484	    patglobflags = GF_MULTIBYTE;
485	else
486	    patglobflags = 0;
487    }
488    if (patflags & PAT_LCMATCHUC)
489	patglobflags |= GF_LCMATCHUC;
490    /*
491     * Have to be set now, since they get updated during compilation.
492     */
493    ((Patprog)patout)->globflags = patglobflags;
494
495    if (!(patflags & PAT_ANY)) {
496	/* Look for a really pure string, with no tokens at all. */
497	if (!(patglobflags & ~GF_MULTIBYTE)
498#ifdef __CYGWIN__
499	    /*
500	     * If the OS treats files case-insensitively and we
501	     * are looking at files, we don't need to use pattern
502	     * matching to find the file.
503	     */
504	    || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
505#endif
506	    )
507	{
508	    /*
509	     * Waah!  I wish I understood this.
510	     * Empty metafied strings have an initial Nularg.
511	     * This never corresponds to a real character in
512	     * a glob pattern or string, so skip it.
513	     */
514	    if (*exp == Nularg)
515		exp++;
516	    for (strp = exp; *strp &&
517		     (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
518		 strp++)
519		;
520	}
521	if (!strp || (*strp && *strp != '/')) {
522	    /* No, do normal compilation. */
523	    strp = NULL;
524	    if (patcompswitch(0, &flags) == 0)
525		return NULL;
526	} else {
527	    /*
528	     * Yes, copy the string, and skip compilation altogether.
529	     * Null terminate for the benefit of globbing.
530	     * Leave metafied both for globbing and for our own
531	     * efficiency.
532	     */
533	    patparse = strp;
534	    len = strp - exp;
535	    patadd(exp, 0, len + 1, 0);
536	    patout[startoff + len] = '\0';
537	    patflags |= PAT_PURES;
538	}
539    }
540
541    /* end of compilation: safe to use pointers */
542    p = (Patprog)patout;
543    p->startoff = startoff;
544    p->patstartch = '\0';
545    p->globend = patglobflags;
546    p->flags = patflags;
547    p->mustoff = 0;
548    p->size = patsize;
549    p->patmlen = len;
550    p->patnpar = patnpar-1;
551
552    if (!strp) {
553	pscan = (Upat)(patout + startoff);
554
555	if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
556	    /* only one top level choice */
557	    pscan = P_OPERAND(pscan);
558
559	    if (flags & P_PURESTR) {
560		/*
561		 * The pattern can be matched with a simple strncmp/strcmp.
562		 * Careful in case we've overwritten the node for the next ptr.
563		 */
564		char *dst = patout + startoff;
565		Upat next;
566		p->flags |= PAT_PURES;
567		for (; pscan; pscan = next) {
568		    next = PATNEXT(pscan);
569		    if (P_OP(pscan) == P_EXACTLY) {
570			char *opnd = P_LS_STR(pscan), *mtest;
571			long oplen = P_LS_LEN(pscan), ilen;
572			int nmeta = 0;
573			/*
574			 * Unfortunately we unmetafied the string
575			 * and we need to put any metacharacters
576			 * back now we know it's a pure string.
577			 * This shouldn't happen too often, it's
578			 * just that there are some cases such
579			 * as . and .. in files where we really
580			 * need a pure string even if there are
581			 * pattern characters flying around.
582			 */
583			for (mtest = opnd, ilen = oplen; ilen;
584			     mtest++, ilen--)
585			    if (imeta(*mtest))
586				nmeta++;
587			if (nmeta) {
588			    char *oldpatout = patout;
589			    patadd(NULL, 0, nmeta, 0);
590			    /*
591			     * Yuk.
592			     */
593			    p = (Patprog)patout;
594			    opnd = patout + (opnd - oldpatout);
595			    dst = patout + startoff;
596			}
597
598			while (oplen--) {
599			    if (imeta(*opnd)) {
600				*dst++ = Meta;
601				*dst++ = *opnd++ ^ 32;
602			    } else {
603				*dst++ = *opnd++;
604			    }
605			}
606		    }
607		}
608		p->size = dst - patout;
609		/* patmlen is really strlen.  We don't need a null. */
610		p->patmlen = p->size - startoff;
611	    } else {
612		/* starting point info */
613		if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
614		    P_LS_LEN(pscan))
615		    p->patstartch = *P_LS_STR(pscan);
616		/*
617		 * Find the longest literal string in something expensive.
618		 * This is itself not all that cheap if we have
619		 * case-insensitive matching or approximation, so don't.
620		 */
621		if ((flags & P_HSTART) && !p->globflags) {
622		    lng = NULL;
623		    len = 0;
624		    for (; pscan; pscan = PATNEXT(pscan))
625			if (P_OP(pscan) == P_EXACTLY &&
626			    P_LS_LEN(pscan) >= len) {
627			    lng = P_LS_STR(pscan);
628			    len = P_LS_LEN(pscan);
629			}
630		    if (lng) {
631			p->mustoff = lng - patout;
632			p->patmlen = len;
633		    }
634		}
635	    }
636	}
637    }
638
639    /*
640     * The pattern was compiled in a fixed buffer:  unless told otherwise,
641     * we stick the compiled pattern on the heap.  This is necessary
642     * for files where we will often be compiling multiple segments at once.
643     * But if we get the ZDUP flag we always put it in zalloc()ed memory.
644     */
645    if (patflags & PAT_ZDUP) {
646	Patprog newp = (Patprog)zalloc(patsize);
647	memcpy((char *)newp, (char *)p, patsize);
648	p = newp;
649    } else if (!(patflags & PAT_STATIC)) {
650	Patprog newp = (Patprog)zhalloc(patsize);
651	memcpy((char *)newp, (char *)p, patsize);
652	p = newp;
653    }
654
655    if (endexp)
656	*endexp = patparse;
657    return p;
658}
659
660/*
661 * Main body or parenthesized subexpression in pattern
662 * Parenthesis (and any ksh_glob gubbins) will have been removed.
663 */
664
665/**/
666static long
667patcompswitch(int paren, int *flagp)
668{
669    long starter, br, ender, excsync = 0;
670    int parno = 0;
671    int flags, gfchanged = 0;
672    long savglobflags = (long)patglobflags;
673    Upat ptr;
674
675    *flagp = 0;
676
677    if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
678	/*
679	 * parenthesized:  make an open node.
680	 * We can only refer to the first nine parentheses.
681	 * For any others, we just use P_OPEN on its own; there's
682	 * no gain in arbitrarily limiting the number of parentheses.
683	 */
684	parno = patnpar++;
685	starter = patnode(P_OPEN + parno);
686    } else
687	starter = 0;
688
689    br = patnode(P_BRANCH);
690    if (!patcompbranch(&flags))
691	return 0;
692    if (patglobflags != (int)savglobflags)
693	gfchanged++;
694    if (starter)
695	pattail(starter, br);
696    else
697	starter = br;
698
699    *flagp |= flags & (P_HSTART|P_PURESTR);
700
701    while (*patparse == Bar ||
702	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
703	    (patparse[1] == '/' ||
704	     !memchr(patendseg, patparse[1], patendseglen)))) {
705	int tilde = *patparse++ == Tilde;
706	long gfnode = 0, newbr;
707
708	*flagp &= ~P_PURESTR;
709
710	if (tilde) {
711	    union upat up;
712	    /* excsync remembers the P_EXCSYNC node before a chain of
713	     * exclusions:  all point back to this.  only the
714	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
715	     */
716	    if (!excsync) {
717		excsync = patnode(P_EXCSYNC);
718		patoptail(br, excsync);
719	    }
720	    /*
721	     * By default, approximations are turned off in exclusions:
722	     * we need to do this here as otherwise the code compiling
723	     * the exclusion doesn't know if the flags have really
724	     * changed if the error count gets restored.
725	     */
726	    patglobflags &= ~0xff;
727	    if (!(patflags & PAT_FILET) || paren) {
728		br = patnode(P_EXCLUDE);
729	    } else {
730		/*
731		 * At top level (paren == 0) in a file glob !(patflags
732		 * &PAT_FILET) do the exclusion prepending the file path
733		 * so far.  We need to flag this to avoid unnecessarily
734		 * copying the path.
735		 */
736		br = patnode(P_EXCLUDP);
737		patflags |= PAT_HAS_EXCLUDP;
738	    }
739	    up.p = NULL;
740	    patadd((char *)&up, 0, sizeof(up), 0);
741	    /* / is not treated as special if we are at top level */
742	    if (!paren && *patendseg == '/') {
743		tilde++;
744		patendseg++;
745		patendseglen--;
746		patendstr++;
747		patendstrlen--;
748	    }
749	} else {
750	    excsync = 0;
751	    br = patnode(P_BRANCH);
752	    /*
753	     * The position of the following statements means globflags
754	     * set in the main branch carry over to the exclusion.
755	     */
756	    if (!paren) {
757		patglobflags = 0;
758		if (((Patprog)patout)->globflags) {
759		    /*
760		     * If at top level, we need to reinitialize flags to zero,
761		     * since (#i)foo|bar only applies to foo and we stuck
762		     * the #i into the global flags.
763		     * We could have done it so that they only got set in the
764		     * first branch, but it's quite convenient having any
765		     * global flags set in the header and not buried in the
766		     * pattern.  (Or maybe it isn't and we should
767		     * forget this bit and always stick in an explicit GFLAGS
768		     * statement instead of using the header.)
769		     * Also, this can't happen for file globs where there are
770		     * no top-level |'s.
771		     *
772		     * No gfchanged, as nothing to follow branch at top
773		     * level.
774		     */
775		    union upat up;
776		    gfnode = patnode(P_GFLAGS);
777		    up.l = patglobflags;
778		    patadd((char *)&up, 0, sizeof(union upat), 0);
779		}
780	    } else {
781		patglobflags = (int)savglobflags;
782	    }
783	}
784	newbr = patcompbranch(&flags);
785	if (tilde == 2) {
786	    /* restore special treatment of / */
787	    patendseg--;
788	    patendseglen++;
789	    patendstr--;
790	    patendstrlen++;
791	}
792	if (!newbr)
793	    return 0;
794	if (gfnode)
795	    pattail(gfnode, newbr);
796	if (!tilde && patglobflags != (int)savglobflags)
797	    gfchanged++;
798	pattail(starter, br);
799	if (excsync)
800	    patoptail(br, patnode(P_EXCEND));
801	*flagp |= flags & P_HSTART;
802    }
803
804    /*
805     * Make a closing node, hooking it to the end.
806     * Note that we can't optimize P_NOTHING out here, since another
807     * branch at that point would indicate the current choices continue,
808     * which they don't.
809     */
810    ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
811    pattail(starter, ender);
812
813    /*
814     * Hook the tails of the branches to the closing node,
815     * except for exclusions which terminate where they are.
816     */
817    for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
818	if (!P_ISEXCLUDE(ptr))
819	    patoptail(ptr-(Upat)patout, ender);
820
821    /* check for proper termination */
822    if ((paren && *patparse++ != Outpar) ||
823	(!paren && *patparse &&
824	 !((patflags & PAT_FILE) && *patparse == '/')))
825	return 0;
826
827    if (paren && gfchanged) {
828	/*
829	 * Restore old values of flags when leaving parentheses.
830	 * gfchanged detects a change in any branch (except exclusions
831	 * which are separate), since we need to emit this even if
832	 * a later branch happened to put the flags back.
833	 */
834	pattail(ender, patnode(P_GFLAGS));
835	patglobflags = (int)savglobflags;
836	patadd((char *)&savglobflags, 0, sizeof(long), 0);
837    }
838
839    return starter;
840}
841
842/*
843 * Compile something ended by Bar, Outpar, Tilde, or end of string.
844 * Note the BRANCH or EXCLUDE tag must already have been omitted:
845 * this returns the position of the operand of that.
846 */
847
848/**/
849static long
850patcompbranch(int *flagp)
851{
852    long chain, latest = 0, starter;
853    int flags = 0;
854
855    *flagp = P_PURESTR;
856
857    starter = chain = 0;
858    while (!memchr(patendseg, *patparse, patendseglen) ||
859	   (*patparse == Tilde && patparse[1] != '/' &&
860	    memchr(patendseg, patparse[1], patendseglen))) {
861	if (isset(EXTENDEDGLOB) &&
862	    ((!isset(SHGLOB) &&
863	      (*patparse == Inpar && patparse[1] == Pound)) ||
864	     (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
865	      patparse[2] == Pound))) {
866	    /* Globbing flags. */
867	    char *pp1 = patparse;
868	    int oldglobflags = patglobflags, ignore;
869	    long assert;
870	    patparse += (*patparse == '@') ? 3 : 2;
871	    if (!patgetglobflags(&patparse, &assert, &ignore))
872		return 0;
873	    if (!ignore) {
874		if (assert) {
875		    /*
876		     * Start/end assertion looking like flags, but
877		     * actually handled as a normal node
878		     */
879		    latest = patnode(assert);
880		    flags = 0;
881		} else {
882		    if (pp1 == patstart) {
883			/* Right at start of pattern, the simplest case.
884			 * Put them into the flags and don't emit anything.
885			 */
886			((Patprog)patout)->globflags = patglobflags;
887			continue;
888		    } else if (!*patparse) {
889			/* Right at the end, so just leave the flags for
890			 * the next Patprog in the chain to pick up.
891			 */
892			break;
893		    }
894		    /*
895		     * Otherwise, we have to stick them in as a pattern
896		     * matching nothing.
897		     */
898		    if (oldglobflags != patglobflags) {
899			/* Flags changed */
900			union upat up;
901			latest = patnode(P_GFLAGS);
902			up.l = patglobflags;
903			patadd((char *)&up, 0, sizeof(union upat), 0);
904		    } else {
905			/* No effect. */
906			continue;
907		    }
908		}
909	    } else if (!*patparse)
910		break;
911	    else
912		continue;
913	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
914	    /*
915	     * ^pat:  anything but pat.  For proper backtracking,
916	     * etc., we turn this into (*~pat), except without the
917	     * parentheses.
918	     */
919	    patparse++;
920	    latest = patcompnot(0, &flags);
921	} else
922	    latest = patcomppiece(&flags);
923	if (!latest)
924	    return 0;
925	if (!starter)
926	    starter = latest;
927	if (!(flags & P_PURESTR))
928	    *flagp &= ~P_PURESTR;
929	if (!chain)
930	    *flagp |= flags & P_HSTART;
931	else
932	    pattail(chain, latest);
933	chain = latest;
934    }
935    /* check if there was nothing in the loop, i.e. () */
936    if (!chain)
937	starter = patnode(P_NOTHING);
938
939    return starter;
940}
941
942/* get glob flags, return 1 for success, 0 for failure */
943
944/**/
945int
946patgetglobflags(char **strp, long *assertp, int *ignore)
947{
948    char *nptr, *ptr = *strp;
949    zlong ret;
950
951    *assertp = 0;
952    *ignore = 1;
953    /* (#X): assumes we are still positioned on the first X */
954    for (; *ptr && *ptr != Outpar; ptr++) {
955	if (*ptr == 'q') {
956	    /* Glob qualifiers, ignored in pattern code */
957	    while (*ptr && *ptr != Outpar)
958		ptr++;
959	    break;
960	} else {
961	    *ignore = 0;
962	    switch (*ptr) {
963	    case 'a':
964		/* Approximate matching, max no. of errors follows */
965		ret = zstrtol(++ptr, &nptr, 10);
966		/*
967		 * We can't have more than 254, because we need 255 to
968		 * mark 254 errors in wbranch and exclude sync strings
969		 * (hypothetically --- hope no-one tries it).
970		 */
971		if (ret < 0 || ret > 254 || ptr == nptr)
972		    return 0;
973		patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
974		ptr = nptr-1;
975		break;
976
977	    case 'l':
978		/* Lowercase in pattern matches lower or upper in target */
979		patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
980		break;
981
982	    case 'i':
983		/* Fully case insensitive */
984		patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
985		break;
986
987	    case 'I':
988		/* Restore case sensitivity */
989		patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
990		break;
991
992	    case 'b':
993		/* Make backreferences */
994		patglobflags |= GF_BACKREF;
995		break;
996
997	    case 'B':
998		/* Don't make backreferences */
999		patglobflags &= ~GF_BACKREF;
1000		break;
1001
1002	    case 'm':
1003		/* Make references to complete match */
1004		patglobflags |= GF_MATCHREF;
1005		break;
1006
1007	    case 'M':
1008		/* Don't */
1009		patglobflags &= ~GF_MATCHREF;
1010		break;
1011
1012	    case 's':
1013		*assertp = P_ISSTART;
1014		break;
1015
1016	    case 'e':
1017		*assertp = P_ISEND;
1018		break;
1019
1020	    case 'u':
1021		patglobflags |= GF_MULTIBYTE;
1022		break;
1023
1024	    case 'U':
1025		patglobflags &= ~GF_MULTIBYTE;
1026		break;
1027
1028	    default:
1029		return 0;
1030	    }
1031	}
1032    }
1033    if (*ptr != Outpar)
1034	return 0;
1035    /* Start/end assertions must appear on their own. */
1036    if (*assertp && (*strp)[1] != Outpar)
1037	return 0;
1038    *strp = ptr + 1;
1039    return 1;
1040}
1041
1042
1043static const char *colon_stuffs[]  = {
1044    "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
1045    "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
1046    "IFS", "IFSSPACE", "WORD", NULL
1047};
1048
1049/*
1050 * Handle the guts of a [:stuff:] character class element.
1051 * start is the beginning of "stuff" and len is its length.
1052 * This code is exported for the benefit of completion matching.
1053 */
1054
1055/**/
1056mod_export int
1057range_type(char *start, int len)
1058{
1059    const char **csp;
1060
1061    for (csp = colon_stuffs; *csp; csp++) {
1062	if (!strncmp(start, *csp, len))
1063	    return (csp - colon_stuffs) + PP_FIRST;
1064    }
1065
1066    return PP_UNKWN;
1067}
1068
1069
1070/*
1071 * Convert the contents of a [...] or [^...] expression (just the
1072 * ... part) back into a string.  This is used by compfiles -p/-P
1073 * for some reason.  The compiled form (a metafied string) is
1074 * passed in rangestr.
1075 *
1076 * If outstr is non-NULL the compiled form is placed there.  It
1077 * must be sufficiently long.  A terminating NULL is appended.
1078 *
1079 * Return the length required, not including the terminating NULL.
1080 *
1081 * TODO: this is non-multibyte for now.  It will need to be defined
1082 * appropriately with MULTIBYTE_SUPPORT when the completion matching
1083 * code catches up.
1084 */
1085
1086/**/
1087mod_export int
1088pattern_range_to_string(char *rangestr, char *outstr)
1089{
1090    int len = 0;
1091
1092    while (*rangestr) {
1093	if (imeta(STOUC(*rangestr))) {
1094	    int swtype = STOUC(*rangestr) - STOUC(Meta);
1095
1096	    if (swtype == 0) {
1097		/* Ordindary metafied character */
1098		if (outstr)
1099		{
1100		    *outstr++ = Meta;
1101		    *outstr++ = rangestr[1] ^ 32;
1102		}
1103		len += 2;
1104		rangestr += 2;
1105	    } else if (swtype == PP_RANGE) {
1106		/* X-Y range */
1107		int i;
1108
1109		for (i = 0; i < 2; i++) {
1110		    if (*rangestr == Meta) {
1111			if (outstr) {
1112			    *outstr++ = Meta;
1113			    *outstr++ = rangestr[1];
1114			}
1115			len += 2;
1116			rangestr += 2;
1117		    } else {
1118			if (outstr)
1119			    *outstr++ = *rangestr;
1120			len++;
1121			rangestr++;
1122		    }
1123
1124		    if (i == 0) {
1125			if (outstr)
1126			    *outstr++ = '-';
1127			len++;
1128		    }
1129		}
1130	    } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
1131		/* [:stuff:]; we need to output [: and :] */
1132		const char *found = colon_stuffs[swtype - PP_FIRST];
1133		int newlen = strlen(found);
1134		if (outstr) {
1135		    strcpy(outstr, "[:");
1136		    outstr += 2;
1137		    memcpy(outstr, found, newlen);
1138		    outstr += newlen;
1139		    strcpy(outstr, ":]");
1140		    outstr += 2;
1141		}
1142		len += newlen + 4;
1143		rangestr++;
1144	    } else {
1145		/* shouldn't happen */
1146		DPUTS(1, "BUG: unknown PP_ code in pattern range");
1147		rangestr++;
1148	    }
1149	} else {
1150	    /* ordinary character, guaranteed no Meta handling needed */
1151	    if (outstr)
1152		*outstr++ = *rangestr;
1153	    len++;
1154	    rangestr++;
1155	}
1156    }
1157
1158    if (outstr)
1159	*outstr = '\0';
1160    return len;
1161}
1162
1163/*
1164 * compile a chunk such as a literal string or a [...] followed
1165 * by a possible hash operator
1166 */
1167
1168/**/
1169static long
1170patcomppiece(int *flagp)
1171{
1172    long starter = 0, next, op, opnd;
1173    int flags, flags2, kshchar, len, ch, patch, nmeta;
1174    int pound, count;
1175    union upat up;
1176    char *nptr, *str0, *ptr, *patprev;
1177    zrange_t from, to;
1178    char *charstart;
1179
1180    flags = 0;
1181    str0 = patprev = patparse;
1182    for (;;) {
1183	/*
1184	 * Check if we have a string. First, we need to make sure
1185	 * the string doesn't introduce a ksh-like parenthesized expression.
1186	 */
1187	kshchar = '\0';
1188	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
1189	    if (strchr("?*+!@", *patparse))
1190		kshchar = STOUC(*patparse);
1191	    else if (*patparse == Star || *patparse == Quest)
1192		kshchar = STOUC(ztokens[*patparse - Pound]);
1193	}
1194
1195	/*
1196	 * End of string (or no string at all) if ksh-type parentheses,
1197	 * or special character, unless that character is a tilde and
1198	 * the character following is an end-of-segment character.  Thus
1199	 * tildes are not special if there is nothing following to
1200	 * be excluded.
1201	 */
1202	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
1203			(*patparse != Tilde ||
1204			 patparse[1] == '/' ||
1205			 !memchr(patendseg, patparse[1], patendseglen))))
1206	    break;
1207
1208	/* Remember the previous character for backtracking */
1209	patprev = patparse;
1210	METACHARINC(patparse);
1211    }
1212
1213    if (patparse > str0) {
1214	long slen = patparse - str0;
1215	int morelen;
1216
1217	/* Ordinary string: cancel kshchar lookahead */
1218	kshchar = '\0';
1219	/*
1220	 * Assume it matches a simple string until we find otherwise.
1221	 */
1222	flags |= P_PURESTR;
1223	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
1224	/* more than one character matched? */
1225	morelen = (patprev > str0);
1226	/*
1227	 * If we have more than one character, a following hash
1228	 * or (#c...) only applies to the last, so backtrack one character.
1229	 */
1230	if (isset(EXTENDEDGLOB) &&
1231	    (*patparse == Pound ||
1232	     (*patparse == Inpar && patparse[1] == Pound &&
1233	      patparse[2] == 'c')) && morelen)
1234	    patparse = patprev;
1235	/*
1236	 * If len is 1, we can't have an active # following, so doesn't
1237	 * matter that we don't make X in `XX#' simple.
1238	 */
1239	if (!morelen)
1240	    flags |= P_SIMPLE;
1241	starter = patnode(P_EXACTLY);
1242
1243	/* Get length of string without metafication. */
1244	nmeta = 0;
1245	/* inherited from domatch, but why, exactly? */
1246	if (*str0 == Nularg)
1247	    str0++;
1248	for (ptr = str0; ptr < patparse; ptr++) {
1249	    if (*ptr == Meta) {
1250		nmeta++;
1251		ptr++;
1252	    }
1253	}
1254	slen = (patparse - str0) - nmeta;
1255	/* First add length, which is a long */
1256	patadd((char *)&slen, 0, sizeof(long), 0);
1257	/*
1258	 * Then the string, not null terminated.
1259	 * Unmetafy and untokenize; pass the final length,
1260	 * which is what we need to allocate, i.e. not including
1261	 * a count for each Meta in the string.
1262	 */
1263	patadd(str0, 0, slen, PA_UNMETA);
1264	nptr = P_LS_STR((Upat)patout + starter);
1265	/*
1266	 * It's much simpler to turn off pure string mode for
1267	 * any case-insensitive or approximate matching; usually,
1268	 * that is correct, or they wouldn't have been turned on.
1269	 * However, we need to make sure we match a "." or ".."
1270	 * in a file name as a pure string.  There's a minor bug
1271	 * that this will also apply to something like
1272	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
1273	 * going to write funny patterns, you get no sympathy from me.
1274	 */
1275	if (patglobflags &
1276#ifdef __CYGWIN__
1277	    /*
1278	     * As above: don't use pattern matching for files
1279	     * just because of case insensitivity if file system
1280	     * is known to be case insensitive.
1281	     *
1282	     * This is known to be necessary in at least one case:
1283	     * if "mount -c /" is in effect, so that drives appear
1284	     * directly under / instead of the usual /cygdrive, they
1285	     * aren't shown by readdir().  So it's vital we don't use
1286	     * globbing to find "/c", since that'll fail.
1287	     */
1288	    ((patflags & PAT_FILE) ?
1289	    (0xFF|GF_LCMATCHUC) :
1290	    (0xFF|GF_LCMATCHUC|GF_IGNCASE))
1291#else
1292	    (0xFF|GF_LCMATCHUC|GF_IGNCASE)
1293#endif
1294	    ) {
1295	    if (!(patflags & PAT_FILE))
1296		flags &= ~P_PURESTR;
1297	    else if (!(nptr[0] == '.' &&
1298		       (slen == 1 || (nptr[1] == '.' && slen == 2))))
1299		flags &= ~P_PURESTR;
1300	}
1301    } else {
1302	if (kshchar)
1303	    patparse++;
1304
1305	patch = *patparse;
1306	METACHARINC(patparse);
1307	switch(patch) {
1308	case Quest:
1309	    flags |= P_SIMPLE;
1310	    starter = patnode(P_ANY);
1311	    break;
1312	case Star:
1313	    /* kshchar is used as a sign that we can't have #'s. */
1314	    kshchar = -1;
1315	    starter = patnode(P_STAR);
1316	    break;
1317	case Inbrack:
1318	    flags |= P_SIMPLE;
1319	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
1320		patparse++;
1321		starter = patnode(P_ANYBUT);
1322	    } else
1323		starter = patnode(P_ANYOF);
1324	    if (*patparse == Outbrack) {
1325		patparse++;
1326		patadd(NULL, ']', 1, PA_NOALIGN);
1327	    }
1328	    while (*patparse && *patparse != Outbrack) {
1329		/* Meta is not a token */
1330		if (*patparse == Inbrack && patparse[1] == ':' &&
1331			(nptr = strchr(patparse+2, ':')) &&
1332			nptr[1] == Outbrack) {
1333			/* Posix range. */
1334			patparse += 2;
1335			len = nptr - patparse;
1336			ch = range_type(patparse, len);
1337			patparse = nptr + 2;
1338			if (ch != PP_UNKWN)
1339			    patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
1340			continue;
1341		}
1342		charstart = patparse;
1343		METACHARINC(patparse);
1344
1345		if (*patparse == '-' && patparse[1] &&
1346		    patparse[1] != Outbrack) {
1347		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
1348		    if (itok(*charstart)) {
1349			patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1350			       PA_NOALIGN);
1351		    } else {
1352			patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1353		    }
1354		    charstart = ++patparse;	/* skip ASCII '-' */
1355		    METACHARINC(patparse);
1356		}
1357		if (itok(*charstart)) {
1358		    patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1359			   PA_NOALIGN);
1360		} else {
1361		    patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1362		}
1363	    }
1364	    if (*patparse != Outbrack)
1365		return 0;
1366	    patparse++;
1367	    /* terminate null string and fix alignment */
1368	    patadd(NULL, 0, 1, 0);
1369	    break;
1370	case Inpar:
1371	    /* is this how to treat parentheses in SHGLOB? */
1372	    if (isset(SHGLOB) && !kshchar)
1373		return 0;
1374	    if (kshchar == '!') {
1375		/* This is nasty, we should really either handle all
1376		 * kshglobbing below or here.  But most of the
1377		 * others look like non-ksh patterns, while this one
1378		 * doesn't, so we handle it here and leave the rest.
1379		 * We treat it like an extendedglob ^, except that
1380		 * it goes into parentheses.
1381		 *
1382		 * If we did do kshglob here, we could support
1383		 * the old behaviour that things like !(foo)##
1384		 * work, but it makes the code more complicated at
1385		 * the expense of allowing the user to do things
1386		 * they shouldn't.
1387		 */
1388		if (!(starter = patcompnot(1, &flags2)))
1389		    return 0;
1390	    } else if (!(starter = patcompswitch(1, &flags2)))
1391		return 0;
1392	    flags |= flags2 & P_HSTART;
1393	    break;
1394	case Inang:
1395	    /* Numeric glob */
1396	    len = 0;		/* beginning present 1, end present 2 */
1397	    if (idigit(*patparse)) {
1398		from = (zrange_t) zstrtol((char *)patparse,
1399					 (char **)&nptr, 10);
1400		patparse = nptr;
1401		len |= 1;
1402	    }
1403	    DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
1404	    patparse++;
1405	    if (idigit(*patparse)) {
1406		to = (zrange_t) zstrtol((char *)patparse,
1407					  (char **)&nptr, 10);
1408		patparse = nptr;
1409		len |= 2;
1410	    }
1411	    if (*patparse != Outang)
1412		return 0;
1413	    patparse++;
1414	    switch(len) {
1415	    case 3:
1416		starter = patnode(P_NUMRNG);
1417		patadd((char *)&from, 0, sizeof(from), 0);
1418		patadd((char *)&to, 0, sizeof(to), 0);
1419		break;
1420	    case 2:
1421		starter = patnode(P_NUMTO);
1422		patadd((char *)&to, 0, sizeof(to), 0);
1423		break;
1424	    case 1:
1425		starter = patnode(P_NUMFROM);
1426		patadd((char *)&from, 0, sizeof(from), 0);
1427		break;
1428	    case 0:
1429		starter = patnode(P_NUMANY);
1430		break;
1431	    }
1432	    /* This can't be simple, because it isn't.
1433	     * Mention in manual that matching digits with [...]
1434	     * is more efficient.
1435	     */
1436	    break;
1437	case Pound:
1438	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
1439	    /*
1440	     * A hash here is an error; it should follow something
1441	     * repeatable.
1442	     */
1443	    return 0;
1444	    break;
1445	case Bnullkeep:
1446	    /*
1447	     * Marker for restoring a backslash in output:
1448	     * does not match a character.
1449	     */
1450	    next = patcomppiece(flagp);
1451	    /*
1452	     * Can't match a pure string since we need to do this
1453	     * as multiple chunks.
1454	     */
1455	    *flagp &= ~P_PURESTR;
1456	    return next;
1457	    break;
1458#ifdef DEBUG
1459	default:
1460	    dputs("BUG: character not handled in patcomppiece");
1461	    return 0;
1462	    break;
1463#endif
1464	}
1465    }
1466
1467    count = 0;
1468    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
1469	!(count = (isset(EXTENDEDGLOB) && *patparse == Inpar &&
1470		   patparse[1] == Pound && patparse[2] == 'c')) &&
1471	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
1472	*flagp = flags;
1473	return starter;
1474    }
1475
1476    /* too much at once doesn't currently work */
1477    if (kshchar && pound)
1478	return 0;
1479
1480    if (kshchar == '*') {
1481	op = P_ONEHASH;
1482	*flagp = P_HSTART;
1483    } else if (kshchar == '+') {
1484	op = P_TWOHASH;
1485	*flagp = P_HSTART;
1486    } else if (kshchar == '?') {
1487	op = 0;
1488	*flagp = 0;
1489    } else if (count) {
1490	op = P_COUNT;
1491	patparse += 3;
1492	*flagp = P_HSTART;
1493    } else if (*++patparse == Pound) {
1494	op = P_TWOHASH;
1495	patparse++;
1496	*flagp = P_HSTART;
1497    } else {
1498	op = P_ONEHASH;
1499	*flagp = P_HSTART;
1500    }
1501
1502    /*
1503     * Note optimizations with pointers into P_NOTHING branches:  some
1504     * should logically point to next node after current piece.
1505     *
1506     * Backtracking is also encoded in a slightly obscure way:  the
1507     * code emitted ensures we test the non-empty branch of complex
1508     * patterns before the empty branch on each repetition.  Hence
1509     * each time we fail on a non-empty branch, we try the empty branch,
1510     * which is equivalent to backtracking.
1511     */
1512    if (op == P_COUNT) {
1513	/* (#cN,M) */
1514	union upat countargs[P_CT_OPERAND];
1515	char *opp = patparse;
1516
1517	countargs[0].l = P_COUNT;
1518	countargs[P_CT_CURRENT].l = 0L;
1519	countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
1520	if (patparse == opp) {
1521	    /* missing number treated as zero */
1522	    countargs[P_CT_MIN].l = 0L;
1523	}
1524	if (*patparse != ',' && *patparse != Comma) {
1525	    /* either max = min or error */
1526	    if (*patparse != Outpar)
1527		return 0;
1528	    countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
1529	} else {
1530	    opp = ++patparse;
1531	    countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
1532	    if (*patparse != Outpar)
1533		return 0;
1534	    if (patparse == opp) {
1535		/* missing number treated as infinity: record as -1 */
1536		countargs[P_CT_MAX].l = -1L;
1537	    }
1538	}
1539	patparse++;
1540	countargs[P_CT_PTR].p = NULL;
1541	/* Mark this chain as a min/max count... */
1542	patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
1543	/*
1544	 * The next of the operand is a loop back to the P_COUNT.  This is
1545	 * how we get recursion for the count.  We don't loop back to
1546	 * the P_COUNTSTART; that's used for initialising the count
1547	 * and saving and restoring the count for any enclosing use
1548	 * of the match.
1549	 */
1550	opnd = P_OPERAND(starter) + P_CT_OPERAND;
1551	pattail(opnd, patnode(P_BACK));
1552	pattail(opnd, P_OPERAND(starter));
1553	/*
1554	 * The next of the counter operators is what follows the
1555	 * closure.
1556	 * This handles matching of the tail.
1557	 */
1558	next = patnode(P_NOTHING);
1559	pattail(starter, next);
1560	pattail(P_OPERAND(starter), next);
1561    } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
1562	P_OP((Upat)patout+starter) == P_ANY) {
1563	/* Optimize ?# to *.  Silly thing to do, since who would use
1564	 * use ?# ? But it makes the later code shorter.
1565	 */
1566	Upat uptr = (Upat)patout + starter;
1567	if (op == P_TWOHASH) {
1568	    /* ?## becomes ?* */
1569	    uptr->l = (uptr->l & ~0xff) | P_ANY;
1570	    pattail(starter, patnode(P_STAR));
1571	} else {
1572	    uptr->l = (uptr->l & ~0xff) | P_STAR;
1573	}
1574    } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
1575	/* Simplify, but not if we need to look for approximations. */
1576	patinsert(op, starter, NULL, 0);
1577    } else if (op == P_ONEHASH) {
1578	/* Emit x# as (x&|), where & means "self". */
1579	up.p = NULL;
1580	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
1581	                                      /* Either x */
1582	patoptail(starter, patnode(P_BACK));  /* and loop */
1583	patoptail(starter, starter);	      /* back */
1584	pattail(starter, patnode(P_BRANCH));  /* or */
1585	pattail(starter, patnode(P_NOTHING)); /* null. */
1586    } else if (op == P_TWOHASH) {
1587	/* Emit x## as x(&|) where & means "self". */
1588	next = patnode(P_WBRANCH);	      /* Either */
1589	up.p = NULL;
1590	patadd((char *)&up, 0, sizeof(up), 0);
1591	pattail(starter, next);
1592	pattail(patnode(P_BACK), starter);    /* loop back */
1593	pattail(next, patnode(P_BRANCH));     /* or */
1594	pattail(starter, patnode(P_NOTHING)); /* null. */
1595    } else if (kshchar == '?') {
1596	/* Emit ?(x) as (x|) */
1597	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
1598	pattail(starter, patnode(P_BRANCH));   /* or */
1599	next = patnode(P_NOTHING);	       /* null */
1600	pattail(starter, next);
1601	patoptail(starter, next);
1602    }
1603    if (*patparse == Pound)
1604	return 0;
1605
1606    return starter;
1607}
1608
1609/*
1610 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
1611 * parentheses if necessary.   As you see, that's really quite easy.
1612 */
1613
1614/**/
1615static long
1616patcompnot(int paren, int *flagsp)
1617{
1618    union upat up;
1619    long excsync, br, excl, n, starter;
1620    int dummy;
1621
1622    /* Here, we're matching a star at the start. */
1623    *flagsp = P_HSTART;
1624
1625    starter = patnode(P_BRANCH);
1626    br = patnode(P_STAR);
1627    excsync = patnode(P_EXCSYNC);
1628    pattail(br, excsync);
1629    pattail(starter, excl = patnode(P_EXCLUDE));
1630    up.p = NULL;
1631    patadd((char *)&up, 0, sizeof(up), 0);
1632    if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy))))
1633	return 0;
1634    pattail(br, patnode(P_EXCEND));
1635    n = patnode(P_NOTHING); /* just so much easier */
1636    pattail(excsync, n);
1637    pattail(excl, n);
1638
1639    return starter;
1640}
1641
1642/* Emit a node */
1643
1644/**/
1645static long
1646patnode(long op)
1647{
1648    long starter = (Upat)patcode - (Upat)patout;
1649    union upat up;
1650
1651    up.l = op;
1652    patadd((char *)&up, 0, sizeof(union upat), 0);
1653    return starter;
1654}
1655
1656/*
1657 * insert an operator in front of an already emitted operand:
1658 * we relocate the operand.  there had better be nothing else after.
1659 */
1660
1661/**/
1662static void
1663patinsert(long op, int opnd, char *xtra, int sz)
1664{
1665    char *src, *dst, *opdst;
1666    union upat buf, *lptr;
1667
1668    buf.l = 0;
1669    patadd((char *)&buf, 0, sizeof(buf), 0);
1670    if (sz)
1671	patadd(xtra, 0, sz, 0);
1672    src = patcode - sizeof(union upat) - sz;
1673    dst = patcode;
1674    opdst = patout + opnd * sizeof(union upat);
1675    while (src > opdst)
1676	*--dst = *--src;
1677
1678    /* A cast can't be an lvalue */
1679    lptr = (Upat)opdst;
1680    lptr->l = op;
1681    opdst += sizeof(union upat);
1682    while (sz--)
1683	*opdst++ = *xtra++;
1684}
1685
1686/* set the 'next' pointer at the end of a node chain */
1687
1688/**/
1689static void
1690pattail(long p, long val)
1691{
1692    Upat scan, temp;
1693    long offset;
1694
1695    scan = (Upat)patout + p;
1696    for (;;) {
1697	if (!(temp = PATNEXT(scan)))
1698	    break;
1699	scan = temp;
1700    }
1701
1702    offset = (P_OP(scan) == P_BACK)
1703	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
1704
1705    scan->l |= offset << 8;
1706}
1707
1708/* do pattail, but on operand of first argument; nop if operandless */
1709
1710/**/
1711static void patoptail(long p, long val)
1712{
1713    Upat ptr = (Upat)patout + p;
1714    int op = P_OP(ptr);
1715    if (!p || !P_ISBRANCH(ptr))
1716	return;
1717    if (op == P_BRANCH)
1718	pattail(P_OPERAND(p), val);
1719    else
1720	pattail(P_OPERAND(p) + 1, val);
1721}
1722
1723
1724/*
1725 * Run a pattern.
1726 */
1727static char *patinstart;	/* Start of input string */
1728static char *patinend;		/* End of input string */
1729static char *patinput;		/* String input pointer */
1730static char *patinpath;		/* Full path for use with ~ exclusions */
1731static int   patinlen;		/* Length of last successful match.
1732				 * Includes count of Meta characters.
1733				 */
1734
1735static char *patbeginp[NSUBEXP];	/* Pointer to backref beginnings */
1736static char *patendp[NSUBEXP];		/* Pointer to backref ends */
1737static int parsfound;			/* parentheses (with backrefs) found */
1738
1739static int globdots;			/* Glob initial dots? */
1740
1741/*
1742 * Character functions operating on unmetafied strings.
1743 */
1744#ifdef MULTIBYTE_SUPPORT
1745
1746/* Get a character from the start point in a string */
1747#define CHARREF(x, y)	charref((x), (y))
1748static wchar_t
1749charref(char *x, char *y)
1750{
1751    wchar_t wc;
1752    size_t ret;
1753
1754    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1755	return (wchar_t) STOUC(*x);
1756
1757    ret = mbrtowc(&wc, x, y-x, &shiftstate);
1758
1759    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1760	/* Error.  Treat as single byte. */
1761	/* Reset the shift state for next time. */
1762	memset(&shiftstate, 0, sizeof(shiftstate));
1763	return (wchar_t) STOUC(*x);
1764    }
1765
1766    return wc;
1767}
1768
1769/* Get  a pointer to the next character */
1770#define CHARNEXT(x, y)	charnext((x), (y))
1771static char *
1772charnext(char *x, char *y)
1773{
1774    wchar_t wc;
1775    size_t ret;
1776
1777    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1778	return x + 1;
1779
1780    ret = mbrtowc(&wc, x, y-x, &shiftstate);
1781
1782    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1783	/* Error.  Treat as single byte. */
1784	/* Reset the shift state for next time. */
1785	memset(&shiftstate, 0, sizeof(shiftstate));
1786	return x + 1;
1787    }
1788
1789    /* Nulls here are normal characters */
1790    return x + (ret ? ret : 1);
1791}
1792
1793/* Increment a pointer past the current character. */
1794#define CHARINC(x, y)	((x) = charnext((x), (y)))
1795
1796
1797/* Get a character and increment */
1798#define CHARREFINC(x, y, z)	charrefinc(&(x), (y), (z))
1799static wchar_t
1800charrefinc(char **x, char *y, int *z)
1801{
1802    wchar_t wc;
1803    size_t ret;
1804
1805    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
1806	return (wchar_t) STOUC(*(*x)++);
1807
1808    ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
1809
1810    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1811	/* Error.  Treat as single byte, but flag. */
1812	*z = 1;
1813	/* Reset the shift state for next time. */
1814	memset(&shiftstate, 0, sizeof(shiftstate));
1815	return (wchar_t) STOUC(*(*x)++);
1816    }
1817
1818    /* Nulls here are normal characters */
1819    *x += ret ? ret : 1;
1820
1821    return wc;
1822}
1823
1824
1825/*
1826 * Counter the number of characters between two pointers, smaller first
1827 *
1828 * This is used when setting values in parameters, so we obey
1829 * the MULTIBYTE option (even if it's been overridden locally).
1830 */
1831#define CHARSUB(x,y)	charsub(x, y)
1832static ptrdiff_t
1833charsub(char *x, char *y)
1834{
1835    ptrdiff_t res = 0;
1836    size_t ret;
1837    wchar_t wc;
1838
1839    if (!isset(MULTIBYTE))
1840	return y - x;
1841
1842    while (x < y) {
1843	ret = mbrtowc(&wc, x, y-x, &shiftstate);
1844
1845	if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1846	    /* Error.  Treat remainder as single characters */
1847	    return res + (y - x);
1848	}
1849
1850	/* Treat nulls as normal characters */
1851	if (!ret)
1852	    ret = 1;
1853	res++;
1854	x += ret;
1855    }
1856
1857    return res;
1858}
1859
1860#else /* no MULTIBYTE_SUPPORT */
1861
1862/* Get a character from the start point in a string */
1863#define CHARREF(x, y)	(STOUC(*(x)))
1864/* Get  a pointer to the next character */
1865#define CHARNEXT(x, y)	((x)+1)
1866/* Increment a pointer past the current character. */
1867#define CHARINC(x, y)	((x)++)
1868/* Get a character and increment */
1869#define CHARREFINC(x, y, z)	(STOUC(*(x)++))
1870/* Counter the number of characters between two pointers, smaller first */
1871#define CHARSUB(x,y)	((y) - (x))
1872
1873#endif /* MULTIBYTE_SUPPORT */
1874
1875/*
1876 * The following need to be accessed in the globbing scanner for
1877 * a multi-component file path.  See horror story in glob.c.
1878 */
1879/**/
1880int errsfound;				/* Total error count so far */
1881
1882/**/
1883int forceerrs;				/* Forced maximum error count */
1884
1885/**/
1886void
1887pattrystart(void)
1888{
1889    forceerrs = -1;
1890    errsfound = 0;
1891}
1892
1893/*
1894 * Test prog against null-terminated, metafied string.
1895 */
1896
1897/**/
1898mod_export int
1899pattry(Patprog prog, char *string)
1900{
1901    return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
1902}
1903
1904/*
1905 * Test prog against string of given length, no null termination
1906 * but still metafied at this point.  offset gives an offset
1907 * to include in reported match indices
1908 */
1909
1910/**/
1911mod_export int
1912pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
1913{
1914    return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
1915}
1916
1917/*
1918 * Test prog against string with given lengths.  The input
1919 * string is metafied; stringlen is the raw string length, and
1920 * unmetalen the number of characters in the original string (some
1921 * of which may now be metafied).  Either value may be -1
1922 * to indicate a null-terminated string which will be counted.  Note
1923 * there may be a severe penalty for this if a lot of matching is done
1924 * on one string.
1925 *
1926 * offset is the position in the original string (not seen by
1927 * the pattern module) at which we are trying to match.
1928 * This is added in to the positions recorded in patbeginp and patendp
1929 * when we are looking for substrings.  Currently this only happens
1930 * in the parameter substitution code.
1931 *
1932 * Note this is a character offset, i.e. a metafied character
1933 * counts as 1.
1934 *
1935 * The last three arguments are used to report the positions for the
1936 * backreferences. On entry, *nump should contain the maximum number
1937 * of positions to report.  In this case the match, mbegin, mend
1938 * arrays are not altered.
1939 *
1940 * If nump is NULL but endp is not NULL, then *endp is set to the
1941 * end position of the match, taking into account patinstart.
1942 */
1943
1944/**/
1945mod_export int
1946pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
1947	   int patoffset,
1948	   int *nump, int *begp, int *endp)
1949{
1950    int i, maxnpos = 0, ret, needfullpath, unmetalenp;
1951    int origlen;
1952    char **sp, **ep, *tryalloced, *ptr;
1953    char *progstr = (char *)prog + prog->startoff;
1954
1955    if (nump) {
1956	maxnpos = *nump;
1957	*nump = 0;
1958    }
1959    /* inherited from domatch, but why, exactly? */
1960    if (*string == Nularg) {
1961	string++;
1962	unmetalen--;
1963    }
1964
1965    if (stringlen < 0)
1966	stringlen = strlen(string);
1967    origlen = stringlen;
1968
1969    patflags = prog->flags;
1970    /*
1971     * For a top-level ~-exclusion, we will need the full
1972     * path to exclude, so copy the path so far and append the
1973     * current test string.
1974     */
1975    needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
1976
1977    /* Get the length of the full string when unmetafied. */
1978    if (unmetalen < 0)
1979	unmetalen = ztrsub(string + stringlen, string);
1980    if (needfullpath)
1981	unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
1982    else
1983	unmetalenp = 0;
1984
1985    DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
1986	  "rum sort of file exclusion");
1987    /*
1988     * Partly for efficiency, and partly for the convenience of
1989     * globbing, we don't unmetafy pure string patterns, and
1990     * there's no reason to if the pattern is just a *.
1991     */
1992    if (!(patflags & (PAT_PURES|PAT_ANY))
1993	&& (needfullpath || unmetalen != stringlen)) {
1994	/*
1995	 * We need to copy if we need to prepend the path so far
1996	 * (in which case we copy both chunks), or if we have
1997	 * Meta characters.
1998	 */
1999	char *dst;
2000	int icopy, ncopy;
2001
2002	dst = tryalloced = zalloc(unmetalen + unmetalenp);
2003
2004	if (needfullpath) {
2005	    /* loop twice, copy path buffer first time */
2006	    ptr = pathbuf;
2007	    ncopy = unmetalenp;
2008	} else {
2009	    /* just loop once, copy string with unmetafication */
2010	    ptr = string;
2011	    ncopy = unmetalen;
2012	}
2013	for (icopy = 0; icopy < 2; icopy++) {
2014	    for (i = 0; i < ncopy; i++) {
2015		if (*ptr == Meta) {
2016		    ptr++;
2017		    *dst++ = *ptr++ ^ 32;
2018		} else {
2019		    *dst++ = *ptr++;
2020		}
2021	    }
2022	    if (!needfullpath)
2023		break;
2024	    /* next time append test string to path so far */
2025	    ptr = string;
2026	    ncopy = unmetalen;
2027	}
2028
2029	if (needfullpath) {
2030	    patinstart = tryalloced + unmetalenp;
2031	    patinpath = tryalloced;
2032	} else {
2033	    patinstart = tryalloced;
2034	    patinpath = NULL;
2035	}
2036	stringlen = unmetalen;
2037    } else {
2038	patinstart = string;
2039	tryalloced = patinpath = NULL;
2040    }
2041
2042    patinend = patinstart + stringlen;
2043    /*
2044     * From now on we do not require NULL termination of
2045     * the test string.  There should also be no more references
2046     * to the variable string.
2047     */
2048
2049    if (prog->flags & (PAT_PURES|PAT_ANY)) {
2050	/*
2051	 * Either we are testing against a pure string,
2052	 * or we can match anything at all.
2053	 */
2054	int ret;
2055	if (prog->flags & PAT_ANY) {
2056	    /*
2057	     * Optimisation for a single "*": always matches
2058	     * (except for no_glob_dots, see below).
2059	     */
2060	    ret = 1;
2061	} else {
2062	    /*
2063	     * Testing a pure string.  See if initial
2064	     * components match.
2065	     */
2066	    int lendiff = stringlen - prog->patmlen;
2067	    if (lendiff < 0) {
2068		/* No, the pattern string is too long. */
2069		ret = 0;
2070	    } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
2071		/*
2072		 * Initial component matches.  Matches either
2073		 * if lengths are the same or we are not anchored
2074		 * to the end of the string.
2075		 */
2076		ret = !lendiff || (prog->flags & PAT_NOANCH);
2077	    } else {
2078		/* No match. */
2079		ret = 0;
2080	    }
2081	}
2082	if (ret) {
2083	    /*
2084	     * For files, we won't match initial "."s unless
2085	     * glob_dots is set.
2086	     */
2087	    if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
2088		ret = 0;
2089	    } else {
2090		/*
2091		 * Remember the length in case used for ${..#..} etc.
2092		 * In this case, we didn't unmetafy the string.
2093		 */
2094		patinlen = (int)prog->patmlen;
2095		/* if matching files, must update globbing flags */
2096		patglobflags = prog->globend;
2097
2098		if ((patglobflags & GF_MATCHREF) &&
2099		    !(patflags & PAT_FILE)) {
2100		    char *str = ztrduppfx(patinstart, patinlen);
2101		    char *ptr = patinstart;
2102		    int mlen = 0;
2103
2104		    /*
2105		     * Count the characters.  We're not using CHARSUB()
2106		     * because the string is still metafied.  We're
2107		     * not using mb_metastrlen() because that expects
2108		     * the string to be null terminated.
2109		     */
2110		    MB_METACHARINIT();
2111		    while (ptr < patinstart + patinlen) {
2112			mlen++;
2113			ptr += MB_METACHARLEN(ptr);
2114		    }
2115
2116		    setsparam("MATCH", str);
2117		    setiparam("MBEGIN",
2118			      (zlong)(patoffset + !isset(KSHARRAYS)));
2119		    setiparam("MEND",
2120			      (zlong)(mlen + patoffset +
2121				      !isset(KSHARRAYS) - 1));
2122		}
2123	    }
2124	}
2125
2126	if (tryalloced)
2127	    zfree(tryalloced, unmetalen + unmetalenp);
2128
2129	return ret;
2130    } else {
2131	/*
2132	 * Test for a `must match' string, unless we're scanning for a match
2133	 * in which case we don't need to do this each time.
2134	 */
2135	ret = 1;
2136	if (!(prog->flags & PAT_SCAN) && prog->mustoff)
2137	{
2138	    char *testptr;	/* start pointer into test string */
2139	    char *teststop;	/* last point from which we can match */
2140	    char *patptr = (char *)prog + prog->mustoff;
2141	    int patlen = prog->patmlen;
2142	    int found = 0;
2143
2144	    if (patlen > stringlen) {
2145		/* Too long, can't match. */
2146		ret = 0;
2147	    } else {
2148		teststop = patinend - patlen;
2149
2150		for (testptr = patinstart; testptr <= teststop; testptr++)
2151		{
2152		    if (!memcmp(testptr, patptr, patlen)) {
2153			found = 1;
2154			break;
2155		    }
2156		}
2157
2158		if (!found)
2159		    ret = 0;
2160	    }
2161	}
2162	if (!ret) {
2163	    if (tryalloced)
2164		zfree(tryalloced, unmetalen + unmetalenp);
2165	    return 0;
2166	}
2167
2168	patglobflags = prog->globflags;
2169	if (!(patflags & PAT_FILE)) {
2170	    forceerrs = -1;
2171	    errsfound = 0;
2172	}
2173	globdots = !(patflags & PAT_NOGLD);
2174	parsfound = 0;
2175
2176	patinput = patinstart;
2177
2178	if (patmatch((Upat)progstr)) {
2179	    /*
2180	     * we were lazy and didn't save the globflags if an exclusion
2181	     * failed, so set it now
2182	     */
2183	    patglobflags = prog->globend;
2184
2185	    /*
2186	     * Record length of successful match, including Meta
2187	     * characters.  Do it here so that patmatchlen() can return
2188	     * it even if we delete the pattern strings.
2189	     */
2190	    patinlen = patinput - patinstart;
2191	    /*
2192	     * Optimization: if we didn't find any Meta characters
2193	     * to begin with, we don't need to look for them now.
2194	     */
2195	    if (unmetalen != origlen) {
2196		for (ptr = patinstart; ptr < patinput; ptr++)
2197		    if (imeta(*ptr))
2198			patinlen++;
2199	    }
2200
2201	    /*
2202	     * Should we clear backreferences and matches on a failed
2203	     * match?
2204	     */
2205	    if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
2206		/*
2207		 * m flag: for global match.  This carries no overhead
2208		 * in the pattern matching part.
2209		 *
2210		 * Remember the test pattern is already unmetafied.
2211		 */
2212		char *str;
2213		int mlen = CHARSUB(patinstart, patinput);
2214
2215		str = metafy(patinstart, patinput - patinstart, META_DUP);
2216		setsparam("MATCH", str);
2217		setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
2218		setiparam("MEND",
2219			  (zlong)(mlen + patoffset +
2220				  !isset(KSHARRAYS) - 1));
2221	    }
2222	    if (prog->patnpar && nump) {
2223		/*
2224		 * b flag: for backreferences using parentheses. Reported
2225		 * directly.
2226		 */
2227		*nump = prog->patnpar;
2228
2229		sp = patbeginp;
2230		ep = patendp;
2231
2232		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
2233		    if (parsfound & (1 << i)) {
2234			if (begp)
2235			    *begp++ = CHARSUB(patinstart, *sp) + patoffset;
2236			if (endp)
2237			    *endp++ = CHARSUB(patinstart, *ep) + patoffset
2238				- 1;
2239		    } else {
2240			if (begp)
2241			    *begp++ = -1;
2242			if (endp)
2243			    *endp++ = -1;
2244		    }
2245
2246		    sp++;
2247		    ep++;
2248		}
2249	    } else if (prog->patnpar && !(patflags & PAT_FILE)) {
2250		/*
2251		 * b flag: for backreferences using parentheses.
2252		 */
2253		int palen = prog->patnpar+1;
2254		char **matcharr, **mbeginarr, **mendarr;
2255		char numbuf[DIGBUFSIZE];
2256
2257		matcharr = zshcalloc(palen*sizeof(char *));
2258		mbeginarr = zshcalloc(palen*sizeof(char *));
2259		mendarr = zshcalloc(palen*sizeof(char *));
2260
2261		sp = patbeginp;
2262		ep = patendp;
2263
2264		for (i = 0; i < prog->patnpar; i++) {
2265		    if (parsfound & (1 << i)) {
2266			matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
2267			/*
2268			 * mbegin and mend give indexes into the string
2269			 * in the standard notation, i.e. respecting
2270			 * KSHARRAYS, and with the end index giving
2271			 * the last character, not one beyond.
2272			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
2273			 * (without KSHARRAYS) indexes 1 and 1, which
2274			 * corresponds to indexing as ${foo[1,1]}.
2275			 */
2276			sprintf(numbuf, "%ld",
2277				(long)(CHARSUB(patinstart, *sp) +
2278				       patoffset +
2279				       !isset(KSHARRAYS)));
2280			mbeginarr[i] = ztrdup(numbuf);
2281			sprintf(numbuf, "%ld",
2282				(long)(CHARSUB(patinstart, *ep) +
2283				       patoffset +
2284				       !isset(KSHARRAYS) - 1));
2285			mendarr[i] = ztrdup(numbuf);
2286		    } else {
2287			/* Pattern wasn't set: either it was in an
2288			 * unmatched branch, or a hashed parenthesis
2289			 * that didn't match at all.
2290			 */
2291			matcharr[i] = ztrdup("");
2292			mbeginarr[i] = ztrdup("-1");
2293			mendarr[i] = ztrdup("-1");
2294		    }
2295		    sp++;
2296		    ep++;
2297		}
2298		setaparam("match", matcharr);
2299		setaparam("mbegin", mbeginarr);
2300		setaparam("mend", mendarr);
2301	    }
2302
2303	    if (!nump && endp) {
2304		/*
2305		 * We just need the overall end position.
2306		 */
2307		*endp = CHARSUB(patinstart, patinput) + patoffset;
2308	    }
2309
2310	    ret = 1;
2311	} else
2312	    ret = 0;
2313
2314	if (tryalloced)
2315	    zfree(tryalloced, unmetalen + unmetalenp);
2316
2317	return ret;
2318    }
2319}
2320
2321/*
2322 * Return length of previous succesful match.  This is
2323 * in metafied bytes, i.e. includes a count of Meta characters.
2324 * Unusual and futile attempt at modular encapsulation.
2325 */
2326
2327/**/
2328int
2329patmatchlen(void)
2330{
2331    return patinlen;
2332}
2333
2334/*
2335 * Match literal characters with case insensitivity test:  the first
2336 * comes from the input string, the second the current pattern.
2337 */
2338#ifdef MULTIBYTE_SUPPORT
2339#define ISUPPER(x)	iswupper(x)
2340#define ISLOWER(x)	iswlower(x)
2341#define TOUPPER(x)	towupper(x)
2342#define TOLOWER(x)	towlower(x)
2343#define ISDIGIT(x)	iswdigit(x)
2344#else
2345#define ISUPPER(x)	isupper(x)
2346#define ISLOWER(x)	islower(x)
2347#define TOUPPER(x)	toupper(x)
2348#define TOLOWER(x)	tolower(x)
2349#define ISDIGIT(x)	idigit(x)
2350#endif
2351#define CHARMATCH(chin, chpa) (chin == chpa || \
2352        ((patglobflags & GF_IGNCASE) ? \
2353	 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
2354	  (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
2355	 (patglobflags & GF_LCMATCHUC) ? \
2356	 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
2357
2358/*
2359 * The same but caching an expression from the first argument,
2360 * Requires local charmatch_cache definition.
2361 */
2362#define CHARMATCH_EXPR(expr, chpa) \
2363	(charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
2364
2365/*
2366 * exactpos is used to remember how far down an exact string we have
2367 * matched, if we are doing approximation and can therefore redo from
2368 * the same point; we never need to otherwise.
2369 *
2370 * exactend is a pointer to the end of the string, which isn't
2371 * null-terminated.
2372 */
2373static char *exactpos, *exactend;
2374
2375/*
2376 * Main matching routine.
2377 *
2378 * Testing the tail end of a match is usually done by recursion, but
2379 * we try to eliminate that in favour of looping for simple cases.
2380 */
2381
2382/**/
2383static int
2384patmatch(Upat prog)
2385{
2386    /* Current and next nodes */
2387    Upat scan = prog, next, opnd;
2388    char *start, *save, *chrop, *chrend, *compend;
2389    int savglobflags, op, no, min, fail = 0, saverrsfound;
2390    zrange_t from, to, comp;
2391    patint_t nextch;
2392
2393    while  (scan) {
2394	next = PATNEXT(scan);
2395
2396	if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
2397	    patinput < patinend && *patinput == '.')
2398	    return 0;
2399
2400	switch (P_OP(scan)) {
2401	case P_ANY:
2402	    if (patinput == patinend)
2403		fail = 1;
2404	    else
2405		CHARINC(patinput, patinend);
2406	    break;
2407	case P_EXACTLY:
2408	    /*
2409	     * acts as nothing if *chrop is null:  this is used by
2410	     * approx code.
2411	     */
2412	    if (exactpos) {
2413		chrop = exactpos;
2414		chrend = exactend;
2415	    } else {
2416		chrop = P_LS_STR(scan);
2417		chrend = chrop + P_LS_LEN(scan);
2418	    }
2419	    exactpos = NULL;
2420	    while (chrop < chrend && patinput < patinend) {
2421		char *savpatinput = patinput;
2422		char *savchrop = chrop;
2423		int badin = 0, badpa = 0;
2424		/*
2425		 * Care with character matching:
2426		 * We do need to convert the character to wide
2427		 * representation if possible, because we may need
2428		 * to do case transformation.  However, we should
2429		 * be careful in case one, but not the other, wasn't
2430		 * representable in the current locale---in that
2431		 * case they don't match even if the returned
2432		 * values (one properly converted, one raw) are
2433		 * the same.
2434		 */
2435		patint_t chin = CHARREFINC(patinput, patinend, &badin);
2436		patint_t chpa = CHARREFINC(chrop, chrend, &badpa);
2437		if (!CHARMATCH(chin, chpa) || badin != badpa) {
2438		    fail = 1;
2439		    patinput = savpatinput;
2440		    chrop = savchrop;
2441		    break;
2442		}
2443	    }
2444	    if (chrop < chrend) {
2445		exactpos = chrop;
2446		exactend = chrend;
2447		fail = 1;
2448	    }
2449	    break;
2450	case P_ANYOF:
2451	case P_ANYBUT:
2452	    if (patinput == patinend)
2453		fail = 1;
2454	    else {
2455#ifdef MULTIBYTE_SUPPORT
2456		wchar_t cr = CHARREF(patinput, patinend);
2457		char *scanop = (char *)P_OPERAND(scan);
2458		if (patglobflags & GF_MULTIBYTE) {
2459		    if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
2460			(P_OP(scan) == P_ANYOF))
2461			fail = 1;
2462		    else
2463			CHARINC(patinput, patinend);
2464		} else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
2465			   (P_OP(scan) == P_ANYOF))
2466		    fail = 1;
2467		else
2468		    CHARINC(patinput, patinend);
2469#else
2470		if (patmatchrange((char *)P_OPERAND(scan),
2471				  CHARREF(patinput, patinend), NULL, NULL) ^
2472		    (P_OP(scan) == P_ANYOF))
2473		    fail = 1;
2474		else
2475		    CHARINC(patinput, patinend);
2476#endif
2477	    }
2478	    break;
2479	case P_NUMRNG:
2480	case P_NUMFROM:
2481	case P_NUMTO:
2482	    /*
2483	     * To do this properly, we really have to treat numbers as
2484	     * closures:  that's so things like <1-1000>33 will
2485	     * match 633 (they didn't up to 3.1.6).  To avoid making this
2486	     * too inefficient, we see if there's an exact match next:
2487	     * if there is, and it's not a digit, we return 1 after
2488	     * the first attempt.
2489	     */
2490	    op = P_OP(scan);
2491	    start = (char *)P_OPERAND(scan);
2492	    from = to = 0;
2493	    if (op != P_NUMTO) {
2494#ifdef ZSH_64_BIT_TYPE
2495		/* We can't rely on pointer alignment being good enough. */
2496		memcpy((char *)&from, start, sizeof(zrange_t));
2497#else
2498		from = *((zrange_t *) start);
2499#endif
2500		start += sizeof(zrange_t);
2501	    }
2502	    if (op != P_NUMFROM) {
2503#ifdef ZSH_64_BIT_TYPE
2504		memcpy((char *)&to, start, sizeof(zrange_t));
2505#else
2506		to = *((zrange_t *) start);
2507#endif
2508	    }
2509	    start = compend = patinput;
2510	    comp = 0;
2511	    while (patinput < patinend && idigit(*patinput)) {
2512		if (comp)
2513		    comp *= 10;
2514		comp += *patinput - '0';
2515		patinput++;
2516		compend++;
2517
2518		if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
2519#ifdef ZRANGE_T_IS_SIGNED
2520					    2
2521#else
2522					    1
2523#endif
2524				))) {
2525		    /*
2526		     * Out of range (allowing for signedness, which
2527		     * we need if we are using zlongs).
2528		     * This is as far as we can go.
2529		     * If we're doing a range "from", skip all the
2530		     * remaining numbers.  Otherwise, we can't
2531		     * match beyond the previous point anyway.
2532		     * Leave the pointer to the last calculated
2533		     * position (compend) where it was before.
2534		     */
2535		    if (op == P_NUMFROM) {
2536			while (patinput < patinend && idigit(*patinput))
2537			    patinput++;
2538		    }
2539		}
2540	    }
2541	    save = patinput;
2542	    no = 0;
2543	    while (patinput > start) {
2544		/* if already too small, no power on earth can save it */
2545		if (comp < from && patinput <= compend)
2546		    break;
2547		if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
2548		    return 1;
2549		}
2550		if (!no && P_OP(next) == P_EXACTLY &&
2551		    (!P_LS_LEN(next) ||
2552		     !idigit(STOUC(*P_LS_STR(next)))) &&
2553		    !(patglobflags & 0xff))
2554		    return 0;
2555		patinput = --save;
2556		no++;
2557		/*
2558		 * With a range start and an unrepresentable test
2559		 * number, we just back down the test string without
2560		 * changing the number until we get to a representable
2561		 * one.
2562		 */
2563		if (patinput < compend)
2564		    comp /= 10;
2565	    }
2566	    patinput = start;
2567	    fail = 1;
2568	    break;
2569	case P_NUMANY:
2570	    /* This is <->: any old set of digits, don't bother comparing */
2571	    start = patinput;
2572	    while (patinput < patinend && idigit(*patinput))
2573		patinput++;
2574	    save = patinput;
2575	    no = 0;
2576	    while (patinput > start) {
2577		if (patmatch(next))
2578		    return 1;
2579		if (!no && P_OP(next) == P_EXACTLY &&
2580		    (!P_LS_LEN(next) ||
2581		     !idigit(*P_LS_STR(next))) &&
2582		    !(patglobflags & 0xff))
2583		    return 0;
2584		patinput = --save;
2585		no++;
2586	    }
2587	    patinput = start;
2588	    fail = 1;
2589	    break;
2590	case P_NOTHING:
2591	    break;
2592	case P_BACK:
2593	    break;
2594	case P_GFLAGS:
2595	    patglobflags = P_OPERAND(scan)->l;
2596	    break;
2597	case P_OPEN:
2598	case P_OPEN+1:
2599	case P_OPEN+2:
2600	case P_OPEN+3:
2601	case P_OPEN+4:
2602	case P_OPEN+5:
2603	case P_OPEN+6:
2604	case P_OPEN+7:
2605	case P_OPEN+8:
2606	case P_OPEN+9:
2607	    no = P_OP(scan) - P_OPEN;
2608	    save = patinput;
2609
2610	    if (patmatch(next)) {
2611		/*
2612		 * Don't set patbeginp if some later invocation of
2613		 * the same parentheses already has.
2614		 */
2615		if (no && !(parsfound & (1 << (no - 1)))) {
2616		    patbeginp[no-1] = save;
2617		    parsfound |= 1 << (no - 1);
2618		}
2619		return 1;
2620	    } else
2621		return 0;
2622	    break;
2623	case P_CLOSE:
2624	case P_CLOSE+1:
2625	case P_CLOSE+2:
2626	case P_CLOSE+3:
2627	case P_CLOSE+4:
2628	case P_CLOSE+5:
2629	case P_CLOSE+6:
2630	case P_CLOSE+7:
2631	case P_CLOSE+8:
2632	case P_CLOSE+9:
2633	    no = P_OP(scan) - P_CLOSE;
2634	    save = patinput;
2635
2636	    if (patmatch(next)) {
2637		if (no && !(parsfound & (1 << (no + 15)))) {
2638		    patendp[no-1] = save;
2639		    parsfound |= 1 << (no + 15);
2640		}
2641		return 1;
2642	    } else
2643		return 0;
2644	    break;
2645	case P_EXCSYNC:
2646	    /* See the P_EXCLUDE code below for where syncptr comes from */
2647	    {
2648		unsigned char *syncptr;
2649		Upat after;
2650		after = P_OPERAND(scan);
2651		DPUTS(!P_ISEXCLUDE(after),
2652		      "BUG: EXCSYNC not followed by EXCLUDE.");
2653		DPUTS(!P_OPERAND(after)->p,
2654		      "BUG: EXCSYNC not handled by EXCLUDE");
2655		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
2656		/*
2657		 * If we already matched from here, this time we fail.
2658		 * See WBRANCH code for story about error count.
2659		 */
2660		if (*syncptr && errsfound + 1 >= *syncptr)
2661		    return 0;
2662		/*
2663		 * Else record that we (possibly) matched this time.
2664		 * No harm if we don't:  then the previous test will just
2665		 * short cut the attempted match that is bound to fail.
2666		 * We never try to exclude something that has already
2667		 * failed anyway.
2668		 */
2669		*syncptr = errsfound + 1;
2670	    }
2671	    break;
2672	case P_EXCEND:
2673	    /*
2674	     * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
2675	     * branch.  Actually, we don't bother following it:  all we
2676	     * need to know is that we successfully matched so far up
2677	     * to the end of the asserted pattern; the endpoint
2678	     * in the target string is nulled out.
2679	     */
2680	    if (!(fail = (patinput < patinend)))
2681		return 1;
2682	    break;
2683	case P_BRANCH:
2684	case P_WBRANCH:
2685	    /* P_EXCLUDE shouldn't occur without a P_BRANCH */
2686	    if (!P_ISBRANCH(next)) {
2687		/* no choice, avoid recursion */
2688		DPUTS(P_OP(scan) == P_WBRANCH,
2689		      "BUG: WBRANCH with no alternative.");
2690		next = P_OPERAND(scan);
2691	    } else {
2692		do {
2693		    save = patinput;
2694		    savglobflags = patglobflags;
2695		    saverrsfound = errsfound;
2696		    if (P_ISEXCLUDE(next)) {
2697			/*
2698			 * The strategy is to test the asserted pattern,
2699			 * recording via P_EXCSYNC how far the part to
2700			 * be excluded matched.  We then set the
2701			 * length of the test string to that
2702			 * point and see if the exclusion as far as
2703			 * P_EXCEND also matches that string.
2704			 * We need to keep testing the asserted pattern
2705			 * by backtracking, since the first attempt
2706			 * may be excluded while a later attempt may not.
2707			 * For this we keep a pointer just after
2708			 * the P_EXCLUDE which is tested by the P_EXCSYNC
2709			 * to see if we matched there last time, in which
2710			 * case we fail.  If there is nothing to backtrack
2711			 * over, that doesn't matter:  we should fail anyway.
2712			 * The pointer also tells us where the asserted
2713			 * pattern matched for use by the exclusion.
2714			 *
2715			 * It's hard to allocate space for this
2716			 * beforehand since we may need to do it
2717			 * recursively.
2718			 *
2719			 * P.S. in case you were wondering, this code
2720			 * is horrible.
2721			 */
2722			Upat syncstrp;
2723			char *origpatinend;
2724			unsigned char *oldsyncstr;
2725			char *matchpt = NULL;
2726			int ret, savglobdots, matchederrs = 0;
2727			int savparsfound = parsfound;
2728			DPUTS(P_OP(scan) == P_WBRANCH,
2729			      "BUG: excluded WBRANCH");
2730			syncstrp = P_OPERAND(next);
2731			/*
2732			 * Unlike WBRANCH, each test at the same exclude
2733			 * sync point (due to an external loop) is separate,
2734			 * i.e testing (foo~bar)# is no different from
2735			 * (foo~bar)(foo~bar)... from the exclusion point
2736			 * of view, so we use a different sync string.
2737			 */
2738			oldsyncstr = syncstrp->p;
2739			syncstrp->p = (unsigned char *)
2740			    zshcalloc((patinend - patinstart) + 1);
2741			origpatinend = patinend;
2742			while ((ret = patmatch(P_OPERAND(scan)))) {
2743			    unsigned char *syncpt;
2744			    char *savpatinstart;
2745			    int savforce = forceerrs;
2746			    int savpatflags = patflags, synclen;
2747			    forceerrs = -1;
2748			    savglobdots = globdots;
2749			    matchederrs = errsfound;
2750			    matchpt = patinput;    /* may not be end */
2751			    globdots = 1;	   /* OK to match . first */
2752			    /* Find the point where the scan
2753			     * matched the part to be excluded: because
2754			     * of backtracking, the one
2755			     * most recently matched will be the first.
2756			     * (Luckily, backtracking is done after all
2757			     * possibilities for approximation have been
2758			     * checked.)
2759			     */
2760			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
2761				;
2762			    synclen = syncpt - syncstrp->p;
2763			    if (patinstart + synclen != patinend) {
2764				/*
2765				 * Temporarily mark the string as
2766				 * ending at this point.
2767				 */
2768				DPUTS(patinstart + synclen > matchpt,
2769				      "BUG: EXCSYNC failed");
2770
2771				patinend = patinstart + synclen;
2772				/*
2773				 * If this isn't really the end of the string,
2774				 * remember this for the (#e) assertion.
2775				 */
2776				patflags |= PAT_NOTEND;
2777			    }
2778			    savpatinstart = patinstart;
2779			    next = PATNEXT(scan);
2780			    while (next && P_ISEXCLUDE(next)) {
2781				patinput = save;
2782				/*
2783				 * turn off approximations in exclusions:
2784				 * note we keep remaining patglobflags
2785				 * set by asserted branch (or previous
2786				 * excluded branches, for consistency).
2787				 */
2788				patglobflags &= ~0xff;
2789				errsfound = 0;
2790				opnd = P_OPERAND(next) + 1;
2791				if (P_OP(next) == P_EXCLUDP && patinpath) {
2792				    /*
2793				     * Top level exclusion with a file,
2794				     * applies to whole path so add the
2795				     * segments already matched.
2796				     * We copied these in front of the
2797				     * test pattern, so patinend doesn't
2798				     * need moving.
2799				     */
2800				    DPUTS(patinput != patinstart,
2801					  "BUG: not at start excluding path");
2802				    patinput = patinstart = patinpath;
2803				}
2804				if (patmatch(opnd)) {
2805				    ret = 0;
2806				    /*
2807				     * Another subtlety: if we exclude the
2808				     * match, any parentheses just found
2809				     * become invalidated.
2810				     */
2811				    parsfound = savparsfound;
2812				}
2813				if (patinpath) {
2814				    patinput = savpatinstart +
2815					(patinput - patinstart);
2816				    patinstart = savpatinstart;
2817				}
2818				if (!ret)
2819				    break;
2820				next = PATNEXT(next);
2821			    }
2822			    /*
2823			     * Restore original end position.
2824			     */
2825			    patinend = origpatinend;
2826			    patflags = savpatflags;
2827			    globdots = savglobdots;
2828			    forceerrs = savforce;
2829			    if (ret)
2830				break;
2831			    patinput = save;
2832			    patglobflags = savglobflags;
2833			    errsfound = saverrsfound;
2834			}
2835			zfree((char *)syncstrp->p,
2836			      (patinend - patinstart) + 1);
2837			syncstrp->p = oldsyncstr;
2838			if (ret) {
2839			    patinput = matchpt;
2840			    errsfound = matchederrs;
2841			    return 1;
2842			}
2843			while ((scan = PATNEXT(scan)) &&
2844			       P_ISEXCLUDE(scan))
2845			    ;
2846		    } else {
2847			int ret = 1, pfree = 0;
2848			Upat ptrp = NULL;
2849			unsigned char *ptr;
2850			if (P_OP(scan) == P_WBRANCH) {
2851			    /*
2852			     * This is where we make sure that we are not
2853			     * repeatedly matching zero-length strings in
2854			     * a closure, which would cause an infinite loop,
2855			     * and also remove exponential behaviour in
2856			     * backtracking nested closures.
2857			     * The P_WBRANCH operator leaves a space for a
2858			     * uchar *, initialized to NULL, which is
2859			     * turned into a string the same length as the
2860			     * target string.  Every time we match from a
2861			     * particular point in the target string, we
2862			     * stick a 1 at the corresponding point here.
2863			     * If we come round to the same branch again, and
2864			     * there is already a 1, then the test fails.
2865			     */
2866			    opnd = P_OPERAND(scan);
2867			    ptrp = opnd++;
2868			    if (!ptrp->p) {
2869				ptrp->p = (unsigned char *)
2870				    zshcalloc((patinend - patinstart) + 1);
2871				pfree = 1;
2872			    }
2873			    ptr = ptrp->p + (patinput - patinstart);
2874
2875			    /*
2876			     * Without approximation, this is just a
2877			     * single bit test.  With approximation, we
2878			     * need to know how many errors there were
2879			     * last time we made the test.  If errsfound
2880			     * is now smaller than it was, hence we can
2881			     * make more approximations in the remaining
2882			     * code, we continue with the test.
2883			     * (This is why the max number of errors is
2884			     * 254, not 255.)
2885			     */
2886			    if (*ptr && errsfound + 1 >= *ptr)
2887				ret = 0;
2888			    *ptr = errsfound + 1;
2889			} else
2890			    opnd = P_OPERAND(scan);
2891			if (ret)
2892			    ret = patmatch(opnd);
2893			if (pfree) {
2894			    zfree((char *)ptrp->p,
2895				  (patinend - patinstart) + 1);
2896			    ptrp->p = NULL;
2897			}
2898			if (ret)
2899			    return 1;
2900			scan = PATNEXT(scan);
2901		    }
2902		    patinput = save;
2903		    patglobflags = savglobflags;
2904		    errsfound = saverrsfound;
2905		    DPUTS(P_OP(scan) == P_WBRANCH,
2906			  "BUG: WBRANCH not first choice.");
2907		    next = PATNEXT(scan);
2908		} while (scan && P_ISBRANCH(scan));
2909		return 0;
2910	    }
2911	    break;
2912	case P_STAR:
2913	    /* Handle specially for speed, although really P_ONEHASH+P_ANY */
2914	case P_ONEHASH:
2915	case P_TWOHASH:
2916	    /*
2917	     * This is just simple cases, matching one character.
2918	     * With approximations, we still handle * this way, since
2919	     * no approximation is ever necessary, but other closures
2920	     * are handled by the more complicated branching method
2921	     */
2922	    op = P_OP(scan);
2923	    /* Note that no counts possibly metafied characters */
2924	    start = patinput;
2925	    {
2926		char *lastcharstart;
2927		/*
2928		 * Array to record the start of characters for
2929		 * backtracking.
2930		 */
2931		VARARR(char, charstart, patinend-patinput);
2932		memset(charstart, 0, patinend-patinput);
2933
2934		if (op == P_STAR) {
2935		    for (no = 0; patinput < patinend;
2936			 CHARINC(patinput, patinend))
2937		    {
2938			charstart[patinput-start] = 1;
2939			no++;
2940		    }
2941		    /* simple optimization for reasonably common case */
2942		    if (P_OP(next) == P_END)
2943			return 1;
2944		} else {
2945		    DPUTS(patglobflags & 0xff,
2946			  "BUG: wrong backtracking with approximation.");
2947		    if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
2948			patinput == patinstart && patinput < patinend &&
2949			CHARREF(patinput, patinend) == ZWC('.'))
2950			return 0;
2951		    no = patrepeat(P_OPERAND(scan), charstart);
2952		}
2953		min = (op == P_TWOHASH) ? 1 : 0;
2954		/*
2955		 * Lookahead to avoid useless matches. This is not possible
2956		 * with approximation.
2957		 */
2958		if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
2959		    !(patglobflags & 0xff)) {
2960		    char *nextop = P_LS_STR(next);
2961#ifdef MULTIBYTE_SUPPORT
2962		    /* else second argument of CHARREF isn't used */
2963		    int nextlen = P_LS_LEN(next);
2964#endif
2965		    /*
2966		     * If that P_EXACTLY is last (common in simple patterns,
2967		     * such as *.c), then it can be only be matched at one
2968		     * point in the test string, so record that.
2969		     */
2970		    if (P_OP(PATNEXT(next)) == P_END &&
2971			!(patflags & PAT_NOANCH)) {
2972			int ptlen = patinend - patinput;
2973			int lenmatch = patinend -
2974			    (min ? CHARNEXT(start, patinend) : start);
2975			/* Are we in the right range? */
2976			if (P_LS_LEN(next) > lenmatch ||
2977			    P_LS_LEN(next) < ptlen)
2978			    return 0;
2979			/* Yes, just position appropriately and test. */
2980			patinput += ptlen - P_LS_LEN(next);
2981			/*
2982			 * Here we will need to be careful that patinput is not
2983			 * in the middle of a multibyte character.
2984			 */
2985			/* Continue loop with P_EXACTLY test. */
2986			break;
2987		    }
2988		    nextch = CHARREF(nextop, nextop + nextlen);
2989		} else
2990		    nextch = PEOF;
2991		savglobflags = patglobflags;
2992		saverrsfound = errsfound;
2993		lastcharstart = charstart + (patinput - start);
2994		if (no >= min) {
2995		    for (;;) {
2996			patint_t charmatch_cache;
2997			if (nextch == PEOF ||
2998			    (patinput < patinend &&
2999			     CHARMATCH_EXPR(CHARREF(patinput, patinend),
3000					    nextch))) {
3001			    if (patmatch(next))
3002				return 1;
3003			}
3004			if (--no < min)
3005			    break;
3006			/* find start of previous full character */
3007			while (!*--lastcharstart)
3008			    DPUTS(lastcharstart < charstart,
3009				  "lastcharstart invalid");
3010			patinput = start + (lastcharstart-charstart);
3011			patglobflags = savglobflags;
3012			errsfound = saverrsfound;
3013		    }
3014		}
3015	    }
3016	    /*
3017	     * As with branches, the patmatch(next) stuff for *
3018	     * handles approximation, so we don't need to try
3019	     * anything here.
3020	     */
3021	    return 0;
3022	case P_ISSTART:
3023	    if (patinput != patinstart || (patflags & PAT_NOTSTART))
3024		fail = 1;
3025	    break;
3026	case P_ISEND:
3027	    if (patinput < patinend || (patflags & PAT_NOTEND))
3028		fail = 1;
3029	    break;
3030	case P_COUNTSTART:
3031	    {
3032		/*
3033		 * Save and restore the current count and the
3034		 * start pointer in case the pattern has been
3035		 * executed by a previous repetition of a
3036		 * closure.
3037		 */
3038		long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
3039		long savecount = *curptr;
3040		unsigned char *saveptr = scan[P_CT_PTR].p;
3041		int ret;
3042
3043		*curptr = 0L;
3044		ret = patmatch(P_OPERAND(scan));
3045		*curptr = savecount;
3046		scan[P_CT_PTR].p = saveptr;
3047		return ret;
3048	    }
3049	case P_COUNT:
3050	    {
3051		/* (#cN,M): execution is relatively straightforward */
3052		long cur = scan[P_CT_CURRENT].l;
3053		long min = scan[P_CT_MIN].l;
3054		long max = scan[P_CT_MAX].l;
3055
3056		if (cur && cur >= min &&
3057		    (unsigned char *)patinput == scan[P_CT_PTR].p) {
3058		    /*
3059		     * Not at the first attempt to match so
3060		     * the previous attempt managed zero length.
3061		     * We can do this indefinitely so there's
3062		     * no point in going on.  Simply try to
3063		     * match the remainder of the pattern.
3064		     */
3065		    return patmatch(next);
3066		}
3067		scan[P_CT_PTR].p = (unsigned char *)patinput;
3068
3069		if (max < 0 || cur < max) {
3070		    char *patinput_thistime = patinput;
3071		    scan[P_CT_CURRENT].l = cur + 1;
3072		    if (patmatch(scan + P_CT_OPERAND))
3073			return 1;
3074		    patinput = patinput_thistime;
3075		}
3076		if (cur < min)
3077		    return 0;
3078		return patmatch(next);
3079	    }
3080	case P_END:
3081	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
3082		return 1;
3083	    break;
3084#ifdef DEBUG
3085	default:
3086	    dputs("BUG: bad operand in patmatch.");
3087	    return 0;
3088	    break;
3089#endif
3090	}
3091
3092	if (fail) {
3093	    if (errsfound < (patglobflags & 0xff) &&
3094		(forceerrs == -1 || errsfound < forceerrs)) {
3095		/*
3096		 * Approximation code.  There are four possibilities
3097		 *
3098		 * 1. omit character from input string
3099		 * 2. transpose characters in input and pattern strings
3100		 * 3. omit character in both input and pattern strings
3101		 * 4. omit character from pattern string.
3102		 *
3103		 * which we try in that order.
3104		 *
3105		 * Of these, 2, 3 and 4 require an exact match string
3106		 * (P_EXACTLY) while 1, 2 and 3 require that we not
3107		 * have reached the end of the input string.
3108		 *
3109		 * Note in each case after making the approximation we
3110		 * need to retry the *same* pattern; this is what
3111		 * requires exactpos, a slightly doleful way of
3112		 * communicating with the exact character matcher.
3113		 */
3114		char *savexact = exactpos;
3115		save = patinput;
3116		savglobflags = patglobflags;
3117		saverrsfound = ++errsfound;
3118		fail = 0;
3119
3120		DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
3121		      "BUG: non-exact match has set exactpos");
3122
3123		/* Try omitting a character from the input string */
3124		if (patinput < patinend) {
3125		    CHARINC(patinput, patinend);
3126		    /* If we are not on an exact match, then this is
3127		     * our last gasp effort, so we can optimize out
3128		     * the recursive call.
3129		     */
3130		    if (P_OP(scan) != P_EXACTLY)
3131			continue;
3132		    if (patmatch(scan))
3133			return 1;
3134		}
3135
3136		if (P_OP(scan) == P_EXACTLY) {
3137		    char *nextexact = savexact;
3138		    DPUTS(!savexact,
3139			  "BUG: exact match has not set exactpos");
3140		    CHARINC(nextexact, exactend);
3141
3142		    if (save < patinend) {
3143			char *nextin = save;
3144			CHARINC(nextin, patinend);
3145			patglobflags = savglobflags;
3146			errsfound = saverrsfound;
3147			exactpos = savexact;
3148
3149			/*
3150			 * Try swapping two characters in patinput and
3151			 * exactpos
3152			 */
3153			if (save < patinend && nextin < patinend &&
3154			    nextexact < exactend) {
3155			    patint_t cin0 = CHARREF(save, patinend);
3156			    patint_t cpa0 = CHARREF(exactpos, exactend);
3157			    patint_t cin1 = CHARREF(nextin, patinend);
3158			    patint_t cpa1 = CHARREF(nextexact, exactend);
3159
3160			    if (CHARMATCH(cin0, cpa1) &&
3161				CHARMATCH(cin1, cpa0)) {
3162				patinput = nextin;
3163				CHARINC(patinput, patinend);
3164				exactpos = nextexact;
3165				CHARINC(exactpos, exactend);
3166				if (patmatch(scan))
3167				    return 1;
3168
3169				patglobflags = savglobflags;
3170				errsfound = saverrsfound;
3171			    }
3172			}
3173
3174			/*
3175			 * Try moving up both strings.
3176			 */
3177			patinput = nextin;
3178			exactpos = nextexact;
3179			if (patmatch(scan))
3180			    return 1;
3181
3182			patinput = save;
3183			patglobflags = savglobflags;
3184			errsfound = saverrsfound;
3185			exactpos = savexact;
3186		    }
3187
3188		    DPUTS(exactpos == exactend, "approximating too far");
3189		    /*
3190		     * Try moving up the exact match pattern.
3191		     * This must be the last attempt, so just loop
3192		     * instead of calling recursively.
3193		     */
3194		    CHARINC(exactpos, exactend);
3195		    continue;
3196		}
3197	    }
3198	    exactpos = NULL;
3199	    return 0;
3200	}
3201
3202	scan = next;
3203    }
3204
3205    return 0;
3206}
3207
3208
3209/**/
3210#ifdef MULTIBYTE_SUPPORT
3211
3212/*
3213 * See if character ch matches a pattern range specification.
3214 * The null-terminated specification is in range; the test
3215 * character is in ch.
3216 *
3217 * indptr is used by completion matching, which is why this
3218 * function is exported.  If indptr is not NULL we set *indptr
3219 * to the index of the character in the range string, adjusted
3220 * in the case of "A-B" ranges such that A would count as its
3221 * normal index (say IA), B would count as IA + (B-A), and any
3222 * character within the range as appropriate.  We're not strictly
3223 * guaranteed this fits within a wint_t, but if this is Unicode
3224 * in 32 bits we have a fair amount of distance left over.
3225 *
3226 * mtp is used in the same circumstances.  *mtp returns the match type:
3227 * 0 for a standard character, else the PP_ index.  It's not
3228 * useful if the match failed.
3229 */
3230
3231/**/
3232mod_export int
3233mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
3234{
3235    wchar_t r1, r2;
3236
3237    if (indptr)
3238	*indptr = 0;
3239    /*
3240     * Careful here: unlike other strings, range is a NULL-terminated,
3241     * metafied string, because we need to treat the Posix and hyphenated
3242     * ranges specially.
3243     */
3244    while (*range) {
3245	if (imeta(STOUC(*range))) {
3246	    int swtype = STOUC(*range++) - STOUC(Meta);
3247	    if (mtp)
3248		*mtp = swtype;
3249	    switch (swtype) {
3250	    case 0:
3251		/* ordinary metafied character */
3252		range--;
3253		if (metacharinc(&range) == ch)
3254		    return 1;
3255		break;
3256	    case PP_ALPHA:
3257		if (iswalpha(ch))
3258		    return 1;
3259		break;
3260	    case PP_ALNUM:
3261		if (iswalnum(ch))
3262		    return 1;
3263		break;
3264	    case PP_ASCII:
3265		if ((ch & ~0x7f) == 0)
3266		    return 1;
3267		break;
3268	    case PP_BLANK:
3269		if (ch == L' ' || ch == L'\t')
3270		    return 1;
3271		break;
3272	    case PP_CNTRL:
3273		if (iswcntrl(ch))
3274		    return 1;
3275		break;
3276	    case PP_DIGIT:
3277		if (iswdigit(ch))
3278		    return 1;
3279		break;
3280	    case PP_GRAPH:
3281		if (iswgraph(ch))
3282		    return 1;
3283		break;
3284	    case PP_LOWER:
3285		if (iswlower(ch))
3286		    return 1;
3287		break;
3288	    case PP_PRINT:
3289		if (iswprint(ch))
3290		    return 1;
3291		break;
3292	    case PP_PUNCT:
3293		if (iswpunct(ch))
3294		    return 1;
3295		break;
3296	    case PP_SPACE:
3297		if (iswspace(ch))
3298		    return 1;
3299		break;
3300	    case PP_UPPER:
3301		if (iswupper(ch))
3302		    return 1;
3303		break;
3304	    case PP_XDIGIT:
3305		if (iswxdigit(ch))
3306		    return 1;
3307		break;
3308	    case PP_IDENT:
3309		if (wcsitype(ch, IIDENT))
3310		    return 1;
3311		break;
3312	    case PP_IFS:
3313		if (wcsitype(ch, ISEP))
3314		    return 1;
3315		break;
3316	    case PP_IFSSPACE:
3317		/* must be ASCII space character */
3318		if (ch < 128 && iwsep((int)ch))
3319		    return 1;
3320		break;
3321	    case PP_WORD:
3322		if (wcsitype(ch, IWORD))
3323		    return 1;
3324		break;
3325	    case PP_RANGE:
3326		r1 = metacharinc(&range);
3327		r2 = metacharinc(&range);
3328		if (r1 <= ch && ch <= r2) {
3329		    if (indptr)
3330			*indptr += ch - r1;
3331		    return 1;
3332		}
3333		/* Careful not to screw up counting with bogus range */
3334		if (indptr && r1 < r2) {
3335		    /*
3336		     * This gets incremented again below to get
3337		     * us past the range end.  This is correct.
3338		     */
3339		    *indptr += r2 - r1;
3340		}
3341		break;
3342	    case PP_UNKWN:
3343		DPUTS(1, "BUG: unknown posix range passed through.\n");
3344		break;
3345	    default:
3346		DPUTS(1, "BUG: unknown metacharacter in range.");
3347		break;
3348	    }
3349	} else if (metacharinc(&range) == ch) {
3350	    if (mtp)
3351		*mtp = 0;
3352	    return 1;
3353	}
3354	if (indptr)
3355	    (*indptr)++;
3356    }
3357    return 0;
3358}
3359
3360
3361/*
3362 * This is effectively the reverse of mb_patmatchrange().
3363 * Given a range descriptor of the same form, and an index into it,
3364 * try to determine the character that is matched.  If the index
3365 * points to a [:...:] generic style match, set chr to WEOF and
3366 * return the type in mtp instead.  Return 1 if successful, 0 if
3367 * there was no corresponding index.  Note all pointer arguments
3368 * must be non-null.
3369 */
3370
3371/**/
3372mod_export int
3373mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
3374{
3375    wchar_t r1, r2, rchr;
3376    wint_t rdiff;
3377
3378    *chr = WEOF;
3379    *mtp = 0;
3380
3381    while (*range) {
3382	if (imeta(STOUC(*range))) {
3383	    int swtype = STOUC(*range++) - STOUC(Meta);
3384	    switch (swtype) {
3385	    case 0:
3386		range--;
3387		rchr = metacharinc(&range);
3388		if (!ind) {
3389		    *chr = (wint_t) rchr;
3390		    return 1;
3391		}
3392		break;
3393
3394	    case PP_ALPHA:
3395	    case PP_ALNUM:
3396	    case PP_ASCII:
3397	    case PP_BLANK:
3398	    case PP_CNTRL:
3399	    case PP_DIGIT:
3400	    case PP_GRAPH:
3401	    case PP_LOWER:
3402	    case PP_PRINT:
3403	    case PP_PUNCT:
3404	    case PP_SPACE:
3405	    case PP_UPPER:
3406	    case PP_XDIGIT:
3407	    case PP_IDENT:
3408	    case PP_IFS:
3409	    case PP_IFSSPACE:
3410	    case PP_WORD:
3411		if (!ind) {
3412		    *mtp = swtype;
3413		    return 1;
3414		}
3415		break;
3416
3417	    case PP_RANGE:
3418		r1 = metacharinc(&range);
3419		r2 = metacharinc(&range);
3420		rdiff = (wint_t)r2 - (wint_t)r1;
3421		if (rdiff >= ind) {
3422		    *chr = (wint_t)r1 + ind;
3423		    return 1;
3424		}
3425		/* note the extra decrement to ind below */
3426		ind -= rdiff;
3427		break;
3428	    case PP_UNKWN:
3429		DPUTS(1, "BUG: unknown posix range passed through.\n");
3430		break;
3431	    default:
3432		DPUTS(1, "BUG: unknown metacharacter in range.");
3433		break;
3434	    }
3435	} else {
3436	    rchr = metacharinc(&range);
3437	    if (!ind) {
3438		*chr = (wint_t)rchr;
3439		return 1;
3440	    }
3441	}
3442	if (!ind--)
3443	    break;
3444    }
3445
3446    /* No corresponding index. */
3447    return 0;
3448}
3449
3450/**/
3451#endif /* MULTIBYTE_SUPPORT */
3452
3453/*
3454 * Identical function to mb_patmatchrange() above for single-byte
3455 * characters.
3456 */
3457
3458/**/
3459mod_export int
3460patmatchrange(char *range, int ch, int *indptr, int *mtp)
3461{
3462    int r1, r2;
3463
3464    if (indptr)
3465	*indptr = 0;
3466    /*
3467     * Careful here: unlike other strings, range is a NULL-terminated,
3468     * metafied string, because we need to treat the Posix and hyphenated
3469     * ranges specially.
3470     */
3471    for (; *range; range++) {
3472	if (imeta(STOUC(*range))) {
3473	    int swtype = STOUC(*range) - STOUC(Meta);
3474	    if (mtp)
3475		*mtp = swtype;
3476	    switch (swtype) {
3477	    case 0:
3478		if (STOUC(*++range ^ 32) == ch)
3479		    return 1;
3480		break;
3481	    case PP_ALPHA:
3482		if (isalpha(ch))
3483		    return 1;
3484		break;
3485	    case PP_ALNUM:
3486		if (isalnum(ch))
3487		    return 1;
3488		break;
3489	    case PP_ASCII:
3490		if ((ch & ~0x7f) == 0)
3491		    return 1;
3492		break;
3493	    case PP_BLANK:
3494		if (ch == ' ' || ch == '\t')
3495		    return 1;
3496		break;
3497	    case PP_CNTRL:
3498		if (iscntrl(ch))
3499		    return 1;
3500		break;
3501	    case PP_DIGIT:
3502		if (isdigit(ch))
3503		    return 1;
3504		break;
3505	    case PP_GRAPH:
3506		if (isgraph(ch))
3507		    return 1;
3508		break;
3509	    case PP_LOWER:
3510		if (islower(ch))
3511		    return 1;
3512		break;
3513	    case PP_PRINT:
3514		if (isprint(ch))
3515		    return 1;
3516		break;
3517	    case PP_PUNCT:
3518		if (ispunct(ch))
3519		    return 1;
3520		break;
3521	    case PP_SPACE:
3522		if (isspace(ch))
3523		    return 1;
3524		break;
3525	    case PP_UPPER:
3526		if (isupper(ch))
3527		    return 1;
3528		break;
3529	    case PP_XDIGIT:
3530		if (isxdigit(ch))
3531		    return 1;
3532		break;
3533	    case PP_IDENT:
3534		if (iident(ch))
3535		    return 1;
3536		break;
3537	    case PP_IFS:
3538		if (isep(ch))
3539		    return 1;
3540		break;
3541	    case PP_IFSSPACE:
3542		if (iwsep(ch))
3543		    return 1;
3544		break;
3545	    case PP_WORD:
3546		if (iword(ch))
3547		    return 1;
3548		break;
3549	    case PP_RANGE:
3550		range++;
3551		r1 = STOUC(UNMETA(range));
3552		METACHARINC(range);
3553		r2 = STOUC(UNMETA(range));
3554		if (*range == Meta)
3555		    range++;
3556		if (r1 <= ch && ch <= r2) {
3557		    if (indptr)
3558			*indptr += ch - r1;
3559		    return 1;
3560		}
3561		if (indptr && r1 < r2)
3562		    *indptr += r2 - r1;
3563		break;
3564	    case PP_UNKWN:
3565		DPUTS(1, "BUG: unknown posix range passed through.\n");
3566		break;
3567	    default:
3568		DPUTS(1, "BUG: unknown metacharacter in range.");
3569		break;
3570	    }
3571	} else if (STOUC(*range) == ch) {
3572	    if (mtp)
3573		*mtp = 0;
3574	    return 1;
3575	}
3576	if (indptr)
3577	    (*indptr)++;
3578    }
3579    return 0;
3580}
3581
3582
3583/**/
3584#ifndef MULTIBYTE_SUPPORT
3585
3586/*
3587 * Identical function to mb_patmatchindex() above for single-byte
3588 * characters.  Here -1 represents a character that needs a special type.
3589 *
3590 * Unlike patmatchrange, we only need this in ZLE, which always
3591 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
3592 * this function in that case.
3593 */
3594
3595/**/
3596mod_export int
3597patmatchindex(char *range, int ind, int *chr, int *mtp)
3598{
3599    int r1, r2, rdiff, rchr;
3600
3601    *chr = -1;
3602    *mtp = 0;
3603
3604    for (; *range; range++) {
3605	if (imeta(STOUC(*range))) {
3606	    int swtype = STOUC(*range) - STOUC(Meta);
3607	    switch (swtype) {
3608	    case 0:
3609		/* ordinary metafied character */
3610		rchr = STOUC(*++range) ^ 32;
3611		if (!ind) {
3612		    *chr = rchr;
3613		    return 1;
3614		}
3615		break;
3616
3617	    case PP_ALPHA:
3618	    case PP_ALNUM:
3619	    case PP_ASCII:
3620	    case PP_BLANK:
3621	    case PP_CNTRL:
3622	    case PP_DIGIT:
3623	    case PP_GRAPH:
3624	    case PP_LOWER:
3625	    case PP_PRINT:
3626	    case PP_PUNCT:
3627	    case PP_SPACE:
3628	    case PP_UPPER:
3629	    case PP_XDIGIT:
3630	    case PP_IDENT:
3631	    case PP_IFS:
3632	    case PP_IFSSPACE:
3633	    case PP_WORD:
3634		if (!ind) {
3635		    *mtp = swtype;
3636		    return 1;
3637		}
3638		break;
3639
3640	    case PP_RANGE:
3641		range++;
3642		r1 = STOUC(UNMETA(range));
3643		METACHARINC(range);
3644		r2 = STOUC(UNMETA(range));
3645		if (*range == Meta)
3646		    range++;
3647		rdiff = r2 - r1;
3648		if (rdiff >= ind) {
3649		    *chr = r1 + ind;
3650		    return 1;
3651		}
3652		/* note the extra decrement to ind below */
3653		ind -= rdiff;
3654		break;
3655	    case PP_UNKWN:
3656		DPUTS(1, "BUG: unknown posix range passed through.\n");
3657		break;
3658	    default:
3659		DPUTS(1, "BUG: unknown metacharacter in range.");
3660		break;
3661	    }
3662	} else {
3663	    if (!ind) {
3664		*chr = STOUC(*range);
3665		return 1;
3666	    }
3667	}
3668	if (!ind--)
3669	    break;
3670    }
3671
3672    /* No corresponding index. */
3673    return 0;
3674}
3675
3676/**/
3677#endif /* MULTIBYTE_SUPPORT */
3678
3679/*
3680 * Repeatedly match something simple and say how many times.
3681 * charstart is an array parallel to that starting at patinput
3682 * and records the start of (possibly multibyte) characters
3683 * to aid in later backtracking.
3684 */
3685
3686/**/
3687static int patrepeat(Upat p, char *charstart)
3688{
3689    int count = 0;
3690    patint_t tch, charmatch_cache;
3691    char *scan, *opnd;
3692
3693    scan = patinput;
3694    opnd = (char *)P_OPERAND(p);
3695
3696    switch(P_OP(p)) {
3697#ifdef DEBUG
3698    case P_ANY:
3699	dputs("BUG: ?# did not get optimized to *");
3700	return 0;
3701	break;
3702#endif
3703    case P_EXACTLY:
3704	DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
3705	tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
3706	while (scan < patinend &&
3707	       CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
3708	    charstart[scan-patinput] = 1;
3709	    count++;
3710	    CHARINC(scan, patinend);
3711	}
3712	break;
3713    case P_ANYOF:
3714    case P_ANYBUT:
3715	while (scan < patinend) {
3716#ifdef MULTIBYTE_SUPPORT
3717	    wchar_t cr = CHARREF(scan, patinend);
3718	    if (patglobflags & GF_MULTIBYTE) {
3719		if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
3720		    (P_OP(p) == P_ANYOF))
3721		    break;
3722	    } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
3723		       (P_OP(p) == P_ANYOF))
3724		break;
3725#else
3726	    if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
3727		(P_OP(p) == P_ANYOF))
3728		break;
3729#endif
3730	    charstart[scan-patinput] = 1;
3731	    count++;
3732	    CHARINC(scan, patinend);
3733	}
3734	break;
3735#ifdef DEBUG
3736    default:
3737	dputs("BUG: something very strange is happening in patrepeat");
3738	return 0;
3739	break;
3740#endif
3741    }
3742
3743    patinput = scan;
3744    return count;
3745}
3746
3747/* Free a patprog. */
3748
3749/**/
3750mod_export void
3751freepatprog(Patprog prog)
3752{
3753    if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
3754	zfree(prog, prog->size);
3755}
3756