1/*
2 * pattern.c - pattern matching
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1999 Peter Stephenson
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Peter Stephenson or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Peter Stephenson and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Peter Stephenson and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 * Pattern matching code derived from the regexp library by Henry
29 * Spencer, which has the following copyright.
30 *
31 *	Copyright (c) 1986 by University of Toronto.
32 *	Written by Henry Spencer.  Not derived from licensed software.
33 *
34 *	Permission is granted to anyone to use this software for any
35 *	purpose on any computer system, and to redistribute it freely,
36 *	subject to the following restrictions:
37 *
38 *	1. The author is not responsible for the consequences of use of
39 *		this software, no matter how awful, even if they arise
40 *		from defects in it.
41 *
42 *	2. The origin of this software must not be misrepresented, either
43 *		by explicit claim or by omission.
44 *
45 *	3. Altered versions must be plainly marked as such, and must not
46 *		be misrepresented as being the original software.
47 *
48 * Eagle-eyed readers will notice this is an altered version.  Incredibly
49 * sharp-eyed readers might even find bits that weren't altered.
50 *
51 *
52 *      And I experienced a sense that, like certain regular
53 *      expressions, seemed to match the day from beginning to end, so
54 *      that I did not need to identify the parenthesised subexpression
55 *      that told of dawn, nor the group of characters that indicated
56 *      the moment when my grandfather returned home with news of
57 *      Swann's departure for Paris; and the whole length of the month
58 *      of May, as if matched by a closure, fitted into the buffer of my
59 *      life with no sign of overflowing, turning the days, like a
60 *      procession of insects that could consist of this or that
61 *      species, into a random and unstructured repetition of different
62 *      sequences, anchored from the first day of the month to the last
63 *      in the same fashion as the weeks when I knew I would not see
64 *      Gilberte and would search in vain for any occurrences of the
65 *      string in the avenue of hawthorns by Tansonville, without my
66 *      having to delimit explicitly the start or finish of the pattern.
67 *
68 *                                 M. Proust, "In Search of Lost Files",
69 *                                 bk I, "The Walk by Bourne's Place".
70 */
71
72#include "zsh.mdh"
73
74/*
75 * The following union is used mostly for alignment purposes.
76 * Normal nodes are longs, while certain nodes take a char * as an argument;
77 * here we make sure that they both work out to the same length.
78 * The compiled regexp we construct consists of upats stuck together;
79 * anything else to be added (strings, numbers) is stuck after and
80 * then aligned to a whole number of upat units.
81 *
82 * Note also that offsets are in terms of the sizes of these things.
83 */
84union upat {
85    long l;
86    unsigned char *p;
87};
88
89typedef union upat *Upat;
90
91#include "pattern.pro"
92
93/* Number of active parenthesized expressions allowed in backreferencing */
94#define NSUBEXP  9
95
96/* definition	number	opnd?	meaning */
97#define	P_END	  0x00	/* no	End of program. */
98#define P_EXCSYNC 0x01	/* no   Test if following exclude already failed */
99#define P_EXCEND  0x02	/* no   Test if exclude matched orig branch */
100#define	P_BACK	  0x03	/* no	Match "", "next" ptr points backward. */
101#define	P_EXACTLY 0x04	/* lstr	Match this string. */
102#define	P_NOTHING 0x05	/* no	Match empty string. */
103#define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
104#define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
105#define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
106#define P_ISSTART 0x09  /* no   Match start of string. */
107#define P_ISEND   0x0a  /* no   Match end of string. */
108#define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
109#define P_COUNT   0x0c  /* 3*long uc* node Match a number of repetitions */
110/* numbered so we can test bit 5 for a branch */
111#define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
112#define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
113/* excludes are also branches, but have bit 4 set, too */
114#define P_EXCLUDE 0x30	/* uc* node Exclude this from previous branch */
115#define P_EXCLUDP 0x31	/* uc* node Exclude, using full file path so far */
116/* numbered so we can test bit 6 so as not to match initial '.' */
117#define	P_ANY	  0x40	/* no	Match any one character. */
118#define	P_ANYOF	  0x41	/* str  Match any character in this string. */
119#define	P_ANYBUT  0x42	/* str  Match any character not in this string. */
120#define P_STAR    0x43	/* no   Match any set of characters. */
121#define P_NUMRNG  0x44	/* zr, zr Match a numeric range. */
122#define P_NUMFROM 0x45	/* zr   Match a number >= X */
123#define P_NUMTO   0x46	/* zr   Match a number <= X */
124#define P_NUMANY  0x47	/* no   Match any set of decimal digits */
125/* spaces left for P_OPEN+n,... for backreferences */
126#define	P_OPEN	  0x80	/* no	Mark this point in input as start of n. */
127#define	P_CLOSE	  0x90	/* no	Analogous to OPEN. */
128/*
129 * no    no argument
130 * zr    the range type zrange_t:  may be zlong or unsigned long
131 * char  a single char
132 * uc*   a pointer to unsigned char, used at run time and initialised
133 *       to NULL.
134 * str   null-terminated, metafied string
135 * lstr  length as long then string, not null-terminated, unmetafied.
136 */
137
138/*
139 * Notes on usage:
140 * P_WBRANCH:  This works like a branch and is used in complex closures,
141 *    to ensure we don't succeed on a zero-length match of the pattern,
142 *    since that would cause an infinite loop.  We do this by recording
143 *    the positions where we have already tried to match.   See the
144 *    P_WBRANCH test in patmatch().
145 *
146 *  P_ANY, P_ANYOF:  the operand is a null terminated
147 *    string.  Normal characters match as expected.  Characters
148 *    in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
149 *    Posix range tests.  This relies on imeta returning true for these
150 *    characters.  We treat unknown POSIX ranges as never matching.
151 *    PP_RANGE means the next two (possibly metafied) characters form
152 *    the limits of a range to test; it's too much like hard work to
153 *    expand the range.
154 *
155 *  P_EXCLUDE, P_EXCSYNC, PEXCEND:  P_EXCLUDE appears in the pattern like
156 *    P_BRANCH, but applies to the immediately preceding branch.  The code in
157 *    the corresponding branch is followed by a P_EXCSYNC, which simply
158 *    acts as a marker that a P_EXCLUDE comes next.  The P_EXCLUDE
159 *    has a pointer to char embeded in it, which works
160 *    like P_WBRANCH:  if we get to the P_EXCSYNC, and we already matched
161 *    up to the same position, fail.  Thus we are forced to backtrack
162 *    on closures in the P_BRANCH if the first attempt was excluded.
163 *    Corresponding to P_EXCSYNC in the original branch, there is a
164 *    P_EXCEND in the exclusion.  If we get to this point, and we did
165 *    *not* match in the original branch, the exclusion itself fails,
166 *    otherwise it succeeds since we know the tail already matches,
167 *    so P_EXCEND is the end of the exclusion test.
168 *    The whole sorry mess looks like this, where the upper lines
169 *    show the linkage of the branches, and the lower shows the linkage
170 *    of their pattern arguments.
171 *
172 *     	        ---------------------      ----------------------
173 *              ^      	       	     v    ^      	         v
174 *      ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
175 *                               	                         ^
176 *		       	  |                                      |
177 *			   --------------------------------------
178 *
179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
180 *   that we prepend the path so far to the exclude pattern.   This is
181 *   for top level file globs, e.g. ** / *.c~*foo.c
182 *                                    ^ I had to leave this space
183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
184 *
185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
186 * closure (#cN,M) and is used to initialise the count.  Executing
187 * the pattern leads back to the P_COUNT, while the next links of the
188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
189 *
190 *	       	        ----------------
191 *     	       	       v       	        ^
192 *        <COUNTSTART><COUNT>pattern<BACK> tail
193 *	     	    v      v  	  	    ^
194 *	            ------------------------
195 */
196
197#define	P_OP(p)		((p)->l & 0xff)
198#define	P_NEXT(p)	((p)->l >> 8)
199#define	P_OPERAND(p)	((p) + 1)
200#define P_ISBRANCH(p)   ((p)->l & 0x20)
201#define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
202#define P_NOTDOT(p)	((p)->l & 0x40)
203
204/* Specific to lstr type, i.e. P_EXACTLY. */
205#define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */
206#define P_LS_STR(p)	((char *)((p) + 2))
207
208/* Specific to P_COUNT: arguments as offset in nodes from operator */
209#define P_CT_CURRENT	(1)	/* Current count */
210#define P_CT_MIN	(2)     /* Minimum count */
211#define P_CT_MAX	(3)	/* Maximum count, -1 for none */
212#define P_CT_PTR	(4)	/* Pointer to last match start */
213#define P_CT_OPERAND	(5)	/* Operand of P_COUNT */
214
215/* Flags needed when pattern is executed */
216#define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
217#define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
218#define P_PURESTR	0x04	/* Can be matched with a strcmp */
219
220#if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
221typedef zlong zrange_t;
222#define ZRANGE_T_IS_SIGNED	(1)
223#else
224typedef unsigned long zrange_t;
225#endif
226
227/*
228 * Array of characters corresponding to zpc_chars enum, which it must match.
229 */
230static const char zpc_chars[ZPC_COUNT] = {
231    '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
232    Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
233};
234
235/*
236 * Corresponding strings used in enable/disable -p.
237 * NULL means no way of turning this on or off.
238 */
239/**/
240mod_export const char *zpc_strings[ZPC_COUNT] = {
241   NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
242   "^", "#", NULL, "?(", "*(", "+(", "!(", "@("
243};
244
245/*
246 * Corresponding array of pattern disables as set by the user
247 * using "disable -p".
248 */
249/**/
250mod_export char zpc_disables[ZPC_COUNT];
251
252/*
253 * Stack of saved (compressed) zpc_disables for function scope.
254 */
255
256static struct zpc_disables_save *zpc_disables_stack;
257
258/*
259 * Characters which terminate a simple string (ZPC_COUNT) or
260 * an entire pattern segment (the first ZPC_SEG_COUNT).
261 * Each entry is either the corresponding character in zpc_chars
262 * or Marker which is guaranteed not to match a character in a
263 * pattern we are compiling.
264 *
265 * The complete list indicates characters that are special, so e.g.
266 * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
267 * *and* Tilde is currently special.
268 */
269
270/**/
271char zpc_special[ZPC_COUNT];
272
273/* Default size for pattern buffer */
274#define P_DEF_ALLOC 256
275
276/* Flags used in compilation */
277static char *patstart, *patparse;	/* input pointers */
278static int patnpar;		/* () count */
279static char *patcode;		/* point of code emission */
280static long patsize;		/* size of code */
281static char *patout;		/* start of code emission string */
282static long patalloc;		/* size allocated for same */
283
284/* Flags used in both compilation and execution */
285static int patflags;		    /* flags passed down to patcompile */
286static int patglobflags;  /* globbing flags & approx */
287
288/*
289 * Increment pointer to metafied multibyte string.
290 */
291#ifdef MULTIBYTE_SUPPORT
292typedef wint_t patint_t;
293
294#define PEOF WEOF
295
296#define METACHARINC(x) ((void)metacharinc(&x))
297
298/*
299 * TODO: the shiftstate isn't well handled; we don't guarantee
300 * to maintain it properly between characters.  If we don't
301 * need it we should use mbtowc() instead.
302 */
303static mbstate_t shiftstate;
304
305/*
306 * Multibyte version: it's (almost) as easy to return the
307 * value as not, so do so since we sometimes need it..
308 */
309static wchar_t
310metacharinc(char **x)
311{
312    char *inptr = *x;
313    char inchar;
314    size_t ret = MB_INVALID;
315    wchar_t wc;
316
317    /*
318     * Cheat if the top bit isn't set.  This is second-guessing
319     * the library, but we know for sure that if the character
320     * set doesn't have the property that all bytes with the 8th
321     * bit clear are single characters then we are stuffed.
322     */
323    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
324    {
325	if (itok(*inptr))
326	    inchar = ztokens[*inptr++ - Pound];
327	else if (*inptr == Meta) {
328	    inptr++;
329	    inchar = *inptr++ ^ 32;
330	} else {
331	    inchar = *inptr++;
332	}
333	*x = inptr;
334	return (wchar_t)STOUC(inchar);
335    }
336
337    while (*inptr) {
338	if (itok(*inptr))
339	    inchar = ztokens[*inptr++ - Pound];
340	else if (*inptr == Meta) {
341	    inptr++;
342	    inchar = *inptr++ ^ 32;
343	} else {
344	    inchar = *inptr++;
345	}
346	ret = mbrtowc(&wc, &inchar, 1, &shiftstate);
347
348	if (ret == MB_INVALID)
349	    break;
350	if (ret == MB_INCOMPLETE)
351	    continue;
352	*x = inptr;
353	return wc;
354    }
355
356    /* Error.  Treat as single byte. */
357    /* Reset the shift state for next time. */
358    memset(&shiftstate, 0, sizeof(shiftstate));
359    return (wchar_t) STOUC(*(*x)++);
360}
361
362#else
363typedef int patint_t;
364
365#define PEOF EOF
366
367#define METACHARINC(x)	((void)((x) += (*(x) == Meta) ? 2 : 1))
368#endif
369
370/*
371 * Return unmetafied char from string (x is any char *).
372 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
373 * in effect.
374 */
375#define UNMETA(x)	(*(x) == Meta ? (x)[1] ^ 32 : *(x))
376
377/* Add n more characters, ensuring there is enough space. */
378
379enum {
380    PA_NOALIGN = 1,
381    PA_UNMETA  = 2
382};
383
384/**/
385static void
386patadd(char *add, int ch, long n, int paflags)
387{
388    /* Make sure everything gets aligned unless we get PA_NOALIGN. */
389    long newpatsize = patsize + n;
390    if (!(paflags & PA_NOALIGN))
391	newpatsize = (newpatsize + sizeof(union upat) - 1) &
392		      ~(sizeof(union upat) - 1);
393    if (patalloc < newpatsize) {
394	long newpatalloc =
395	    2*(newpatsize > patalloc ? newpatsize : patalloc);
396	patout = (char *)zrealloc((char *)patout, newpatalloc);
397	patcode = patout + patsize;
398	patalloc = newpatalloc;
399    }
400    patsize = newpatsize;
401    if (add) {
402	if (paflags & PA_UNMETA) {
403	    /*
404	     * Unmetafy and untokenize the string as we go.
405	     * The Meta characters in add aren't counted in n.
406	     */
407	    while (n--) {
408		if (itok(*add))
409		    *patcode++ = ztokens[*add++ - Pound];
410		else if (*add == Meta) {
411		    add++;
412		    *patcode++ = *add++ ^ 32;
413		} else {
414		    *patcode++ = *add++;
415		}
416	    }
417	} else {
418	    while (n--)
419		*patcode++ = *add++;
420	}
421    } else
422	*patcode++ = ch;
423    patcode = patout + patsize;
424}
425
426static long rn_offs;
427/* operates on pointers to union upat, returns a pointer */
428#define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
429		    (P_OP(p) == P_BACK) ? \
430		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
431
432/*
433 * Set up zpc_special with characters that end a string segment.
434 * "Marker" cannot occur in the pattern we are compiling so
435 * is used to mark "invalid".
436 */
437static void
438patcompcharsset(void)
439{
440    char *spp, *disp;
441    int i;
442
443    /* Initialise enabled special characters */
444    memcpy(zpc_special, zpc_chars, ZPC_COUNT);
445    /* Apply user disables from disable -p */
446    for (i = 0, spp = zpc_special, disp = zpc_disables;
447	 i < ZPC_COUNT;
448	 i++, spp++, disp++) {
449	if (*disp)
450	    *spp = Marker;
451    }
452
453    if (!isset(EXTENDEDGLOB)) {
454	/* Extended glob characters are not active */
455	zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
456	    zpc_special[ZPC_HASH] = Marker;
457    }
458    if (!isset(KSHGLOB)) {
459	/*
460	 * Ksh glob characters are not active.
461	 * * and ? are shared with normal globbing, but for their
462	 * use here we are looking for a following Inpar.
463	 */
464	zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
465	    zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
466	    zpc_special[ZPC_KSH_AT] = Marker;
467    }
468    /*
469     * Note that if we are using KSHGLOB, then we test for a following
470     * Inpar, not zpc_special[ZPC_INPAR]:  the latter makes an Inpar on
471     * its own active.  The zpc_special[ZPC_KSH_*] followed by any old Inpar
472     * discriminate ksh globbing.
473     */
474    if (isset(SHGLOB)) {
475	/*
476	 * Grouping and numeric ranges are not valid.
477	 * We do allow alternation, however; it's needed for
478	 * "case".  This may not be entirely consistent.
479	 *
480	 * Don't disable Outpar: we may need to match the end of KSHGLOB
481	 * parentheses and it would be difficult to tell them apart.
482	 */
483	zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
484    }
485}
486
487/* Called before parsing a set of file matchs to initialize flags */
488
489/**/
490void
491patcompstart(void)
492{
493    patcompcharsset();
494    if (isset(CASEGLOB))
495	patglobflags = 0;
496    else
497	patglobflags = GF_IGNCASE;
498    if (isset(MULTIBYTE))
499	patglobflags |= GF_MULTIBYTE;
500}
501
502/*
503 * Top level pattern compilation subroutine
504 * exp is a null-terminated, metafied string.
505 * inflags is an or of some PAT_* flags.
506 * endexp, if non-null, is set to a pointer to the end of the
507 *   part of exp which was compiled.  This is used when
508 *   compiling patterns for directories which must be
509 *   matched recursively.
510 */
511
512/**/
513mod_export Patprog
514patcompile(char *exp, int inflags, char **endexp)
515{
516    int flags = 0;
517    long len = 0;
518    long startoff;
519    Upat pscan;
520    char *lng, *strp = NULL;
521    Patprog p;
522
523    startoff = sizeof(struct patprog);
524    /* Ensure alignment of start of program string */
525    startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
526
527    /* Allocate reasonable sized chunk if none, reduce size if too big */
528    if (patalloc != P_DEF_ALLOC)
529	patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
530    patcode = patout + startoff;
531    patsize = patcode - patout;
532    patstart = patparse = exp;
533    /*
534     * Note global patnpar numbers parentheses 1..9, while patnpar
535     * in struct is actual count of parentheses.
536     */
537    patnpar = 1;
538    patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
539
540    if (!(patflags & PAT_FILE)) {
541	patcompcharsset();
542	zpc_special[ZPC_SLASH] = Marker;
543	remnulargs(patparse);
544	if (isset(MULTIBYTE))
545	    patglobflags = GF_MULTIBYTE;
546	else
547	    patglobflags = 0;
548    }
549    if (patflags & PAT_LCMATCHUC)
550	patglobflags |= GF_LCMATCHUC;
551    /*
552     * Have to be set now, since they get updated during compilation.
553     */
554    ((Patprog)patout)->globflags = patglobflags;
555
556    if (!(patflags & PAT_ANY)) {
557	/* Look for a really pure string, with no tokens at all. */
558	if (!(patglobflags & ~GF_MULTIBYTE)
559#ifdef __CYGWIN__
560	    /*
561	     * If the OS treats files case-insensitively and we
562	     * are looking at files, we don't need to use pattern
563	     * matching to find the file.
564	     */
565	    || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
566#endif
567	    )
568	{
569	    /*
570	     * Waah!  I wish I understood this.
571	     * Empty metafied strings have an initial Nularg.
572	     * This never corresponds to a real character in
573	     * a glob pattern or string, so skip it.
574	     */
575	    if (*exp == Nularg)
576		exp++;
577	    for (strp = exp; *strp &&
578		     (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
579		 strp++)
580		;
581	}
582	if (!strp || (*strp && *strp != '/')) {
583	    /* No, do normal compilation. */
584	    strp = NULL;
585	    if (patcompswitch(0, &flags) == 0)
586		return NULL;
587	} else {
588	    /*
589	     * Yes, copy the string, and skip compilation altogether.
590	     * Null terminate for the benefit of globbing.
591	     * Leave metafied both for globbing and for our own
592	     * efficiency.
593	     */
594	    patparse = strp;
595	    len = strp - exp;
596	    patadd(exp, 0, len + 1, 0);
597	    patout[startoff + len] = '\0';
598	    patflags |= PAT_PURES;
599	}
600    }
601
602    /* end of compilation: safe to use pointers */
603    p = (Patprog)patout;
604    p->startoff = startoff;
605    p->patstartch = '\0';
606    p->globend = patglobflags;
607    p->flags = patflags;
608    p->mustoff = 0;
609    p->size = patsize;
610    p->patmlen = len;
611    p->patnpar = patnpar-1;
612
613    if (!strp) {
614	pscan = (Upat)(patout + startoff);
615
616	if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
617	    /* only one top level choice */
618	    pscan = P_OPERAND(pscan);
619
620	    if (flags & P_PURESTR) {
621		/*
622		 * The pattern can be matched with a simple strncmp/strcmp.
623		 * Careful in case we've overwritten the node for the next ptr.
624		 */
625		char *dst = patout + startoff;
626		Upat next;
627		p->flags |= PAT_PURES;
628		for (; pscan; pscan = next) {
629		    next = PATNEXT(pscan);
630		    if (P_OP(pscan) == P_EXACTLY) {
631			char *opnd = P_LS_STR(pscan), *mtest;
632			long oplen = P_LS_LEN(pscan), ilen;
633			int nmeta = 0;
634			/*
635			 * Unfortunately we unmetafied the string
636			 * and we need to put any metacharacters
637			 * back now we know it's a pure string.
638			 * This shouldn't happen too often, it's
639			 * just that there are some cases such
640			 * as . and .. in files where we really
641			 * need a pure string even if there are
642			 * pattern characters flying around.
643			 */
644			for (mtest = opnd, ilen = oplen; ilen;
645			     mtest++, ilen--)
646			    if (imeta(*mtest))
647				nmeta++;
648			if (nmeta) {
649			    char *oldpatout = patout;
650			    patadd(NULL, 0, nmeta, 0);
651			    /*
652			     * Yuk.
653			     */
654			    p = (Patprog)patout;
655			    opnd = patout + (opnd - oldpatout);
656			    dst = patout + startoff;
657			}
658
659			while (oplen--) {
660			    if (imeta(*opnd)) {
661				*dst++ = Meta;
662				*dst++ = *opnd++ ^ 32;
663			    } else {
664				*dst++ = *opnd++;
665			    }
666			}
667		    }
668		}
669		p->size = dst - patout;
670		/* patmlen is really strlen.  We don't need a null. */
671		p->patmlen = p->size - startoff;
672	    } else {
673		/* starting point info */
674		if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
675		    P_LS_LEN(pscan))
676		    p->patstartch = *P_LS_STR(pscan);
677		/*
678		 * Find the longest literal string in something expensive.
679		 * This is itself not all that cheap if we have
680		 * case-insensitive matching or approximation, so don't.
681		 */
682		if ((flags & P_HSTART) && !p->globflags) {
683		    lng = NULL;
684		    len = 0;
685		    for (; pscan; pscan = PATNEXT(pscan))
686			if (P_OP(pscan) == P_EXACTLY &&
687			    P_LS_LEN(pscan) >= len) {
688			    lng = P_LS_STR(pscan);
689			    len = P_LS_LEN(pscan);
690			}
691		    if (lng) {
692			p->mustoff = lng - patout;
693			p->patmlen = len;
694		    }
695		}
696	    }
697	}
698    }
699
700    /*
701     * The pattern was compiled in a fixed buffer:  unless told otherwise,
702     * we stick the compiled pattern on the heap.  This is necessary
703     * for files where we will often be compiling multiple segments at once.
704     * But if we get the ZDUP flag we always put it in zalloc()ed memory.
705     */
706    if (patflags & PAT_ZDUP) {
707	Patprog newp = (Patprog)zalloc(patsize);
708	memcpy((char *)newp, (char *)p, patsize);
709	p = newp;
710    } else if (!(patflags & PAT_STATIC)) {
711	Patprog newp = (Patprog)zhalloc(patsize);
712	memcpy((char *)newp, (char *)p, patsize);
713	p = newp;
714    }
715
716    if (endexp)
717	*endexp = patparse;
718    return p;
719}
720
721/*
722 * Main body or parenthesized subexpression in pattern
723 * Parenthesis (and any ksh_glob gubbins) will have been removed.
724 */
725
726/**/
727static long
728patcompswitch(int paren, int *flagp)
729{
730    long starter, br, ender, excsync = 0;
731    int parno = 0;
732    int flags, gfchanged = 0;
733    long savglobflags = (long)patglobflags;
734    Upat ptr;
735
736    *flagp = 0;
737
738    if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
739	/*
740	 * parenthesized:  make an open node.
741	 * We can only refer to the first nine parentheses.
742	 * For any others, we just use P_OPEN on its own; there's
743	 * no gain in arbitrarily limiting the number of parentheses.
744	 */
745	parno = patnpar++;
746	starter = patnode(P_OPEN + parno);
747    } else
748	starter = 0;
749
750    br = patnode(P_BRANCH);
751    if (!patcompbranch(&flags, paren))
752	return 0;
753    if (patglobflags != (int)savglobflags)
754	gfchanged++;
755    if (starter)
756	pattail(starter, br);
757    else
758	starter = br;
759
760    *flagp |= flags & (P_HSTART|P_PURESTR);
761
762    while (*patparse == zpc_chars[ZPC_BAR] ||
763	   (*patparse == zpc_special[ZPC_TILDE] &&
764	    (patparse[1] == '/' ||
765	     !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
766	int tilde = *patparse++ == zpc_special[ZPC_TILDE];
767	long gfnode = 0, newbr;
768
769	*flagp &= ~P_PURESTR;
770
771	if (tilde) {
772	    union upat up;
773	    /* excsync remembers the P_EXCSYNC node before a chain of
774	     * exclusions:  all point back to this.  only the
775	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
776	     */
777	    if (!excsync) {
778		excsync = patnode(P_EXCSYNC);
779		patoptail(br, excsync);
780	    }
781	    /*
782	     * By default, approximations are turned off in exclusions:
783	     * we need to do this here as otherwise the code compiling
784	     * the exclusion doesn't know if the flags have really
785	     * changed if the error count gets restored.
786	     */
787	    patglobflags &= ~0xff;
788	    if (!(patflags & PAT_FILET) || paren) {
789		br = patnode(P_EXCLUDE);
790	    } else {
791		/*
792		 * At top level (paren == 0) in a file glob !(patflags
793		 * &PAT_FILET) do the exclusion prepending the file path
794		 * so far.  We need to flag this to avoid unnecessarily
795		 * copying the path.
796		 */
797		br = patnode(P_EXCLUDP);
798		patflags |= PAT_HAS_EXCLUDP;
799	    }
800	    up.p = NULL;
801	    patadd((char *)&up, 0, sizeof(up), 0);
802	    /* / is not treated as special if we are at top level */
803	    if (!paren && zpc_special[ZPC_SLASH] == '/') {
804		tilde++;
805		zpc_special[ZPC_SLASH] = Marker;
806	    }
807	} else {
808	    excsync = 0;
809	    br = patnode(P_BRANCH);
810	    /*
811	     * The position of the following statements means globflags
812	     * set in the main branch carry over to the exclusion.
813	     */
814	    if (!paren) {
815		patglobflags = 0;
816		if (((Patprog)patout)->globflags) {
817		    /*
818		     * If at top level, we need to reinitialize flags to zero,
819		     * since (#i)foo|bar only applies to foo and we stuck
820		     * the #i into the global flags.
821		     * We could have done it so that they only got set in the
822		     * first branch, but it's quite convenient having any
823		     * global flags set in the header and not buried in the
824		     * pattern.  (Or maybe it isn't and we should
825		     * forget this bit and always stick in an explicit GFLAGS
826		     * statement instead of using the header.)
827		     * Also, this can't happen for file globs where there are
828		     * no top-level |'s.
829		     *
830		     * No gfchanged, as nothing to follow branch at top
831		     * level.
832		     */
833		    union upat up;
834		    gfnode = patnode(P_GFLAGS);
835		    up.l = patglobflags;
836		    patadd((char *)&up, 0, sizeof(union upat), 0);
837		}
838	    } else {
839		patglobflags = (int)savglobflags;
840	    }
841	}
842	newbr = patcompbranch(&flags, paren);
843	if (tilde == 2) {
844	    /* restore special treatment of / */
845	    zpc_special[ZPC_SLASH] = '/';
846	}
847	if (!newbr)
848	    return 0;
849	if (gfnode)
850	    pattail(gfnode, newbr);
851	if (!tilde && patglobflags != (int)savglobflags)
852	    gfchanged++;
853	pattail(starter, br);
854	if (excsync)
855	    patoptail(br, patnode(P_EXCEND));
856	*flagp |= flags & P_HSTART;
857    }
858
859    /*
860     * Make a closing node, hooking it to the end.
861     * Note that we can't optimize P_NOTHING out here, since another
862     * branch at that point would indicate the current choices continue,
863     * which they don't.
864     */
865    ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
866    pattail(starter, ender);
867
868    /*
869     * Hook the tails of the branches to the closing node,
870     * except for exclusions which terminate where they are.
871     */
872    for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
873	if (!P_ISEXCLUDE(ptr))
874	    patoptail(ptr-(Upat)patout, ender);
875
876    /* check for proper termination */
877    if ((paren && *patparse++ != Outpar) ||
878	(!paren && *patparse &&
879	 !((patflags & PAT_FILE) && *patparse == '/')))
880	return 0;
881
882    if (paren && gfchanged) {
883	/*
884	 * Restore old values of flags when leaving parentheses.
885	 * gfchanged detects a change in any branch (except exclusions
886	 * which are separate), since we need to emit this even if
887	 * a later branch happened to put the flags back.
888	 */
889	pattail(ender, patnode(P_GFLAGS));
890	patglobflags = (int)savglobflags;
891	patadd((char *)&savglobflags, 0, sizeof(long), 0);
892    }
893
894    return starter;
895}
896
897/*
898 * Compile something ended by Bar, Outpar, Tilde, or end of string.
899 * Note the BRANCH or EXCLUDE tag must already have been omitted:
900 * this returns the position of the operand of that.
901 */
902
903/**/
904static long
905patcompbranch(int *flagp, int paren)
906{
907    long chain, latest = 0, starter;
908    int flags = 0;
909
910    *flagp = P_PURESTR;
911
912    starter = chain = 0;
913    while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
914	   (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
915	    memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
916	if ((*patparse == zpc_special[ZPC_INPAR] &&
917	     patparse[1] == zpc_special[ZPC_HASH]) ||
918	    (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
919	     patparse[2] == zpc_special[ZPC_HASH])) {
920	    /* Globbing flags. */
921	    char *pp1 = patparse;
922	    int oldglobflags = patglobflags, ignore;
923	    long assert;
924	    patparse += (*patparse == '@') ? 3 : 2;
925	    if (!patgetglobflags(&patparse, &assert, &ignore))
926		return 0;
927	    if (!ignore) {
928		if (assert) {
929		    /*
930		     * Start/end assertion looking like flags, but
931		     * actually handled as a normal node
932		     */
933		    latest = patnode(assert);
934		    flags = 0;
935		} else {
936		    if (pp1 == patstart) {
937			/* Right at start of pattern, the simplest case.
938			 * Put them into the flags and don't emit anything.
939			 */
940			((Patprog)patout)->globflags = patglobflags;
941			continue;
942		    } else if (!*patparse) {
943			/* Right at the end, so just leave the flags for
944			 * the next Patprog in the chain to pick up.
945			 */
946			break;
947		    }
948		    /*
949		     * Otherwise, we have to stick them in as a pattern
950		     * matching nothing.
951		     */
952		    if (oldglobflags != patglobflags) {
953			/* Flags changed */
954			union upat up;
955			latest = patnode(P_GFLAGS);
956			up.l = patglobflags;
957			patadd((char *)&up, 0, sizeof(union upat), 0);
958		    } else {
959			/* No effect. */
960			continue;
961		    }
962		}
963	    } else if (!*patparse)
964		break;
965	    else
966		continue;
967	} else if (*patparse == zpc_special[ZPC_HAT]) {
968	    /*
969	     * ^pat:  anything but pat.  For proper backtracking,
970	     * etc., we turn this into (*~pat), except without the
971	     * parentheses.
972	     */
973	    patparse++;
974	    latest = patcompnot(0, &flags);
975	} else
976	    latest = patcomppiece(&flags, paren);
977	if (!latest)
978	    return 0;
979	if (!starter)
980	    starter = latest;
981	if (!(flags & P_PURESTR))
982	    *flagp &= ~P_PURESTR;
983	if (!chain)
984	    *flagp |= flags & P_HSTART;
985	else
986	    pattail(chain, latest);
987	chain = latest;
988    }
989    /* check if there was nothing in the loop, i.e. () */
990    if (!chain)
991	starter = patnode(P_NOTHING);
992
993    return starter;
994}
995
996/* get glob flags, return 1 for success, 0 for failure */
997
998/**/
999int
1000patgetglobflags(char **strp, long *assertp, int *ignore)
1001{
1002    char *nptr, *ptr = *strp;
1003    zlong ret;
1004
1005    *assertp = 0;
1006    *ignore = 1;
1007    /* (#X): assumes we are still positioned on the first X */
1008    for (; *ptr && *ptr != Outpar; ptr++) {
1009	if (*ptr == 'q') {
1010	    /* Glob qualifiers, ignored in pattern code */
1011	    while (*ptr && *ptr != Outpar)
1012		ptr++;
1013	    break;
1014	} else {
1015	    *ignore = 0;
1016	    switch (*ptr) {
1017	    case 'a':
1018		/* Approximate matching, max no. of errors follows */
1019		ret = zstrtol(++ptr, &nptr, 10);
1020		/*
1021		 * We can't have more than 254, because we need 255 to
1022		 * mark 254 errors in wbranch and exclude sync strings
1023		 * (hypothetically --- hope no-one tries it).
1024		 */
1025		if (ret < 0 || ret > 254 || ptr == nptr)
1026		    return 0;
1027		patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
1028		ptr = nptr-1;
1029		break;
1030
1031	    case 'l':
1032		/* Lowercase in pattern matches lower or upper in target */
1033		patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
1034		break;
1035
1036	    case 'i':
1037		/* Fully case insensitive */
1038		patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
1039		break;
1040
1041	    case 'I':
1042		/* Restore case sensitivity */
1043		patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
1044		break;
1045
1046	    case 'b':
1047		/* Make backreferences */
1048		patglobflags |= GF_BACKREF;
1049		break;
1050
1051	    case 'B':
1052		/* Don't make backreferences */
1053		patglobflags &= ~GF_BACKREF;
1054		break;
1055
1056	    case 'm':
1057		/* Make references to complete match */
1058		patglobflags |= GF_MATCHREF;
1059		break;
1060
1061	    case 'M':
1062		/* Don't */
1063		patglobflags &= ~GF_MATCHREF;
1064		break;
1065
1066	    case 's':
1067		*assertp = P_ISSTART;
1068		break;
1069
1070	    case 'e':
1071		*assertp = P_ISEND;
1072		break;
1073
1074	    case 'u':
1075		patglobflags |= GF_MULTIBYTE;
1076		break;
1077
1078	    case 'U':
1079		patglobflags &= ~GF_MULTIBYTE;
1080		break;
1081
1082	    default:
1083		return 0;
1084	    }
1085	}
1086    }
1087    if (*ptr != Outpar)
1088	return 0;
1089    /* Start/end assertions must appear on their own. */
1090    if (*assertp && (*strp)[1] != Outpar)
1091	return 0;
1092    *strp = ptr + 1;
1093    return 1;
1094}
1095
1096
1097static const char *colon_stuffs[]  = {
1098    "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
1099    "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
1100    "IFS", "IFSSPACE", "WORD", NULL
1101};
1102
1103/*
1104 * Handle the guts of a [:stuff:] character class element.
1105 * start is the beginning of "stuff" and len is its length.
1106 * This code is exported for the benefit of completion matching.
1107 */
1108
1109/**/
1110mod_export int
1111range_type(char *start, int len)
1112{
1113    const char **csp;
1114
1115    for (csp = colon_stuffs; *csp; csp++) {
1116	if (!strncmp(start, *csp, len))
1117	    return (csp - colon_stuffs) + PP_FIRST;
1118    }
1119
1120    return PP_UNKWN;
1121}
1122
1123
1124/*
1125 * Convert the contents of a [...] or [^...] expression (just the
1126 * ... part) back into a string.  This is used by compfiles -p/-P
1127 * for some reason.  The compiled form (a metafied string) is
1128 * passed in rangestr.
1129 *
1130 * If outstr is non-NULL the compiled form is placed there.  It
1131 * must be sufficiently long.  A terminating NULL is appended.
1132 *
1133 * Return the length required, not including the terminating NULL.
1134 *
1135 * TODO: this is non-multibyte for now.  It will need to be defined
1136 * appropriately with MULTIBYTE_SUPPORT when the completion matching
1137 * code catches up.
1138 */
1139
1140/**/
1141mod_export int
1142pattern_range_to_string(char *rangestr, char *outstr)
1143{
1144    int len = 0;
1145
1146    while (*rangestr) {
1147	if (imeta(STOUC(*rangestr))) {
1148	    int swtype = STOUC(*rangestr) - STOUC(Meta);
1149
1150	    if (swtype == 0) {
1151		/* Ordindary metafied character */
1152		if (outstr)
1153		{
1154		    *outstr++ = Meta;
1155		    *outstr++ = rangestr[1] ^ 32;
1156		}
1157		len += 2;
1158		rangestr += 2;
1159	    } else if (swtype == PP_RANGE) {
1160		/* X-Y range */
1161		int i;
1162
1163		for (i = 0; i < 2; i++) {
1164		    if (*rangestr == Meta) {
1165			if (outstr) {
1166			    *outstr++ = Meta;
1167			    *outstr++ = rangestr[1];
1168			}
1169			len += 2;
1170			rangestr += 2;
1171		    } else {
1172			if (outstr)
1173			    *outstr++ = *rangestr;
1174			len++;
1175			rangestr++;
1176		    }
1177
1178		    if (i == 0) {
1179			if (outstr)
1180			    *outstr++ = '-';
1181			len++;
1182		    }
1183		}
1184	    } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
1185		/* [:stuff:]; we need to output [: and :] */
1186		const char *found = colon_stuffs[swtype - PP_FIRST];
1187		int newlen = strlen(found);
1188		if (outstr) {
1189		    strcpy(outstr, "[:");
1190		    outstr += 2;
1191		    memcpy(outstr, found, newlen);
1192		    outstr += newlen;
1193		    strcpy(outstr, ":]");
1194		    outstr += 2;
1195		}
1196		len += newlen + 4;
1197		rangestr++;
1198	    } else {
1199		/* shouldn't happen */
1200		DPUTS(1, "BUG: unknown PP_ code in pattern range");
1201		rangestr++;
1202	    }
1203	} else {
1204	    /* ordinary character, guaranteed no Meta handling needed */
1205	    if (outstr)
1206		*outstr++ = *rangestr;
1207	    len++;
1208	    rangestr++;
1209	}
1210    }
1211
1212    if (outstr)
1213	*outstr = '\0';
1214    return len;
1215}
1216
1217/*
1218 * compile a chunk such as a literal string or a [...] followed
1219 * by a possible hash operator
1220 */
1221
1222/**/
1223static long
1224patcomppiece(int *flagp, int paren)
1225{
1226    long starter = 0, next, op, opnd;
1227    int flags, flags2, kshchar, len, ch, patch, nmeta;
1228    int hash, count;
1229    union upat up;
1230    char *nptr, *str0, *ptr, *patprev;
1231    zrange_t from, to;
1232    char *charstart;
1233
1234    flags = 0;
1235    str0 = patprev = patparse;
1236    for (;;) {
1237	/*
1238	 * Check if we have a string. First, we need to make sure
1239	 * the string doesn't introduce a ksh-like parenthesized expression.
1240	 */
1241	kshchar = '\0';
1242	if (*patparse && patparse[1] == Inpar) {
1243	    if (*patparse == zpc_special[ZPC_KSH_PLUS])
1244		kshchar = STOUC('+');
1245	    else if (*patparse == zpc_special[ZPC_KSH_BANG])
1246		kshchar = STOUC('!');
1247	    else if (*patparse == zpc_special[ZPC_KSH_AT])
1248		kshchar = STOUC('@');
1249	    else if (*patparse == zpc_special[ZPC_KSH_STAR])
1250		kshchar = STOUC('*');
1251	    else if (*patparse == zpc_special[ZPC_KSH_QUEST])
1252		kshchar = STOUC('?');
1253	}
1254
1255	/*
1256	 * If '(' is disabled as a pattern char, allow ')' as
1257	 * an ordinary string character if there are no parentheses to
1258	 * close.  Don't allow it otherwise, it changes the syntax.
1259	 */
1260	if (zpc_special[ZPC_INPAR] != Marker || *patparse != Outpar ||
1261	    paren) {
1262	    /*
1263	     * End of string (or no string at all) if ksh-type parentheses,
1264	     * or special character, unless that character is a tilde and
1265	     * the character following is an end-of-segment character.  Thus
1266	     * tildes are not special if there is nothing following to
1267	     * be excluded.
1268	     *
1269	     * Don't look for X()-style kshglobs at this point; we've
1270	     * checked above for the case with parentheses and we don't
1271	     * want to match without parentheses.
1272	     */
1273	    if (kshchar ||
1274		(memchr(zpc_special, *patparse, ZPC_NO_KSH_GLOB) &&
1275		 (*patparse != zpc_special[ZPC_TILDE] ||
1276		  patparse[1] == '/' ||
1277		  !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
1278		break;
1279	    }
1280    	}
1281
1282	/* Remember the previous character for backtracking */
1283	patprev = patparse;
1284	METACHARINC(patparse);
1285    }
1286
1287    if (patparse > str0) {
1288	long slen = patparse - str0;
1289	int morelen;
1290
1291	/* Ordinary string: cancel kshchar lookahead */
1292	kshchar = '\0';
1293	/*
1294	 * Assume it matches a simple string until we find otherwise.
1295	 */
1296	flags |= P_PURESTR;
1297	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
1298	/* more than one character matched? */
1299	morelen = (patprev > str0);
1300	/*
1301	 * If we have more than one character, a following hash
1302	 * or (#c...) only applies to the last, so backtrack one character.
1303	 */
1304	if ((*patparse == zpc_special[ZPC_HASH] ||
1305	     (*patparse == zpc_special[ZPC_INPAR] &&
1306	      patparse[1] == zpc_special[ZPC_HASH] &&
1307	      patparse[2] == 'c') ||
1308	     (*patparse == zpc_special[ZPC_KSH_AT] &&
1309	      patparse[1] == Inpar &&
1310	      patparse[2] == zpc_special[ZPC_HASH] &&
1311	      patparse[3] == 'c')) && morelen)
1312	    patparse = patprev;
1313	/*
1314	 * If len is 1, we can't have an active # following, so doesn't
1315	 * matter that we don't make X in `XX#' simple.
1316	 */
1317	if (!morelen)
1318	    flags |= P_SIMPLE;
1319	starter = patnode(P_EXACTLY);
1320
1321	/* Get length of string without metafication. */
1322	nmeta = 0;
1323	/* inherited from domatch, but why, exactly? */
1324	if (*str0 == Nularg)
1325	    str0++;
1326	for (ptr = str0; ptr < patparse; ptr++) {
1327	    if (*ptr == Meta) {
1328		nmeta++;
1329		ptr++;
1330	    }
1331	}
1332	slen = (patparse - str0) - nmeta;
1333	/* First add length, which is a long */
1334	patadd((char *)&slen, 0, sizeof(long), 0);
1335	/*
1336	 * Then the string, not null terminated.
1337	 * Unmetafy and untokenize; pass the final length,
1338	 * which is what we need to allocate, i.e. not including
1339	 * a count for each Meta in the string.
1340	 */
1341	patadd(str0, 0, slen, PA_UNMETA);
1342	nptr = P_LS_STR((Upat)patout + starter);
1343	/*
1344	 * It's much simpler to turn off pure string mode for
1345	 * any case-insensitive or approximate matching; usually,
1346	 * that is correct, or they wouldn't have been turned on.
1347	 * However, we need to make sure we match a "." or ".."
1348	 * in a file name as a pure string.  There's a minor bug
1349	 * that this will also apply to something like
1350	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
1351	 * going to write funny patterns, you get no sympathy from me.
1352	 */
1353	if (patglobflags &
1354#ifdef __CYGWIN__
1355	    /*
1356	     * As above: don't use pattern matching for files
1357	     * just because of case insensitivity if file system
1358	     * is known to be case insensitive.
1359	     *
1360	     * This is known to be necessary in at least one case:
1361	     * if "mount -c /" is in effect, so that drives appear
1362	     * directly under / instead of the usual /cygdrive, they
1363	     * aren't shown by readdir().  So it's vital we don't use
1364	     * globbing to find "/c", since that'll fail.
1365	     */
1366	    ((patflags & PAT_FILE) ?
1367	    (0xFF|GF_LCMATCHUC) :
1368	    (0xFF|GF_LCMATCHUC|GF_IGNCASE))
1369#else
1370	    (0xFF|GF_LCMATCHUC|GF_IGNCASE)
1371#endif
1372	    ) {
1373	    if (!(patflags & PAT_FILE))
1374		flags &= ~P_PURESTR;
1375	    else if (!(nptr[0] == '.' &&
1376		       (slen == 1 || (nptr[1] == '.' && slen == 2))))
1377		flags &= ~P_PURESTR;
1378	}
1379    } else {
1380	if (kshchar)
1381	    patparse++;
1382
1383	patch = *patparse;
1384	METACHARINC(patparse);
1385	switch(patch) {
1386	case Quest:
1387	    DPUTS(zpc_special[ZPC_QUEST] == Marker,
1388		  "Treating '?' as pattern character although disabled");
1389	    flags |= P_SIMPLE;
1390	    starter = patnode(P_ANY);
1391	    break;
1392	case Star:
1393	    DPUTS(zpc_special[ZPC_STAR] == Marker,
1394		  "Treating '*' as pattern character although disabled");
1395	    /* kshchar is used as a sign that we can't have #'s. */
1396	    kshchar = -1;
1397	    starter = patnode(P_STAR);
1398	    break;
1399	case Inbrack:
1400	    DPUTS(zpc_special[ZPC_INBRACK] == Marker,
1401		  "Treating '[' as pattern character although disabled");
1402	    flags |= P_SIMPLE;
1403	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
1404		patparse++;
1405		starter = patnode(P_ANYBUT);
1406	    } else
1407		starter = patnode(P_ANYOF);
1408	    if (*patparse == Outbrack) {
1409		patparse++;
1410		patadd(NULL, ']', 1, PA_NOALIGN);
1411	    }
1412	    while (*patparse && *patparse != Outbrack) {
1413		/* Meta is not a token */
1414		if (*patparse == Inbrack && patparse[1] == ':' &&
1415			(nptr = strchr(patparse+2, ':')) &&
1416			nptr[1] == Outbrack) {
1417			/* Posix range. */
1418			patparse += 2;
1419			len = nptr - patparse;
1420			ch = range_type(patparse, len);
1421			patparse = nptr + 2;
1422			if (ch != PP_UNKWN)
1423			    patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
1424			continue;
1425		}
1426		charstart = patparse;
1427		METACHARINC(patparse);
1428
1429		if (*patparse == '-' && patparse[1] &&
1430		    patparse[1] != Outbrack) {
1431		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
1432		    if (itok(*charstart)) {
1433			patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1434			       PA_NOALIGN);
1435		    } else {
1436			patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1437		    }
1438		    charstart = ++patparse;	/* skip ASCII '-' */
1439		    METACHARINC(patparse);
1440		}
1441		if (itok(*charstart)) {
1442		    patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1443			   PA_NOALIGN);
1444		} else {
1445		    patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1446		}
1447	    }
1448	    if (*patparse != Outbrack)
1449		return 0;
1450	    patparse++;
1451	    /* terminate null string and fix alignment */
1452	    patadd(NULL, 0, 1, 0);
1453	    break;
1454	case Inpar:
1455	    DPUTS(!kshchar && zpc_special[ZPC_INPAR] == Marker,
1456		  "Treating '(' as pattern character although disabled");
1457	    DPUTS(isset(SHGLOB) && !kshchar,
1458		  "Treating bare '(' as pattern character with SHGLOB");
1459	    if (kshchar == '!') {
1460		/* This is nasty, we should really either handle all
1461		 * kshglobbing below or here.  But most of the
1462		 * others look like non-ksh patterns, while this one
1463		 * doesn't, so we handle it here and leave the rest.
1464		 * We treat it like an extendedglob ^, except that
1465		 * it goes into parentheses.
1466		 *
1467		 * If we did do kshglob here, we could support
1468		 * the old behaviour that things like !(foo)##
1469		 * work, but it makes the code more complicated at
1470		 * the expense of allowing the user to do things
1471		 * they shouldn't.
1472		 */
1473		if (!(starter = patcompnot(1, &flags2)))
1474		    return 0;
1475	    } else if (!(starter = patcompswitch(1, &flags2)))
1476		return 0;
1477	    flags |= flags2 & P_HSTART;
1478	    break;
1479	case Inang:
1480	    /* Numeric glob */
1481	    DPUTS(zpc_special[ZPC_INANG] == Marker,
1482		  "Treating '<' as pattern character although disabled");
1483	    DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
1484	    len = 0;		/* beginning present 1, end present 2 */
1485	    if (idigit(*patparse)) {
1486		from = (zrange_t) zstrtol((char *)patparse,
1487					 (char **)&nptr, 10);
1488		patparse = nptr;
1489		len |= 1;
1490	    }
1491	    DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
1492	    patparse++;
1493	    if (idigit(*patparse)) {
1494		to = (zrange_t) zstrtol((char *)patparse,
1495					  (char **)&nptr, 10);
1496		patparse = nptr;
1497		len |= 2;
1498	    }
1499	    if (*patparse != Outang)
1500		return 0;
1501	    patparse++;
1502	    switch(len) {
1503	    case 3:
1504		starter = patnode(P_NUMRNG);
1505		patadd((char *)&from, 0, sizeof(from), 0);
1506		patadd((char *)&to, 0, sizeof(to), 0);
1507		break;
1508	    case 2:
1509		starter = patnode(P_NUMTO);
1510		patadd((char *)&to, 0, sizeof(to), 0);
1511		break;
1512	    case 1:
1513		starter = patnode(P_NUMFROM);
1514		patadd((char *)&from, 0, sizeof(from), 0);
1515		break;
1516	    case 0:
1517		starter = patnode(P_NUMANY);
1518		break;
1519	    }
1520	    /* This can't be simple, because it isn't.
1521	     * Mention in manual that matching digits with [...]
1522	     * is more efficient.
1523	     */
1524	    break;
1525	case Pound:
1526	    DPUTS(zpc_special[ZPC_HASH] == Marker,
1527		  "Treating '#' as pattern character although disabled");
1528	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
1529	    /*
1530	     * A hash here is an error; it should follow something
1531	     * repeatable.
1532	     */
1533	    return 0;
1534	    break;
1535	case Bnullkeep:
1536	    /*
1537	     * Marker for restoring a backslash in output:
1538	     * does not match a character.
1539	     */
1540	    next = patcomppiece(flagp, paren);
1541	    /*
1542	     * Can't match a pure string since we need to do this
1543	     * as multiple chunks.
1544	     */
1545	    *flagp &= ~P_PURESTR;
1546	    return next;
1547	    break;
1548#ifdef DEBUG
1549	default:
1550	    dputs("BUG: character not handled in patcomppiece");
1551	    return 0;
1552	    break;
1553#endif
1554	}
1555    }
1556
1557    count = 0;
1558    if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
1559	!(count = ((*patparse == zpc_special[ZPC_INPAR] &&
1560		    patparse[1] == zpc_special[ZPC_HASH] &&
1561		    patparse[2] == 'c') ||
1562		   (*patparse == zpc_special[ZPC_KSH_AT] &&
1563		    patparse[1] == Inpar &&
1564		    patparse[2] == zpc_special[ZPC_HASH] &&
1565		    patparse[3] == 'c'))) &&
1566	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
1567	*flagp = flags;
1568	return starter;
1569    }
1570
1571    /* too much at once doesn't currently work */
1572    if (kshchar && (hash || count))
1573	return 0;
1574
1575    if (kshchar == '*') {
1576	op = P_ONEHASH;
1577	*flagp = P_HSTART;
1578    } else if (kshchar == '+') {
1579	op = P_TWOHASH;
1580	*flagp = P_HSTART;
1581    } else if (kshchar == '?') {
1582	op = 0;
1583	*flagp = 0;
1584    } else if (count) {
1585	op = P_COUNT;
1586	patparse += 3;
1587	*flagp = P_HSTART;
1588    } else if (*++patparse == zpc_special[ZPC_HASH]) {
1589	op = P_TWOHASH;
1590	patparse++;
1591	*flagp = P_HSTART;
1592    } else {
1593	op = P_ONEHASH;
1594	*flagp = P_HSTART;
1595    }
1596
1597    /*
1598     * Note optimizations with pointers into P_NOTHING branches:  some
1599     * should logically point to next node after current piece.
1600     *
1601     * Backtracking is also encoded in a slightly obscure way:  the
1602     * code emitted ensures we test the non-empty branch of complex
1603     * patterns before the empty branch on each repetition.  Hence
1604     * each time we fail on a non-empty branch, we try the empty branch,
1605     * which is equivalent to backtracking.
1606     */
1607    if (op == P_COUNT) {
1608	/* (#cN,M) */
1609	union upat countargs[P_CT_OPERAND];
1610	char *opp = patparse;
1611
1612	countargs[0].l = P_COUNT;
1613	countargs[P_CT_CURRENT].l = 0L;
1614	countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
1615	if (patparse == opp) {
1616	    /* missing number treated as zero */
1617	    countargs[P_CT_MIN].l = 0L;
1618	}
1619	if (*patparse != ',' && *patparse != Comma) {
1620	    /* either max = min or error */
1621	    if (*patparse != Outpar)
1622		return 0;
1623	    countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
1624	} else {
1625	    opp = ++patparse;
1626	    countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
1627	    if (*patparse != Outpar)
1628		return 0;
1629	    if (patparse == opp) {
1630		/* missing number treated as infinity: record as -1 */
1631		countargs[P_CT_MAX].l = -1L;
1632	    }
1633	}
1634	patparse++;
1635	countargs[P_CT_PTR].p = NULL;
1636	/* Mark this chain as a min/max count... */
1637	patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
1638	/*
1639	 * The next of the operand is a loop back to the P_COUNT.  This is
1640	 * how we get recursion for the count.  We don't loop back to
1641	 * the P_COUNTSTART; that's used for initialising the count
1642	 * and saving and restoring the count for any enclosing use
1643	 * of the match.
1644	 */
1645	opnd = P_OPERAND(starter) + P_CT_OPERAND;
1646	pattail(opnd, patnode(P_BACK));
1647	pattail(opnd, P_OPERAND(starter));
1648	/*
1649	 * The next of the counter operators is what follows the
1650	 * closure.
1651	 * This handles matching of the tail.
1652	 */
1653	next = patnode(P_NOTHING);
1654	pattail(starter, next);
1655	pattail(P_OPERAND(starter), next);
1656    } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
1657	P_OP((Upat)patout+starter) == P_ANY) {
1658	/* Optimize ?# to *.  Silly thing to do, since who would use
1659	 * use ?# ? But it makes the later code shorter.
1660	 */
1661	Upat uptr = (Upat)patout + starter;
1662	if (op == P_TWOHASH) {
1663	    /* ?## becomes ?* */
1664	    uptr->l = (uptr->l & ~0xff) | P_ANY;
1665	    pattail(starter, patnode(P_STAR));
1666	} else {
1667	    uptr->l = (uptr->l & ~0xff) | P_STAR;
1668	}
1669    } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
1670	/* Simplify, but not if we need to look for approximations. */
1671	patinsert(op, starter, NULL, 0);
1672    } else if (op == P_ONEHASH) {
1673	/* Emit x# as (x&|), where & means "self". */
1674	up.p = NULL;
1675	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
1676	                                      /* Either x */
1677	patoptail(starter, patnode(P_BACK));  /* and loop */
1678	patoptail(starter, starter);	      /* back */
1679	pattail(starter, patnode(P_BRANCH));  /* or */
1680	pattail(starter, patnode(P_NOTHING)); /* null. */
1681    } else if (op == P_TWOHASH) {
1682	/* Emit x## as x(&|) where & means "self". */
1683	next = patnode(P_WBRANCH);	      /* Either */
1684	up.p = NULL;
1685	patadd((char *)&up, 0, sizeof(up), 0);
1686	pattail(starter, next);
1687	pattail(patnode(P_BACK), starter);    /* loop back */
1688	pattail(next, patnode(P_BRANCH));     /* or */
1689	pattail(starter, patnode(P_NOTHING)); /* null. */
1690    } else if (kshchar == '?') {
1691	/* Emit ?(x) as (x|) */
1692	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
1693	pattail(starter, patnode(P_BRANCH));   /* or */
1694	next = patnode(P_NOTHING);	       /* null */
1695	pattail(starter, next);
1696	patoptail(starter, next);
1697    }
1698    if (*patparse == zpc_special[ZPC_HASH])
1699	return 0;
1700
1701    return starter;
1702}
1703
1704/*
1705 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
1706 * parentheses if necessary.   As you see, that's really quite easy.
1707 */
1708
1709/**/
1710static long
1711patcompnot(int paren, int *flagsp)
1712{
1713    union upat up;
1714    long excsync, br, excl, n, starter;
1715    int dummy;
1716
1717    /* Here, we're matching a star at the start. */
1718    *flagsp = P_HSTART;
1719
1720    starter = patnode(P_BRANCH);
1721    br = patnode(P_STAR);
1722    excsync = patnode(P_EXCSYNC);
1723    pattail(br, excsync);
1724    pattail(starter, excl = patnode(P_EXCLUDE));
1725    up.p = NULL;
1726    patadd((char *)&up, 0, sizeof(up), 0);
1727    if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy, 0))))
1728	return 0;
1729    pattail(br, patnode(P_EXCEND));
1730    n = patnode(P_NOTHING); /* just so much easier */
1731    pattail(excsync, n);
1732    pattail(excl, n);
1733
1734    return starter;
1735}
1736
1737/* Emit a node */
1738
1739/**/
1740static long
1741patnode(long op)
1742{
1743    long starter = (Upat)patcode - (Upat)patout;
1744    union upat up;
1745
1746    up.l = op;
1747    patadd((char *)&up, 0, sizeof(union upat), 0);
1748    return starter;
1749}
1750
1751/*
1752 * insert an operator in front of an already emitted operand:
1753 * we relocate the operand.  there had better be nothing else after.
1754 */
1755
1756/**/
1757static void
1758patinsert(long op, int opnd, char *xtra, int sz)
1759{
1760    char *src, *dst, *opdst;
1761    union upat buf, *lptr;
1762
1763    buf.l = 0;
1764    patadd((char *)&buf, 0, sizeof(buf), 0);
1765    if (sz)
1766	patadd(xtra, 0, sz, 0);
1767    src = patcode - sizeof(union upat) - sz;
1768    dst = patcode;
1769    opdst = patout + opnd * sizeof(union upat);
1770    while (src > opdst)
1771	*--dst = *--src;
1772
1773    /* A cast can't be an lvalue */
1774    lptr = (Upat)opdst;
1775    lptr->l = op;
1776    opdst += sizeof(union upat);
1777    while (sz--)
1778	*opdst++ = *xtra++;
1779}
1780
1781/* set the 'next' pointer at the end of a node chain */
1782
1783/**/
1784static void
1785pattail(long p, long val)
1786{
1787    Upat scan, temp;
1788    long offset;
1789
1790    scan = (Upat)patout + p;
1791    for (;;) {
1792	if (!(temp = PATNEXT(scan)))
1793	    break;
1794	scan = temp;
1795    }
1796
1797    offset = (P_OP(scan) == P_BACK)
1798	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
1799
1800    scan->l |= offset << 8;
1801}
1802
1803/* do pattail, but on operand of first argument; nop if operandless */
1804
1805/**/
1806static void patoptail(long p, long val)
1807{
1808    Upat ptr = (Upat)patout + p;
1809    int op = P_OP(ptr);
1810    if (!p || !P_ISBRANCH(ptr))
1811	return;
1812    if (op == P_BRANCH)
1813	pattail(P_OPERAND(p), val);
1814    else
1815	pattail(P_OPERAND(p) + 1, val);
1816}
1817
1818
1819/*
1820 * Run a pattern.
1821 */
1822static char *patinstart;	/* Start of input string */
1823static char *patinend;		/* End of input string */
1824static char *patinput;		/* String input pointer */
1825static char *patinpath;		/* Full path for use with ~ exclusions */
1826static int   patinlen;		/* Length of last successful match.
1827				 * Includes count of Meta characters.
1828				 */
1829
1830static char *patbeginp[NSUBEXP];	/* Pointer to backref beginnings */
1831static char *patendp[NSUBEXP];		/* Pointer to backref ends */
1832static int parsfound;			/* parentheses (with backrefs) found */
1833
1834static int globdots;			/* Glob initial dots? */
1835
1836/*
1837 * Character functions operating on unmetafied strings.
1838 */
1839#ifdef MULTIBYTE_SUPPORT
1840
1841/* Get a character from the start point in a string */
1842#define CHARREF(x, y)	charref((x), (y))
1843static wchar_t
1844charref(char *x, char *y)
1845{
1846    wchar_t wc;
1847    size_t ret;
1848
1849    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1850	return (wchar_t) STOUC(*x);
1851
1852    ret = mbrtowc(&wc, x, y-x, &shiftstate);
1853
1854    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1855	/* Error.  Treat as single byte. */
1856	/* Reset the shift state for next time. */
1857	memset(&shiftstate, 0, sizeof(shiftstate));
1858	return (wchar_t) STOUC(*x);
1859    }
1860
1861    return wc;
1862}
1863
1864/* Get  a pointer to the next character */
1865#define CHARNEXT(x, y)	charnext((x), (y))
1866static char *
1867charnext(char *x, char *y)
1868{
1869    wchar_t wc;
1870    size_t ret;
1871
1872    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1873	return x + 1;
1874
1875    ret = mbrtowc(&wc, x, y-x, &shiftstate);
1876
1877    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1878	/* Error.  Treat as single byte. */
1879	/* Reset the shift state for next time. */
1880	memset(&shiftstate, 0, sizeof(shiftstate));
1881	return x + 1;
1882    }
1883
1884    /* Nulls here are normal characters */
1885    return x + (ret ? ret : 1);
1886}
1887
1888/* Increment a pointer past the current character. */
1889#define CHARINC(x, y)	((x) = charnext((x), (y)))
1890
1891
1892/* Get a character and increment */
1893#define CHARREFINC(x, y, z)	charrefinc(&(x), (y), (z))
1894static wchar_t
1895charrefinc(char **x, char *y, int *z)
1896{
1897    wchar_t wc;
1898    size_t ret;
1899
1900    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
1901	return (wchar_t) STOUC(*(*x)++);
1902
1903    ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
1904
1905    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1906	/* Error.  Treat as single byte, but flag. */
1907	*z = 1;
1908	/* Reset the shift state for next time. */
1909	memset(&shiftstate, 0, sizeof(shiftstate));
1910	return (wchar_t) STOUC(*(*x)++);
1911    }
1912
1913    /* Nulls here are normal characters */
1914    *x += ret ? ret : 1;
1915
1916    return wc;
1917}
1918
1919
1920/*
1921 * Counter the number of characters between two pointers, smaller first
1922 *
1923 * This is used when setting values in parameters, so we obey
1924 * the MULTIBYTE option (even if it's been overridden locally).
1925 */
1926#define CHARSUB(x,y)	charsub(x, y)
1927static ptrdiff_t
1928charsub(char *x, char *y)
1929{
1930    ptrdiff_t res = 0;
1931    size_t ret;
1932    wchar_t wc;
1933
1934    if (!isset(MULTIBYTE))
1935	return y - x;
1936
1937    while (x < y) {
1938	ret = mbrtowc(&wc, x, y-x, &shiftstate);
1939
1940	if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1941	    /* Error.  Treat remainder as single characters */
1942	    return res + (y - x);
1943	}
1944
1945	/* Treat nulls as normal characters */
1946	if (!ret)
1947	    ret = 1;
1948	res++;
1949	x += ret;
1950    }
1951
1952    return res;
1953}
1954
1955#else /* no MULTIBYTE_SUPPORT */
1956
1957/* Get a character from the start point in a string */
1958#define CHARREF(x, y)	(STOUC(*(x)))
1959/* Get  a pointer to the next character */
1960#define CHARNEXT(x, y)	((x)+1)
1961/* Increment a pointer past the current character. */
1962#define CHARINC(x, y)	((x)++)
1963/* Get a character and increment */
1964#define CHARREFINC(x, y, z)	(STOUC(*(x)++))
1965/* Counter the number of characters between two pointers, smaller first */
1966#define CHARSUB(x,y)	((y) - (x))
1967
1968#endif /* MULTIBYTE_SUPPORT */
1969
1970/*
1971 * The following need to be accessed in the globbing scanner for
1972 * a multi-component file path.  See horror story in glob.c.
1973 */
1974/**/
1975int errsfound;				/* Total error count so far */
1976
1977/**/
1978int forceerrs;				/* Forced maximum error count */
1979
1980/**/
1981void
1982pattrystart(void)
1983{
1984    forceerrs = -1;
1985    errsfound = 0;
1986}
1987
1988/*
1989 * Test prog against null-terminated, metafied string.
1990 */
1991
1992/**/
1993mod_export int
1994pattry(Patprog prog, char *string)
1995{
1996    return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
1997}
1998
1999/*
2000 * Test prog against string of given length, no null termination
2001 * but still metafied at this point.  offset gives an offset
2002 * to include in reported match indices
2003 */
2004
2005/**/
2006mod_export int
2007pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
2008{
2009    return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
2010}
2011
2012/*
2013 * Test prog against string with given lengths.  The input
2014 * string is metafied; stringlen is the raw string length, and
2015 * unmetalen the number of characters in the original string (some
2016 * of which may now be metafied).  Either value may be -1
2017 * to indicate a null-terminated string which will be counted.  Note
2018 * there may be a severe penalty for this if a lot of matching is done
2019 * on one string.
2020 *
2021 * offset is the position in the original string (not seen by
2022 * the pattern module) at which we are trying to match.
2023 * This is added in to the positions recorded in patbeginp and patendp
2024 * when we are looking for substrings.  Currently this only happens
2025 * in the parameter substitution code.
2026 *
2027 * Note this is a character offset, i.e. a metafied character
2028 * counts as 1.
2029 *
2030 * The last three arguments are used to report the positions for the
2031 * backreferences. On entry, *nump should contain the maximum number
2032 * of positions to report.  In this case the match, mbegin, mend
2033 * arrays are not altered.
2034 *
2035 * If nump is NULL but endp is not NULL, then *endp is set to the
2036 * end position of the match, taking into account patinstart.
2037 */
2038
2039/**/
2040mod_export int
2041pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
2042	   int patoffset,
2043	   int *nump, int *begp, int *endp)
2044{
2045    int i, maxnpos = 0, ret, needfullpath, unmetalenp;
2046    int origlen;
2047    char **sp, **ep, *tryalloced, *ptr;
2048    char *progstr = (char *)prog + prog->startoff;
2049
2050    if (nump) {
2051	maxnpos = *nump;
2052	*nump = 0;
2053    }
2054    /* inherited from domatch, but why, exactly? */
2055    if (*string == Nularg) {
2056	string++;
2057	unmetalen--;
2058    }
2059
2060    if (stringlen < 0)
2061	stringlen = strlen(string);
2062    origlen = stringlen;
2063
2064    patflags = prog->flags;
2065    /*
2066     * For a top-level ~-exclusion, we will need the full
2067     * path to exclude, so copy the path so far and append the
2068     * current test string.
2069     */
2070    needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
2071
2072    /* Get the length of the full string when unmetafied. */
2073    if (unmetalen < 0)
2074	unmetalen = ztrsub(string + stringlen, string);
2075    if (needfullpath)
2076	unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
2077    else
2078	unmetalenp = 0;
2079
2080    DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
2081	  "rum sort of file exclusion");
2082    /*
2083     * Partly for efficiency, and partly for the convenience of
2084     * globbing, we don't unmetafy pure string patterns, and
2085     * there's no reason to if the pattern is just a *.
2086     */
2087    if (!(patflags & (PAT_PURES|PAT_ANY))
2088	&& (needfullpath || unmetalen != stringlen)) {
2089	/*
2090	 * We need to copy if we need to prepend the path so far
2091	 * (in which case we copy both chunks), or if we have
2092	 * Meta characters.
2093	 */
2094	char *dst;
2095	int icopy, ncopy;
2096
2097	dst = tryalloced = zalloc(unmetalen + unmetalenp);
2098
2099	if (needfullpath) {
2100	    /* loop twice, copy path buffer first time */
2101	    ptr = pathbuf;
2102	    ncopy = unmetalenp;
2103	} else {
2104	    /* just loop once, copy string with unmetafication */
2105	    ptr = string;
2106	    ncopy = unmetalen;
2107	}
2108	for (icopy = 0; icopy < 2; icopy++) {
2109	    for (i = 0; i < ncopy; i++) {
2110		if (*ptr == Meta) {
2111		    ptr++;
2112		    *dst++ = *ptr++ ^ 32;
2113		} else {
2114		    *dst++ = *ptr++;
2115		}
2116	    }
2117	    if (!needfullpath)
2118		break;
2119	    /* next time append test string to path so far */
2120	    ptr = string;
2121	    ncopy = unmetalen;
2122	}
2123
2124	if (needfullpath) {
2125	    patinstart = tryalloced + unmetalenp;
2126	    patinpath = tryalloced;
2127	} else {
2128	    patinstart = tryalloced;
2129	    patinpath = NULL;
2130	}
2131	stringlen = unmetalen;
2132    } else {
2133	patinstart = string;
2134	tryalloced = patinpath = NULL;
2135    }
2136
2137    patinend = patinstart + stringlen;
2138    /*
2139     * From now on we do not require NULL termination of
2140     * the test string.  There should also be no more references
2141     * to the variable string.
2142     */
2143
2144    if (prog->flags & (PAT_PURES|PAT_ANY)) {
2145	/*
2146	 * Either we are testing against a pure string,
2147	 * or we can match anything at all.
2148	 */
2149	int ret;
2150	if (prog->flags & PAT_ANY) {
2151	    /*
2152	     * Optimisation for a single "*": always matches
2153	     * (except for no_glob_dots, see below).
2154	     */
2155	    ret = 1;
2156	} else {
2157	    /*
2158	     * Testing a pure string.  See if initial
2159	     * components match.
2160	     */
2161	    int lendiff = stringlen - prog->patmlen;
2162	    if (lendiff < 0) {
2163		/* No, the pattern string is too long. */
2164		ret = 0;
2165	    } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
2166		/*
2167		 * Initial component matches.  Matches either
2168		 * if lengths are the same or we are not anchored
2169		 * to the end of the string.
2170		 */
2171		ret = !lendiff || (prog->flags & PAT_NOANCH);
2172	    } else {
2173		/* No match. */
2174		ret = 0;
2175	    }
2176	}
2177	if (ret) {
2178	    /*
2179	     * For files, we won't match initial "."s unless
2180	     * glob_dots is set.
2181	     */
2182	    if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
2183		ret = 0;
2184	    } else {
2185		/*
2186		 * Remember the length in case used for ${..#..} etc.
2187		 * In this case, we didn't unmetafy the string.
2188		 */
2189		patinlen = (int)prog->patmlen;
2190		/* if matching files, must update globbing flags */
2191		patglobflags = prog->globend;
2192
2193		if ((patglobflags & GF_MATCHREF) &&
2194		    !(patflags & PAT_FILE)) {
2195		    char *str = ztrduppfx(patinstart, patinlen);
2196		    char *ptr = patinstart;
2197		    int mlen = 0;
2198
2199		    /*
2200		     * Count the characters.  We're not using CHARSUB()
2201		     * because the string is still metafied.  We're
2202		     * not using mb_metastrlen() because that expects
2203		     * the string to be null terminated.
2204		     */
2205		    MB_METACHARINIT();
2206		    while (ptr < patinstart + patinlen) {
2207			mlen++;
2208			ptr += MB_METACHARLEN(ptr);
2209		    }
2210
2211		    setsparam("MATCH", str);
2212		    setiparam("MBEGIN",
2213			      (zlong)(patoffset + !isset(KSHARRAYS)));
2214		    setiparam("MEND",
2215			      (zlong)(mlen + patoffset +
2216				      !isset(KSHARRAYS) - 1));
2217		}
2218	    }
2219	}
2220
2221	if (tryalloced)
2222	    zfree(tryalloced, unmetalen + unmetalenp);
2223
2224	return ret;
2225    } else {
2226	/*
2227	 * Test for a `must match' string, unless we're scanning for a match
2228	 * in which case we don't need to do this each time.
2229	 */
2230	ret = 1;
2231	if (!(prog->flags & PAT_SCAN) && prog->mustoff)
2232	{
2233	    char *testptr;	/* start pointer into test string */
2234	    char *teststop;	/* last point from which we can match */
2235	    char *patptr = (char *)prog + prog->mustoff;
2236	    int patlen = prog->patmlen;
2237	    int found = 0;
2238
2239	    if (patlen > stringlen) {
2240		/* Too long, can't match. */
2241		ret = 0;
2242	    } else {
2243		teststop = patinend - patlen;
2244
2245		for (testptr = patinstart; testptr <= teststop; testptr++)
2246		{
2247		    if (!memcmp(testptr, patptr, patlen)) {
2248			found = 1;
2249			break;
2250		    }
2251		}
2252
2253		if (!found)
2254		    ret = 0;
2255	    }
2256	}
2257	if (!ret) {
2258	    if (tryalloced)
2259		zfree(tryalloced, unmetalen + unmetalenp);
2260	    return 0;
2261	}
2262
2263	patglobflags = prog->globflags;
2264	if (!(patflags & PAT_FILE)) {
2265	    forceerrs = -1;
2266	    errsfound = 0;
2267	}
2268	globdots = !(patflags & PAT_NOGLD);
2269	parsfound = 0;
2270
2271	patinput = patinstart;
2272
2273	if (patmatch((Upat)progstr)) {
2274	    /*
2275	     * we were lazy and didn't save the globflags if an exclusion
2276	     * failed, so set it now
2277	     */
2278	    patglobflags = prog->globend;
2279
2280	    /*
2281	     * Record length of successful match, including Meta
2282	     * characters.  Do it here so that patmatchlen() can return
2283	     * it even if we delete the pattern strings.
2284	     */
2285	    patinlen = patinput - patinstart;
2286	    /*
2287	     * Optimization: if we didn't find any Meta characters
2288	     * to begin with, we don't need to look for them now.
2289	     */
2290	    if (unmetalen != origlen) {
2291		for (ptr = patinstart; ptr < patinput; ptr++)
2292		    if (imeta(*ptr))
2293			patinlen++;
2294	    }
2295
2296	    /*
2297	     * Should we clear backreferences and matches on a failed
2298	     * match?
2299	     */
2300	    if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
2301		/*
2302		 * m flag: for global match.  This carries no overhead
2303		 * in the pattern matching part.
2304		 *
2305		 * Remember the test pattern is already unmetafied.
2306		 */
2307		char *str;
2308		int mlen = CHARSUB(patinstart, patinput);
2309
2310		str = metafy(patinstart, patinput - patinstart, META_DUP);
2311		setsparam("MATCH", str);
2312		setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
2313		setiparam("MEND",
2314			  (zlong)(mlen + patoffset +
2315				  !isset(KSHARRAYS) - 1));
2316	    }
2317	    if (prog->patnpar && nump) {
2318		/*
2319		 * b flag: for backreferences using parentheses. Reported
2320		 * directly.
2321		 */
2322		*nump = prog->patnpar;
2323
2324		sp = patbeginp;
2325		ep = patendp;
2326
2327		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
2328		    if (parsfound & (1 << i)) {
2329			if (begp)
2330			    *begp++ = CHARSUB(patinstart, *sp) + patoffset;
2331			if (endp)
2332			    *endp++ = CHARSUB(patinstart, *ep) + patoffset
2333				- 1;
2334		    } else {
2335			if (begp)
2336			    *begp++ = -1;
2337			if (endp)
2338			    *endp++ = -1;
2339		    }
2340
2341		    sp++;
2342		    ep++;
2343		}
2344	    } else if (prog->patnpar && !(patflags & PAT_FILE)) {
2345		/*
2346		 * b flag: for backreferences using parentheses.
2347		 */
2348		int palen = prog->patnpar+1;
2349		char **matcharr, **mbeginarr, **mendarr;
2350		char numbuf[DIGBUFSIZE];
2351
2352		matcharr = zshcalloc(palen*sizeof(char *));
2353		mbeginarr = zshcalloc(palen*sizeof(char *));
2354		mendarr = zshcalloc(palen*sizeof(char *));
2355
2356		sp = patbeginp;
2357		ep = patendp;
2358
2359		for (i = 0; i < prog->patnpar; i++) {
2360		    if (parsfound & (1 << i)) {
2361			matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
2362			/*
2363			 * mbegin and mend give indexes into the string
2364			 * in the standard notation, i.e. respecting
2365			 * KSHARRAYS, and with the end index giving
2366			 * the last character, not one beyond.
2367			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
2368			 * (without KSHARRAYS) indexes 1 and 1, which
2369			 * corresponds to indexing as ${foo[1,1]}.
2370			 */
2371			sprintf(numbuf, "%ld",
2372				(long)(CHARSUB(patinstart, *sp) +
2373				       patoffset +
2374				       !isset(KSHARRAYS)));
2375			mbeginarr[i] = ztrdup(numbuf);
2376			sprintf(numbuf, "%ld",
2377				(long)(CHARSUB(patinstart, *ep) +
2378				       patoffset +
2379				       !isset(KSHARRAYS) - 1));
2380			mendarr[i] = ztrdup(numbuf);
2381		    } else {
2382			/* Pattern wasn't set: either it was in an
2383			 * unmatched branch, or a hashed parenthesis
2384			 * that didn't match at all.
2385			 */
2386			matcharr[i] = ztrdup("");
2387			mbeginarr[i] = ztrdup("-1");
2388			mendarr[i] = ztrdup("-1");
2389		    }
2390		    sp++;
2391		    ep++;
2392		}
2393		setaparam("match", matcharr);
2394		setaparam("mbegin", mbeginarr);
2395		setaparam("mend", mendarr);
2396	    }
2397
2398	    if (!nump && endp) {
2399		/*
2400		 * We just need the overall end position.
2401		 */
2402		*endp = CHARSUB(patinstart, patinput) + patoffset;
2403	    }
2404
2405	    ret = 1;
2406	} else
2407	    ret = 0;
2408
2409	if (tryalloced)
2410	    zfree(tryalloced, unmetalen + unmetalenp);
2411
2412	return ret;
2413    }
2414}
2415
2416/*
2417 * Return length of previous succesful match.  This is
2418 * in metafied bytes, i.e. includes a count of Meta characters.
2419 * Unusual and futile attempt at modular encapsulation.
2420 */
2421
2422/**/
2423int
2424patmatchlen(void)
2425{
2426    return patinlen;
2427}
2428
2429/*
2430 * Match literal characters with case insensitivity test:  the first
2431 * comes from the input string, the second the current pattern.
2432 */
2433#ifdef MULTIBYTE_SUPPORT
2434#define ISUPPER(x)	iswupper(x)
2435#define ISLOWER(x)	iswlower(x)
2436#define TOUPPER(x)	towupper(x)
2437#define TOLOWER(x)	towlower(x)
2438#define ISDIGIT(x)	iswdigit(x)
2439#else
2440#define ISUPPER(x)	isupper(x)
2441#define ISLOWER(x)	islower(x)
2442#define TOUPPER(x)	toupper(x)
2443#define TOLOWER(x)	tolower(x)
2444#define ISDIGIT(x)	idigit(x)
2445#endif
2446#define CHARMATCH(chin, chpa) (chin == chpa || \
2447        ((patglobflags & GF_IGNCASE) ? \
2448	 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
2449	  (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
2450	 (patglobflags & GF_LCMATCHUC) ? \
2451	 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
2452
2453/*
2454 * The same but caching an expression from the first argument,
2455 * Requires local charmatch_cache definition.
2456 */
2457#define CHARMATCH_EXPR(expr, chpa) \
2458	(charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
2459
2460/*
2461 * exactpos is used to remember how far down an exact string we have
2462 * matched, if we are doing approximation and can therefore redo from
2463 * the same point; we never need to otherwise.
2464 *
2465 * exactend is a pointer to the end of the string, which isn't
2466 * null-terminated.
2467 */
2468static char *exactpos, *exactend;
2469
2470/*
2471 * Main matching routine.
2472 *
2473 * Testing the tail end of a match is usually done by recursion, but
2474 * we try to eliminate that in favour of looping for simple cases.
2475 */
2476
2477/**/
2478static int
2479patmatch(Upat prog)
2480{
2481    /* Current and next nodes */
2482    Upat scan = prog, next, opnd;
2483    char *start, *save, *chrop, *chrend, *compend;
2484    int savglobflags, op, no, min, fail = 0, saverrsfound;
2485    zrange_t from, to, comp;
2486    patint_t nextch;
2487
2488    while  (scan) {
2489	next = PATNEXT(scan);
2490
2491	if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
2492	    patinput < patinend && *patinput == '.')
2493	    return 0;
2494
2495	switch (P_OP(scan)) {
2496	case P_ANY:
2497	    if (patinput == patinend)
2498		fail = 1;
2499	    else
2500		CHARINC(patinput, patinend);
2501	    break;
2502	case P_EXACTLY:
2503	    /*
2504	     * acts as nothing if *chrop is null:  this is used by
2505	     * approx code.
2506	     */
2507	    if (exactpos) {
2508		chrop = exactpos;
2509		chrend = exactend;
2510	    } else {
2511		chrop = P_LS_STR(scan);
2512		chrend = chrop + P_LS_LEN(scan);
2513	    }
2514	    exactpos = NULL;
2515	    while (chrop < chrend && patinput < patinend) {
2516		char *savpatinput = patinput;
2517		char *savchrop = chrop;
2518		int badin = 0, badpa = 0;
2519		/*
2520		 * Care with character matching:
2521		 * We do need to convert the character to wide
2522		 * representation if possible, because we may need
2523		 * to do case transformation.  However, we should
2524		 * be careful in case one, but not the other, wasn't
2525		 * representable in the current locale---in that
2526		 * case they don't match even if the returned
2527		 * values (one properly converted, one raw) are
2528		 * the same.
2529		 */
2530		patint_t chin = CHARREFINC(patinput, patinend, &badin);
2531		patint_t chpa = CHARREFINC(chrop, chrend, &badpa);
2532		if (!CHARMATCH(chin, chpa) || badin != badpa) {
2533		    fail = 1;
2534		    patinput = savpatinput;
2535		    chrop = savchrop;
2536		    break;
2537		}
2538	    }
2539	    if (chrop < chrend) {
2540		exactpos = chrop;
2541		exactend = chrend;
2542		fail = 1;
2543	    }
2544	    break;
2545	case P_ANYOF:
2546	case P_ANYBUT:
2547	    if (patinput == patinend)
2548		fail = 1;
2549	    else {
2550#ifdef MULTIBYTE_SUPPORT
2551		wchar_t cr = CHARREF(patinput, patinend);
2552		char *scanop = (char *)P_OPERAND(scan);
2553		if (patglobflags & GF_MULTIBYTE) {
2554		    if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
2555			(P_OP(scan) == P_ANYOF))
2556			fail = 1;
2557		    else
2558			CHARINC(patinput, patinend);
2559		} else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
2560			   (P_OP(scan) == P_ANYOF))
2561		    fail = 1;
2562		else
2563		    CHARINC(patinput, patinend);
2564#else
2565		if (patmatchrange((char *)P_OPERAND(scan),
2566				  CHARREF(patinput, patinend), NULL, NULL) ^
2567		    (P_OP(scan) == P_ANYOF))
2568		    fail = 1;
2569		else
2570		    CHARINC(patinput, patinend);
2571#endif
2572	    }
2573	    break;
2574	case P_NUMRNG:
2575	case P_NUMFROM:
2576	case P_NUMTO:
2577	    /*
2578	     * To do this properly, we really have to treat numbers as
2579	     * closures:  that's so things like <1-1000>33 will
2580	     * match 633 (they didn't up to 3.1.6).  To avoid making this
2581	     * too inefficient, we see if there's an exact match next:
2582	     * if there is, and it's not a digit, we return 1 after
2583	     * the first attempt.
2584	     */
2585	    op = P_OP(scan);
2586	    start = (char *)P_OPERAND(scan);
2587	    from = to = 0;
2588	    if (op != P_NUMTO) {
2589#ifdef ZSH_64_BIT_TYPE
2590		/* We can't rely on pointer alignment being good enough. */
2591		memcpy((char *)&from, start, sizeof(zrange_t));
2592#else
2593		from = *((zrange_t *) start);
2594#endif
2595		start += sizeof(zrange_t);
2596	    }
2597	    if (op != P_NUMFROM) {
2598#ifdef ZSH_64_BIT_TYPE
2599		memcpy((char *)&to, start, sizeof(zrange_t));
2600#else
2601		to = *((zrange_t *) start);
2602#endif
2603	    }
2604	    start = compend = patinput;
2605	    comp = 0;
2606	    while (patinput < patinend && idigit(*patinput)) {
2607		if (comp)
2608		    comp *= 10;
2609		comp += *patinput - '0';
2610		patinput++;
2611		compend++;
2612
2613		if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
2614#ifdef ZRANGE_T_IS_SIGNED
2615					    2
2616#else
2617					    1
2618#endif
2619				))) {
2620		    /*
2621		     * Out of range (allowing for signedness, which
2622		     * we need if we are using zlongs).
2623		     * This is as far as we can go.
2624		     * If we're doing a range "from", skip all the
2625		     * remaining numbers.  Otherwise, we can't
2626		     * match beyond the previous point anyway.
2627		     * Leave the pointer to the last calculated
2628		     * position (compend) where it was before.
2629		     */
2630		    if (op == P_NUMFROM) {
2631			while (patinput < patinend && idigit(*patinput))
2632			    patinput++;
2633		    }
2634		}
2635	    }
2636	    save = patinput;
2637	    no = 0;
2638	    while (patinput > start) {
2639		/* if already too small, no power on earth can save it */
2640		if (comp < from && patinput <= compend)
2641		    break;
2642		if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
2643		    return 1;
2644		}
2645		if (!no && P_OP(next) == P_EXACTLY &&
2646		    (!P_LS_LEN(next) ||
2647		     !idigit(STOUC(*P_LS_STR(next)))) &&
2648		    !(patglobflags & 0xff))
2649		    return 0;
2650		patinput = --save;
2651		no++;
2652		/*
2653		 * With a range start and an unrepresentable test
2654		 * number, we just back down the test string without
2655		 * changing the number until we get to a representable
2656		 * one.
2657		 */
2658		if (patinput < compend)
2659		    comp /= 10;
2660	    }
2661	    patinput = start;
2662	    fail = 1;
2663	    break;
2664	case P_NUMANY:
2665	    /* This is <->: any old set of digits, don't bother comparing */
2666	    start = patinput;
2667	    while (patinput < patinend && idigit(*patinput))
2668		patinput++;
2669	    save = patinput;
2670	    no = 0;
2671	    while (patinput > start) {
2672		if (patmatch(next))
2673		    return 1;
2674		if (!no && P_OP(next) == P_EXACTLY &&
2675		    (!P_LS_LEN(next) ||
2676		     !idigit(*P_LS_STR(next))) &&
2677		    !(patglobflags & 0xff))
2678		    return 0;
2679		patinput = --save;
2680		no++;
2681	    }
2682	    patinput = start;
2683	    fail = 1;
2684	    break;
2685	case P_NOTHING:
2686	    break;
2687	case P_BACK:
2688	    break;
2689	case P_GFLAGS:
2690	    patglobflags = P_OPERAND(scan)->l;
2691	    break;
2692	case P_OPEN:
2693	case P_OPEN+1:
2694	case P_OPEN+2:
2695	case P_OPEN+3:
2696	case P_OPEN+4:
2697	case P_OPEN+5:
2698	case P_OPEN+6:
2699	case P_OPEN+7:
2700	case P_OPEN+8:
2701	case P_OPEN+9:
2702	    no = P_OP(scan) - P_OPEN;
2703	    save = patinput;
2704
2705	    if (patmatch(next)) {
2706		/*
2707		 * Don't set patbeginp if some later invocation of
2708		 * the same parentheses already has.
2709		 */
2710		if (no && !(parsfound & (1 << (no - 1)))) {
2711		    patbeginp[no-1] = save;
2712		    parsfound |= 1 << (no - 1);
2713		}
2714		return 1;
2715	    } else
2716		return 0;
2717	    break;
2718	case P_CLOSE:
2719	case P_CLOSE+1:
2720	case P_CLOSE+2:
2721	case P_CLOSE+3:
2722	case P_CLOSE+4:
2723	case P_CLOSE+5:
2724	case P_CLOSE+6:
2725	case P_CLOSE+7:
2726	case P_CLOSE+8:
2727	case P_CLOSE+9:
2728	    no = P_OP(scan) - P_CLOSE;
2729	    save = patinput;
2730
2731	    if (patmatch(next)) {
2732		if (no && !(parsfound & (1 << (no + 15)))) {
2733		    patendp[no-1] = save;
2734		    parsfound |= 1 << (no + 15);
2735		}
2736		return 1;
2737	    } else
2738		return 0;
2739	    break;
2740	case P_EXCSYNC:
2741	    /* See the P_EXCLUDE code below for where syncptr comes from */
2742	    {
2743		unsigned char *syncptr;
2744		Upat after;
2745		after = P_OPERAND(scan);
2746		DPUTS(!P_ISEXCLUDE(after),
2747		      "BUG: EXCSYNC not followed by EXCLUDE.");
2748		DPUTS(!P_OPERAND(after)->p,
2749		      "BUG: EXCSYNC not handled by EXCLUDE");
2750		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
2751		/*
2752		 * If we already matched from here, this time we fail.
2753		 * See WBRANCH code for story about error count.
2754		 */
2755		if (*syncptr && errsfound + 1 >= *syncptr)
2756		    return 0;
2757		/*
2758		 * Else record that we (possibly) matched this time.
2759		 * No harm if we don't:  then the previous test will just
2760		 * short cut the attempted match that is bound to fail.
2761		 * We never try to exclude something that has already
2762		 * failed anyway.
2763		 */
2764		*syncptr = errsfound + 1;
2765	    }
2766	    break;
2767	case P_EXCEND:
2768	    /*
2769	     * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
2770	     * branch.  Actually, we don't bother following it:  all we
2771	     * need to know is that we successfully matched so far up
2772	     * to the end of the asserted pattern; the endpoint
2773	     * in the target string is nulled out.
2774	     */
2775	    if (!(fail = (patinput < patinend)))
2776		return 1;
2777	    break;
2778	case P_BRANCH:
2779	case P_WBRANCH:
2780	    /* P_EXCLUDE shouldn't occur without a P_BRANCH */
2781	    if (!P_ISBRANCH(next)) {
2782		/* no choice, avoid recursion */
2783		DPUTS(P_OP(scan) == P_WBRANCH,
2784		      "BUG: WBRANCH with no alternative.");
2785		next = P_OPERAND(scan);
2786	    } else {
2787		do {
2788		    save = patinput;
2789		    savglobflags = patglobflags;
2790		    saverrsfound = errsfound;
2791		    if (P_ISEXCLUDE(next)) {
2792			/*
2793			 * The strategy is to test the asserted pattern,
2794			 * recording via P_EXCSYNC how far the part to
2795			 * be excluded matched.  We then set the
2796			 * length of the test string to that
2797			 * point and see if the exclusion as far as
2798			 * P_EXCEND also matches that string.
2799			 * We need to keep testing the asserted pattern
2800			 * by backtracking, since the first attempt
2801			 * may be excluded while a later attempt may not.
2802			 * For this we keep a pointer just after
2803			 * the P_EXCLUDE which is tested by the P_EXCSYNC
2804			 * to see if we matched there last time, in which
2805			 * case we fail.  If there is nothing to backtrack
2806			 * over, that doesn't matter:  we should fail anyway.
2807			 * The pointer also tells us where the asserted
2808			 * pattern matched for use by the exclusion.
2809			 *
2810			 * It's hard to allocate space for this
2811			 * beforehand since we may need to do it
2812			 * recursively.
2813			 *
2814			 * P.S. in case you were wondering, this code
2815			 * is horrible.
2816			 */
2817			Upat syncstrp;
2818			char *origpatinend;
2819			unsigned char *oldsyncstr;
2820			char *matchpt = NULL;
2821			int ret, savglobdots, matchederrs = 0;
2822			int savparsfound = parsfound;
2823			DPUTS(P_OP(scan) == P_WBRANCH,
2824			      "BUG: excluded WBRANCH");
2825			syncstrp = P_OPERAND(next);
2826			/*
2827			 * Unlike WBRANCH, each test at the same exclude
2828			 * sync point (due to an external loop) is separate,
2829			 * i.e testing (foo~bar)# is no different from
2830			 * (foo~bar)(foo~bar)... from the exclusion point
2831			 * of view, so we use a different sync string.
2832			 */
2833			oldsyncstr = syncstrp->p;
2834			syncstrp->p = (unsigned char *)
2835			    zshcalloc((patinend - patinstart) + 1);
2836			origpatinend = patinend;
2837			while ((ret = patmatch(P_OPERAND(scan)))) {
2838			    unsigned char *syncpt;
2839			    char *savpatinstart;
2840			    int savforce = forceerrs;
2841			    int savpatflags = patflags, synclen;
2842			    forceerrs = -1;
2843			    savglobdots = globdots;
2844			    matchederrs = errsfound;
2845			    matchpt = patinput;    /* may not be end */
2846			    globdots = 1;	   /* OK to match . first */
2847			    /* Find the point where the scan
2848			     * matched the part to be excluded: because
2849			     * of backtracking, the one
2850			     * most recently matched will be the first.
2851			     * (Luckily, backtracking is done after all
2852			     * possibilities for approximation have been
2853			     * checked.)
2854			     */
2855			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
2856				;
2857			    synclen = syncpt - syncstrp->p;
2858			    if (patinstart + synclen != patinend) {
2859				/*
2860				 * Temporarily mark the string as
2861				 * ending at this point.
2862				 */
2863				DPUTS(patinstart + synclen > matchpt,
2864				      "BUG: EXCSYNC failed");
2865
2866				patinend = patinstart + synclen;
2867				/*
2868				 * If this isn't really the end of the string,
2869				 * remember this for the (#e) assertion.
2870				 */
2871				patflags |= PAT_NOTEND;
2872			    }
2873			    savpatinstart = patinstart;
2874			    next = PATNEXT(scan);
2875			    while (next && P_ISEXCLUDE(next)) {
2876				patinput = save;
2877				/*
2878				 * turn off approximations in exclusions:
2879				 * note we keep remaining patglobflags
2880				 * set by asserted branch (or previous
2881				 * excluded branches, for consistency).
2882				 */
2883				patglobflags &= ~0xff;
2884				errsfound = 0;
2885				opnd = P_OPERAND(next) + 1;
2886				if (P_OP(next) == P_EXCLUDP && patinpath) {
2887				    /*
2888				     * Top level exclusion with a file,
2889				     * applies to whole path so add the
2890				     * segments already matched.
2891				     * We copied these in front of the
2892				     * test pattern, so patinend doesn't
2893				     * need moving.
2894				     */
2895				    DPUTS(patinput != patinstart,
2896					  "BUG: not at start excluding path");
2897				    patinput = patinstart = patinpath;
2898				}
2899				if (patmatch(opnd)) {
2900				    ret = 0;
2901				    /*
2902				     * Another subtlety: if we exclude the
2903				     * match, any parentheses just found
2904				     * become invalidated.
2905				     */
2906				    parsfound = savparsfound;
2907				}
2908				if (patinpath) {
2909				    patinput = savpatinstart +
2910					(patinput - patinstart);
2911				    patinstart = savpatinstart;
2912				}
2913				if (!ret)
2914				    break;
2915				next = PATNEXT(next);
2916			    }
2917			    /*
2918			     * Restore original end position.
2919			     */
2920			    patinend = origpatinend;
2921			    patflags = savpatflags;
2922			    globdots = savglobdots;
2923			    forceerrs = savforce;
2924			    if (ret)
2925				break;
2926			    patinput = save;
2927			    patglobflags = savglobflags;
2928			    errsfound = saverrsfound;
2929			}
2930			zfree((char *)syncstrp->p,
2931			      (patinend - patinstart) + 1);
2932			syncstrp->p = oldsyncstr;
2933			if (ret) {
2934			    patinput = matchpt;
2935			    errsfound = matchederrs;
2936			    return 1;
2937			}
2938			while ((scan = PATNEXT(scan)) &&
2939			       P_ISEXCLUDE(scan))
2940			    ;
2941		    } else {
2942			int ret = 1, pfree = 0;
2943			Upat ptrp = NULL;
2944			unsigned char *ptr;
2945			if (P_OP(scan) == P_WBRANCH) {
2946			    /*
2947			     * This is where we make sure that we are not
2948			     * repeatedly matching zero-length strings in
2949			     * a closure, which would cause an infinite loop,
2950			     * and also remove exponential behaviour in
2951			     * backtracking nested closures.
2952			     * The P_WBRANCH operator leaves a space for a
2953			     * uchar *, initialized to NULL, which is
2954			     * turned into a string the same length as the
2955			     * target string.  Every time we match from a
2956			     * particular point in the target string, we
2957			     * stick a 1 at the corresponding point here.
2958			     * If we come round to the same branch again, and
2959			     * there is already a 1, then the test fails.
2960			     */
2961			    opnd = P_OPERAND(scan);
2962			    ptrp = opnd++;
2963			    if (!ptrp->p) {
2964				ptrp->p = (unsigned char *)
2965				    zshcalloc((patinend - patinstart) + 1);
2966				pfree = 1;
2967			    }
2968			    ptr = ptrp->p + (patinput - patinstart);
2969
2970			    /*
2971			     * Without approximation, this is just a
2972			     * single bit test.  With approximation, we
2973			     * need to know how many errors there were
2974			     * last time we made the test.  If errsfound
2975			     * is now smaller than it was, hence we can
2976			     * make more approximations in the remaining
2977			     * code, we continue with the test.
2978			     * (This is why the max number of errors is
2979			     * 254, not 255.)
2980			     */
2981			    if (*ptr && errsfound + 1 >= *ptr)
2982				ret = 0;
2983			    *ptr = errsfound + 1;
2984			} else
2985			    opnd = P_OPERAND(scan);
2986			if (ret)
2987			    ret = patmatch(opnd);
2988			if (pfree) {
2989			    zfree((char *)ptrp->p,
2990				  (patinend - patinstart) + 1);
2991			    ptrp->p = NULL;
2992			}
2993			if (ret)
2994			    return 1;
2995			scan = PATNEXT(scan);
2996		    }
2997		    patinput = save;
2998		    patglobflags = savglobflags;
2999		    errsfound = saverrsfound;
3000		    DPUTS(P_OP(scan) == P_WBRANCH,
3001			  "BUG: WBRANCH not first choice.");
3002		    next = PATNEXT(scan);
3003		} while (scan && P_ISBRANCH(scan));
3004		return 0;
3005	    }
3006	    break;
3007	case P_STAR:
3008	    /* Handle specially for speed, although really P_ONEHASH+P_ANY */
3009	case P_ONEHASH:
3010	case P_TWOHASH:
3011	    /*
3012	     * This is just simple cases, matching one character.
3013	     * With approximations, we still handle * this way, since
3014	     * no approximation is ever necessary, but other closures
3015	     * are handled by the more complicated branching method
3016	     */
3017	    op = P_OP(scan);
3018	    /* Note that no counts possibly metafied characters */
3019	    start = patinput;
3020	    {
3021		char *lastcharstart;
3022		/*
3023		 * Array to record the start of characters for
3024		 * backtracking.
3025		 */
3026		VARARR(char, charstart, patinend-patinput);
3027		memset(charstart, 0, patinend-patinput);
3028
3029		if (op == P_STAR) {
3030		    for (no = 0; patinput < patinend;
3031			 CHARINC(patinput, patinend))
3032		    {
3033			charstart[patinput-start] = 1;
3034			no++;
3035		    }
3036		    /* simple optimization for reasonably common case */
3037		    if (P_OP(next) == P_END)
3038			return 1;
3039		} else {
3040		    DPUTS(patglobflags & 0xff,
3041			  "BUG: wrong backtracking with approximation.");
3042		    if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
3043			patinput == patinstart && patinput < patinend &&
3044			CHARREF(patinput, patinend) == ZWC('.'))
3045			return 0;
3046		    no = patrepeat(P_OPERAND(scan), charstart);
3047		}
3048		min = (op == P_TWOHASH) ? 1 : 0;
3049		/*
3050		 * Lookahead to avoid useless matches. This is not possible
3051		 * with approximation.
3052		 */
3053		if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
3054		    !(patglobflags & 0xff)) {
3055		    char *nextop = P_LS_STR(next);
3056#ifdef MULTIBYTE_SUPPORT
3057		    /* else second argument of CHARREF isn't used */
3058		    int nextlen = P_LS_LEN(next);
3059#endif
3060		    /*
3061		     * If that P_EXACTLY is last (common in simple patterns,
3062		     * such as *.c), then it can be only be matched at one
3063		     * point in the test string, so record that.
3064		     */
3065		    if (P_OP(PATNEXT(next)) == P_END &&
3066			!(patflags & PAT_NOANCH)) {
3067			int ptlen = patinend - patinput;
3068			int lenmatch = patinend -
3069			    (min ? CHARNEXT(start, patinend) : start);
3070			/* Are we in the right range? */
3071			if (P_LS_LEN(next) > lenmatch ||
3072			    P_LS_LEN(next) < ptlen)
3073			    return 0;
3074			/* Yes, just position appropriately and test. */
3075			patinput += ptlen - P_LS_LEN(next);
3076			/*
3077			 * Here we will need to be careful that patinput is not
3078			 * in the middle of a multibyte character.
3079			 */
3080			/* Continue loop with P_EXACTLY test. */
3081			break;
3082		    }
3083		    nextch = CHARREF(nextop, nextop + nextlen);
3084		} else
3085		    nextch = PEOF;
3086		savglobflags = patglobflags;
3087		saverrsfound = errsfound;
3088		lastcharstart = charstart + (patinput - start);
3089		if (no >= min) {
3090		    for (;;) {
3091			patint_t charmatch_cache;
3092			if (nextch == PEOF ||
3093			    (patinput < patinend &&
3094			     CHARMATCH_EXPR(CHARREF(patinput, patinend),
3095					    nextch))) {
3096			    if (patmatch(next))
3097				return 1;
3098			}
3099			if (--no < min)
3100			    break;
3101			/* find start of previous full character */
3102			while (!*--lastcharstart)
3103			    DPUTS(lastcharstart < charstart,
3104				  "lastcharstart invalid");
3105			patinput = start + (lastcharstart-charstart);
3106			patglobflags = savglobflags;
3107			errsfound = saverrsfound;
3108		    }
3109		}
3110	    }
3111	    /*
3112	     * As with branches, the patmatch(next) stuff for *
3113	     * handles approximation, so we don't need to try
3114	     * anything here.
3115	     */
3116	    return 0;
3117	case P_ISSTART:
3118	    if (patinput != patinstart || (patflags & PAT_NOTSTART))
3119		fail = 1;
3120	    break;
3121	case P_ISEND:
3122	    if (patinput < patinend || (patflags & PAT_NOTEND))
3123		fail = 1;
3124	    break;
3125	case P_COUNTSTART:
3126	    {
3127		/*
3128		 * Save and restore the current count and the
3129		 * start pointer in case the pattern has been
3130		 * executed by a previous repetition of a
3131		 * closure.
3132		 */
3133		long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
3134		long savecount = *curptr;
3135		unsigned char *saveptr = scan[P_CT_PTR].p;
3136		int ret;
3137
3138		*curptr = 0L;
3139		ret = patmatch(P_OPERAND(scan));
3140		*curptr = savecount;
3141		scan[P_CT_PTR].p = saveptr;
3142		return ret;
3143	    }
3144	case P_COUNT:
3145	    {
3146		/* (#cN,M): execution is relatively straightforward */
3147		long cur = scan[P_CT_CURRENT].l;
3148		long min = scan[P_CT_MIN].l;
3149		long max = scan[P_CT_MAX].l;
3150
3151		if (cur && cur >= min &&
3152		    (unsigned char *)patinput == scan[P_CT_PTR].p) {
3153		    /*
3154		     * Not at the first attempt to match so
3155		     * the previous attempt managed zero length.
3156		     * We can do this indefinitely so there's
3157		     * no point in going on.  Simply try to
3158		     * match the remainder of the pattern.
3159		     */
3160		    return patmatch(next);
3161		}
3162		scan[P_CT_PTR].p = (unsigned char *)patinput;
3163
3164		if (max < 0 || cur < max) {
3165		    char *patinput_thistime = patinput;
3166		    scan[P_CT_CURRENT].l = cur + 1;
3167		    if (patmatch(scan + P_CT_OPERAND))
3168			return 1;
3169		    patinput = patinput_thistime;
3170		}
3171		if (cur < min)
3172		    return 0;
3173		return patmatch(next);
3174	    }
3175	case P_END:
3176	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
3177		return 1;
3178	    break;
3179#ifdef DEBUG
3180	default:
3181	    dputs("BUG: bad operand in patmatch.");
3182	    return 0;
3183	    break;
3184#endif
3185	}
3186
3187	if (fail) {
3188	    if (errsfound < (patglobflags & 0xff) &&
3189		(forceerrs == -1 || errsfound < forceerrs)) {
3190		/*
3191		 * Approximation code.  There are four possibilities
3192		 *
3193		 * 1. omit character from input string
3194		 * 2. transpose characters in input and pattern strings
3195		 * 3. omit character in both input and pattern strings
3196		 * 4. omit character from pattern string.
3197		 *
3198		 * which we try in that order.
3199		 *
3200		 * Of these, 2, 3 and 4 require an exact match string
3201		 * (P_EXACTLY) while 1, 2 and 3 require that we not
3202		 * have reached the end of the input string.
3203		 *
3204		 * Note in each case after making the approximation we
3205		 * need to retry the *same* pattern; this is what
3206		 * requires exactpos, a slightly doleful way of
3207		 * communicating with the exact character matcher.
3208		 */
3209		char *savexact = exactpos;
3210		save = patinput;
3211		savglobflags = patglobflags;
3212		saverrsfound = ++errsfound;
3213		fail = 0;
3214
3215		DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
3216		      "BUG: non-exact match has set exactpos");
3217
3218		/* Try omitting a character from the input string */
3219		if (patinput < patinend) {
3220		    CHARINC(patinput, patinend);
3221		    /* If we are not on an exact match, then this is
3222		     * our last gasp effort, so we can optimize out
3223		     * the recursive call.
3224		     */
3225		    if (P_OP(scan) != P_EXACTLY)
3226			continue;
3227		    if (patmatch(scan))
3228			return 1;
3229		}
3230
3231		if (P_OP(scan) == P_EXACTLY) {
3232		    char *nextexact = savexact;
3233		    DPUTS(!savexact,
3234			  "BUG: exact match has not set exactpos");
3235		    CHARINC(nextexact, exactend);
3236
3237		    if (save < patinend) {
3238			char *nextin = save;
3239			CHARINC(nextin, patinend);
3240			patglobflags = savglobflags;
3241			errsfound = saverrsfound;
3242			exactpos = savexact;
3243
3244			/*
3245			 * Try swapping two characters in patinput and
3246			 * exactpos
3247			 */
3248			if (save < patinend && nextin < patinend &&
3249			    nextexact < exactend) {
3250			    patint_t cin0 = CHARREF(save, patinend);
3251			    patint_t cpa0 = CHARREF(exactpos, exactend);
3252			    patint_t cin1 = CHARREF(nextin, patinend);
3253			    patint_t cpa1 = CHARREF(nextexact, exactend);
3254
3255			    if (CHARMATCH(cin0, cpa1) &&
3256				CHARMATCH(cin1, cpa0)) {
3257				patinput = nextin;
3258				CHARINC(patinput, patinend);
3259				exactpos = nextexact;
3260				CHARINC(exactpos, exactend);
3261				if (patmatch(scan))
3262				    return 1;
3263
3264				patglobflags = savglobflags;
3265				errsfound = saverrsfound;
3266			    }
3267			}
3268
3269			/*
3270			 * Try moving up both strings.
3271			 */
3272			patinput = nextin;
3273			exactpos = nextexact;
3274			if (patmatch(scan))
3275			    return 1;
3276
3277			patinput = save;
3278			patglobflags = savglobflags;
3279			errsfound = saverrsfound;
3280			exactpos = savexact;
3281		    }
3282
3283		    DPUTS(exactpos == exactend, "approximating too far");
3284		    /*
3285		     * Try moving up the exact match pattern.
3286		     * This must be the last attempt, so just loop
3287		     * instead of calling recursively.
3288		     */
3289		    CHARINC(exactpos, exactend);
3290		    continue;
3291		}
3292	    }
3293	    exactpos = NULL;
3294	    return 0;
3295	}
3296
3297	scan = next;
3298    }
3299
3300    return 0;
3301}
3302
3303
3304/**/
3305#ifdef MULTIBYTE_SUPPORT
3306
3307/*
3308 * See if character ch matches a pattern range specification.
3309 * The null-terminated specification is in range; the test
3310 * character is in ch.
3311 *
3312 * indptr is used by completion matching, which is why this
3313 * function is exported.  If indptr is not NULL we set *indptr
3314 * to the index of the character in the range string, adjusted
3315 * in the case of "A-B" ranges such that A would count as its
3316 * normal index (say IA), B would count as IA + (B-A), and any
3317 * character within the range as appropriate.  We're not strictly
3318 * guaranteed this fits within a wint_t, but if this is Unicode
3319 * in 32 bits we have a fair amount of distance left over.
3320 *
3321 * mtp is used in the same circumstances.  *mtp returns the match type:
3322 * 0 for a standard character, else the PP_ index.  It's not
3323 * useful if the match failed.
3324 */
3325
3326/**/
3327mod_export int
3328mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
3329{
3330    wchar_t r1, r2;
3331
3332    if (indptr)
3333	*indptr = 0;
3334    /*
3335     * Careful here: unlike other strings, range is a NULL-terminated,
3336     * metafied string, because we need to treat the Posix and hyphenated
3337     * ranges specially.
3338     */
3339    while (*range) {
3340	if (imeta(STOUC(*range))) {
3341	    int swtype = STOUC(*range++) - STOUC(Meta);
3342	    if (mtp)
3343		*mtp = swtype;
3344	    switch (swtype) {
3345	    case 0:
3346		/* ordinary metafied character */
3347		range--;
3348		if (metacharinc(&range) == ch)
3349		    return 1;
3350		break;
3351	    case PP_ALPHA:
3352		if (iswalpha(ch))
3353		    return 1;
3354		break;
3355	    case PP_ALNUM:
3356		if (iswalnum(ch))
3357		    return 1;
3358		break;
3359	    case PP_ASCII:
3360		if ((ch & ~0x7f) == 0)
3361		    return 1;
3362		break;
3363	    case PP_BLANK:
3364		if (ch == L' ' || ch == L'\t')
3365		    return 1;
3366		break;
3367	    case PP_CNTRL:
3368		if (iswcntrl(ch))
3369		    return 1;
3370		break;
3371	    case PP_DIGIT:
3372		if (iswdigit(ch))
3373		    return 1;
3374		break;
3375	    case PP_GRAPH:
3376		if (iswgraph(ch))
3377		    return 1;
3378		break;
3379	    case PP_LOWER:
3380		if (iswlower(ch))
3381		    return 1;
3382		break;
3383	    case PP_PRINT:
3384		if (iswprint(ch))
3385		    return 1;
3386		break;
3387	    case PP_PUNCT:
3388		if (iswpunct(ch))
3389		    return 1;
3390		break;
3391	    case PP_SPACE:
3392		if (iswspace(ch))
3393		    return 1;
3394		break;
3395	    case PP_UPPER:
3396		if (iswupper(ch))
3397		    return 1;
3398		break;
3399	    case PP_XDIGIT:
3400		if (iswxdigit(ch))
3401		    return 1;
3402		break;
3403	    case PP_IDENT:
3404		if (wcsitype(ch, IIDENT))
3405		    return 1;
3406		break;
3407	    case PP_IFS:
3408		if (wcsitype(ch, ISEP))
3409		    return 1;
3410		break;
3411	    case PP_IFSSPACE:
3412		/* must be ASCII space character */
3413		if (ch < 128 && iwsep((int)ch))
3414		    return 1;
3415		break;
3416	    case PP_WORD:
3417		if (wcsitype(ch, IWORD))
3418		    return 1;
3419		break;
3420	    case PP_RANGE:
3421		r1 = metacharinc(&range);
3422		r2 = metacharinc(&range);
3423		if (r1 <= ch && ch <= r2) {
3424		    if (indptr)
3425			*indptr += ch - r1;
3426		    return 1;
3427		}
3428		/* Careful not to screw up counting with bogus range */
3429		if (indptr && r1 < r2) {
3430		    /*
3431		     * This gets incremented again below to get
3432		     * us past the range end.  This is correct.
3433		     */
3434		    *indptr += r2 - r1;
3435		}
3436		break;
3437	    case PP_UNKWN:
3438		DPUTS(1, "BUG: unknown posix range passed through.\n");
3439		break;
3440	    default:
3441		DPUTS(1, "BUG: unknown metacharacter in range.");
3442		break;
3443	    }
3444	} else if (metacharinc(&range) == ch) {
3445	    if (mtp)
3446		*mtp = 0;
3447	    return 1;
3448	}
3449	if (indptr)
3450	    (*indptr)++;
3451    }
3452    return 0;
3453}
3454
3455
3456/*
3457 * This is effectively the reverse of mb_patmatchrange().
3458 * Given a range descriptor of the same form, and an index into it,
3459 * try to determine the character that is matched.  If the index
3460 * points to a [:...:] generic style match, set chr to WEOF and
3461 * return the type in mtp instead.  Return 1 if successful, 0 if
3462 * there was no corresponding index.  Note all pointer arguments
3463 * must be non-null.
3464 */
3465
3466/**/
3467mod_export int
3468mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
3469{
3470    wchar_t r1, r2, rchr;
3471    wint_t rdiff;
3472
3473    *chr = WEOF;
3474    *mtp = 0;
3475
3476    while (*range) {
3477	if (imeta(STOUC(*range))) {
3478	    int swtype = STOUC(*range++) - STOUC(Meta);
3479	    switch (swtype) {
3480	    case 0:
3481		range--;
3482		rchr = metacharinc(&range);
3483		if (!ind) {
3484		    *chr = (wint_t) rchr;
3485		    return 1;
3486		}
3487		break;
3488
3489	    case PP_ALPHA:
3490	    case PP_ALNUM:
3491	    case PP_ASCII:
3492	    case PP_BLANK:
3493	    case PP_CNTRL:
3494	    case PP_DIGIT:
3495	    case PP_GRAPH:
3496	    case PP_LOWER:
3497	    case PP_PRINT:
3498	    case PP_PUNCT:
3499	    case PP_SPACE:
3500	    case PP_UPPER:
3501	    case PP_XDIGIT:
3502	    case PP_IDENT:
3503	    case PP_IFS:
3504	    case PP_IFSSPACE:
3505	    case PP_WORD:
3506		if (!ind) {
3507		    *mtp = swtype;
3508		    return 1;
3509		}
3510		break;
3511
3512	    case PP_RANGE:
3513		r1 = metacharinc(&range);
3514		r2 = metacharinc(&range);
3515		rdiff = (wint_t)r2 - (wint_t)r1;
3516		if (rdiff >= ind) {
3517		    *chr = (wint_t)r1 + ind;
3518		    return 1;
3519		}
3520		/* note the extra decrement to ind below */
3521		ind -= rdiff;
3522		break;
3523	    case PP_UNKWN:
3524		DPUTS(1, "BUG: unknown posix range passed through.\n");
3525		break;
3526	    default:
3527		DPUTS(1, "BUG: unknown metacharacter in range.");
3528		break;
3529	    }
3530	} else {
3531	    rchr = metacharinc(&range);
3532	    if (!ind) {
3533		*chr = (wint_t)rchr;
3534		return 1;
3535	    }
3536	}
3537	if (!ind--)
3538	    break;
3539    }
3540
3541    /* No corresponding index. */
3542    return 0;
3543}
3544
3545/**/
3546#endif /* MULTIBYTE_SUPPORT */
3547
3548/*
3549 * Identical function to mb_patmatchrange() above for single-byte
3550 * characters.
3551 */
3552
3553/**/
3554mod_export int
3555patmatchrange(char *range, int ch, int *indptr, int *mtp)
3556{
3557    int r1, r2;
3558
3559    if (indptr)
3560	*indptr = 0;
3561    /*
3562     * Careful here: unlike other strings, range is a NULL-terminated,
3563     * metafied string, because we need to treat the Posix and hyphenated
3564     * ranges specially.
3565     */
3566    for (; *range; range++) {
3567	if (imeta(STOUC(*range))) {
3568	    int swtype = STOUC(*range) - STOUC(Meta);
3569	    if (mtp)
3570		*mtp = swtype;
3571	    switch (swtype) {
3572	    case 0:
3573		if (STOUC(*++range ^ 32) == ch)
3574		    return 1;
3575		break;
3576	    case PP_ALPHA:
3577		if (isalpha(ch))
3578		    return 1;
3579		break;
3580	    case PP_ALNUM:
3581		if (isalnum(ch))
3582		    return 1;
3583		break;
3584	    case PP_ASCII:
3585		if ((ch & ~0x7f) == 0)
3586		    return 1;
3587		break;
3588	    case PP_BLANK:
3589		if (ch == ' ' || ch == '\t')
3590		    return 1;
3591		break;
3592	    case PP_CNTRL:
3593		if (iscntrl(ch))
3594		    return 1;
3595		break;
3596	    case PP_DIGIT:
3597		if (isdigit(ch))
3598		    return 1;
3599		break;
3600	    case PP_GRAPH:
3601		if (isgraph(ch))
3602		    return 1;
3603		break;
3604	    case PP_LOWER:
3605		if (islower(ch))
3606		    return 1;
3607		break;
3608	    case PP_PRINT:
3609		if (isprint(ch))
3610		    return 1;
3611		break;
3612	    case PP_PUNCT:
3613		if (ispunct(ch))
3614		    return 1;
3615		break;
3616	    case PP_SPACE:
3617		if (isspace(ch))
3618		    return 1;
3619		break;
3620	    case PP_UPPER:
3621		if (isupper(ch))
3622		    return 1;
3623		break;
3624	    case PP_XDIGIT:
3625		if (isxdigit(ch))
3626		    return 1;
3627		break;
3628	    case PP_IDENT:
3629		if (iident(ch))
3630		    return 1;
3631		break;
3632	    case PP_IFS:
3633		if (isep(ch))
3634		    return 1;
3635		break;
3636	    case PP_IFSSPACE:
3637		if (iwsep(ch))
3638		    return 1;
3639		break;
3640	    case PP_WORD:
3641		if (iword(ch))
3642		    return 1;
3643		break;
3644	    case PP_RANGE:
3645		range++;
3646		r1 = STOUC(UNMETA(range));
3647		METACHARINC(range);
3648		r2 = STOUC(UNMETA(range));
3649		if (*range == Meta)
3650		    range++;
3651		if (r1 <= ch && ch <= r2) {
3652		    if (indptr)
3653			*indptr += ch - r1;
3654		    return 1;
3655		}
3656		if (indptr && r1 < r2)
3657		    *indptr += r2 - r1;
3658		break;
3659	    case PP_UNKWN:
3660		DPUTS(1, "BUG: unknown posix range passed through.\n");
3661		break;
3662	    default:
3663		DPUTS(1, "BUG: unknown metacharacter in range.");
3664		break;
3665	    }
3666	} else if (STOUC(*range) == ch) {
3667	    if (mtp)
3668		*mtp = 0;
3669	    return 1;
3670	}
3671	if (indptr)
3672	    (*indptr)++;
3673    }
3674    return 0;
3675}
3676
3677
3678/**/
3679#ifndef MULTIBYTE_SUPPORT
3680
3681/*
3682 * Identical function to mb_patmatchindex() above for single-byte
3683 * characters.  Here -1 represents a character that needs a special type.
3684 *
3685 * Unlike patmatchrange, we only need this in ZLE, which always
3686 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
3687 * this function in that case.
3688 */
3689
3690/**/
3691mod_export int
3692patmatchindex(char *range, int ind, int *chr, int *mtp)
3693{
3694    int r1, r2, rdiff, rchr;
3695
3696    *chr = -1;
3697    *mtp = 0;
3698
3699    for (; *range; range++) {
3700	if (imeta(STOUC(*range))) {
3701	    int swtype = STOUC(*range) - STOUC(Meta);
3702	    switch (swtype) {
3703	    case 0:
3704		/* ordinary metafied character */
3705		rchr = STOUC(*++range) ^ 32;
3706		if (!ind) {
3707		    *chr = rchr;
3708		    return 1;
3709		}
3710		break;
3711
3712	    case PP_ALPHA:
3713	    case PP_ALNUM:
3714	    case PP_ASCII:
3715	    case PP_BLANK:
3716	    case PP_CNTRL:
3717	    case PP_DIGIT:
3718	    case PP_GRAPH:
3719	    case PP_LOWER:
3720	    case PP_PRINT:
3721	    case PP_PUNCT:
3722	    case PP_SPACE:
3723	    case PP_UPPER:
3724	    case PP_XDIGIT:
3725	    case PP_IDENT:
3726	    case PP_IFS:
3727	    case PP_IFSSPACE:
3728	    case PP_WORD:
3729		if (!ind) {
3730		    *mtp = swtype;
3731		    return 1;
3732		}
3733		break;
3734
3735	    case PP_RANGE:
3736		range++;
3737		r1 = STOUC(UNMETA(range));
3738		METACHARINC(range);
3739		r2 = STOUC(UNMETA(range));
3740		if (*range == Meta)
3741		    range++;
3742		rdiff = r2 - r1;
3743		if (rdiff >= ind) {
3744		    *chr = r1 + ind;
3745		    return 1;
3746		}
3747		/* note the extra decrement to ind below */
3748		ind -= rdiff;
3749		break;
3750	    case PP_UNKWN:
3751		DPUTS(1, "BUG: unknown posix range passed through.\n");
3752		break;
3753	    default:
3754		DPUTS(1, "BUG: unknown metacharacter in range.");
3755		break;
3756	    }
3757	} else {
3758	    if (!ind) {
3759		*chr = STOUC(*range);
3760		return 1;
3761	    }
3762	}
3763	if (!ind--)
3764	    break;
3765    }
3766
3767    /* No corresponding index. */
3768    return 0;
3769}
3770
3771/**/
3772#endif /* MULTIBYTE_SUPPORT */
3773
3774/*
3775 * Repeatedly match something simple and say how many times.
3776 * charstart is an array parallel to that starting at patinput
3777 * and records the start of (possibly multibyte) characters
3778 * to aid in later backtracking.
3779 */
3780
3781/**/
3782static int patrepeat(Upat p, char *charstart)
3783{
3784    int count = 0;
3785    patint_t tch, charmatch_cache;
3786    char *scan, *opnd;
3787
3788    scan = patinput;
3789    opnd = (char *)P_OPERAND(p);
3790
3791    switch(P_OP(p)) {
3792#ifdef DEBUG
3793    case P_ANY:
3794	dputs("BUG: ?# did not get optimized to *");
3795	return 0;
3796	break;
3797#endif
3798    case P_EXACTLY:
3799	DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
3800	tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
3801	while (scan < patinend &&
3802	       CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
3803	    charstart[scan-patinput] = 1;
3804	    count++;
3805	    CHARINC(scan, patinend);
3806	}
3807	break;
3808    case P_ANYOF:
3809    case P_ANYBUT:
3810	while (scan < patinend) {
3811#ifdef MULTIBYTE_SUPPORT
3812	    wchar_t cr = CHARREF(scan, patinend);
3813	    if (patglobflags & GF_MULTIBYTE) {
3814		if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
3815		    (P_OP(p) == P_ANYOF))
3816		    break;
3817	    } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
3818		       (P_OP(p) == P_ANYOF))
3819		break;
3820#else
3821	    if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
3822		(P_OP(p) == P_ANYOF))
3823		break;
3824#endif
3825	    charstart[scan-patinput] = 1;
3826	    count++;
3827	    CHARINC(scan, patinend);
3828	}
3829	break;
3830#ifdef DEBUG
3831    default:
3832	dputs("BUG: something very strange is happening in patrepeat");
3833	return 0;
3834	break;
3835#endif
3836    }
3837
3838    patinput = scan;
3839    return count;
3840}
3841
3842/* Free a patprog. */
3843
3844/**/
3845mod_export void
3846freepatprog(Patprog prog)
3847{
3848    if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
3849	zfree(prog, prog->size);
3850}
3851
3852/* Disable or reenable a pattern character */
3853
3854/**/
3855int
3856pat_enables(const char *cmd, char **patp, int enable)
3857{
3858    int ret = 0;
3859    const char **stringp;
3860    char *disp;
3861
3862    if (!*patp) {
3863	int done = 0;
3864	for (stringp = zpc_strings, disp = zpc_disables;
3865	     stringp < zpc_strings + ZPC_COUNT;
3866	     stringp++, disp++) {
3867	    if (!*stringp)
3868		continue;
3869	    if (enable ? *disp : !*disp)
3870		continue;
3871	    if (done)
3872		putc(' ', stdout);
3873	    printf("'%s'", *stringp);
3874	    done = 1;
3875	}
3876	if (done)
3877	    putc('\n', stdout);
3878	return 0;
3879    }
3880
3881    for (; *patp; patp++) {
3882	for (stringp = zpc_strings, disp = zpc_disables;
3883	     stringp < zpc_strings + ZPC_COUNT;
3884	     stringp++, disp++) {
3885	    if (*stringp && !strcmp(*stringp, *patp)) {
3886		*disp = (char)!enable;
3887		break;
3888	    }
3889	}
3890	if (stringp == zpc_strings + ZPC_COUNT) {
3891	    zerrnam(cmd, "invalid pattern: %s", *patp);
3892	    ret = 1;
3893	}
3894    }
3895
3896    return ret;
3897}
3898
3899/*
3900 * Save the current state of pattern disables, returning the saved value.
3901 */
3902
3903/**/
3904unsigned int
3905savepatterndisables(void)
3906{
3907    unsigned int disables, bit;
3908    char *disp;
3909
3910    disables = 0;
3911    for (bit = 1, disp = zpc_disables;
3912	 disp < zpc_disables + ZPC_COUNT;
3913	 bit <<= 1, disp++) {
3914	if (*disp)
3915	    disables |= bit;
3916    }
3917    return disables;
3918}
3919
3920/*
3921 * Function scope saving pattern enables.
3922 */
3923
3924/**/
3925void
3926startpatternscope(void)
3927{
3928    Zpc_disables_save newdis;
3929
3930    newdis = (Zpc_disables_save)zalloc(sizeof(*newdis));
3931    newdis->next = zpc_disables_stack;
3932    newdis->disables = savepatterndisables();
3933
3934    zpc_disables_stack = newdis;
3935}
3936
3937/*
3938 * Restore completely the state of pattern disables.
3939 */
3940
3941/**/
3942void
3943restorepatterndisables(unsigned int disables)
3944{
3945    char *disp;
3946    unsigned int bit;
3947
3948    for (bit = 1, disp = zpc_disables;
3949	 disp < zpc_disables + ZPC_COUNT;
3950	 bit <<= 1, disp++) {
3951	if (disables & bit)
3952	    *disp = 1;
3953	else
3954	    *disp = 0;
3955    }
3956}
3957
3958/*
3959 * Function scope to restore pattern enables if localpatterns is turned on.
3960 */
3961
3962/**/
3963void
3964endpatternscope(void)
3965{
3966    Zpc_disables_save olddis;
3967
3968    olddis = zpc_disables_stack;
3969    zpc_disables_stack = olddis->next;
3970
3971    if (isset(LOCALPATTERNS))
3972	restorepatterndisables(olddis->disables);
3973
3974    zfree(olddis, sizeof(*olddis));
3975}
3976
3977/* Reinitialise pattern disables */
3978
3979/**/
3980void
3981clearpatterndisables(void)
3982{
3983    memset(zpc_disables, 0, ZPC_COUNT);
3984}
3985
3986
3987/* Check to see if str is eligible for filename generation. */
3988
3989/**/
3990mod_export int
3991haswilds(char *str)
3992{
3993    char *start;
3994
3995    /* `[' and `]' are legal even if bad patterns are usually not. */
3996    if ((*str == Inbrack || *str == Outbrack) && !str[1])
3997	return 0;
3998
3999    /* If % is immediately followed by ?, then that ? is     *
4000     * not treated as a wildcard.  This is so you don't have *
4001     * to escape job references such as %?foo.               */
4002    if (str[0] == '%' && str[1] == Quest)
4003	str[1] = '?';
4004
4005    /*
4006     * Note that at this point zpc_special has not been set up.
4007     */
4008    start = str;
4009    for (; *str; str++) {
4010	switch (*str) {
4011	    case Inpar:
4012		if ((!isset(SHGLOB) && !zpc_disables[ZPC_INPAR]) ||
4013		    (str > start && isset(KSHGLOB) &&
4014		     ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
4015		      (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
4016		      (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
4017		      (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
4018		      (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
4019		    return 1;
4020		break;
4021
4022	    case Bar:
4023		if (!zpc_disables[ZPC_BAR])
4024		    return 1;
4025		break;
4026
4027	    case Star:
4028		if (!zpc_disables[ZPC_STAR])
4029		    return 1;
4030		break;
4031
4032	    case Inbrack:
4033		if (!zpc_disables[ZPC_INBRACK])
4034		    return 1;
4035		break;
4036
4037	    case Inang:
4038		if (!zpc_disables[ZPC_INANG])
4039		    return 1;
4040		break;
4041
4042	    case Quest:
4043		if (!zpc_disables[ZPC_QUEST])
4044		    return 1;
4045		break;
4046
4047	    case Pound:
4048		if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])
4049		    return 1;
4050		break;
4051
4052	    case Hat:
4053		if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HAT])
4054		    return 1;
4055		break;
4056	}
4057    }
4058    return 0;
4059}
4060