1/*
2 * compmatch.c - the complete module, completion matching code
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1999 Sven Wischnowsky
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Sven Wischnowsky or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Sven Wischnowsky and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Sven Wischnowsky and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose.  The software
24 * provided hereunder is on an "as is" basis, and Sven Wischnowsky and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30#include "complete.mdh"
31#include "compmatch.pro"
32
33/*
34 * This compares two cpattern lists and returns non-zero if they are
35 * equal (N.B. opposite sense to usual *cmp()).
36 *
37 * The old version of this didn't worry about whether the lists
38 * were the same length.  This one does.  It's hard to see how
39 * that can be wrong even if it's unnecessary.
40 */
41
42/**/
43static int
44cpatterns_same(Cpattern a, Cpattern b)
45{
46    while (a) {
47	if (!b)
48	    return 0;
49	if (a->tp != b->tp)
50	    return 0;
51	switch (a->tp) {
52	case CPAT_CCLASS:
53	case CPAT_NCLASS:
54	case CPAT_EQUIV:
55	    /*
56	     * Patterns can actually match the same even if
57	     * the range strings don't compare differently, but
58	     * I don't think we need to handle that subtlety.
59	     */
60	    if (strcmp(a->u.str, b->u.str) != 0)
61		return 0;
62	    break;
63
64	case CPAT_CHAR:
65	    if (a->u.chr != b->u.chr)
66		return 0;
67	    break;
68
69	default:
70	    /* here to silence compiler */
71	    break;
72	}
73
74	a = a->next;
75	b = b->next;
76    }
77    return !b;
78}
79
80/* This compares two cmatchers and returns non-zero if they are equal. */
81
82/**/
83static int
84cmatchers_same(Cmatcher a, Cmatcher b)
85{
86    return (a == b ||
87	    (a->flags == b->flags &&
88	     a->llen == b->llen && a->wlen == b->wlen &&
89	     (!a->llen || cpatterns_same(a->line, b->line)) &&
90	     (a->wlen <= 0 || cpatterns_same(a->word, b->word)) &&
91	     (!(a->flags & (CMF_LEFT | CMF_RIGHT)) ||
92	      (a->lalen == b->lalen && a->ralen == b->ralen &&
93	       (!a->lalen || cpatterns_same(a->left, b->left)) &&
94	       (!a->ralen || cpatterns_same(a->right, b->right))))));
95}
96
97/* Add the given matchers to the bmatcher list. */
98
99/**/
100mod_export void
101add_bmatchers(Cmatcher m)
102{
103    Cmlist old = bmatchers, *q = &bmatchers, n;
104
105    for (; m; m = m->next) {
106	if ((!m->flags && m->wlen > 0 && m->llen > 0) ||
107	    (m->flags == CMF_RIGHT && m->wlen < 0 && !m->llen)) {
108	    *q = n = (Cmlist) zhalloc(sizeof(struct cmlist));
109	    n->matcher = m;
110	    q = &(n->next);
111	}
112    }
113    *q = old;
114}
115
116/* This is called when the matchers in the mstack have changed to
117 * ensure that the bmatchers list contains no matchers not in mstack. */
118
119/**/
120mod_export void
121update_bmatchers(void)
122{
123    Cmlist p = bmatchers, ms;
124    Cmatcher mp;
125    int t;
126
127    while (p) {
128	t = 0;
129	for (ms = mstack; ms && !t; ms = ms->next)
130	    for (mp = ms->matcher; mp && !t; mp = mp->next)
131		t = cmatchers_same(mp, p->matcher);
132
133	p = p->next;
134	if (!t) {
135	    bmatchers = p;
136	}
137    }
138}
139
140/* This returns a new Cline structure. */
141
142/**/
143Cline
144get_cline(char *l, int ll, char *w, int wl, char *o, int ol, int fl)
145{
146    Cline r;
147
148    /* Prefer to take it from the buffer list (freecl), if there
149     * is none, allocate a new one. */
150
151    if ((r = freecl))
152	freecl = r->next;
153    else
154	r = (Cline) zhalloc(sizeof(*r));
155
156    r->next = NULL;
157    r->line = l; r->llen = ll;
158    r->word = w; r->wlen = wl;
159    DPUTS(wl > 0 && !*w, "Bad word");
160    r->orig = o; r->olen = ol;
161    r->slen = 0;
162    r->flags = fl;
163    r->prefix = r->suffix = NULL;
164    r->min = r->max = 0;
165    return r;
166}
167
168/* This frees a cline list. */
169
170/**/
171void
172free_cline(Cline l)
173{
174    Cline n;
175
176    while (l) {
177	n = l->next;
178	l->next = freecl;
179	freecl = l;
180	free_cline(l->prefix);
181	free_cline(l->suffix);
182	l = n;
183    }
184}
185
186/* Copy a cline list. */
187
188/**/
189Cline
190cp_cline(Cline l, int deep)
191{
192    Cline r = NULL, *p = &r, t, lp = NULL;
193
194    while (l) {
195	if ((t = freecl))
196	    freecl = t->next;
197	else
198	    t = (Cline) zhalloc(sizeof(*t));
199	memcpy(t, l, sizeof(*t));
200	if (deep) {
201	    if (t->prefix)
202		t->prefix = cp_cline(t->prefix, 0);
203	    if (t->suffix)
204		t->suffix = cp_cline(t->suffix, 0);
205	}
206	*p = lp = t;
207	p = &(t->next);
208	l = l->next;
209    }
210    *p = NULL;
211
212    return r;
213}
214
215/* Calculate the length of a cline and its sub-lists. */
216
217/**/
218int
219cline_sublen(Cline l)
220{
221    int len = ((l->flags & CLF_LINE) ? l->llen : l->wlen);
222
223    if (l->olen && !((l->flags & CLF_SUF) ? l->suffix : l->prefix))
224	len += l->olen;
225    else {
226	Cline p;
227
228	for (p = l->prefix; p; p = p->next)
229	    len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
230	for (p = l->suffix; p; p = p->next)
231	    len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
232    }
233    return len;
234}
235
236/* Set the lengths in the cline lists. */
237
238/**/
239void
240cline_setlens(Cline l, int both)
241{
242    while (l) {
243	l->min = cline_sublen(l);
244	if (both)
245	    l->max = l->min;
246	l = l->next;
247    }
248}
249
250/* This sets the CLF_MATCHED flag in the given clines. */
251
252/**/
253void
254cline_matched(Cline p)
255{
256    while (p) {
257	p->flags |= CLF_MATCHED;
258	cline_matched(p->prefix);
259	cline_matched(p->suffix);
260
261	p = p->next;
262    }
263}
264
265/* This reverts the order of the elements of the given cline list and
266 * returns a pointer to the new head. */
267
268/**/
269Cline
270revert_cline(Cline p)
271{
272    Cline r = NULL, n;
273
274    while (p) {
275	n = p->next;
276	p->next = r;
277	r = p;
278	p = n;
279    }
280    return r;
281}
282
283/* Global variables used during matching: a char-buffer for the string to
284 * use for the match, and two cline lists for the two levels we use. */
285
286/**/
287char *matchbuf = NULL;
288/**/
289int matchbuflen = 0, matchbufadded;
290
291/**/
292Cline matchparts, matchlastpart;
293/**/
294Cline matchsubs, matchlastsub;
295
296/* This initialises the variables above. */
297
298/**/
299static void
300start_match(void)
301{
302    if (matchbuf)
303	*matchbuf = '\0';
304    matchbufadded = 0;
305    matchparts = matchlastpart = matchsubs = matchlastsub = NULL;
306}
307
308/* This aborts a matching, freeing the cline lists build. */
309
310/**/
311static void
312abort_match(void)
313{
314    free_cline(matchparts);
315    free_cline(matchsubs);
316    matchparts = matchsubs = NULL;
317}
318
319/* This adds a new string in the static char buffer. The arguments are
320 * the matcher used (if any), the strings from the line and the word
321 * and the length of the string from the word. The last argument is
322 * non-zero if we are matching a suffix (where the given string has to
323 * be prepended to the contents of the buffer). */
324
325/**/
326static void
327add_match_str(Cmatcher m, char *l, char *w, int wl, int sfx)
328{
329    /* Get the string and length to insert: either from the line
330     * or from the match. */
331    if (m && (m->flags & CMF_LINE)) {
332	wl = m->llen; w = l;
333    }
334    if (wl) {
335	/* Probably resize the buffer. */
336	if (matchbuflen - matchbufadded <= wl) {
337	    int blen = matchbuflen + wl + 20;
338	    char *buf;
339
340	    buf = (char *) zalloc(blen);
341	    memcpy(buf, matchbuf, matchbuflen);
342	    zfree(matchbuf, matchbuflen);
343	    matchbuf = buf;
344	    matchbuflen = blen;
345	}
346	/* Insert the string. */
347	if (sfx) {
348	    memmove(matchbuf + wl, matchbuf, matchbufadded + 1);
349	    memcpy(matchbuf, w, wl);
350	} else
351	    memcpy(matchbuf + matchbufadded, w, wl);
352	matchbufadded += wl;
353	matchbuf[matchbufadded] = '\0';
354    }
355}
356
357/* This adds a cline for a word-part during matching. Arguments are the
358 * matcher used, pointers to the line and word strings for the anchor,
359 * a pointer to the original line string for the whole part, the string
360 * before (or after) the anchor that has not yet been added, the length
361 * of the line-string for that, and a flag saying if we are matching a
362 * suffix. */
363
364/**/
365static void
366add_match_part(Cmatcher m, char *l, char *w, int wl,
367	       char *o, int ol, char *s, int sl, int osl, int sfx)
368{
369    Cline p, lp, lprem;
370
371    /* If the anchors are equal, we keep only one. */
372
373    if (l && !strncmp(l, w, wl))
374	l = NULL;
375
376    /*
377     * Split the new part into parts and turn the last one into a
378     * `suffix' if we have a left anchor---don't do this if the last one
379     * came from a right anchor before the end of the part we're
380     * splitting.
381     */
382
383    p = bld_parts(s, sl, osl, &lp, &lprem);
384
385    if (lprem && m && (m->flags & CMF_LEFT)) {
386	lprem->flags |= CLF_SUF;
387	lprem->suffix = lprem->prefix;
388	lprem->prefix = NULL;
389    }
390    /* cline lists for suffixes are sorted from back to front, so we have
391     * to revert the list we got. */
392    if (sfx)
393	p = revert_cline(lp = p);
394    /* Now add the sub-clines we already had. */
395    if (matchsubs) {
396	if (sfx) {
397	    Cline q;
398
399	    if ((q = lp->prefix)) {
400		while (q->next)
401		    q = q->next;
402		q->next = matchsubs;
403	    } else
404		lp->prefix = matchsubs;
405
406	    matchlastsub->next = NULL;
407	} else {
408	    matchlastsub->next = p->prefix;
409	    p->prefix = matchsubs;
410	}
411	matchsubs = matchlastsub = NULL;
412    }
413    /* Store the arguments in the last part-cline. */
414    if (lp->llen || lp->wlen) {
415	lp->next = get_cline(l, wl, w, wl, o, ol, CLF_NEW);
416	lp = lp->next;
417    } else {
418	lp->line = l; lp->llen = wl;
419	lp->word = w; lp->wlen = wl;
420	DPUTS(wl > 0 && !*w, "Bad word");
421	lp->orig = o; lp->olen = ol;
422    }
423    if (o || ol)
424	lp->flags &= ~CLF_NEW;
425
426    /* Finally, put the new parts on the list. */
427    if (matchlastpart)
428	matchlastpart->next = p;
429    else
430	matchparts = p;
431    matchlastpart = lp;
432}
433
434/* This adds a new sub-cline. Arguments are the matcher and the strings from
435 * the line and the word. */
436
437/**/
438static void
439add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl)
440{
441    int flags;
442    Cline n;
443
444    /* Check if we are interested only in the string from the line. */
445    if (m && (m->flags & CMF_LINE)) {
446	w = NULL; wl = 0;
447	flags = CLF_LINE;
448    } else
449	flags = 0;
450
451    /* And add the cline. */
452    if (wl || ll) {
453	Cline p, lp;
454
455	if ((p = n = bld_parts(w, wl, ll, &lp, NULL)) && n != lp) {
456	    for (; p->next != lp; p = p->next);
457
458	    if (matchsubs) {
459		matchlastsub->next = n->prefix;
460		n->prefix = matchsubs;
461	    }
462	    matchsubs = matchlastsub = lp;
463
464	    if (matchlastpart)
465		matchlastpart->next = n;
466	    else
467		matchparts = n;
468	    p->next = 0;
469	    matchlastpart = p;
470	} else {
471	    n = get_cline(l, ll, w, wl, NULL, 0,
472			  flags | ((m && m->wlen == -2) ? CLF_SKIP : 0));
473	    if (matchlastsub)
474		matchlastsub->next = n;
475	    else
476		matchsubs = n;
477	    matchlastsub = n;
478	}
479    }
480}
481
482/* This tests if the string from the line l matches the word w. In *bpp
483 * the offset for the brace is returned, in rwlp the length of the
484 * matched prefix or suffix, not including the stuff before or after
485 * the last anchor is given. When sfx is non-zero matching is done from
486 * the ends of the strings backward, if test is zero, the global variables
487 * above are used to build the string for the match and the cline. If
488 * part is non-zero, we are satisfied if only a part of the line-string
489 * is used (and return the length used). */
490
491/**/
492int
493match_str(char *l, char *w, Brinfo *bpp, int bc, int *rwlp,
494	  int sfx, int test, int part)
495{
496    int ll = strlen(l), lw = strlen(w), oll = ll, olw = lw, exact = 0, wexact = 0;
497    int il = 0, iw = 0, t, ind, add, he = 0, bpc, obc = bc, bslash;
498    char *ow;
499    Cmlist ms;
500    Cmatcher mp, lm = NULL;
501    Brinfo bp = NULL;
502
503    if (!test) {
504	start_match();
505	bp = *bpp;
506    }
507    /* Adjust the pointers and get the values for subscripting and
508     * incrementing. */
509
510    if (sfx) {
511	l += ll; w += lw;
512	ind = -1; add = -1;
513    } else {
514	ind = 0; add = 1;
515    }
516    /* ow will always point to the beginning (or end) of that sub-string
517     * in w that wasn't put in the match-variables yet. */
518
519    ow = w;
520
521    /* If the brace is at the beginning, we have to treat it now. */
522
523    if (!test && bp && bc >= bp->pos) {
524	bp->curpos = bc;
525	bp = bp->next;
526    }
527    /*** This once was: `while (ll && lw)', but then ignored characters at
528     *   the end were not, well, ignored. */
529    while (ll) {
530
531	/* Hm, we unconditionally first tried the matchers for the cases
532	 * where the beginnings of the line and word patterns match the
533	 * same character(s).
534	 * But first testing if the characters are the same is sooo
535	 * much faster...
536	 * Maybe it's OK to make same-character matching be preferred in
537	 * recursive calls. At least, it /seems/ to work.
538	 *
539	 * Let's try.
540	 *
541	 * Update: this once tested `test && ...' to check for exact
542	 * character matches only in recursive calls.  But then one
543	 * can't complete `nom<TAB>' to `nomatch' with a match spec
544	 * of `B:[nN][oO]=' because that will eat the `no'.
545	 * But that would break completion of strings like `nonomatch'
546	 * because the `B:[nN][oO]=' doesn't match the second `no'.
547	 * For this we added the code below that can remove already
548	 * accepted exact characters and try again, preferring match
549	 * specs.
550	 */
551
552	bslash = 0;
553	if (!sfx && lw && (!part || test) &&
554	    (l[ind] == w[ind] ||
555	     (bslash = (lw > 1 && w[ind] == '\\' &&
556			(ind ? (w[0] == l[0]) : (w[1] == l[0])))))) {
557	    /* No matcher could be used, but the strings have the same
558	     * character here, skip over it. */
559	    l += add; w += (bslash ? (add + add) : add);
560	    il++; iw += 1 + bslash;
561	    ll--; lw -= 1 + bslash;
562	    bc++;
563	    exact++;
564	    wexact += 1 + bslash;
565	    if (!test)
566		while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
567		    bp->curpos = matchbufadded + (w - ow) + obc;
568		    bp = bp->next;
569		}
570	    lm = NULL;
571	    he = 0;
572
573	    continue;
574	}
575    retry:
576	/* First try the matchers. Err... see above. */
577	for (mp = NULL, ms = mstack; !mp && ms; ms = ms->next) {
578	    for (mp = ms->matcher; mp; mp = mp->next) {
579		t = 1;
580		if ((lm && lm == mp) ||
581		    ((oll == ll || olw == lw) &&
582		     (test == 1 || (test && !mp->left && !mp->right)) &&
583		     mp->wlen < 0))
584		    /* If we were called recursively, don't use `*' patterns
585		     * at the beginning (avoiding infinite recursion). */
586		    continue;
587
588		if (mp->wlen < 0) {
589		    int both, loff, aoff, llen, alen, zoff, moff, ct, ict, aol;
590		    char *tp, savl = '\0', savw;
591		    Cpattern ap, aop;
592
593		    /* This is for `*' patterns, first initialise some
594		     * local variables. */
595		    llen = mp->llen;
596		    if (mp->flags & CMF_LEFT) {
597			alen = mp->lalen; aol = mp->ralen;
598		    } else {
599			alen = mp->ralen; aol = mp->lalen;
600		    }
601		    /* Give up if we don't have enough characters for the
602		     * line-string and the anchor. */
603		    if (ll < llen + alen || lw < alen)
604			continue;
605
606		    if (mp->flags & CMF_LEFT) {
607			ap = mp->left; zoff = 0; moff = alen; aop = mp->right;
608			if (sfx) {
609			    both = 0; loff = -llen; aoff = -(llen + alen);
610			} else {
611			    both = 1; loff = alen; aoff = 0;
612			}
613		    } else {
614			ap = mp->right; zoff = alen; moff = 0; aop = mp->left;
615			if (sfx) {
616			    both = 1; loff = -(llen + alen); aoff = -alen;
617			} else {
618			    both = 0; loff = 0; aoff = llen;
619			}
620		    }
621		    /* Try to match the line pattern and the anchor. */
622		    if (!pattern_match(mp->line, l + loff, NULL, NULL))
623			continue;
624		    if (ap) {
625			if (!pattern_match(ap, l + aoff, NULL, NULL) ||
626			    (both &&
627			     (!pattern_match(ap, w + aoff, NULL, NULL) ||
628			      (aol && aol <= aoff + iw &&
629			       !pattern_match(aop, w + aoff - aol,
630					      NULL, NULL)) ||
631			      !match_parts(l + aoff, w + aoff, alen, part))))
632				continue;
633		    } else if (!both || ((mp->flags & CMF_INTER) ?
634					 ((mp->flags & CMF_LINE) ? iw : il) :
635					 (il || iw)))
636			continue;
637
638		    /* Fine, now we call ourselves recursively to find the
639		     * string matched by the `*'. */
640		    if (sfx && (savl = l[-(llen + zoff)]))
641			l[-(llen + zoff)] = '\0';
642		    for (t = 0, tp = w, ct = 0, ict = lw - alen + 1;
643			 ict;
644			 tp += add, ct++, ict--) {
645			if ((both &&
646			     (!ap || !test ||
647			      !pattern_match(ap, tp + aoff, NULL, NULL) ||
648			      (aol && aol <= aoff + ct + iw &&
649			       !pattern_match(aop, tp + aoff - aol,
650					      NULL, NULL)))) ||
651			    (!both &&
652			     pattern_match(ap, tp - moff, NULL, NULL) &&
653			     (!aol || (aol <= iw + ct - moff &&
654				       pattern_match(aop, tp - moff - aol,
655						     NULL, NULL))) &&
656			     (mp->wlen == -1 ||
657			      match_parts(l + aoff , tp - moff,
658						      alen, part)))) {
659			    if (!both && mp->wlen == -1 &&
660				!match_parts(l + aoff , tp - moff, alen, part))
661				break;
662			    if (sfx) {
663				if ((savw = tp[-zoff]))
664				    tp[-zoff] = '\0';
665				t = match_str(l - ll, w - lw,
666					      NULL, 0, NULL, 1, 2, part);
667				if (savw)
668				    tp[-zoff] = savw;
669			    } else
670				t = match_str(l + llen + moff, tp + moff,
671					      NULL, 0, NULL, 0, 1, part);
672			    if (t || (mp->wlen == -1 && !both))
673				break;
674			}
675		    }
676		    ict = ct;
677		    if (sfx && savl)
678			l[-(llen + zoff)] = savl;
679
680		    /* Have we found a position in w where the rest of l
681		     * matches? */
682		    if (!t)
683			continue;
684
685		    /* Yes, add the strings and clines if this is a
686		     * top-level call. */
687		    if (!test && (!he || (llen + alen))) {
688			char *op, *lp, *map, *wap, *wmp;
689			int ol;
690
691			if (sfx) {
692			    op = w; ol = ow - w; lp = l - (llen + alen);
693			    map = tp - alen;
694			    if (mp->flags & CMF_LEFT) {
695				wap = tp - alen; wmp = tp;
696			    } else {
697				wap = w - alen; wmp = tp - alen;
698			    }
699			} else {
700			    op = ow; ol = w - ow; lp = l;
701			    map = ow;
702			    if (mp->flags & CMF_LEFT) {
703				wap = w; wmp = w + alen;
704			    } else {
705				wap = tp; wmp = ow;
706			    }
707			}
708			/* If the matcher says that we are only interested
709			 * in the line pattern, we just add that and the
710			 * anchor and the string not added yet. Otherwise
711			 * we add a new part. */
712			if (mp->flags & CMF_LINE) {
713			    add_match_str(NULL, NULL, op, ol, sfx);
714			    add_match_str(NULL, NULL, lp, llen + alen, sfx);
715			    add_match_sub(NULL, NULL, ol, op, ol);
716			    add_match_sub(NULL, NULL, llen + alen,
717					  lp, llen + alen);
718			} else if (sfx) {
719			    add_match_str(NULL, NULL,
720					  map, ct + ol + alen, sfx);
721			    add_match_part(mp, l + aoff, wap, alen,
722					   l + loff, llen, op, ol, ol, sfx);
723			    add_match_sub(NULL, NULL, 0, wmp, ct);
724			} else {
725			    add_match_str(NULL, NULL,
726					  map, ct + ol + alen, sfx);
727			    if (both) {
728				add_match_sub(NULL, NULL, ol, op, ol);
729				ol = -1;
730			    } else
731				ct += ol;
732			    add_match_part(mp, l + aoff, wap, alen,
733					   l + loff, llen, wmp, ct, ol, sfx);
734			}
735		    }
736		    /* Now skip over the matched portion and the anchor. */
737		    llen += alen; alen += ict;
738		    if (sfx) {
739			l -= llen; w -= alen;
740		    } else {
741			l += llen; w += alen;
742		    }
743		    ll -= llen; il += llen;
744		    lw -= alen; iw += alen;
745		    bc += llen;
746		    exact = 0;
747
748		    if (!test)
749			while (bp &&
750			       bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
751			    bp->curpos = matchbufadded + bpc - bc + obc;
752			    bp = bp->next;
753			}
754		    ow = w;
755
756		    if (!llen && !alen) {
757			lm = mp;
758			if (he)
759			    mp = NULL;
760			else
761			    he = 1;
762		    } else {
763			lm = NULL; he = 0;
764		    }
765		    break;
766		} else if (ll >= mp->llen && lw >= mp->wlen) {
767		    /* Non-`*'-pattern. */
768		    char *tl, *tw;
769		    int tll, tlw, til, tiw;
770
771		    /* We do this only if the line- and word-substrings
772		     * are not equal. */
773		    if (!(mp->flags & (CMF_LEFT | CMF_RIGHT)) &&
774			mp->llen == mp->wlen &&
775			!(sfx ? strncmp(l - mp->llen, w - mp->wlen, mp->llen) :
776			  strncmp(l, w, mp->llen)))
777			continue;
778
779		    /* Using local variables to make the following
780		     * independent of whether we match a prefix or a
781		     * suffix. */
782		    if (sfx) {
783			tl = l - mp->llen; tw = w - mp->wlen;
784			til = ll - mp->llen; tiw = lw - mp->wlen;
785			tll = il + mp->llen; tlw = iw + mp->wlen;
786		    } else {
787			tl = l; tw = w;
788			til = il; tiw = iw;
789			tll = ll; tlw = lw;
790		    }
791		    if (mp->flags & CMF_LEFT) {
792			/* Try to match the left anchor, if any. */
793			if (til < mp->lalen || tiw < mp->lalen + mp->ralen)
794			    continue;
795			else if (mp->left)
796			    t = pattern_match(mp->left, tl - mp->lalen,
797					      NULL, NULL) &&
798				pattern_match(mp->left, tw - mp->lalen,
799					      NULL, NULL) &&
800				(!mp->ralen ||
801				 pattern_match(mp->right,
802					       tw - mp->lalen - mp->ralen,
803					       NULL, NULL));
804			else
805			    t = (!sfx && !((mp->flags & CMF_INTER) ?
806					   ((mp->flags & CMF_LINE) ? iw : il) :
807					   (il || iw)));
808		    }
809		    if (mp->flags & CMF_RIGHT) {
810			/* Try to match the right anchor, if any. */
811			if (tll < mp->llen + mp->ralen ||
812			    tlw < mp->wlen + mp->ralen + mp->lalen)
813			    continue;
814			else if (mp->right)
815			    t = pattern_match(mp->right,
816					      tl + mp->llen - mp->ralen,
817					      NULL, NULL) &&
818				pattern_match(mp->right,
819					      tw + mp->wlen - mp->ralen,
820					      NULL, NULL) &&
821				(!mp->lalen ||
822				 pattern_match(mp->left, tw + mp->wlen -
823					       mp->ralen - mp->lalen,
824					       NULL, NULL));
825			else
826			    t = (sfx && !((mp->flags & CMF_INTER) ?
827					  ((mp->flags & CMF_LINE) ? iw : il) :
828					  (il || iw)));
829		    }
830		    /* Now try to match the line and word patterns. */
831		    if (!t || !pattern_match(mp->line, tl, mp->word, tw))
832			continue;
833
834		    /* Probably add the matched strings. */
835		    if (!test) {
836			if (sfx)
837			{
838			    if (ow >= w)
839				add_match_str(NULL, NULL, w, ow - w, sfx);
840			}
841			else
842			{
843			    if (w >= ow)
844				add_match_str(NULL, NULL, ow, w - ow, sfx);
845			}
846			add_match_str(mp, tl, tw, mp->wlen, sfx);
847			if (sfx)
848			{
849			    if (ow >= w)
850				add_match_sub(NULL, NULL, 0, w, ow - w);
851			}
852			else
853			{
854			    if (w >= ow)
855				add_match_sub(NULL, NULL, 0, ow, w - ow);
856			}
857			add_match_sub(mp, tl, mp->llen, tw, mp->wlen);
858		    }
859		    if (sfx) {
860			l = tl;	w = tw;
861		    } else {
862			l += mp->llen; w += mp->wlen;
863		    }
864		    il += mp->llen; iw += mp->wlen;
865		    ll -= mp->llen; lw -= mp->wlen;
866		    bc += mp->llen;
867		    exact = 0;
868
869		    if (!test)
870			while (bp &&
871			       bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
872			    bp->curpos = matchbufadded + bpc - bc + obc;
873			    bp = bp->next;
874			}
875		    ow = w;
876		    lm = NULL;
877		    he = 0;
878		    break;
879		}
880	    }
881	}
882	if (mp)
883	    continue;
884
885	/* Same code as at the beginning, used in top-level calls. */
886
887	bslash = 0;
888	if ((!test || sfx) && lw &&
889	    (l[ind] == w[ind] ||
890	     (bslash = (lw > 1 && w[ind] == '\\' &&
891			(ind ? (w[0] == l[0]) : (w[1] == l[0])))))) {
892	    /* No matcher could be used, but the strings have the same
893	     * character here, skip over it. */
894	    l += add; w += (bslash ? (add + add ) : add);
895	    il++; iw += 1 + bslash;
896	    ll--; lw -= 1 + bslash;
897	    bc++;
898	    if (!test)
899		while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
900		    bp->curpos = matchbufadded + (sfx ? (ow - w) : (w - ow)) + obc;
901		    bp = bp->next;
902		}
903	    lm = NULL;
904	    he = 0;
905	} else {
906
907	    if (!lw)
908		break;
909
910	    if (exact && !part) {
911		/* If we just accepted some characters directly (at the
912		 * beginning of the loop) and now can't match any further,
913		 * we go back to before those characters and try again,
914		 * preferring match specs this time. */
915
916		il -= exact; iw -= wexact;
917		ll += exact; lw += wexact;
918		bc -= exact;
919		l -= add * exact; w -= add * wexact;
920
921		exact = wexact = 0;
922
923		goto retry;
924	    }
925	    /* No matcher and different characters: l does not match w. */
926	    if (test)
927		return 0;
928
929	    abort_match();
930
931	    return -1;
932	}
933    }
934    /* If this is a recursive call, we just return if l matched w or not. */
935    if (test)
936	return (part || !ll);
937
938    /* In top-level calls, if ll is non-zero (unmatched portion in l),
939     * we have to free the collected clines. */
940    if (!part && ll) {
941	abort_match();
942
943	return -1;
944    }
945    if (rwlp)
946	*rwlp = iw - (sfx ? ow - w : w - ow);
947
948    /* If we matched a suffix, the anchors stored in the top-clines
949     * will be in the wrong clines: shifted by one. Adjust this. */
950    if (sfx && matchparts) {
951	Cline t, tn, s;
952
953	if (matchparts->prefix || matchparts->suffix) {
954	    t = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
955	    t->next = matchparts;
956	    if (matchparts->prefix)
957		t->prefix = (Cline) 1;
958	    else
959		t->suffix = (Cline) 1;
960	    matchparts = t;
961	}
962	for (t = matchparts; (tn = t->next); t = tn) {
963	    s = (tn->prefix ? tn->prefix : tn->suffix);
964	    if (t->suffix)
965		t->suffix = s;
966	    else
967		t->prefix = s;
968	}
969	t->prefix = t->suffix = NULL;
970    }
971    /* Finally, return the number of matched characters. */
972
973    *bpp = bp;
974    return (part ? il : iw);
975}
976
977/* Wrapper for match_str(), only for a certain length and only doing
978 * the test. */
979
980/**/
981static int
982match_parts(char *l, char *w, int n, int part)
983{
984    char lsav = l[n], wsav = w[n];
985    int ret;
986
987    if (lsav)
988	l[n] = '\0';
989    if (wsav)
990	w[n] = '\0';
991    ret = match_str(l, w, NULL, 0, NULL, 0, 1, part);
992    if (lsav)
993	l[n] = lsav;
994    if (wsav)
995	w[n] = wsav;
996
997    return ret;
998}
999
1000/* Check if the word w is matched by the strings in pfx and sfx (the prefix
1001 * and the suffix from the line) or the pattern cp. In clp a cline list for
1002 * w is returned.
1003 * qu is non-zero if the words has to be quoted before processed any
1004 * further: the value 2 indicates file quoting.
1005 * bpl and bsl are used to report the positions where the brace-strings in
1006 * the prefix and the suffix have to be re-inserted if this match is inserted
1007 * in the line.
1008 * The return value is the string to use as a completion or NULL if the prefix
1009 * and the suffix don't match the word w. */
1010
1011/**/
1012mod_export char *
1013comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
1014	   Brinfo *bpl, int bcp, Brinfo *bsl, int bcs, int *exact)
1015{
1016    char *r = NULL;
1017
1018    if (cp) {
1019	/* We have a globcomplete-like pattern, just use that. */
1020	int wl;
1021	char *teststr;
1022
1023	r = w;
1024	if (!qu) {
1025	    /*
1026	     * If we're not quoting the strings, that means they're
1027	     * already quoted (?) and so when we test them against
1028	     * a pattern we have to remove the quotes else we will
1029	     * end up trying to match against the quote characters.
1030	     *
1031	     * Almost certainly this fails in some complicated cases
1032	     * but it should catch the basic ones.
1033	     */
1034	    teststr = dupstring(r);
1035	    tokenize(teststr);
1036	    if (parse_subst_string(teststr))
1037		teststr = r;
1038	    else {
1039		remnulargs(teststr);
1040		untokenize(teststr);
1041	    }
1042	} else
1043	    teststr = r;
1044	if (!pattry(cp, teststr))
1045	    return NULL;
1046
1047	r = (qu == 2 ? tildequote(r, 0) : multiquote(r, !qu));
1048
1049	/* We still break it into parts here, trying to build a sensible
1050	 * cline list for these matches, too. */
1051	w = dupstring(w);
1052	wl = strlen(w);
1053	*clp = bld_parts(w, wl, wl, NULL, NULL);
1054	*exact = 0;
1055    } else {
1056	Cline pli, plil;
1057	int mpl, rpl, wl;
1058
1059	w = (qu == 2 ? tildequote(w, 0) : multiquote(w, !qu));
1060	wl = strlen(w);
1061
1062	/* Always try to match the prefix. */
1063
1064	useqbr = qu;
1065	if ((mpl = match_str(pfx, w, bpl, bcp, &rpl, 0, 0, 0)) < 0)
1066	    return NULL;
1067
1068	if (sfx && *sfx) {
1069	    int wpl = matchbufadded, msl, rsl;
1070	    VARARR(char, wpfx, wpl);
1071	    Cline mli, mlil;
1072
1073	    /* We also have a suffix to match, so first save the
1074	     * contents of the global matching variables. */
1075	    memcpy(wpfx, matchbuf, wpl);
1076	    if (matchsubs) {
1077		Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
1078
1079		tmp->prefix = matchsubs;
1080		if (matchlastpart)
1081		    matchlastpart->next = tmp;
1082		else
1083		    matchparts = tmp;
1084	    }
1085	    pli = matchparts;
1086	    plil = matchlastpart;
1087
1088	    /* The try to match the suffix. */
1089
1090	    if ((msl = match_str(sfx, w + mpl, bsl, bcs, &rsl, 1, 0, 0)) < 0) {
1091		free_cline(pli);
1092
1093		return NULL;
1094	    }
1095	    /* Matched, so add the string in the middle and the saved
1096	     * string for the prefix, and build a combined cline list
1097	     * for the prefix and the suffix. */
1098	    if (matchsubs) {
1099		Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, CLF_SUF);
1100
1101		tmp->suffix = matchsubs;
1102		if (matchlastpart)
1103		    matchlastpart->next = tmp;
1104		else
1105		    matchparts = tmp;
1106	    }
1107	    add_match_str(NULL, NULL, w + rpl, wl - rpl - rsl, 1);
1108	    add_match_str(NULL, NULL, wpfx, wpl, 1);
1109
1110	    mli = bld_parts(w + rpl, wl - rpl - rsl,
1111			    (mpl - rpl) + (msl - rsl), &mlil, NULL);
1112	    mlil->flags |= CLF_MID;
1113	    mlil->slen = msl - rsl;
1114	    mlil->next = revert_cline(matchparts);
1115
1116	    if (plil)
1117		plil->next = mli;
1118	    else
1119		pli = mli;
1120	} else {
1121	    /* Only a prefix, add the string and a part-cline for it. */
1122	    add_match_str(NULL, NULL, w + rpl, wl - rpl, 0);
1123
1124	    add_match_part(NULL, NULL, NULL, 0, NULL, 0, w + rpl, wl - rpl,
1125			   mpl - rpl, 0);
1126	    pli = matchparts;
1127	}
1128	r = dupstring(matchbuf ? matchbuf : "");
1129
1130	*clp = pli;
1131
1132	/* Test if the string built is equal to the one from the line. */
1133	if (sfx && *sfx) {
1134	    int pl = strlen(pfx);
1135
1136	    *exact = (!strncmp(pfx, w, pl) && !strcmp(sfx, w + pl));
1137	} else
1138	    *exact = !strcmp(pfx, w);
1139    }
1140    if (!qu)
1141	hasunqu = 1;
1142
1143    return r;
1144}
1145
1146
1147/*
1148 * Guts of a single pattern for pattern_match().
1149 * Return non-zero if match successful.
1150 * If the class was an equivalence, return 1 + the index into
1151 * the equivalence class (see pattern.c for how this is calculated).
1152 */
1153
1154/**/
1155mod_export convchar_t
1156pattern_match1(Cpattern p, convchar_t c, int *mtp)
1157{
1158    convchar_t ind;
1159
1160    *mtp = 0;
1161    switch (p->tp) {
1162    case CPAT_CCLASS:
1163	return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
1164
1165    case CPAT_NCLASS:
1166	return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
1167
1168    case CPAT_EQUIV:
1169	if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp))
1170	    return ind + 1;
1171	else
1172	    return 0;
1173
1174    case CPAT_ANY:
1175	return 1;
1176
1177    case CPAT_CHAR:
1178	return (p->u.chr == c);
1179
1180    default:
1181	DPUTS(1, "bad matcher pattern type");
1182	return 0;
1183    }
1184}
1185
1186
1187/*
1188 * Use an equivalence to deduce the line character from the word, or
1189 * vice versa.  (If vice versa, then "line" and "word" are reversed
1190 * in what follows.  The logic is symmetric.)
1191 * lp is the line pattern.
1192 * wind is the index returned by a pattern match on the word pattern,
1193 * with type wmtp.
1194 * wchr is the word character.
1195 * Return CHR_INVALID if no matching character, else the character.
1196 *
1197 * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
1198 * pattern also has that type.
1199 */
1200
1201/**/
1202mod_export convchar_t
1203pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
1204			  convchar_t wchr)
1205{
1206    convchar_t lchr;
1207    int lmtp;
1208
1209    if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
1210	/*
1211	 * No equivalent.  No possible match; give up.
1212	 */
1213	return CHR_INVALID;
1214    }
1215    /*
1216     * If we matched an exact character rather than a range
1217     * type, return it.
1218     */
1219    if (lchr != CHR_INVALID)
1220	return lchr;
1221
1222    /*
1223     * Check the match types.  We may want a case-changed
1224     * version of the word character.
1225     */
1226    if (wmtp == PP_UPPER && lmtp == PP_LOWER)
1227	return ZC_tolower(wchr);
1228    else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
1229	return ZC_toupper(wchr);
1230    else if (wmtp == lmtp) {
1231	/*
1232	 * Be lenient and allow identical replacements
1233	 * for character classes, although in fact this
1234	 * doesn't give special functionality for equivalence
1235	 * classes.
1236	 */
1237	return wchr;
1238    } else {
1239	/*
1240	 * Non-matching generic types; this can't work.
1241	 */
1242	return CHR_INVALID;
1243    }
1244}
1245
1246/*
1247 * Check if the given pattern matches the given string.
1248 * p is either an anchor or line pattern and string;
1249 * wp and wsc are word (candidate) pattern and string
1250 *
1251 * Check that characters match for {...} classes by comparing positions in the
1252 * strings.
1253 *
1254 * prestrict is a chain of patterns at least as long
1255 * as the line string.  In this case we are still assembling the line at
1256 * newline (which has been allocated but doesn't yet contain anything useful)
1257 * and must continue to do so as we go along; prestrict gives
1258 * restrictions on the line character to be applied along side the other
1259 * patterns.  In the simple case a restriction is a character to be put
1260 * in place; otherwise it is a set of possible characters and we have to
1261 * deduce an actual matching character.  Note prestrict is never an
1262 * equivalence class.  In extreme cases we can't deduce a unique
1263 * character; then the match fails.
1264 *
1265 * If prestrict is not NULL, s will be NULL.
1266 */
1267
1268/**/
1269static int
1270pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,
1271		       Cpattern prestrict, ZLE_STRING_T new_line)
1272{
1273    convchar_t c;
1274    convchar_t ind, wind;
1275    int mt, wmt;
1276
1277    while (p && wp && wsclen && prestrict) {
1278	/* First test the word character */
1279	wind = pattern_match1(wp, *wsc, &wmt);
1280	if (!wind)
1281	    return 0;
1282
1283	/*
1284	 * Now the line character; deal with the case where
1285	 * we don't yet have it, only a restriction on it.
1286	 */
1287	if (prestrict->tp == CPAT_CHAR) {
1288	    /*
1289	     * Easy case: restricted to an exact character on
1290	     * the line.  Procede as normal.
1291	     */
1292	    c = prestrict->u.chr;
1293	} else {
1294	    if (p->tp == CPAT_CHAR) {
1295		/*
1296		 * Normal line pattern is an exact character:  as
1297		 * long as this matches prestrict, we can proceed
1298		 * as usual.
1299		 */
1300		c = p->u.chr;
1301	    } else if (p->tp == CPAT_EQUIV) {
1302		/*
1303		 * An equivalence, so we can deduce the character
1304		 * backwards from the word pattern and see if it
1305		 * matches prestrict.
1306		 */
1307		if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
1308		    CHR_INVALID)
1309		    return 0;
1310	    } else {
1311		/*
1312		 * Not an equivalence, so that means we must match
1313		 * the word (not just the word pattern), so grab it
1314		 * and make sure it fulfills our needs.  I think.
1315		 * Not 100% sure about that, but what else can
1316		 * we do?  We haven't actually been passed a string
1317		 * from the command line.
1318		 */
1319		c = *wsc;
1320	    }
1321	    /* Character so deduced must match the restriction. */
1322	    if (!pattern_match1(prestrict, c, &mt))
1323		return 0;
1324	}
1325
1326	/*
1327	 * If either is "?", they match each other; no further tests.
1328	 * Apply this even if the character wasn't convertable;
1329	 * there's no point trying to be clever in that case.
1330	 */
1331	if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
1332	{
1333	    ind = pattern_match1(p, c, &mt);
1334	    if (!ind)
1335		return 0;
1336	    if (ind != wind)
1337		return 0;
1338	    if (mt != wmt) {
1339		/*
1340		 * Special case if matching lower vs. upper or
1341		 * vice versa.  The transformed characters must match.
1342		 * We don't need to check the transformation is
1343		 * the appropriate one for each character separately,
1344		 * since that was done in pattern_match1(), so just
1345		 * compare lower-cased versions of both.
1346		 */
1347		if ((mt == PP_LOWER || mt == PP_UPPER) &&
1348		    (wmt == PP_LOWER || wmt == PP_UPPER)) {
1349		    if (ZC_tolower(c) != ZC_tolower(*wsc))
1350			return 0;
1351		} else {
1352		    /* Other different classes can't match. */
1353		    return 0;
1354		}
1355	    }
1356	}
1357
1358	/* We need to assemble the line */
1359	*new_line++ = (ZLE_CHAR_T)c;
1360	prestrict = prestrict->next;
1361	wsc++;
1362	wsclen--;
1363	p = p->next;
1364	wp = wp->next;
1365    }
1366
1367    while (p && prestrict) {
1368	/*
1369	 * As above, but with even less info to go on.
1370	 * (Can this happen?)  At least handle the cases where
1371	 * one of our patterns has given us a specific character.
1372	 */
1373	if (prestrict->tp == CPAT_CHAR) {
1374	    c = prestrict->u.chr;
1375	} else {
1376	    if (p->tp == CPAT_CHAR) {
1377		c = p->u.chr;
1378	    } else {
1379		/*
1380		 * OK.  Here we are in a function with just a line
1381		 * pattern and another pattern to restrict the
1382		 * characters that can go on the line, and no actual
1383		 * characters.  We're matching two patterns against
1384		 * one another to generate a character to insert.
1385		 * This is a bit too psychedelic, so I'm going to
1386		 * bale out now.  See you on the ground.
1387		 */
1388		return 0;
1389	    }
1390	    if (!pattern_match1(prestrict, c, &mt))
1391		return 0;
1392	}
1393	if (!pattern_match1(p, c, &mt))
1394	    return 0;
1395	p = p->next;
1396	*new_line++ = (ZLE_CHAR_T)c;
1397	prestrict = prestrict->next;
1398    }
1399
1400    if (prestrict) {
1401	/* Restriction with nothing to match */
1402	return 0;
1403    }
1404
1405    while (wp && wsclen) {
1406	/* No funny business when we only have the word pattern. */
1407	if (!pattern_match1(wp, *wsc, &wmt))
1408	    return 0;
1409	wp = wp->next;
1410	wsc++;
1411	wsclen--;
1412    }
1413
1414    return 1;
1415}
1416
1417
1418/*
1419 * The usual version of pattern matching, without the line string
1420 * being handled by restriction.
1421 *
1422 * Check if the given pattern matches the given string.
1423 * p and  s are either anchor or line pattern and string;
1424 * wp and ws are word (candidate) pattern and string
1425 *
1426 * If only one pattern is given, we just check if characters match.
1427 * If both line and word are given, we check that characters match
1428 * for {...} classes by comparing positions in the strings.
1429 *
1430 * Patterns and strings are always passed in pairs, so it is enough
1431 * to check for non-NULL wp. p should always be present.
1432 */
1433/**/
1434mod_export int
1435pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
1436{
1437    convchar_t c, wc;
1438    convchar_t ind, wind;
1439    int len = 0, wlen, mt, wmt;
1440#ifdef MULTIBYTE_SUPPORT
1441    mbstate_t lstate, wstate;
1442
1443    memset(&lstate, 0, sizeof(lstate));
1444    memset(&wstate, 0, sizeof(wstate));
1445#endif
1446
1447    while (p && wp && *s && *ws) {
1448	/* First test the word character */
1449#ifdef MULTIBYTE_SUPPORT
1450	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
1451#else
1452	if (*ws == Meta) {
1453	    wc = STOUC(ws[1]) ^ 32;
1454	    wlen = 2;
1455	} else {
1456	    wc = STOUC(*ws);
1457	    wlen = 1;
1458	}
1459#endif
1460	wind = pattern_match1(wp, wc, &wmt);
1461	if (!wind)
1462	    return 0;
1463
1464	/*
1465	 * Now the line character.
1466	 */
1467#ifdef MULTIBYTE_SUPPORT
1468	len = mb_metacharlenconv_r(s, &c, &lstate);
1469#else
1470	/* We have the character itself. */
1471	if (*s == Meta) {
1472	    c = STOUC(s[1]) ^ 32;
1473	    len = 2;
1474	} else {
1475	    c = STOUC(*s);
1476	    len = 1;
1477	}
1478#endif
1479	/*
1480	 * If either is "?", they match each other; no further tests.
1481	 * Apply this even if the character wasn't convertable;
1482	 * there's no point trying to be clever in that case.
1483	 */
1484	if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
1485	{
1486	    ind = pattern_match1(p, c, &mt);
1487	    if (!ind)
1488		return 0;
1489	    if (ind != wind)
1490		return 0;
1491	    if (mt != wmt) {
1492		/*
1493		 * Special case if matching lower vs. upper or
1494		 * vice versa.  The transformed characters must match.
1495		 * We don't need to check the transformation is
1496		 * the appropriate one for each character separately,
1497		 * since that was done in pattern_match1(), so just
1498		 * compare lower-cased versions of both.
1499		 */
1500		if ((mt == PP_LOWER || mt == PP_UPPER) &&
1501		    (wmt == PP_LOWER || wmt == PP_UPPER)) {
1502		    if (ZC_tolower(c) != ZC_tolower(wc))
1503			return 0;
1504		} else {
1505		    /* Other different classes can't match. */
1506		    return 0;
1507		}
1508	    }
1509	}
1510
1511	s += len;
1512	ws += wlen;
1513	p = p->next;
1514	wp = wp->next;
1515    }
1516
1517    while (p && *s) {
1518#ifdef MULTIBYTE_SUPPORT
1519	len = mb_metacharlenconv_r(s, &c, &lstate);
1520#else
1521	if (*s == Meta) {
1522	    c = STOUC(s[1]) ^ 32;
1523	    len = 2;
1524	} else {
1525	    c = STOUC(*s);
1526	    len = 1;
1527	}
1528#endif
1529	if (!pattern_match1(p, c, &mt))
1530	    return 0;
1531	p = p->next;
1532	s += len;
1533    }
1534
1535    while (wp && *ws) {
1536#ifdef MULTIBYTE_SUPPORT
1537	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
1538#else
1539	if (*ws == Meta) {
1540	    wc = STOUC(ws[1]) ^ 32;
1541	    wlen = 2;
1542	} else {
1543	    wc = STOUC(*ws);
1544	    wlen = 1;
1545	}
1546#endif
1547	if (!pattern_match1(wp, wc, &wmt))
1548	    return 0;
1549	wp = wp->next;
1550	ws += wlen;
1551    }
1552
1553    return 1;
1554}
1555
1556
1557/* This splits the given string into a list of cline structs, separated
1558 * at those places where one of the anchors of an `*' pattern was found.
1559 * plen gives the number of characters on the line that matched this
1560 * string.
1561 *
1562 * In *lp, if lp is not NULL, we return a pointer to the last cline struct we
1563 * build.
1564 *
1565 * In *lprem, if lprem is not NULL, we return a pointer to the last
1566 * cline struct we build if it came from the remainder of the
1567 * line rather than from a right anchor match, else NULL.
1568 */
1569
1570/**/
1571Cline
1572bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
1573{
1574    Cline ret = NULL, *q = &ret, n = NULL;
1575    Cmlist ms;
1576    Cmatcher mp;
1577    int t, op = plen;
1578    char *p = str, *os = str;
1579
1580    while (len) {
1581	for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
1582	    mp = ms->matcher;
1583	    if (mp && mp->flags == CMF_RIGHT && mp->wlen < 0 && mp->ralen &&
1584		!mp->llen && len >= mp->ralen && (str - os) >= mp->lalen &&
1585		pattern_match(mp->right, str, NULL, NULL) &&
1586		(!mp->lalen ||
1587		 ((str - os) >= mp->lalen &&
1588		  pattern_match(mp->left, str - mp->lalen, NULL, NULL)))) {
1589		int olen = str - p, llen;
1590
1591		/* We found an anchor, create a new cline. The NEW flag
1592		 * is set if the characters before the anchor were not
1593		 * on the line. */
1594		*q = n = get_cline(NULL, mp->ralen, str, mp->ralen, NULL, 0,
1595				   ((plen <= 0) ? CLF_NEW : 0));
1596
1597		/* If there were any characters before the anchor, add
1598		 * them as a cline struct. */
1599
1600		if (p != str) {
1601		    llen = (op < 0 ? 0 : op);
1602
1603		    if (llen > olen)
1604			llen = olen;
1605		    n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
1606		}
1607		q = &(n->next);
1608		str += mp->ralen; len -= mp->ralen;
1609		plen -= mp->ralen;
1610		op -= olen;
1611		p = str;
1612		t = 1;
1613	    }
1614	}
1615	if (!t) {
1616	    /* No anchor was found here, skip. */
1617	    str++; len--;
1618	    plen--;
1619	}
1620    }
1621    /* This is the cline struct for the remaining string at the end. */
1622
1623    if (p != str) {
1624	int olen = str - p, llen = (op < 0 ? 0 : op);
1625
1626        *q = n = get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
1627
1628	if (llen > olen)
1629	    llen = olen;
1630	n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
1631	if (lprem)
1632	    *lprem = n;
1633    }
1634    else if (!ret) {
1635        *q = n =
1636	    get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
1637	if (lprem)
1638	    *lprem = n;
1639    } else if (lprem) {
1640	*lprem = NULL;
1641    }
1642
1643    if (n)
1644        n->next = NULL;
1645
1646    if (lp)
1647	*lp = n;
1648
1649    return ret;
1650}
1651
1652
1653/*
1654 * This builds all the possible line patterns for the pattern pat in the
1655 * buffer line.  Then we test if this line matches the string given by
1656 * wlen and word.
1657 *
1658 * The matcher  ) wpat, containing pattern that matched previously
1659 *   mp gives   ) lpat, containing the pattern for line we build
1660 * line is the line we are assembling; it is initially empty
1661 * mword is a string that matched wpat before
1662 * word is string that we try to match now
1663 *
1664 * The return value is the length of the string matched in the word, it
1665 * is zero if we couldn't build a line that matches the word.
1666 */
1667
1668/**/
1669static int
1670bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
1671	 int wlen, int sfx)
1672{
1673    Cpattern lpat = mp->line;
1674    Cpattern wpat = mp->word;
1675    Cpattern curgenpat;
1676    Cmlist ms;
1677    int llen, rl, l;
1678    convchar_t convchr, *wordcp;
1679    VARARR(convchar_t, wordchars, wlen);
1680    VARARR(struct cpattern, genpatarr, mp->llen);
1681
1682    /*
1683     * We may need to start the "word" array from the end.  This
1684     * is much easier if we convert it to an array of (possibly wide)
1685     * characters.
1686     */
1687    MB_METACHARINIT();
1688    for (l = wlen, wordcp = wordchars; l; l--) {
1689	int charlen = MB_METACHARLENCONV(word, &convchr);
1690#ifdef MULTIBYTE_SUPPORT
1691	if (convchr == WEOF)
1692	    convchr = (*word == Meta) ? word[1] ^ 32 : *word;
1693#endif
1694	*wordcp++ = convchr;
1695	word += charlen;
1696    }
1697
1698    /*
1699     * Loop over all characters.  At this stage, line is an empty
1700     * space of length llen (not counting the null byte) which we assemble as
1701     * we go along.
1702     *
1703     * However, first we need to know what characters can appear at each
1704     * point in the line.  For this we assemble an list genpatarr of the
1705     * same length as the line.  (It's convenient to store this as an
1706     * array but it's linked as a list, too.)  If there are equivalences
1707     * we use mword to derive the equivalent character; when we've
1708     * reached the end of mword, equivalences are treated just like
1709     * ordinary character classes.  For character classes we just attach
1710     * the class to the genpatarr list and apply it as a restriction
1711     * when we finally match the line against the set of matchers.
1712     */
1713    curgenpat = genpatarr;
1714    MB_METACHARINIT();
1715    while (lpat) {
1716	convchar_t wchr, wind;
1717	int wmtp, mwordlen;
1718	/*
1719	 * If the line pattern is an equivalence, query wpat to find the
1720	 * word part of the equivalence.  If we don't find one we don't try
1721	 * equivalencing but use lpat as an ordinary match.  (It's not
1722	 * entirely clear to me this is the correct behaviour on a
1723	 * failed character match within the equivalence, but that was
1724	 * the behaviour of the old logic that this replaces.)
1725	 */
1726	if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
1727	    mwordlen = MB_METACHARLENCONV(mword, &wchr);
1728	    wind = pattern_match1(wpat, wchr, &wmtp);
1729	    wpat = wpat->next;
1730	    mword += mwordlen;
1731	} else
1732	    wind = 0;
1733	if (wind) {
1734	    /*
1735	     * Successful match for word side of equivalence.
1736	     * Find the line equivalent.
1737	     */
1738	    convchar_t lchr;
1739	    if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
1740		== CHR_INVALID) {
1741		/*
1742		 * No equivalent.  No possible match; give up.
1743		 */
1744		return 0;
1745	    }
1746	    /*
1747	     * We now have an exact character to match,
1748	     * so make up a pattern element for it.
1749	     */
1750	    curgenpat->tp = CPAT_CHAR;
1751	    curgenpat->u.chr = lchr;
1752	} else {
1753	    /*
1754	     * Not an equivalence class, so we just keep the
1755	     * test in the lpat as it is.
1756	     */
1757	    curgenpat->tp = lpat->tp;
1758	    if (lpat->tp == CPAT_CHAR)
1759		curgenpat->u.chr = lpat->u.chr;
1760	    else if (lpat->tp != CPAT_ANY) {
1761		/*
1762		 * The string isn't modified and is only needed in calls from
1763		 * this function, so we don't even need to copy it.
1764		 */
1765		curgenpat->u.str = lpat->u.str;
1766	    }
1767	}
1768	lpat = lpat->next;
1769	/*
1770	 * This linked list is defined above as an array.
1771	 * We could get away with just keeping it as an array
1772	 * and passing it down as such, but that's a bit icky
1773	 * since the generic linkage of Cpatterns is as a linked
1774	 * list and we should keep our local memory management
1775	 * problems to ourselvess.
1776	 */
1777	if (lpat)
1778	    curgenpat->next = curgenpat+1;
1779	else
1780	    curgenpat->next = NULL;
1781	curgenpat++;
1782    }
1783
1784    /*
1785     * We now know how to match the word with the line patterns; let's
1786     * see if it does.  We will use the information in curgenpat if we
1787     * are successful to work out what character goes on the line.  This
1788     * is a bit hairy, as in "the Yeti is a creature that is a bit
1789     * hairy".
1790     */
1791    llen = mp->llen;
1792    rl = 0;
1793
1794    if (sfx)
1795    {
1796	/*
1797	 * We need to work backwards from the end of both the
1798	 * word and the line strings.
1799	 */
1800	wordcp = wordchars + wlen;
1801
1802	/*
1803	 * We construct the line from the end.
1804	 */
1805	line += llen;
1806	curgenpat = genpatarr + llen;
1807    } else {
1808	wordcp = wordchars;
1809	curgenpat = genpatarr;
1810    }
1811
1812    /* we now reuse mp, lpat, wpat for the global matchers */
1813    MB_METACHARINIT();
1814    while (llen && wlen) {
1815	int wmtp;
1816	convchar_t *wp;
1817	Cpattern tmpgenpat;
1818
1819	if (sfx) {
1820	    wp = wordcp - 1;
1821	    tmpgenpat = curgenpat - 1;
1822	} else {
1823	    tmpgenpat = curgenpat;
1824	    wp = wordcp;
1825	}
1826	if (pattern_match1(tmpgenpat, *wp, &wmtp))
1827	{
1828	    convchar_t lchr;
1829	    /*
1830	     * We can match the line character directly with the word
1831	     * character.  If the line character is a fixed one,
1832	     * keep it, since we went to all that trouble above,
1833	     * else if it's generic, keep the word character,
1834	     * since we have no choice.
1835	     */
1836	    if (tmpgenpat->tp == CPAT_CHAR)
1837		lchr = tmpgenpat->u.chr;
1838	    else
1839		lchr = *wp;
1840
1841	    if (sfx)
1842		*--line = lchr;
1843	    else
1844		*line++ = lchr;
1845
1846	    llen--;
1847	    wlen--;
1848	    rl++;
1849
1850	    if (sfx) {
1851		wordcp = wp;
1852		curgenpat = tmpgenpat;
1853	    } else {
1854		if (llen)
1855		    curgenpat++;
1856		wordcp++;
1857	    }
1858	}
1859	else
1860	{
1861	    ZLE_CHAR_T *lp;
1862	    /*
1863	     * Need to loop over pattern matchers.
1864	     */
1865	    for (ms = bmatchers; ms; ms = ms->next) {
1866		mp = ms->matcher;
1867		/*
1868		 * This is the nightmare case: we have line and
1869		 * and word matchers and some pattern which restricts
1870		 * the value on the line without us knowing exactly
1871		 * what it is.  Despatch to the special function
1872		 * for that.
1873		 */
1874		if (mp && !mp->flags && mp->wlen <= wlen &&
1875		    mp->llen <= llen)
1876		{
1877		    lp = line;
1878		    wp = wordcp;
1879		    tmpgenpat = curgenpat;
1880
1881		    if (sfx) {
1882			lp -= mp->llen;
1883			wp -= mp->wlen;
1884			tmpgenpat -= mp->llen;
1885		    }
1886
1887		    if (pattern_match_restrict(mp->line, mp->word, wp,
1888					       wlen - (wp - wordchars),
1889					       tmpgenpat, lp)) {
1890			/*
1891			 * Matched: advance over as many characters
1892			 * of the patterns and strings as
1893			 * we've done matches.
1894			 */
1895			if (sfx) {
1896			    line = lp;
1897			    wordcp = wp;
1898			    curgenpat = tmpgenpat;
1899			} else {
1900			    line += mp->llen;
1901			    wordcp += mp->wlen;
1902			    curgenpat += mp->llen;
1903			}
1904			llen -= mp->llen;
1905			wlen -= mp->wlen;
1906			rl += mp->wlen;
1907			break;
1908		    }
1909		}
1910	    }
1911	    if (!ms)
1912		return 0;	/* Didn't match, give up */
1913	}
1914    }
1915    if (!llen) {
1916	/* Unmatched portion in the line built, return matched length. */
1917	return rl;
1918    }
1919    return 0;
1920}
1921
1922/* This builds a string that may be put on the line that fully matches the
1923 * given strings. The return value is NULL if no such string could be built
1924 * or that string in local static memory, dup it. */
1925
1926/**/
1927static char *
1928join_strs(int la, char *sa, int lb, char *sb)
1929{
1930    static char *rs = NULL;
1931    static int rl = 0;
1932
1933    Cmlist ms;
1934    Cmatcher mp;
1935    int t, bl;
1936    /** rr is the remaining length already allocated in rs */
1937    int rr = rl;
1938    /*
1939     * convlen is the length we need for the string converted to
1940     * char * (possibly multibyte).
1941     */
1942    int convlen;
1943    char *rp = rs;
1944
1945    while (la && lb) {
1946	if (*sa != *sb) {
1947	    /* Different characters, try the matchers. */
1948	    for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
1949		mp = ms->matcher;
1950		if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0 &&
1951		    mp->wlen <= la && mp->wlen <= lb) {
1952		    /* The pattern has no anchors and the word
1953		     * pattern fits, try it. */
1954		    if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
1955			pattern_match(mp->word, sb, NULL, NULL)) {
1956			/* It matched one of the strings, t says which one. */
1957			VARARR(ZLE_CHAR_T, line, mp->llen);
1958			char **ap, **bp;
1959			int *alp, *blp;
1960
1961			if (t) {
1962			    ap = &sa;
1963			    alp = &la;
1964
1965			    bp = &sb;
1966			    blp = &lb;
1967			} else {
1968			    ap = &sb;
1969			    alp = &lb;
1970
1971			    bp = &sa;
1972			    blp = &la;
1973			}
1974			/* Now try to build a string that matches the other
1975			 * string. */
1976			if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
1977			    /* Found one, put it into the return string. */
1978			    char *convstr =
1979				zlelineasstring(line, mp->llen, 0, &convlen,
1980						NULL, 0);
1981			    if (rr <= convlen) {
1982				char *or = rs;
1983				int alloclen = (convlen > 20) ? convlen : 20;
1984
1985				rs = realloc(rs, (rl += alloclen));
1986				rr += alloclen;
1987				rp += rs - or;
1988			    }
1989			    memcpy(rp, convstr, convlen);
1990			    rp += convlen;
1991			    rr -= convlen;
1992			    /* HERE: multibyte chars */
1993			    *ap += mp->wlen;
1994			    *alp -= mp->wlen;
1995
1996			    *bp += bl;
1997			    *blp -= bl;
1998			    t = 1;
1999			    free(convstr);
2000			} else
2001			    t = 0;
2002		    }
2003		}
2004	    }
2005	    if (!t)
2006		break;
2007	} else {
2008	    /* Same character, just take it. */
2009	    if (rr <= 1 /* HERE charlen */) {
2010		char *or = rs;
2011
2012		rs = realloc(rs, (rl += 20));
2013		rr += 20;
2014		rp += rs - or;
2015	    }
2016	    /* HERE: multibyte char */
2017	    *rp++ = *sa;
2018	    rr--;
2019	    sa++;
2020	    sb++;
2021	    la--;
2022	    lb--;
2023	}
2024    }
2025    if (la || lb)
2026	return NULL;
2027
2028    *rp = '\0';
2029
2030    return rs;
2031}
2032
2033/*
2034 * This compares the anchors stored in two top-level clines.
2035 * It returns 1 if the anchors are the same, 2 if they are
2036 * compatible (and have been combined in "o"), 0 otherwise.
2037 */
2038
2039/**/
2040static int
2041cmp_anchors(Cline o, Cline n, int join)
2042{
2043    int line = 0;
2044    char *j;
2045
2046    /* First try the exact strings. */
2047    if ((!(o->flags & CLF_LINE) && o->wlen == n->wlen &&
2048	 (!o->word || !strncmp(o->word, n->word, o->wlen))) ||
2049	(line = ((!o->line && !n->line && !o->wlen && !n->wlen) ||
2050		 (o->llen == n->llen && o->line && n->line &&
2051		  !strncmp(o->line, n->line, o->llen))))) {
2052	if (line) {
2053	    o->flags |= CLF_LINE;
2054	    o->word = NULL;
2055	    o->wlen = 0;
2056	}
2057	return 1;
2058    }
2059    /* Didn't work, try to build a string matching both anchors. */
2060    if (join && !(o->flags & CLF_JOIN) && o->word && n->word &&
2061	(j = join_strs(o->wlen, o->word, n->wlen, n->word))) {
2062	o->flags |= CLF_JOIN;
2063	o->wlen = strlen(j);
2064	o->word = dupstring(j);
2065
2066	return 2;
2067    }
2068    return 0;
2069}
2070
2071/* Below is the code to join two cline lists. This struct is used to walk
2072 * through a sub-list. */
2073
2074typedef struct cmdata *Cmdata;
2075
2076struct cmdata {
2077    Cline cl, pcl;
2078    char *str, *astr;
2079    int len, alen, olen, line;
2080};
2081
2082/* This is used to ensure that a cmdata struct contains usable data.
2083 * The return value is non-zero if we reached the end. */
2084
2085static int
2086check_cmdata(Cmdata md, int sfx)
2087{
2088    /* We will use the str and len fields to contain the next sub-string
2089     * in the list. If len is zero, we have to use the next cline. */
2090    if (!md->len) {
2091	/* If there is none, we reached the end. */
2092	if (!md->cl)
2093	    return 1;
2094
2095	/* Otherwise, get the string. Only the line-string or both.
2096	 * We also have to adjust the pointer if this is for a suffix. */
2097	if (md->cl->flags & CLF_LINE) {
2098	    md->line = 1;
2099	    md->len = md->cl->llen;
2100	    md->str = md->cl->line;
2101	} else {
2102	    md->line = 0;
2103	    md->len = md->olen = md->cl->wlen;
2104	    /* HERE: multibyte */
2105	    if ((md->str = md->cl->word) && sfx)
2106		md->str += md->len;
2107	    md->alen = md->cl->llen;
2108	    /* HERE: multibyte */
2109	    if ((md->astr = md->cl->line) && sfx)
2110		md->astr += md->alen;
2111	}
2112	md->pcl = md->cl;
2113	md->cl = md->cl->next;
2114    }
2115    return 0;
2116}
2117
2118/* This puts the not-yet-matched portion back into the last cline and
2119 * returns that. */
2120
2121static Cline
2122undo_cmdata(Cmdata md, int sfx)
2123{
2124    Cline r = md->pcl;
2125
2126    if (md->line) {
2127	r->word = NULL;
2128	r->wlen = 0;
2129	r->flags |= CLF_LINE;
2130	r->llen = md->len;
2131	/* HERE: multibyte */
2132	r->line = md->str - (sfx ? md->len : 0);
2133    } else if (md->len != md->olen) {
2134	r->wlen = md->len;
2135	/* HERE: multibyte */
2136	r->word = md->str - (sfx ? md->len : 0);
2137	DPUTS(r->wlen > 0 && !*r->word, "Bad word");
2138    }
2139    return r;
2140}
2141
2142/* This tries to build a string matching a sub-string in a sub-cline
2143 * that could not be matched otherwise. */
2144
2145static Cline
2146join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
2147{
2148    if (!check_cmdata(md, sfx)) {
2149	char *ow = str, *nw = md->str;
2150	int ol = len, nl = md->len;
2151	Cmlist ms;
2152	Cmatcher mp;
2153	int t;
2154
2155	if (sfx) {
2156	    ow += ol; nw += nl;
2157	}
2158	for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
2159	    mp = ms->matcher;
2160	    /* We use only those patterns that match a non-empty
2161	     * string in both the line and the word and that have
2162	     * no anchors. */
2163	    if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0) {
2164		/* We first test, if the old string matches already the
2165		 * new one. */
2166		if (mp->llen <= ol && mp->wlen <= nl &&
2167		    pattern_match(mp->line, ow - (sfx ? mp->llen : 0),
2168				  mp->word, nw - (sfx ? mp->wlen : 0))) {
2169		    /* It did, update the contents of the cmdata struct
2170		     * and return a cline for the matched part. */
2171		    if (sfx)
2172			md->str -= mp->wlen;
2173		    else
2174			md->str += mp->wlen;
2175		    md->len -= mp->wlen;
2176		    *mlen = mp->llen;
2177
2178		    return get_cline(NULL, 0, ow - (sfx ? mp->llen : 0),
2179				     mp->llen, NULL, 0, 0);
2180		}
2181		/* Otherwise we will try to build a string that matches
2182		 * both strings. But try the pattern only if the word-
2183		 * pattern matches one of the strings. */
2184		if (join && mp->wlen <= ol && mp->wlen <= nl &&
2185		    ((t = pattern_match(mp->word, ow - (sfx ? mp->wlen : 0),
2186				       NULL, NULL)) ||
2187		     pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
2188				   NULL, NULL))) {
2189		    VARARR(ZLE_CHAR_T, line, mp->llen);
2190		    int bl;
2191		    char *mw;
2192
2193		    /* Then build all the possible lines and see
2194		     * if one of them matches the other string. */
2195		    /* HERE: they're multibyte */
2196		    if (t)
2197			mw = ow - (sfx ? mp->wlen : 0);
2198		    else
2199			mw = nw - (sfx ? mp->wlen : 0);
2200
2201		    if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
2202				       (t ? nl : ol), sfx)))  {
2203			/* Yep, one of the lines matched the other
2204			 * string. */
2205
2206			/* HERE: multibyte characters */
2207			if (t) {
2208			    ol = mp->wlen; nl = bl;
2209			} else {
2210			    ol = bl; nl = mp->wlen;
2211			}
2212			if (sfx)
2213			    md->str -= nl;
2214			else
2215			    md->str += nl;
2216			md->len -= nl;
2217			*mlen = ol;
2218
2219			return get_cline(NULL, 0,
2220					 zlelineasstring(line, mp->llen,
2221							 0, NULL, NULL, 1),
2222					 mp->llen, NULL, 0, CLF_JOIN);
2223		    }
2224		}
2225	    }
2226	}
2227    }
2228    return NULL;
2229}
2230
2231/* This is used to match a sub-string in a sub-cline. The length of the
2232 * matched portion is returned. This tests only for exact equality. */
2233
2234static int
2235sub_match(Cmdata md, char *str, int len, int sfx)
2236{
2237    int ret = 0, l, ind, add;
2238    char *p, *q;
2239#ifdef MULTIBYTE_SUPPORT
2240    int fulllen = len;
2241    char *fullstr = str;
2242    mbstate_t mbs;
2243#endif
2244
2245    if (sfx) {
2246	str += len;
2247	ind = -1; add = -1;
2248    } else {
2249	ind = 0; add = 1;
2250    }
2251    /* str and len describe the old string, in md we have the new one. */
2252    while (len) {
2253	if (check_cmdata(md, sfx))
2254	    return ret;
2255
2256	/*
2257	 * Look for a common prefix.  If we do include metafied
2258	 * characters, at this stage we still need the overall length
2259	 * including Meta's as separate characters.
2260	 */
2261	for (l = 0, p = str, q = md->str;
2262	     l < len && l < md->len && p[ind] == q[ind];
2263	     l++, p += add, q += add) {}
2264
2265	/* Make sure we don't end in the middle of a Meta sequence. */
2266	if (add == 1) {
2267	    if (l && p[-1] == Meta)
2268		l--;
2269	} else {
2270	    if (l && ((l < len && p[-1] == Meta)
2271		   || (l < md->len && q[-1] == Meta)))
2272		l--;
2273	}
2274#ifdef MULTIBYTE_SUPPORT
2275	/*
2276	 * Make sure we don't end in the middle of a multibyte character.
2277	 * Don't need to do this if the match ended at the start
2278	 * of the original string.
2279	 *
2280	 * Let q be the match point we've found.
2281	 */
2282	q = sfx ? str - l : str + l;
2283	if (q != fullstr) {
2284	    memset(&mbs, 0, sizeof mbs);
2285	    /*
2286	     * Otherwise read characters from the start of the original
2287	     * string until we reach or pass the match point.  This
2288	     * is rather inefficient, but in general only reading
2289	     * the full string can keep track of where we are in
2290	     * a character.  With a prefix we could be more efficient,
2291	     * but it's difficult with a suffix where the match point
2292	     * moves backwards.
2293	     */
2294	    for (p = fullstr; p < fullstr + fulllen; ) {
2295		wchar_t wc;
2296		/*
2297		 * ret must, in fact, be set by the current logic,
2298		 * but gcc doesn't realise (at least some versions don't).
2299		 */
2300		size_t cnt = MB_INVALID;
2301		int diff;
2302		char *p2;
2303
2304		/*
2305		 * Because the string is metafied, we need to
2306		 * assembled wide characters a byte at a time.
2307		 */
2308		for (p2 = p; p2 < fullstr + fulllen; p2++) {
2309		    char curchar = (*p2 == Meta) ? (*++p2 ^ 32) : *p2;
2310		    cnt = mbrtowc(&wc, &curchar, 1, &mbs);
2311		    /* Continue while character is incomplete. */
2312		    if (cnt != MB_INCOMPLETE)
2313			break;
2314		}
2315		if (cnt == MB_INVALID || cnt == MB_INCOMPLETE) {
2316		    /* not a valid character, give up test */
2317		    break;
2318		}
2319		/* increment p2 for last byte read */
2320		diff = ++p2 - q;
2321		if (diff == 0) {
2322		    /*
2323		     * Prefix or suffix matches at end of multbyte character,
2324		     * so OK.
2325		     */
2326		    break;
2327		} else if (diff > 0) {
2328		    /*
2329		     * The prefix or suffix finishes in the middle
2330		     * of a character.  Shorten it until it doesn't.
2331		     */
2332		    if (sfx) {
2333			/*
2334			 * We need to remove the trailing part of
2335			 * the character from the suffix.
2336			 */
2337			l -= diff;
2338		    } else {
2339			/*
2340			 * We need to remove the initial part of
2341			 * the character from the prefix.
2342			 */
2343			l -= (q - p);
2344		    }
2345		    break;
2346		}
2347		/* Advance over full character */
2348		p = p2;
2349	    }
2350	}
2351#endif
2352	if (l) {
2353	    /* There was a common prefix, use it. */
2354	    md->len -= l; len -= l;
2355	    if (sfx) {
2356		md->str -= l; str -= l;
2357	    } else {
2358		md->str += l; str += l;
2359	    }
2360	    ret += l;
2361	} else if (md->line || md->len != md->olen || !md->astr)
2362	    return ret;
2363	else {
2364	    /* We still have the line string to try. */
2365	    md->line = 1;
2366	    md->len = md->alen;
2367	    md->str = md->astr;
2368	}
2369    }
2370    return ret;
2371}
2372
2373/* This is used to build a common prefix or suffix sub-list. If requested
2374 * it returns the unmatched cline lists in orest and nrest. */
2375
2376/**/
2377static void
2378join_psfx(Cline ot, Cline nt, Cline *orest, Cline *nrest, int sfx)
2379{
2380    Cline p = NULL, o, n;
2381    struct cmdata md, omd;
2382    char **sstr = NULL;
2383    int len, join = 0, line = 0, *slen = NULL;
2384
2385    if (sfx) {
2386	o = ot->suffix; n = nt->suffix;
2387    } else {
2388	o = ot->prefix;	n = nt->prefix;
2389    }
2390    if (!o) {
2391	if (orest)
2392	    *orest = NULL;
2393	if (nrest)
2394	    *nrest = n;
2395	if (n && n->wlen)
2396	    ot->flags |= CLF_MISS;
2397
2398	return;
2399    }
2400    if (!n) {
2401	if (sfx)
2402	    ot->suffix = NULL;
2403	else
2404	    ot->prefix = NULL;
2405
2406	if (orest)
2407	    *orest = o;
2408	else
2409	    free_cline(o);
2410	if (nrest)
2411	    *nrest = NULL;
2412	return;
2413    }
2414    md.cl = n;
2415    md.len = 0;
2416
2417    /* Walk through the old list. */
2418    while (o) {
2419	join = 0;
2420	memcpy(&omd, &md, sizeof(struct cmdata));
2421
2422	/* We first get the length of the prefix equal in both strings. */
2423	if (o->flags & CLF_LINE) {
2424	    if ((len = sub_match(&md, o->line, o->llen, sfx)) != o->llen) {
2425		join = 1; line = 1; slen = &(o->llen); sstr = &(o->line);
2426	    }
2427	} else if ((len = sub_match(&md, o->word, o->wlen, sfx)) != o->wlen) {
2428	    if (o->line) {
2429		memcpy(&md, &omd, sizeof(struct cmdata));
2430		o->flags |= CLF_LINE | CLF_DIFF;
2431
2432		continue;
2433	    }
2434	    o->llen = o->llen - ot->slen;
2435	    join = 1; line = 0; slen = &(o->wlen); sstr = &(o->word);
2436	}
2437	if (join) {
2438	    /* There is a rest that is different in the two lists,
2439	     * we try to build a new cline matching both strings. */
2440	    Cline joinl;
2441	    int jlen;
2442
2443	    if ((joinl = join_sub(&md, *sstr + len, *slen - len,
2444				  &jlen, sfx, !(o->flags & CLF_JOIN)))) {
2445		/* We have one, insert it into the list. */
2446		joinl->flags |= CLF_DIFF;
2447		if (len + jlen != *slen) {
2448		    Cline rest;
2449
2450		    rest = get_cline(NULL, 0, *sstr + (sfx ? 0 : len + jlen),
2451				     *slen - len - jlen, NULL, 0, 0);
2452
2453		    rest->next = o->next;
2454		    joinl->next = rest;
2455		} else
2456		    joinl->next = o->next;
2457
2458		if (len) {
2459		    if (sfx)
2460			*sstr += *slen - len;
2461		    *slen = len;
2462		    o->next = joinl;
2463		} else {
2464		    o->next = NULL;
2465		    free_cline(o);
2466		    if (p)
2467			p->next = joinl;
2468		    else if (sfx)
2469			ot->suffix = joinl;
2470		    else
2471			ot->prefix = joinl;
2472		}
2473		o = joinl;
2474		join = 0;
2475	    }
2476	}
2477	if (join) {
2478	    /* We couldn't build a cline for a common string, so we
2479	     * cut the list here. */
2480	    if (len) {
2481		Cline r;
2482
2483		if (orest) {
2484		    if (line)
2485			r = get_cline(o->line + len, *slen - len,
2486				      NULL, 0, NULL, 0, o->flags);
2487		    else
2488			r = get_cline(NULL, 0, o->word + len, *slen - len,
2489				      NULL, 0, o->flags);
2490
2491		    r->next = o->next;
2492		    *orest = r;
2493
2494		    *slen = len;
2495		    o->next = NULL;
2496		} else {
2497		    if (sfx)
2498			*sstr += *slen - len;
2499		    *slen = len;
2500		    free_cline(o->next);
2501		    o->next = NULL;
2502		}
2503	    } else {
2504		if (p)
2505		    p->next = NULL;
2506		else if (sfx)
2507		    ot->suffix = NULL;
2508		else
2509		    ot->prefix = NULL;
2510
2511		if (orest)
2512		    *orest = o;
2513		else
2514		    free_cline(o);
2515	    }
2516	    if (!orest || !nrest)
2517		ot->flags |= CLF_MISS;
2518
2519	    if (nrest)
2520		*nrest = undo_cmdata(&md, sfx);
2521
2522	    return;
2523	}
2524	p = o;
2525	o = o->next;
2526    }
2527    if (md.len || md.cl)
2528	ot->flags |= CLF_MISS;
2529    if (orest)
2530	*orest = NULL;
2531    if (nrest)
2532	*nrest = undo_cmdata(&md, sfx);
2533}
2534
2535/* This builds the common prefix and suffix for a mid-cline -- the one
2536 * describing the place where the prefix and the suffix meet. */
2537
2538/**/
2539static void
2540join_mid(Cline o, Cline n)
2541{
2542    if (o->flags & CLF_JOIN) {
2543	/* The JOIN flag is set in the old cline struct if it was
2544	 * already joined with another one. In this case the suffix
2545	 * field contains the suffix from previous calls. */
2546	Cline nr;
2547
2548	join_psfx(o, n, NULL, &nr, 0);
2549
2550	n->suffix = revert_cline(nr);
2551
2552	join_psfx(o, n, NULL, NULL, 1);
2553    } else {
2554	/* This is the first time for both structs, so the prefix field
2555	 * contains the whole sub-list. */
2556	Cline or, nr;
2557
2558	o->flags |= CLF_JOIN;
2559
2560	/* We let us give both rests and use them as the suffixes. */
2561	join_psfx(o, n, &or, &nr, 0);
2562
2563	if (or)
2564	    or->llen = (o->slen > or->wlen ? or->wlen : o->slen);
2565	o->suffix = revert_cline(or);
2566	n->suffix = revert_cline(nr);
2567
2568	join_psfx(o, n, NULL, NULL, 1);
2569    }
2570    n->suffix = NULL;
2571}
2572
2573/* This turns the sequence of anchor cline structs from b to e into a
2574 * prefix sequence, puts it before the prefix of e and then tries to
2575 * join that with the prefix of a.
2576 * This is needed if some matches had a anchor match spec and others
2577 * didn't. */
2578
2579/**/
2580static int
2581sub_join(Cline a, Cline b, Cline e, int anew)
2582{
2583    if (!e->suffix && a->prefix) {
2584	Cline op = e->prefix, n = NULL, *p = &n, t, ca;
2585	int min = 0, max = 0;
2586
2587	for (; b != e; b = b->next) {
2588	    if ((*p = t = b->prefix)) {
2589		while (t->next)
2590		    t = t->next;
2591		p = &(t->next);
2592	    }
2593	    b->suffix = b->prefix = NULL;
2594	    b->flags &= ~CLF_SUF;
2595	    min += b->min;
2596	    max += b->max;
2597	    *p = b;
2598	    p = &(b->next);
2599	}
2600	*p = e->prefix;
2601	ca = a->prefix;
2602
2603	while (n) {
2604	    e->prefix = cp_cline(n, 1);
2605	    a->prefix = cp_cline(ca, 1);
2606
2607	    if (anew) {
2608		int f = e->flags;
2609
2610		join_psfx(e, a, NULL, NULL, 0);
2611		e->flags = f;
2612		if (e->prefix)
2613		    return max - min;
2614	    } else {
2615		int f = e->flags;
2616
2617		join_psfx(a, e, NULL, NULL, 0);
2618		e->flags = f;
2619		if (a->prefix)
2620		    return max - min;
2621	    }
2622	    min -= n->min;
2623
2624	    if (n == op)
2625		break;
2626	    n = n->next;
2627	}
2628	return max - min;
2629    }
2630    return 0;
2631}
2632
2633/* This simplifies the cline list given as the first argument so that
2634 * it also matches the second list. */
2635
2636/**/
2637Cline
2638join_clines(Cline o, Cline n)
2639{
2640    cline_setlens(n, 1);
2641
2642    /* First time called, just return the new list. On further invocations
2643     * we will get it as the first argument. */
2644    if (!o)
2645	return n;
2646    else {
2647	Cline oo = o, nn = n, po = NULL, pn = NULL, x;
2648	int diff;
2649
2650	/* Walk through the lists. */
2651	while (o && n) {
2652	    /* If one of them describes a new part and the other one does
2653	     * not, synchronise them by searching an old part in the
2654	     * other list. */
2655	    if ((o->flags & CLF_NEW) && !(n->flags & CLF_NEW)) {
2656		Cline t, tn;
2657
2658		for (t = o; (tn = t->next) &&
2659			 ((tn->flags & CLF_NEW) || !cmp_anchors(tn, n, 0));
2660		     t = tn);
2661		if (tn) {
2662		    diff = sub_join(n, o, tn, 1);
2663
2664		    if (po)
2665			po->next = tn;
2666		    else
2667			oo = tn;
2668
2669		    t->next = NULL;
2670		    free_cline(o);
2671		    x = o;
2672		    o = tn;
2673
2674		    if (po && po->prefix && cmp_anchors(x, po, 0)) {
2675			po->flags |= CLF_MISS;
2676			po->max += diff;
2677		    } else {
2678			o->flags |= CLF_MISS;
2679			o->max += diff;
2680		    }
2681		    continue;
2682		}
2683	    }
2684	    if (!(o->flags & CLF_NEW) && (n->flags & CLF_NEW)) {
2685		Cline t, tn;
2686
2687		for (t = n; (tn = t->next) &&
2688			 ((tn->flags & CLF_NEW) || !cmp_anchors(o, tn, 0));
2689		     t = tn);
2690		if (tn) {
2691		    int of = o->flags & CLF_MISS;
2692
2693		    diff = sub_join(o, n, tn, 0);
2694		    o->flags = (o->flags & ~CLF_MISS) | of;
2695
2696		    if (po && po->prefix && cmp_anchors(n, pn, 0)) {
2697			po->flags |= CLF_MISS;
2698			po->max += diff;
2699		    } else {
2700			o->flags |= CLF_MISS;
2701			o->max += diff;
2702		    }
2703		    n = tn;
2704		    continue;
2705		}
2706	    }
2707	    /* Almost the same as above, but for the case that they
2708	     * describe different types of parts (prefix, suffix, or mid). */
2709	    if ((o->flags & (CLF_SUF | CLF_MID)) !=
2710		(n->flags & (CLF_SUF | CLF_MID))) {
2711		Cline t, tn;
2712
2713		for (t = n;
2714		     (tn = t->next) &&
2715			 (tn->flags & (CLF_SUF | CLF_MID)) !=
2716			 (o->flags  & (CLF_SUF | CLF_MID));
2717		     t = tn);
2718		if (tn && cmp_anchors(o, tn, 1)) {
2719		    sub_join(o, n, tn, 0);
2720
2721		    n = tn;
2722		    continue;
2723		}
2724		for (t = o;
2725		     (tn = t->next) &&
2726			 (tn->flags & (CLF_SUF | CLF_MID)) !=
2727			 (n->flags  & (CLF_SUF | CLF_MID));
2728		     t = tn);
2729		if (tn && cmp_anchors(tn, n, 1)) {
2730		    sub_join(n, o, tn, 1);
2731
2732		    if (po)
2733			po->next = tn;
2734		    else
2735			oo = tn;
2736		    t->next = NULL;
2737		    free_cline(o);
2738		    o = tn;
2739		    continue;
2740		}
2741		if (o->flags & CLF_MID) {
2742		    o->flags = (o->flags & ~CLF_MID) | (n->flags & CLF_SUF);
2743		    if (n->flags & CLF_SUF) {
2744			free_cline(o->prefix);
2745			o->prefix = NULL;
2746		    } else {
2747			free_cline(o->suffix);
2748			o->suffix = NULL;
2749		    }
2750		}
2751		break;
2752	    }
2753	    /* Now see if they have matching anchors. If not, cut the list. */
2754	    if (!(o->flags & CLF_MID) && !cmp_anchors(o, n, 1)) {
2755		Cline t, tn, tt, to = NULL;
2756
2757		for (t = n; (tn = t->next); t = tn)
2758		    if (!(tn->flags & CLF_NEW) && (tn->flags & CLF_SKIP)) {
2759			for (tt = o; (to = tt->next); tt = to)
2760			    if (!(to->flags & CLF_NEW) && (to->flags & CLF_SKIP) &&
2761				cmp_anchors(tn, to, 1))
2762				break;
2763			if (to)
2764			    break;
2765		    }
2766		if (tn) {
2767		    if (po)
2768			po->next = to;
2769		    else
2770			oo = to;
2771		    o = to;
2772
2773		    diff = sub_join(o, n, tn, 0);
2774
2775		    o->flags |= CLF_MISS;
2776		    o->max += diff;
2777
2778		    n = tn;
2779		    po = o;
2780		    o = o->next;
2781		    pn = n;
2782		    n = n->next;
2783		    continue;
2784		} else {
2785		    for (t = o; (to = t->next); t = to)
2786			if ((to->flags & CLF_SKIP) && cmp_anchors(n, to, 1))
2787			    break;
2788
2789		    if (to) {
2790			diff = sub_join(n, o, to, 1);
2791
2792			if (po)
2793			    po->next = to;
2794			else
2795			    oo = to;
2796			x = o;
2797			o = to;
2798			if (po && po->prefix && cmp_anchors(x, po, 0)) {
2799			    po->flags |= CLF_MISS;
2800			    po->max += diff;
2801			} else {
2802			    o->flags |= CLF_MISS;
2803			    o->max += diff;
2804			}
2805			continue;
2806		    } else {
2807			for (tt = NULL, t = n; (tn = t->next); t = tn) {
2808			    if (tn->flags & CLF_SKIP)
2809				for (tt = o; (to = tt->next); tt = to)
2810				    if ((to->flags & CLF_SKIP) &&
2811					cmp_anchors(tn, to, 1))
2812					break;
2813			    if (to)
2814				break;
2815			}
2816			if (to) {
2817			    diff = sub_join(n, o, to, 1);
2818
2819			    if (po)
2820				po->next = to;
2821			    else
2822				oo = to;
2823			    x = o;
2824			    o = to;
2825			    if (po && po->prefix && cmp_anchors(x, po, 0)) {
2826				po->flags |= CLF_MISS;
2827				po->max += diff;
2828			    } else {
2829				o->flags |= CLF_MISS;
2830				o->max += diff;
2831			    }
2832			    continue;
2833			} else {
2834			    for (tn = n; tn; tn = tn->next)
2835				if ((tn->flags & CLF_NEW) ==
2836				    (o->flags & CLF_NEW) &&
2837				    cmp_anchors(tn, o, 1)) break;
2838
2839			    if (tn) {
2840				int of = o->flags & CLF_MISS;
2841
2842				if ((diff = sub_join(o, n, tn, 0))) {
2843				    o->flags = (o->flags & ~CLF_MISS) | of;
2844				    if (po && po->prefix) {
2845					po->flags |= CLF_MISS;
2846					po->max += diff;
2847				    }
2848				    else {
2849					o->flags |= CLF_MISS;
2850					o->max += diff;
2851				    }
2852				}
2853				n = tn;
2854				po = o;
2855				o = o->next;
2856				pn = n;
2857				n = n->next;
2858				continue;
2859			    }
2860			    if (o->flags & CLF_SUF)
2861				break;
2862
2863			    o->word = o->line = o->orig = NULL;
2864			    o->wlen = 0;
2865			    free_cline(o->next);
2866			    o->next = NULL;
2867			    o->flags |= CLF_MISS;
2868			}
2869		    }
2870		}
2871	    }
2872	    /* Ok, they are equal, now copy the information about the
2873             * original string if needed, calculate minimum and maximum
2874	     * lengths, and join the sub-lists. */
2875	    if (!o->orig && !o->olen) {
2876		o->orig = n->orig;
2877		o->olen = n->olen;
2878	    }
2879	    if (n->min < o->min)
2880		o->min = n->min;
2881	    if (n->max > o->max)
2882		o->max = n->max;
2883	    if (o->flags & CLF_MID)
2884		join_mid(o, n);
2885	    else
2886		join_psfx(o, n, NULL, NULL, (o->flags & CLF_SUF));
2887
2888	    po = o;
2889	    o = o->next;
2890	    pn = n;
2891	    n = n->next;
2892	}
2893	/* Free the rest of the old list. */
2894	if (o) {
2895	    if (po)
2896		po->next = NULL;
2897	    else
2898		oo = NULL;
2899
2900	    free_cline(o);
2901	}
2902	free_cline(nn);
2903
2904	return oo;
2905    }
2906}
2907