1/* 2 * compmatch.c - the complete module, completion matching code 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1999 Sven Wischnowsky 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Sven Wischnowsky or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Sven Wischnowsky and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Sven Wischnowsky and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Sven Wischnowsky and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 */ 29 30#include "complete.mdh" 31#include "compmatch.pro" 32 33/* 34 * This compares two cpattern lists and returns non-zero if they are 35 * equal (N.B. opposite sense to usual *cmp()). 36 * 37 * The old version of this didn't worry about whether the lists 38 * were the same length. This one does. It's hard to see how 39 * that can be wrong even if it's unnecessary. 40 */ 41 42/**/ 43static int 44cpatterns_same(Cpattern a, Cpattern b) 45{ 46 while (a) { 47 if (!b) 48 return 0; 49 if (a->tp != b->tp) 50 return 0; 51 switch (a->tp) { 52 case CPAT_CCLASS: 53 case CPAT_NCLASS: 54 case CPAT_EQUIV: 55 /* 56 * Patterns can actually match the same even if 57 * the range strings don't compare differently, but 58 * I don't think we need to handle that subtlety. 59 */ 60 if (strcmp(a->u.str, b->u.str) != 0) 61 return 0; 62 break; 63 64 case CPAT_CHAR: 65 if (a->u.chr != b->u.chr) 66 return 0; 67 break; 68 69 default: 70 /* here to silence compiler */ 71 break; 72 } 73 74 a = a->next; 75 b = b->next; 76 } 77 return !b; 78} 79 80/* This compares two cmatchers and returns non-zero if they are equal. */ 81 82/**/ 83static int 84cmatchers_same(Cmatcher a, Cmatcher b) 85{ 86 return (a == b || 87 (a->flags == b->flags && 88 a->llen == b->llen && a->wlen == b->wlen && 89 (!a->llen || cpatterns_same(a->line, b->line)) && 90 (a->wlen <= 0 || cpatterns_same(a->word, b->word)) && 91 (!(a->flags & (CMF_LEFT | CMF_RIGHT)) || 92 (a->lalen == b->lalen && a->ralen == b->ralen && 93 (!a->lalen || cpatterns_same(a->left, b->left)) && 94 (!a->ralen || cpatterns_same(a->right, b->right)))))); 95} 96 97/* Add the given matchers to the bmatcher list. */ 98 99/**/ 100mod_export void 101add_bmatchers(Cmatcher m) 102{ 103 Cmlist old = bmatchers, *q = &bmatchers, n; 104 105 for (; m; m = m->next) { 106 if ((!m->flags && m->wlen > 0 && m->llen > 0) || 107 (m->flags == CMF_RIGHT && m->wlen < 0 && !m->llen)) { 108 *q = n = (Cmlist) zhalloc(sizeof(struct cmlist)); 109 n->matcher = m; 110 q = &(n->next); 111 } 112 } 113 *q = old; 114} 115 116/* This is called when the matchers in the mstack have changed to 117 * ensure that the bmatchers list contains no matchers not in mstack. */ 118 119/**/ 120mod_export void 121update_bmatchers(void) 122{ 123 Cmlist p = bmatchers, ms; 124 Cmatcher mp; 125 int t; 126 127 while (p) { 128 t = 0; 129 for (ms = mstack; ms && !t; ms = ms->next) 130 for (mp = ms->matcher; mp && !t; mp = mp->next) 131 t = cmatchers_same(mp, p->matcher); 132 133 p = p->next; 134 if (!t) { 135 bmatchers = p; 136 } 137 } 138} 139 140/* This returns a new Cline structure. */ 141 142/**/ 143Cline 144get_cline(char *l, int ll, char *w, int wl, char *o, int ol, int fl) 145{ 146 Cline r; 147 148 /* Prefer to take it from the buffer list (freecl), if there 149 * is none, allocate a new one. */ 150 151 if ((r = freecl)) 152 freecl = r->next; 153 else 154 r = (Cline) zhalloc(sizeof(*r)); 155 156 r->next = NULL; 157 r->line = l; r->llen = ll; 158 r->word = w; r->wlen = wl; 159 DPUTS(wl > 0 && !*w, "Bad word"); 160 r->orig = o; r->olen = ol; 161 r->slen = 0; 162 r->flags = fl; 163 r->prefix = r->suffix = NULL; 164 r->min = r->max = 0; 165 return r; 166} 167 168/* This frees a cline list. */ 169 170/**/ 171void 172free_cline(Cline l) 173{ 174 Cline n; 175 176 while (l) { 177 n = l->next; 178 l->next = freecl; 179 freecl = l; 180 free_cline(l->prefix); 181 free_cline(l->suffix); 182 l = n; 183 } 184} 185 186/* Copy a cline list. */ 187 188/**/ 189Cline 190cp_cline(Cline l, int deep) 191{ 192 Cline r = NULL, *p = &r, t, lp = NULL; 193 194 while (l) { 195 if ((t = freecl)) 196 freecl = t->next; 197 else 198 t = (Cline) zhalloc(sizeof(*t)); 199 memcpy(t, l, sizeof(*t)); 200 if (deep) { 201 if (t->prefix) 202 t->prefix = cp_cline(t->prefix, 0); 203 if (t->suffix) 204 t->suffix = cp_cline(t->suffix, 0); 205 } 206 *p = lp = t; 207 p = &(t->next); 208 l = l->next; 209 } 210 *p = NULL; 211 212 return r; 213} 214 215/* Calculate the length of a cline and its sub-lists. */ 216 217/**/ 218int 219cline_sublen(Cline l) 220{ 221 int len = ((l->flags & CLF_LINE) ? l->llen : l->wlen); 222 223 if (l->olen && !((l->flags & CLF_SUF) ? l->suffix : l->prefix)) 224 len += l->olen; 225 else { 226 Cline p; 227 228 for (p = l->prefix; p; p = p->next) 229 len += ((p->flags & CLF_LINE) ? p->llen : p->wlen); 230 for (p = l->suffix; p; p = p->next) 231 len += ((p->flags & CLF_LINE) ? p->llen : p->wlen); 232 } 233 return len; 234} 235 236/* Set the lengths in the cline lists. */ 237 238/**/ 239void 240cline_setlens(Cline l, int both) 241{ 242 while (l) { 243 l->min = cline_sublen(l); 244 if (both) 245 l->max = l->min; 246 l = l->next; 247 } 248} 249 250/* This sets the CLF_MATCHED flag in the given clines. */ 251 252/**/ 253void 254cline_matched(Cline p) 255{ 256 while (p) { 257 p->flags |= CLF_MATCHED; 258 cline_matched(p->prefix); 259 cline_matched(p->suffix); 260 261 p = p->next; 262 } 263} 264 265/* This reverts the order of the elements of the given cline list and 266 * returns a pointer to the new head. */ 267 268/**/ 269Cline 270revert_cline(Cline p) 271{ 272 Cline r = NULL, n; 273 274 while (p) { 275 n = p->next; 276 p->next = r; 277 r = p; 278 p = n; 279 } 280 return r; 281} 282 283/* Global variables used during matching: a char-buffer for the string to 284 * use for the match, and two cline lists for the two levels we use. */ 285 286/**/ 287char *matchbuf = NULL; 288/**/ 289int matchbuflen = 0, matchbufadded; 290 291/**/ 292Cline matchparts, matchlastpart; 293/**/ 294Cline matchsubs, matchlastsub; 295 296/* This initialises the variables above. */ 297 298/**/ 299static void 300start_match(void) 301{ 302 if (matchbuf) 303 *matchbuf = '\0'; 304 matchbufadded = 0; 305 matchparts = matchlastpart = matchsubs = matchlastsub = NULL; 306} 307 308/* This aborts a matching, freeing the cline lists build. */ 309 310/**/ 311static void 312abort_match(void) 313{ 314 free_cline(matchparts); 315 free_cline(matchsubs); 316 matchparts = matchsubs = NULL; 317} 318 319/* This adds a new string in the static char buffer. The arguments are 320 * the matcher used (if any), the strings from the line and the word 321 * and the length of the string from the word. The last argument is 322 * non-zero if we are matching a suffix (where the given string has to 323 * be prepended to the contents of the buffer). */ 324 325/**/ 326static void 327add_match_str(Cmatcher m, char *l, char *w, int wl, int sfx) 328{ 329 /* Get the string and length to insert: either from the line 330 * or from the match. */ 331 if (m && (m->flags & CMF_LINE)) { 332 wl = m->llen; w = l; 333 } 334 if (wl) { 335 /* Probably resize the buffer. */ 336 if (matchbuflen - matchbufadded <= wl) { 337 int blen = matchbuflen + wl + 20; 338 char *buf; 339 340 buf = (char *) zalloc(blen); 341 memcpy(buf, matchbuf, matchbuflen); 342 zfree(matchbuf, matchbuflen); 343 matchbuf = buf; 344 matchbuflen = blen; 345 } 346 /* Insert the string. */ 347 if (sfx) { 348 memmove(matchbuf + wl, matchbuf, matchbufadded + 1); 349 memcpy(matchbuf, w, wl); 350 } else 351 memcpy(matchbuf + matchbufadded, w, wl); 352 matchbufadded += wl; 353 matchbuf[matchbufadded] = '\0'; 354 } 355} 356 357/* This adds a cline for a word-part during matching. Arguments are the 358 * matcher used, pointers to the line and word strings for the anchor, 359 * a pointer to the original line string for the whole part, the string 360 * before (or after) the anchor that has not yet been added, the length 361 * of the line-string for that, and a flag saying if we are matching a 362 * suffix. */ 363 364/**/ 365static void 366add_match_part(Cmatcher m, char *l, char *w, int wl, 367 char *o, int ol, char *s, int sl, int osl, int sfx) 368{ 369 Cline p, lp, lprem; 370 371 /* If the anchors are equal, we keep only one. */ 372 373 if (l && !strncmp(l, w, wl)) 374 l = NULL; 375 376 /* 377 * Split the new part into parts and turn the last one into a 378 * `suffix' if we have a left anchor---don't do this if the last one 379 * came from a right anchor before the end of the part we're 380 * splitting. 381 */ 382 383 p = bld_parts(s, sl, osl, &lp, &lprem); 384 385 if (lprem && m && (m->flags & CMF_LEFT)) { 386 lprem->flags |= CLF_SUF; 387 lprem->suffix = lprem->prefix; 388 lprem->prefix = NULL; 389 } 390 /* cline lists for suffixes are sorted from back to front, so we have 391 * to revert the list we got. */ 392 if (sfx) 393 p = revert_cline(lp = p); 394 /* Now add the sub-clines we already had. */ 395 if (matchsubs) { 396 if (sfx) { 397 Cline q; 398 399 if ((q = lp->prefix)) { 400 while (q->next) 401 q = q->next; 402 q->next = matchsubs; 403 } else 404 lp->prefix = matchsubs; 405 406 matchlastsub->next = NULL; 407 } else { 408 matchlastsub->next = p->prefix; 409 p->prefix = matchsubs; 410 } 411 matchsubs = matchlastsub = NULL; 412 } 413 /* Store the arguments in the last part-cline. */ 414 if (lp->llen || lp->wlen) { 415 lp->next = get_cline(l, wl, w, wl, o, ol, CLF_NEW); 416 lp = lp->next; 417 } else { 418 lp->line = l; lp->llen = wl; 419 lp->word = w; lp->wlen = wl; 420 DPUTS(wl > 0 && !*w, "Bad word"); 421 lp->orig = o; lp->olen = ol; 422 } 423 if (o || ol) 424 lp->flags &= ~CLF_NEW; 425 426 /* Finally, put the new parts on the list. */ 427 if (matchlastpart) 428 matchlastpart->next = p; 429 else 430 matchparts = p; 431 matchlastpart = lp; 432} 433 434/* This adds a new sub-cline. Arguments are the matcher and the strings from 435 * the line and the word. */ 436 437/**/ 438static void 439add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl) 440{ 441 int flags; 442 Cline n; 443 444 /* Check if we are interested only in the string from the line. */ 445 if (m && (m->flags & CMF_LINE)) { 446 w = NULL; wl = 0; 447 flags = CLF_LINE; 448 } else 449 flags = 0; 450 451 /* And add the cline. */ 452 if (wl || ll) { 453 Cline p, lp; 454 455 if ((p = n = bld_parts(w, wl, ll, &lp, NULL)) && n != lp) { 456 for (; p->next != lp; p = p->next); 457 458 if (matchsubs) { 459 matchlastsub->next = n->prefix; 460 n->prefix = matchsubs; 461 } 462 matchsubs = matchlastsub = lp; 463 464 if (matchlastpart) 465 matchlastpart->next = n; 466 else 467 matchparts = n; 468 p->next = 0; 469 matchlastpart = p; 470 } else { 471 n = get_cline(l, ll, w, wl, NULL, 0, 472 flags | ((m && m->wlen == -2) ? CLF_SKIP : 0)); 473 if (matchlastsub) 474 matchlastsub->next = n; 475 else 476 matchsubs = n; 477 matchlastsub = n; 478 } 479 } 480} 481 482/* This tests if the string from the line l matches the word w. In *bpp 483 * the offset for the brace is returned, in rwlp the length of the 484 * matched prefix or suffix, not including the stuff before or after 485 * the last anchor is given. When sfx is non-zero matching is done from 486 * the ends of the strings backward, if test is zero, the global variables 487 * above are used to build the string for the match and the cline. If 488 * part is non-zero, we are satisfied if only a part of the line-string 489 * is used (and return the length used). */ 490 491/**/ 492int 493match_str(char *l, char *w, Brinfo *bpp, int bc, int *rwlp, 494 int sfx, int test, int part) 495{ 496 int ll = strlen(l), lw = strlen(w), oll = ll, olw = lw, exact = 0, wexact = 0; 497 int il = 0, iw = 0, t, ind, add, he = 0, bpc, obc = bc, bslash; 498 char *ow; 499 Cmlist ms; 500 Cmatcher mp, lm = NULL; 501 Brinfo bp = NULL; 502 503 if (!test) { 504 start_match(); 505 bp = *bpp; 506 } 507 /* Adjust the pointers and get the values for subscripting and 508 * incrementing. */ 509 510 if (sfx) { 511 l += ll; w += lw; 512 ind = -1; add = -1; 513 } else { 514 ind = 0; add = 1; 515 } 516 /* ow will always point to the beginning (or end) of that sub-string 517 * in w that wasn't put in the match-variables yet. */ 518 519 ow = w; 520 521 /* If the brace is at the beginning, we have to treat it now. */ 522 523 if (!test && bp && bc >= bp->pos) { 524 bp->curpos = bc; 525 bp = bp->next; 526 } 527 /*** This once was: `while (ll && lw)', but then ignored characters at 528 * the end were not, well, ignored. */ 529 while (ll) { 530 531 /* Hm, we unconditionally first tried the matchers for the cases 532 * where the beginnings of the line and word patterns match the 533 * same character(s). 534 * But first testing if the characters are the same is sooo 535 * much faster... 536 * Maybe it's OK to make same-character matching be preferred in 537 * recursive calls. At least, it /seems/ to work. 538 * 539 * Let's try. 540 * 541 * Update: this once tested `test && ...' to check for exact 542 * character matches only in recursive calls. But then one 543 * can't complete `nom<TAB>' to `nomatch' with a match spec 544 * of `B:[nN][oO]=' because that will eat the `no'. 545 * But that would break completion of strings like `nonomatch' 546 * because the `B:[nN][oO]=' doesn't match the second `no'. 547 * For this we added the code below that can remove already 548 * accepted exact characters and try again, preferring match 549 * specs. 550 */ 551 552 bslash = 0; 553 if (!sfx && lw && (!part || test) && 554 (l[ind] == w[ind] || 555 (bslash = (lw > 1 && w[ind] == '\\' && 556 (ind ? (w[0] == l[0]) : (w[1] == l[0])))))) { 557 /* No matcher could be used, but the strings have the same 558 * character here, skip over it. */ 559 l += add; w += (bslash ? (add + add) : add); 560 il++; iw += 1 + bslash; 561 ll--; lw -= 1 + bslash; 562 bc++; 563 exact++; 564 wexact += 1 + bslash; 565 if (!test) 566 while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) { 567 bp->curpos = matchbufadded + (w - ow) + obc; 568 bp = bp->next; 569 } 570 lm = NULL; 571 he = 0; 572 573 continue; 574 } 575 retry: 576 /* First try the matchers. Err... see above. */ 577 for (mp = NULL, ms = mstack; !mp && ms; ms = ms->next) { 578 for (mp = ms->matcher; mp; mp = mp->next) { 579 t = 1; 580 if ((lm && lm == mp) || 581 ((oll == ll || olw == lw) && 582 (test == 1 || (test && !mp->left && !mp->right)) && 583 mp->wlen < 0)) 584 /* If we were called recursively, don't use `*' patterns 585 * at the beginning (avoiding infinite recursion). */ 586 continue; 587 588 if (mp->wlen < 0) { 589 int both, loff, aoff, llen, alen, zoff, moff, ct, ict, aol; 590 char *tp, savl = '\0', savw; 591 Cpattern ap, aop; 592 593 /* This is for `*' patterns, first initialise some 594 * local variables. */ 595 llen = mp->llen; 596 if (mp->flags & CMF_LEFT) { 597 alen = mp->lalen; aol = mp->ralen; 598 } else { 599 alen = mp->ralen; aol = mp->lalen; 600 } 601 /* Give up if we don't have enough characters for the 602 * line-string and the anchor. */ 603 if (ll < llen + alen || lw < alen) 604 continue; 605 606 if (mp->flags & CMF_LEFT) { 607 ap = mp->left; zoff = 0; moff = alen; aop = mp->right; 608 if (sfx) { 609 both = 0; loff = -llen; aoff = -(llen + alen); 610 } else { 611 both = 1; loff = alen; aoff = 0; 612 } 613 } else { 614 ap = mp->right; zoff = alen; moff = 0; aop = mp->left; 615 if (sfx) { 616 both = 1; loff = -(llen + alen); aoff = -alen; 617 } else { 618 both = 0; loff = 0; aoff = llen; 619 } 620 } 621 /* Try to match the line pattern and the anchor. */ 622 if (!pattern_match(mp->line, l + loff, NULL, NULL)) 623 continue; 624 if (ap) { 625 if (!pattern_match(ap, l + aoff, NULL, NULL) || 626 (both && 627 (!pattern_match(ap, w + aoff, NULL, NULL) || 628 (aol && aol <= aoff + iw && 629 !pattern_match(aop, w + aoff - aol, 630 NULL, NULL)) || 631 !match_parts(l + aoff, w + aoff, alen, part)))) 632 continue; 633 } else if (!both || ((mp->flags & CMF_INTER) ? 634 ((mp->flags & CMF_LINE) ? iw : il) : 635 (il || iw))) 636 continue; 637 638 /* Fine, now we call ourselves recursively to find the 639 * string matched by the `*'. */ 640 if (sfx && (savl = l[-(llen + zoff)])) 641 l[-(llen + zoff)] = '\0'; 642 for (t = 0, tp = w, ct = 0, ict = lw - alen + 1; 643 ict; 644 tp += add, ct++, ict--) { 645 if ((both && 646 (!ap || !test || 647 !pattern_match(ap, tp + aoff, NULL, NULL) || 648 (aol && aol <= aoff + ct + iw && 649 !pattern_match(aop, tp + aoff - aol, 650 NULL, NULL)))) || 651 (!both && 652 pattern_match(ap, tp - moff, NULL, NULL) && 653 (!aol || (aol <= iw + ct - moff && 654 pattern_match(aop, tp - moff - aol, 655 NULL, NULL))) && 656 (mp->wlen == -1 || 657 match_parts(l + aoff , tp - moff, 658 alen, part)))) { 659 if (!both && mp->wlen == -1 && 660 !match_parts(l + aoff , tp - moff, alen, part)) 661 break; 662 if (sfx) { 663 if ((savw = tp[-zoff])) 664 tp[-zoff] = '\0'; 665 t = match_str(l - ll, w - lw, 666 NULL, 0, NULL, 1, 2, part); 667 if (savw) 668 tp[-zoff] = savw; 669 } else 670 t = match_str(l + llen + moff, tp + moff, 671 NULL, 0, NULL, 0, 1, part); 672 if (t || (mp->wlen == -1 && !both)) 673 break; 674 } 675 } 676 ict = ct; 677 if (sfx && savl) 678 l[-(llen + zoff)] = savl; 679 680 /* Have we found a position in w where the rest of l 681 * matches? */ 682 if (!t) 683 continue; 684 685 /* Yes, add the strings and clines if this is a 686 * top-level call. */ 687 if (!test && (!he || (llen + alen))) { 688 char *op, *lp, *map, *wap, *wmp; 689 int ol; 690 691 if (sfx) { 692 op = w; ol = ow - w; lp = l - (llen + alen); 693 map = tp - alen; 694 if (mp->flags & CMF_LEFT) { 695 wap = tp - alen; wmp = tp; 696 } else { 697 wap = w - alen; wmp = tp - alen; 698 } 699 } else { 700 op = ow; ol = w - ow; lp = l; 701 map = ow; 702 if (mp->flags & CMF_LEFT) { 703 wap = w; wmp = w + alen; 704 } else { 705 wap = tp; wmp = ow; 706 } 707 } 708 /* If the matcher says that we are only interested 709 * in the line pattern, we just add that and the 710 * anchor and the string not added yet. Otherwise 711 * we add a new part. */ 712 if (mp->flags & CMF_LINE) { 713 add_match_str(NULL, NULL, op, ol, sfx); 714 add_match_str(NULL, NULL, lp, llen + alen, sfx); 715 add_match_sub(NULL, NULL, ol, op, ol); 716 add_match_sub(NULL, NULL, llen + alen, 717 lp, llen + alen); 718 } else if (sfx) { 719 add_match_str(NULL, NULL, 720 map, ct + ol + alen, sfx); 721 add_match_part(mp, l + aoff, wap, alen, 722 l + loff, llen, op, ol, ol, sfx); 723 add_match_sub(NULL, NULL, 0, wmp, ct); 724 } else { 725 add_match_str(NULL, NULL, 726 map, ct + ol + alen, sfx); 727 if (both) { 728 add_match_sub(NULL, NULL, ol, op, ol); 729 ol = -1; 730 } else 731 ct += ol; 732 add_match_part(mp, l + aoff, wap, alen, 733 l + loff, llen, wmp, ct, ol, sfx); 734 } 735 } 736 /* Now skip over the matched portion and the anchor. */ 737 llen += alen; alen += ict; 738 if (sfx) { 739 l -= llen; w -= alen; 740 } else { 741 l += llen; w += alen; 742 } 743 ll -= llen; il += llen; 744 lw -= alen; iw += alen; 745 bc += llen; 746 exact = 0; 747 748 if (!test) 749 while (bp && 750 bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) { 751 bp->curpos = matchbufadded + bpc - bc + obc; 752 bp = bp->next; 753 } 754 ow = w; 755 756 if (!llen && !alen) { 757 lm = mp; 758 if (he) 759 mp = NULL; 760 else 761 he = 1; 762 } else { 763 lm = NULL; he = 0; 764 } 765 break; 766 } else if (ll >= mp->llen && lw >= mp->wlen) { 767 /* Non-`*'-pattern. */ 768 char *tl, *tw; 769 int tll, tlw, til, tiw; 770 771 /* We do this only if the line- and word-substrings 772 * are not equal. */ 773 if (!(mp->flags & (CMF_LEFT | CMF_RIGHT)) && 774 mp->llen == mp->wlen && 775 !(sfx ? strncmp(l - mp->llen, w - mp->wlen, mp->llen) : 776 strncmp(l, w, mp->llen))) 777 continue; 778 779 /* Using local variables to make the following 780 * independent of whether we match a prefix or a 781 * suffix. */ 782 if (sfx) { 783 tl = l - mp->llen; tw = w - mp->wlen; 784 til = ll - mp->llen; tiw = lw - mp->wlen; 785 tll = il + mp->llen; tlw = iw + mp->wlen; 786 } else { 787 tl = l; tw = w; 788 til = il; tiw = iw; 789 tll = ll; tlw = lw; 790 } 791 if (mp->flags & CMF_LEFT) { 792 /* Try to match the left anchor, if any. */ 793 if (til < mp->lalen || tiw < mp->lalen + mp->ralen) 794 continue; 795 else if (mp->left) 796 t = pattern_match(mp->left, tl - mp->lalen, 797 NULL, NULL) && 798 pattern_match(mp->left, tw - mp->lalen, 799 NULL, NULL) && 800 (!mp->ralen || 801 pattern_match(mp->right, 802 tw - mp->lalen - mp->ralen, 803 NULL, NULL)); 804 else 805 t = (!sfx && !((mp->flags & CMF_INTER) ? 806 ((mp->flags & CMF_LINE) ? iw : il) : 807 (il || iw))); 808 } 809 if (mp->flags & CMF_RIGHT) { 810 /* Try to match the right anchor, if any. */ 811 if (tll < mp->llen + mp->ralen || 812 tlw < mp->wlen + mp->ralen + mp->lalen) 813 continue; 814 else if (mp->right) 815 t = pattern_match(mp->right, 816 tl + mp->llen - mp->ralen, 817 NULL, NULL) && 818 pattern_match(mp->right, 819 tw + mp->wlen - mp->ralen, 820 NULL, NULL) && 821 (!mp->lalen || 822 pattern_match(mp->left, tw + mp->wlen - 823 mp->ralen - mp->lalen, 824 NULL, NULL)); 825 else 826 t = (sfx && !((mp->flags & CMF_INTER) ? 827 ((mp->flags & CMF_LINE) ? iw : il) : 828 (il || iw))); 829 } 830 /* Now try to match the line and word patterns. */ 831 if (!t || !pattern_match(mp->line, tl, mp->word, tw)) 832 continue; 833 834 /* Probably add the matched strings. */ 835 if (!test) { 836 if (sfx) 837 { 838 if (ow >= w) 839 add_match_str(NULL, NULL, w, ow - w, sfx); 840 } 841 else 842 { 843 if (w >= ow) 844 add_match_str(NULL, NULL, ow, w - ow, sfx); 845 } 846 add_match_str(mp, tl, tw, mp->wlen, sfx); 847 if (sfx) 848 { 849 if (ow >= w) 850 add_match_sub(NULL, NULL, 0, w, ow - w); 851 } 852 else 853 { 854 if (w >= ow) 855 add_match_sub(NULL, NULL, 0, ow, w - ow); 856 } 857 add_match_sub(mp, tl, mp->llen, tw, mp->wlen); 858 } 859 if (sfx) { 860 l = tl; w = tw; 861 } else { 862 l += mp->llen; w += mp->wlen; 863 } 864 il += mp->llen; iw += mp->wlen; 865 ll -= mp->llen; lw -= mp->wlen; 866 bc += mp->llen; 867 exact = 0; 868 869 if (!test) 870 while (bp && 871 bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) { 872 bp->curpos = matchbufadded + bpc - bc + obc; 873 bp = bp->next; 874 } 875 ow = w; 876 lm = NULL; 877 he = 0; 878 break; 879 } 880 } 881 } 882 if (mp) 883 continue; 884 885 /* Same code as at the beginning, used in top-level calls. */ 886 887 bslash = 0; 888 if ((!test || sfx) && lw && 889 (l[ind] == w[ind] || 890 (bslash = (lw > 1 && w[ind] == '\\' && 891 (ind ? (w[0] == l[0]) : (w[1] == l[0])))))) { 892 /* No matcher could be used, but the strings have the same 893 * character here, skip over it. */ 894 l += add; w += (bslash ? (add + add ) : add); 895 il++; iw += 1 + bslash; 896 ll--; lw -= 1 + bslash; 897 bc++; 898 if (!test) 899 while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) { 900 bp->curpos = matchbufadded + (sfx ? (ow - w) : (w - ow)) + obc; 901 bp = bp->next; 902 } 903 lm = NULL; 904 he = 0; 905 } else { 906 907 if (!lw) 908 break; 909 910 if (exact && !part) { 911 /* If we just accepted some characters directly (at the 912 * beginning of the loop) and now can't match any further, 913 * we go back to before those characters and try again, 914 * preferring match specs this time. */ 915 916 il -= exact; iw -= wexact; 917 ll += exact; lw += wexact; 918 bc -= exact; 919 l -= add * exact; w -= add * wexact; 920 921 exact = wexact = 0; 922 923 goto retry; 924 } 925 /* No matcher and different characters: l does not match w. */ 926 if (test) 927 return 0; 928 929 abort_match(); 930 931 return -1; 932 } 933 } 934 /* If this is a recursive call, we just return if l matched w or not. */ 935 if (test) 936 return (part || !ll); 937 938 /* In top-level calls, if ll is non-zero (unmatched portion in l), 939 * we have to free the collected clines. */ 940 if (!part && ll) { 941 abort_match(); 942 943 return -1; 944 } 945 if (rwlp) 946 *rwlp = iw - (sfx ? ow - w : w - ow); 947 948 /* If we matched a suffix, the anchors stored in the top-clines 949 * will be in the wrong clines: shifted by one. Adjust this. */ 950 if (sfx && matchparts) { 951 Cline t, tn, s; 952 953 if (matchparts->prefix || matchparts->suffix) { 954 t = get_cline(NULL, 0, NULL, 0, NULL, 0, 0); 955 t->next = matchparts; 956 if (matchparts->prefix) 957 t->prefix = (Cline) 1; 958 else 959 t->suffix = (Cline) 1; 960 matchparts = t; 961 } 962 for (t = matchparts; (tn = t->next); t = tn) { 963 s = (tn->prefix ? tn->prefix : tn->suffix); 964 if (t->suffix) 965 t->suffix = s; 966 else 967 t->prefix = s; 968 } 969 t->prefix = t->suffix = NULL; 970 } 971 /* Finally, return the number of matched characters. */ 972 973 *bpp = bp; 974 return (part ? il : iw); 975} 976 977/* Wrapper for match_str(), only for a certain length and only doing 978 * the test. */ 979 980/**/ 981static int 982match_parts(char *l, char *w, int n, int part) 983{ 984 char lsav = l[n], wsav = w[n]; 985 int ret; 986 987 if (lsav) 988 l[n] = '\0'; 989 if (wsav) 990 w[n] = '\0'; 991 ret = match_str(l, w, NULL, 0, NULL, 0, 1, part); 992 if (lsav) 993 l[n] = lsav; 994 if (wsav) 995 w[n] = wsav; 996 997 return ret; 998} 999 1000/* Check if the word w is matched by the strings in pfx and sfx (the prefix 1001 * and the suffix from the line) or the pattern cp. In clp a cline list for 1002 * w is returned. 1003 * qu is non-zero if the words has to be quoted before processed any 1004 * further: the value 2 indicates file quoting. 1005 * bpl and bsl are used to report the positions where the brace-strings in 1006 * the prefix and the suffix have to be re-inserted if this match is inserted 1007 * in the line. 1008 * The return value is the string to use as a completion or NULL if the prefix 1009 * and the suffix don't match the word w. */ 1010 1011/**/ 1012mod_export char * 1013comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu, 1014 Brinfo *bpl, int bcp, Brinfo *bsl, int bcs, int *exact) 1015{ 1016 char *r = NULL; 1017 1018 if (cp) { 1019 /* We have a globcomplete-like pattern, just use that. */ 1020 int wl; 1021 char *teststr; 1022 1023 r = w; 1024 if (!qu) { 1025 /* 1026 * If we're not quoting the strings, that means they're 1027 * already quoted (?) and so when we test them against 1028 * a pattern we have to remove the quotes else we will 1029 * end up trying to match against the quote characters. 1030 * 1031 * Almost certainly this fails in some complicated cases 1032 * but it should catch the basic ones. 1033 */ 1034 teststr = dupstring(r); 1035 tokenize(teststr); 1036 if (parse_subst_string(teststr)) 1037 teststr = r; 1038 else { 1039 remnulargs(teststr); 1040 untokenize(teststr); 1041 } 1042 } else 1043 teststr = r; 1044 if (!pattry(cp, teststr)) 1045 return NULL; 1046 1047 r = (qu == 2 ? tildequote(r, 0) : multiquote(r, !qu)); 1048 1049 /* We still break it into parts here, trying to build a sensible 1050 * cline list for these matches, too. */ 1051 w = dupstring(w); 1052 wl = strlen(w); 1053 *clp = bld_parts(w, wl, wl, NULL, NULL); 1054 *exact = 0; 1055 } else { 1056 Cline pli, plil; 1057 int mpl, rpl, wl; 1058 1059 w = (qu == 2 ? tildequote(w, 0) : multiquote(w, !qu)); 1060 wl = strlen(w); 1061 1062 /* Always try to match the prefix. */ 1063 1064 useqbr = qu; 1065 if ((mpl = match_str(pfx, w, bpl, bcp, &rpl, 0, 0, 0)) < 0) 1066 return NULL; 1067 1068 if (sfx && *sfx) { 1069 int wpl = matchbufadded, msl, rsl; 1070 VARARR(char, wpfx, wpl); 1071 Cline mli, mlil; 1072 1073 /* We also have a suffix to match, so first save the 1074 * contents of the global matching variables. */ 1075 memcpy(wpfx, matchbuf, wpl); 1076 if (matchsubs) { 1077 Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, 0); 1078 1079 tmp->prefix = matchsubs; 1080 if (matchlastpart) 1081 matchlastpart->next = tmp; 1082 else 1083 matchparts = tmp; 1084 } 1085 pli = matchparts; 1086 plil = matchlastpart; 1087 1088 /* The try to match the suffix. */ 1089 1090 if ((msl = match_str(sfx, w + mpl, bsl, bcs, &rsl, 1, 0, 0)) < 0) { 1091 free_cline(pli); 1092 1093 return NULL; 1094 } 1095 /* Matched, so add the string in the middle and the saved 1096 * string for the prefix, and build a combined cline list 1097 * for the prefix and the suffix. */ 1098 if (matchsubs) { 1099 Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, CLF_SUF); 1100 1101 tmp->suffix = matchsubs; 1102 if (matchlastpart) 1103 matchlastpart->next = tmp; 1104 else 1105 matchparts = tmp; 1106 } 1107 add_match_str(NULL, NULL, w + rpl, wl - rpl - rsl, 1); 1108 add_match_str(NULL, NULL, wpfx, wpl, 1); 1109 1110 mli = bld_parts(w + rpl, wl - rpl - rsl, 1111 (mpl - rpl) + (msl - rsl), &mlil, NULL); 1112 mlil->flags |= CLF_MID; 1113 mlil->slen = msl - rsl; 1114 mlil->next = revert_cline(matchparts); 1115 1116 if (plil) 1117 plil->next = mli; 1118 else 1119 pli = mli; 1120 } else { 1121 /* Only a prefix, add the string and a part-cline for it. */ 1122 add_match_str(NULL, NULL, w + rpl, wl - rpl, 0); 1123 1124 add_match_part(NULL, NULL, NULL, 0, NULL, 0, w + rpl, wl - rpl, 1125 mpl - rpl, 0); 1126 pli = matchparts; 1127 } 1128 r = dupstring(matchbuf ? matchbuf : ""); 1129 1130 *clp = pli; 1131 1132 /* Test if the string built is equal to the one from the line. */ 1133 if (sfx && *sfx) { 1134 int pl = strlen(pfx); 1135 1136 *exact = (!strncmp(pfx, w, pl) && !strcmp(sfx, w + pl)); 1137 } else 1138 *exact = !strcmp(pfx, w); 1139 } 1140 if (!qu) 1141 hasunqu = 1; 1142 1143 return r; 1144} 1145 1146 1147/* 1148 * Guts of a single pattern for pattern_match(). 1149 * Return non-zero if match successful. 1150 * If the class was an equivalence, return 1 + the index into 1151 * the equivalence class (see pattern.c for how this is calculated). 1152 */ 1153 1154/**/ 1155mod_export convchar_t 1156pattern_match1(Cpattern p, convchar_t c, int *mtp) 1157{ 1158 convchar_t ind; 1159 1160 *mtp = 0; 1161 switch (p->tp) { 1162 case CPAT_CCLASS: 1163 return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL); 1164 1165 case CPAT_NCLASS: 1166 return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL); 1167 1168 case CPAT_EQUIV: 1169 if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp)) 1170 return ind + 1; 1171 else 1172 return 0; 1173 1174 case CPAT_ANY: 1175 return 1; 1176 1177 case CPAT_CHAR: 1178 return (p->u.chr == c); 1179 1180 default: 1181 DPUTS(1, "bad matcher pattern type"); 1182 return 0; 1183 } 1184} 1185 1186 1187/* 1188 * Use an equivalence to deduce the line character from the word, or 1189 * vice versa. (If vice versa, then "line" and "word" are reversed 1190 * in what follows. The logic is symmetric.) 1191 * lp is the line pattern. 1192 * wind is the index returned by a pattern match on the word pattern, 1193 * with type wmtp. 1194 * wchr is the word character. 1195 * Return CHR_INVALID if no matching character, else the character. 1196 * 1197 * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word 1198 * pattern also has that type. 1199 */ 1200 1201/**/ 1202mod_export convchar_t 1203pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp, 1204 convchar_t wchr) 1205{ 1206 convchar_t lchr; 1207 int lmtp; 1208 1209 if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) { 1210 /* 1211 * No equivalent. No possible match; give up. 1212 */ 1213 return CHR_INVALID; 1214 } 1215 /* 1216 * If we matched an exact character rather than a range 1217 * type, return it. 1218 */ 1219 if (lchr != CHR_INVALID) 1220 return lchr; 1221 1222 /* 1223 * Check the match types. We may want a case-changed 1224 * version of the word character. 1225 */ 1226 if (wmtp == PP_UPPER && lmtp == PP_LOWER) 1227 return ZC_tolower(wchr); 1228 else if (wmtp == PP_LOWER && lmtp == PP_UPPER) 1229 return ZC_toupper(wchr); 1230 else if (wmtp == lmtp) { 1231 /* 1232 * Be lenient and allow identical replacements 1233 * for character classes, although in fact this 1234 * doesn't give special functionality for equivalence 1235 * classes. 1236 */ 1237 return wchr; 1238 } else { 1239 /* 1240 * Non-matching generic types; this can't work. 1241 */ 1242 return CHR_INVALID; 1243 } 1244} 1245 1246/* 1247 * Check if the given pattern matches the given string. 1248 * p is either an anchor or line pattern and string; 1249 * wp and wsc are word (candidate) pattern and string 1250 * 1251 * Check that characters match for {...} classes by comparing positions in the 1252 * strings. 1253 * 1254 * prestrict is a chain of patterns at least as long 1255 * as the line string. In this case we are still assembling the line at 1256 * newline (which has been allocated but doesn't yet contain anything useful) 1257 * and must continue to do so as we go along; prestrict gives 1258 * restrictions on the line character to be applied along side the other 1259 * patterns. In the simple case a restriction is a character to be put 1260 * in place; otherwise it is a set of possible characters and we have to 1261 * deduce an actual matching character. Note prestrict is never an 1262 * equivalence class. In extreme cases we can't deduce a unique 1263 * character; then the match fails. 1264 * 1265 * If prestrict is not NULL, s will be NULL. 1266 */ 1267 1268/**/ 1269static int 1270pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen, 1271 Cpattern prestrict, ZLE_STRING_T new_line) 1272{ 1273 convchar_t c; 1274 convchar_t ind, wind; 1275 int mt, wmt; 1276 1277 while (p && wp && wsclen && prestrict) { 1278 /* First test the word character */ 1279 wind = pattern_match1(wp, *wsc, &wmt); 1280 if (!wind) 1281 return 0; 1282 1283 /* 1284 * Now the line character; deal with the case where 1285 * we don't yet have it, only a restriction on it. 1286 */ 1287 if (prestrict->tp == CPAT_CHAR) { 1288 /* 1289 * Easy case: restricted to an exact character on 1290 * the line. Procede as normal. 1291 */ 1292 c = prestrict->u.chr; 1293 } else { 1294 if (p->tp == CPAT_CHAR) { 1295 /* 1296 * Normal line pattern is an exact character: as 1297 * long as this matches prestrict, we can proceed 1298 * as usual. 1299 */ 1300 c = p->u.chr; 1301 } else if (p->tp == CPAT_EQUIV) { 1302 /* 1303 * An equivalence, so we can deduce the character 1304 * backwards from the word pattern and see if it 1305 * matches prestrict. 1306 */ 1307 if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) == 1308 CHR_INVALID) 1309 return 0; 1310 } else { 1311 /* 1312 * Not an equivalence, so that means we must match 1313 * the word (not just the word pattern), so grab it 1314 * and make sure it fulfills our needs. I think. 1315 * Not 100% sure about that, but what else can 1316 * we do? We haven't actually been passed a string 1317 * from the command line. 1318 */ 1319 c = *wsc; 1320 } 1321 /* Character so deduced must match the restriction. */ 1322 if (!pattern_match1(prestrict, c, &mt)) 1323 return 0; 1324 } 1325 1326 /* 1327 * If either is "?", they match each other; no further tests. 1328 * Apply this even if the character wasn't convertable; 1329 * there's no point trying to be clever in that case. 1330 */ 1331 if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY) 1332 { 1333 ind = pattern_match1(p, c, &mt); 1334 if (!ind) 1335 return 0; 1336 if (ind != wind) 1337 return 0; 1338 if (mt != wmt) { 1339 /* 1340 * Special case if matching lower vs. upper or 1341 * vice versa. The transformed characters must match. 1342 * We don't need to check the transformation is 1343 * the appropriate one for each character separately, 1344 * since that was done in pattern_match1(), so just 1345 * compare lower-cased versions of both. 1346 */ 1347 if ((mt == PP_LOWER || mt == PP_UPPER) && 1348 (wmt == PP_LOWER || wmt == PP_UPPER)) { 1349 if (ZC_tolower(c) != ZC_tolower(*wsc)) 1350 return 0; 1351 } else { 1352 /* Other different classes can't match. */ 1353 return 0; 1354 } 1355 } 1356 } 1357 1358 /* We need to assemble the line */ 1359 *new_line++ = (ZLE_CHAR_T)c; 1360 prestrict = prestrict->next; 1361 wsc++; 1362 wsclen--; 1363 p = p->next; 1364 wp = wp->next; 1365 } 1366 1367 while (p && prestrict) { 1368 /* 1369 * As above, but with even less info to go on. 1370 * (Can this happen?) At least handle the cases where 1371 * one of our patterns has given us a specific character. 1372 */ 1373 if (prestrict->tp == CPAT_CHAR) { 1374 c = prestrict->u.chr; 1375 } else { 1376 if (p->tp == CPAT_CHAR) { 1377 c = p->u.chr; 1378 } else { 1379 /* 1380 * OK. Here we are in a function with just a line 1381 * pattern and another pattern to restrict the 1382 * characters that can go on the line, and no actual 1383 * characters. We're matching two patterns against 1384 * one another to generate a character to insert. 1385 * This is a bit too psychedelic, so I'm going to 1386 * bale out now. See you on the ground. 1387 */ 1388 return 0; 1389 } 1390 if (!pattern_match1(prestrict, c, &mt)) 1391 return 0; 1392 } 1393 if (!pattern_match1(p, c, &mt)) 1394 return 0; 1395 p = p->next; 1396 *new_line++ = (ZLE_CHAR_T)c; 1397 prestrict = prestrict->next; 1398 } 1399 1400 if (prestrict) { 1401 /* Restriction with nothing to match */ 1402 return 0; 1403 } 1404 1405 while (wp && wsclen) { 1406 /* No funny business when we only have the word pattern. */ 1407 if (!pattern_match1(wp, *wsc, &wmt)) 1408 return 0; 1409 wp = wp->next; 1410 wsc++; 1411 wsclen--; 1412 } 1413 1414 return 1; 1415} 1416 1417 1418/* 1419 * The usual version of pattern matching, without the line string 1420 * being handled by restriction. 1421 * 1422 * Check if the given pattern matches the given string. 1423 * p and s are either anchor or line pattern and string; 1424 * wp and ws are word (candidate) pattern and string 1425 * 1426 * If only one pattern is given, we just check if characters match. 1427 * If both line and word are given, we check that characters match 1428 * for {...} classes by comparing positions in the strings. 1429 * 1430 * Patterns and strings are always passed in pairs, so it is enough 1431 * to check for non-NULL wp. p should always be present. 1432 */ 1433/**/ 1434mod_export int 1435pattern_match(Cpattern p, char *s, Cpattern wp, char *ws) 1436{ 1437 convchar_t c, wc; 1438 convchar_t ind, wind; 1439 int len = 0, wlen, mt, wmt; 1440#ifdef MULTIBYTE_SUPPORT 1441 mbstate_t lstate, wstate; 1442 1443 memset(&lstate, 0, sizeof(lstate)); 1444 memset(&wstate, 0, sizeof(wstate)); 1445#endif 1446 1447 while (p && wp && *s && *ws) { 1448 /* First test the word character */ 1449#ifdef MULTIBYTE_SUPPORT 1450 wlen = mb_metacharlenconv_r(ws, &wc, &wstate); 1451#else 1452 if (*ws == Meta) { 1453 wc = STOUC(ws[1]) ^ 32; 1454 wlen = 2; 1455 } else { 1456 wc = STOUC(*ws); 1457 wlen = 1; 1458 } 1459#endif 1460 wind = pattern_match1(wp, wc, &wmt); 1461 if (!wind) 1462 return 0; 1463 1464 /* 1465 * Now the line character. 1466 */ 1467#ifdef MULTIBYTE_SUPPORT 1468 len = mb_metacharlenconv_r(s, &c, &lstate); 1469#else 1470 /* We have the character itself. */ 1471 if (*s == Meta) { 1472 c = STOUC(s[1]) ^ 32; 1473 len = 2; 1474 } else { 1475 c = STOUC(*s); 1476 len = 1; 1477 } 1478#endif 1479 /* 1480 * If either is "?", they match each other; no further tests. 1481 * Apply this even if the character wasn't convertable; 1482 * there's no point trying to be clever in that case. 1483 */ 1484 if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY) 1485 { 1486 ind = pattern_match1(p, c, &mt); 1487 if (!ind) 1488 return 0; 1489 if (ind != wind) 1490 return 0; 1491 if (mt != wmt) { 1492 /* 1493 * Special case if matching lower vs. upper or 1494 * vice versa. The transformed characters must match. 1495 * We don't need to check the transformation is 1496 * the appropriate one for each character separately, 1497 * since that was done in pattern_match1(), so just 1498 * compare lower-cased versions of both. 1499 */ 1500 if ((mt == PP_LOWER || mt == PP_UPPER) && 1501 (wmt == PP_LOWER || wmt == PP_UPPER)) { 1502 if (ZC_tolower(c) != ZC_tolower(wc)) 1503 return 0; 1504 } else { 1505 /* Other different classes can't match. */ 1506 return 0; 1507 } 1508 } 1509 } 1510 1511 s += len; 1512 ws += wlen; 1513 p = p->next; 1514 wp = wp->next; 1515 } 1516 1517 while (p && *s) { 1518#ifdef MULTIBYTE_SUPPORT 1519 len = mb_metacharlenconv_r(s, &c, &lstate); 1520#else 1521 if (*s == Meta) { 1522 c = STOUC(s[1]) ^ 32; 1523 len = 2; 1524 } else { 1525 c = STOUC(*s); 1526 len = 1; 1527 } 1528#endif 1529 if (!pattern_match1(p, c, &mt)) 1530 return 0; 1531 p = p->next; 1532 s += len; 1533 } 1534 1535 while (wp && *ws) { 1536#ifdef MULTIBYTE_SUPPORT 1537 wlen = mb_metacharlenconv_r(ws, &wc, &wstate); 1538#else 1539 if (*ws == Meta) { 1540 wc = STOUC(ws[1]) ^ 32; 1541 wlen = 2; 1542 } else { 1543 wc = STOUC(*ws); 1544 wlen = 1; 1545 } 1546#endif 1547 if (!pattern_match1(wp, wc, &wmt)) 1548 return 0; 1549 wp = wp->next; 1550 ws += wlen; 1551 } 1552 1553 return 1; 1554} 1555 1556 1557/* This splits the given string into a list of cline structs, separated 1558 * at those places where one of the anchors of an `*' pattern was found. 1559 * plen gives the number of characters on the line that matched this 1560 * string. 1561 * 1562 * In *lp, if lp is not NULL, we return a pointer to the last cline struct we 1563 * build. 1564 * 1565 * In *lprem, if lprem is not NULL, we return a pointer to the last 1566 * cline struct we build if it came from the remainder of the 1567 * line rather than from a right anchor match, else NULL. 1568 */ 1569 1570/**/ 1571Cline 1572bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem) 1573{ 1574 Cline ret = NULL, *q = &ret, n = NULL; 1575 Cmlist ms; 1576 Cmatcher mp; 1577 int t, op = plen; 1578 char *p = str, *os = str; 1579 1580 while (len) { 1581 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) { 1582 mp = ms->matcher; 1583 if (mp && mp->flags == CMF_RIGHT && mp->wlen < 0 && mp->ralen && 1584 !mp->llen && len >= mp->ralen && (str - os) >= mp->lalen && 1585 pattern_match(mp->right, str, NULL, NULL) && 1586 (!mp->lalen || 1587 ((str - os) >= mp->lalen && 1588 pattern_match(mp->left, str - mp->lalen, NULL, NULL)))) { 1589 int olen = str - p, llen; 1590 1591 /* We found an anchor, create a new cline. The NEW flag 1592 * is set if the characters before the anchor were not 1593 * on the line. */ 1594 *q = n = get_cline(NULL, mp->ralen, str, mp->ralen, NULL, 0, 1595 ((plen <= 0) ? CLF_NEW : 0)); 1596 1597 /* If there were any characters before the anchor, add 1598 * them as a cline struct. */ 1599 1600 if (p != str) { 1601 llen = (op < 0 ? 0 : op); 1602 1603 if (llen > olen) 1604 llen = olen; 1605 n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0); 1606 } 1607 q = &(n->next); 1608 str += mp->ralen; len -= mp->ralen; 1609 plen -= mp->ralen; 1610 op -= olen; 1611 p = str; 1612 t = 1; 1613 } 1614 } 1615 if (!t) { 1616 /* No anchor was found here, skip. */ 1617 str++; len--; 1618 plen--; 1619 } 1620 } 1621 /* This is the cline struct for the remaining string at the end. */ 1622 1623 if (p != str) { 1624 int olen = str - p, llen = (op < 0 ? 0 : op); 1625 1626 *q = n = get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0)); 1627 1628 if (llen > olen) 1629 llen = olen; 1630 n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0); 1631 if (lprem) 1632 *lprem = n; 1633 } 1634 else if (!ret) { 1635 *q = n = 1636 get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0)); 1637 if (lprem) 1638 *lprem = n; 1639 } else if (lprem) { 1640 *lprem = NULL; 1641 } 1642 1643 if (n) 1644 n->next = NULL; 1645 1646 if (lp) 1647 *lp = n; 1648 1649 return ret; 1650} 1651 1652 1653/* 1654 * This builds all the possible line patterns for the pattern pat in the 1655 * buffer line. Then we test if this line matches the string given by 1656 * wlen and word. 1657 * 1658 * The matcher ) wpat, containing pattern that matched previously 1659 * mp gives ) lpat, containing the pattern for line we build 1660 * line is the line we are assembling; it is initially empty 1661 * mword is a string that matched wpat before 1662 * word is string that we try to match now 1663 * 1664 * The return value is the length of the string matched in the word, it 1665 * is zero if we couldn't build a line that matches the word. 1666 */ 1667 1668/**/ 1669static int 1670bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word, 1671 int wlen, int sfx) 1672{ 1673 Cpattern lpat = mp->line; 1674 Cpattern wpat = mp->word; 1675 Cpattern curgenpat; 1676 Cmlist ms; 1677 int llen, rl, l; 1678 convchar_t convchr, *wordcp; 1679 VARARR(convchar_t, wordchars, wlen); 1680 VARARR(struct cpattern, genpatarr, mp->llen); 1681 1682 /* 1683 * We may need to start the "word" array from the end. This 1684 * is much easier if we convert it to an array of (possibly wide) 1685 * characters. 1686 */ 1687 MB_METACHARINIT(); 1688 for (l = wlen, wordcp = wordchars; l; l--) { 1689 int charlen = MB_METACHARLENCONV(word, &convchr); 1690#ifdef MULTIBYTE_SUPPORT 1691 if (convchr == WEOF) 1692 convchr = (*word == Meta) ? word[1] ^ 32 : *word; 1693#endif 1694 *wordcp++ = convchr; 1695 word += charlen; 1696 } 1697 1698 /* 1699 * Loop over all characters. At this stage, line is an empty 1700 * space of length llen (not counting the null byte) which we assemble as 1701 * we go along. 1702 * 1703 * However, first we need to know what characters can appear at each 1704 * point in the line. For this we assemble an list genpatarr of the 1705 * same length as the line. (It's convenient to store this as an 1706 * array but it's linked as a list, too.) If there are equivalences 1707 * we use mword to derive the equivalent character; when we've 1708 * reached the end of mword, equivalences are treated just like 1709 * ordinary character classes. For character classes we just attach 1710 * the class to the genpatarr list and apply it as a restriction 1711 * when we finally match the line against the set of matchers. 1712 */ 1713 curgenpat = genpatarr; 1714 MB_METACHARINIT(); 1715 while (lpat) { 1716 convchar_t wchr, wind; 1717 int wmtp, mwordlen; 1718 /* 1719 * If the line pattern is an equivalence, query wpat to find the 1720 * word part of the equivalence. If we don't find one we don't try 1721 * equivalencing but use lpat as an ordinary match. (It's not 1722 * entirely clear to me this is the correct behaviour on a 1723 * failed character match within the equivalence, but that was 1724 * the behaviour of the old logic that this replaces.) 1725 */ 1726 if (lpat->tp == CPAT_EQUIV && wpat && *mword) { 1727 mwordlen = MB_METACHARLENCONV(mword, &wchr); 1728 wind = pattern_match1(wpat, wchr, &wmtp); 1729 wpat = wpat->next; 1730 mword += mwordlen; 1731 } else 1732 wind = 0; 1733 if (wind) { 1734 /* 1735 * Successful match for word side of equivalence. 1736 * Find the line equivalent. 1737 */ 1738 convchar_t lchr; 1739 if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr)) 1740 == CHR_INVALID) { 1741 /* 1742 * No equivalent. No possible match; give up. 1743 */ 1744 return 0; 1745 } 1746 /* 1747 * We now have an exact character to match, 1748 * so make up a pattern element for it. 1749 */ 1750 curgenpat->tp = CPAT_CHAR; 1751 curgenpat->u.chr = lchr; 1752 } else { 1753 /* 1754 * Not an equivalence class, so we just keep the 1755 * test in the lpat as it is. 1756 */ 1757 curgenpat->tp = lpat->tp; 1758 if (lpat->tp == CPAT_CHAR) 1759 curgenpat->u.chr = lpat->u.chr; 1760 else if (lpat->tp != CPAT_ANY) { 1761 /* 1762 * The string isn't modified and is only needed in calls from 1763 * this function, so we don't even need to copy it. 1764 */ 1765 curgenpat->u.str = lpat->u.str; 1766 } 1767 } 1768 lpat = lpat->next; 1769 /* 1770 * This linked list is defined above as an array. 1771 * We could get away with just keeping it as an array 1772 * and passing it down as such, but that's a bit icky 1773 * since the generic linkage of Cpatterns is as a linked 1774 * list and we should keep our local memory management 1775 * problems to ourselvess. 1776 */ 1777 if (lpat) 1778 curgenpat->next = curgenpat+1; 1779 else 1780 curgenpat->next = NULL; 1781 curgenpat++; 1782 } 1783 1784 /* 1785 * We now know how to match the word with the line patterns; let's 1786 * see if it does. We will use the information in curgenpat if we 1787 * are successful to work out what character goes on the line. This 1788 * is a bit hairy, as in "the Yeti is a creature that is a bit 1789 * hairy". 1790 */ 1791 llen = mp->llen; 1792 rl = 0; 1793 1794 if (sfx) 1795 { 1796 /* 1797 * We need to work backwards from the end of both the 1798 * word and the line strings. 1799 */ 1800 wordcp = wordchars + wlen; 1801 1802 /* 1803 * We construct the line from the end. 1804 */ 1805 line += llen; 1806 curgenpat = genpatarr + llen; 1807 } else { 1808 wordcp = wordchars; 1809 curgenpat = genpatarr; 1810 } 1811 1812 /* we now reuse mp, lpat, wpat for the global matchers */ 1813 MB_METACHARINIT(); 1814 while (llen && wlen) { 1815 int wmtp; 1816 convchar_t *wp; 1817 Cpattern tmpgenpat; 1818 1819 if (sfx) { 1820 wp = wordcp - 1; 1821 tmpgenpat = curgenpat - 1; 1822 } else { 1823 tmpgenpat = curgenpat; 1824 wp = wordcp; 1825 } 1826 if (pattern_match1(tmpgenpat, *wp, &wmtp)) 1827 { 1828 convchar_t lchr; 1829 /* 1830 * We can match the line character directly with the word 1831 * character. If the line character is a fixed one, 1832 * keep it, since we went to all that trouble above, 1833 * else if it's generic, keep the word character, 1834 * since we have no choice. 1835 */ 1836 if (tmpgenpat->tp == CPAT_CHAR) 1837 lchr = tmpgenpat->u.chr; 1838 else 1839 lchr = *wp; 1840 1841 if (sfx) 1842 *--line = lchr; 1843 else 1844 *line++ = lchr; 1845 1846 llen--; 1847 wlen--; 1848 rl++; 1849 1850 if (sfx) { 1851 wordcp = wp; 1852 curgenpat = tmpgenpat; 1853 } else { 1854 if (llen) 1855 curgenpat++; 1856 wordcp++; 1857 } 1858 } 1859 else 1860 { 1861 ZLE_CHAR_T *lp; 1862 /* 1863 * Need to loop over pattern matchers. 1864 */ 1865 for (ms = bmatchers; ms; ms = ms->next) { 1866 mp = ms->matcher; 1867 /* 1868 * This is the nightmare case: we have line and 1869 * and word matchers and some pattern which restricts 1870 * the value on the line without us knowing exactly 1871 * what it is. Despatch to the special function 1872 * for that. 1873 */ 1874 if (mp && !mp->flags && mp->wlen <= wlen && 1875 mp->llen <= llen) 1876 { 1877 lp = line; 1878 wp = wordcp; 1879 tmpgenpat = curgenpat; 1880 1881 if (sfx) { 1882 lp -= mp->llen; 1883 wp -= mp->wlen; 1884 tmpgenpat -= mp->llen; 1885 } 1886 1887 if (pattern_match_restrict(mp->line, mp->word, wp, 1888 wlen - (wp - wordchars), 1889 tmpgenpat, lp)) { 1890 /* 1891 * Matched: advance over as many characters 1892 * of the patterns and strings as 1893 * we've done matches. 1894 */ 1895 if (sfx) { 1896 line = lp; 1897 wordcp = wp; 1898 curgenpat = tmpgenpat; 1899 } else { 1900 line += mp->llen; 1901 wordcp += mp->wlen; 1902 curgenpat += mp->llen; 1903 } 1904 llen -= mp->llen; 1905 wlen -= mp->wlen; 1906 rl += mp->wlen; 1907 break; 1908 } 1909 } 1910 } 1911 if (!ms) 1912 return 0; /* Didn't match, give up */ 1913 } 1914 } 1915 if (!llen) { 1916 /* Unmatched portion in the line built, return matched length. */ 1917 return rl; 1918 } 1919 return 0; 1920} 1921 1922/* This builds a string that may be put on the line that fully matches the 1923 * given strings. The return value is NULL if no such string could be built 1924 * or that string in local static memory, dup it. */ 1925 1926/**/ 1927static char * 1928join_strs(int la, char *sa, int lb, char *sb) 1929{ 1930 static char *rs = NULL; 1931 static int rl = 0; 1932 1933 Cmlist ms; 1934 Cmatcher mp; 1935 int t, bl; 1936 /** rr is the remaining length already allocated in rs */ 1937 int rr = rl; 1938 /* 1939 * convlen is the length we need for the string converted to 1940 * char * (possibly multibyte). 1941 */ 1942 int convlen; 1943 char *rp = rs; 1944 1945 while (la && lb) { 1946 if (*sa != *sb) { 1947 /* Different characters, try the matchers. */ 1948 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) { 1949 mp = ms->matcher; 1950 if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0 && 1951 mp->wlen <= la && mp->wlen <= lb) { 1952 /* The pattern has no anchors and the word 1953 * pattern fits, try it. */ 1954 if ((t = pattern_match(mp->word, sa, NULL, NULL)) || 1955 pattern_match(mp->word, sb, NULL, NULL)) { 1956 /* It matched one of the strings, t says which one. */ 1957 VARARR(ZLE_CHAR_T, line, mp->llen); 1958 char **ap, **bp; 1959 int *alp, *blp; 1960 1961 if (t) { 1962 ap = &sa; 1963 alp = &la; 1964 1965 bp = &sb; 1966 blp = &lb; 1967 } else { 1968 ap = &sb; 1969 alp = &lb; 1970 1971 bp = &sa; 1972 blp = &la; 1973 } 1974 /* Now try to build a string that matches the other 1975 * string. */ 1976 if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) { 1977 /* Found one, put it into the return string. */ 1978 char *convstr = 1979 zlelineasstring(line, mp->llen, 0, &convlen, 1980 NULL, 0); 1981 if (rr <= convlen) { 1982 char *or = rs; 1983 int alloclen = (convlen > 20) ? convlen : 20; 1984 1985 rs = realloc(rs, (rl += alloclen)); 1986 rr += alloclen; 1987 rp += rs - or; 1988 } 1989 memcpy(rp, convstr, convlen); 1990 rp += convlen; 1991 rr -= convlen; 1992 /* HERE: multibyte chars */ 1993 *ap += mp->wlen; 1994 *alp -= mp->wlen; 1995 1996 *bp += bl; 1997 *blp -= bl; 1998 t = 1; 1999 free(convstr); 2000 } else 2001 t = 0; 2002 } 2003 } 2004 } 2005 if (!t) 2006 break; 2007 } else { 2008 /* Same character, just take it. */ 2009 if (rr <= 1 /* HERE charlen */) { 2010 char *or = rs; 2011 2012 rs = realloc(rs, (rl += 20)); 2013 rr += 20; 2014 rp += rs - or; 2015 } 2016 /* HERE: multibyte char */ 2017 *rp++ = *sa; 2018 rr--; 2019 sa++; 2020 sb++; 2021 la--; 2022 lb--; 2023 } 2024 } 2025 if (la || lb) 2026 return NULL; 2027 2028 *rp = '\0'; 2029 2030 return rs; 2031} 2032 2033/* 2034 * This compares the anchors stored in two top-level clines. 2035 * It returns 1 if the anchors are the same, 2 if they are 2036 * compatible (and have been combined in "o"), 0 otherwise. 2037 */ 2038 2039/**/ 2040static int 2041cmp_anchors(Cline o, Cline n, int join) 2042{ 2043 int line = 0; 2044 char *j; 2045 2046 /* First try the exact strings. */ 2047 if ((!(o->flags & CLF_LINE) && o->wlen == n->wlen && 2048 (!o->word || !strncmp(o->word, n->word, o->wlen))) || 2049 (line = ((!o->line && !n->line && !o->wlen && !n->wlen) || 2050 (o->llen == n->llen && o->line && n->line && 2051 !strncmp(o->line, n->line, o->llen))))) { 2052 if (line) { 2053 o->flags |= CLF_LINE; 2054 o->word = NULL; 2055 o->wlen = 0; 2056 } 2057 return 1; 2058 } 2059 /* Didn't work, try to build a string matching both anchors. */ 2060 if (join && !(o->flags & CLF_JOIN) && o->word && n->word && 2061 (j = join_strs(o->wlen, o->word, n->wlen, n->word))) { 2062 o->flags |= CLF_JOIN; 2063 o->wlen = strlen(j); 2064 o->word = dupstring(j); 2065 2066 return 2; 2067 } 2068 return 0; 2069} 2070 2071/* Below is the code to join two cline lists. This struct is used to walk 2072 * through a sub-list. */ 2073 2074typedef struct cmdata *Cmdata; 2075 2076struct cmdata { 2077 Cline cl, pcl; 2078 char *str, *astr; 2079 int len, alen, olen, line; 2080}; 2081 2082/* This is used to ensure that a cmdata struct contains usable data. 2083 * The return value is non-zero if we reached the end. */ 2084 2085static int 2086check_cmdata(Cmdata md, int sfx) 2087{ 2088 /* We will use the str and len fields to contain the next sub-string 2089 * in the list. If len is zero, we have to use the next cline. */ 2090 if (!md->len) { 2091 /* If there is none, we reached the end. */ 2092 if (!md->cl) 2093 return 1; 2094 2095 /* Otherwise, get the string. Only the line-string or both. 2096 * We also have to adjust the pointer if this is for a suffix. */ 2097 if (md->cl->flags & CLF_LINE) { 2098 md->line = 1; 2099 md->len = md->cl->llen; 2100 md->str = md->cl->line; 2101 } else { 2102 md->line = 0; 2103 md->len = md->olen = md->cl->wlen; 2104 /* HERE: multibyte */ 2105 if ((md->str = md->cl->word) && sfx) 2106 md->str += md->len; 2107 md->alen = md->cl->llen; 2108 /* HERE: multibyte */ 2109 if ((md->astr = md->cl->line) && sfx) 2110 md->astr += md->alen; 2111 } 2112 md->pcl = md->cl; 2113 md->cl = md->cl->next; 2114 } 2115 return 0; 2116} 2117 2118/* This puts the not-yet-matched portion back into the last cline and 2119 * returns that. */ 2120 2121static Cline 2122undo_cmdata(Cmdata md, int sfx) 2123{ 2124 Cline r = md->pcl; 2125 2126 if (md->line) { 2127 r->word = NULL; 2128 r->wlen = 0; 2129 r->flags |= CLF_LINE; 2130 r->llen = md->len; 2131 /* HERE: multibyte */ 2132 r->line = md->str - (sfx ? md->len : 0); 2133 } else if (md->len != md->olen) { 2134 r->wlen = md->len; 2135 /* HERE: multibyte */ 2136 r->word = md->str - (sfx ? md->len : 0); 2137 DPUTS(r->wlen > 0 && !*r->word, "Bad word"); 2138 } 2139 return r; 2140} 2141 2142/* This tries to build a string matching a sub-string in a sub-cline 2143 * that could not be matched otherwise. */ 2144 2145static Cline 2146join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join) 2147{ 2148 if (!check_cmdata(md, sfx)) { 2149 char *ow = str, *nw = md->str; 2150 int ol = len, nl = md->len; 2151 Cmlist ms; 2152 Cmatcher mp; 2153 int t; 2154 2155 if (sfx) { 2156 ow += ol; nw += nl; 2157 } 2158 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) { 2159 mp = ms->matcher; 2160 /* We use only those patterns that match a non-empty 2161 * string in both the line and the word and that have 2162 * no anchors. */ 2163 if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0) { 2164 /* We first test, if the old string matches already the 2165 * new one. */ 2166 if (mp->llen <= ol && mp->wlen <= nl && 2167 pattern_match(mp->line, ow - (sfx ? mp->llen : 0), 2168 mp->word, nw - (sfx ? mp->wlen : 0))) { 2169 /* It did, update the contents of the cmdata struct 2170 * and return a cline for the matched part. */ 2171 if (sfx) 2172 md->str -= mp->wlen; 2173 else 2174 md->str += mp->wlen; 2175 md->len -= mp->wlen; 2176 *mlen = mp->llen; 2177 2178 return get_cline(NULL, 0, ow - (sfx ? mp->llen : 0), 2179 mp->llen, NULL, 0, 0); 2180 } 2181 /* Otherwise we will try to build a string that matches 2182 * both strings. But try the pattern only if the word- 2183 * pattern matches one of the strings. */ 2184 if (join && mp->wlen <= ol && mp->wlen <= nl && 2185 ((t = pattern_match(mp->word, ow - (sfx ? mp->wlen : 0), 2186 NULL, NULL)) || 2187 pattern_match(mp->word, nw - (sfx ? mp->wlen : 0), 2188 NULL, NULL))) { 2189 VARARR(ZLE_CHAR_T, line, mp->llen); 2190 int bl; 2191 char *mw; 2192 2193 /* Then build all the possible lines and see 2194 * if one of them matches the other string. */ 2195 /* HERE: they're multibyte */ 2196 if (t) 2197 mw = ow - (sfx ? mp->wlen : 0); 2198 else 2199 mw = nw - (sfx ? mp->wlen : 0); 2200 2201 if ((bl = bld_line(mp, line, mw, (t ? nw : ow), 2202 (t ? nl : ol), sfx))) { 2203 /* Yep, one of the lines matched the other 2204 * string. */ 2205 2206 /* HERE: multibyte characters */ 2207 if (t) { 2208 ol = mp->wlen; nl = bl; 2209 } else { 2210 ol = bl; nl = mp->wlen; 2211 } 2212 if (sfx) 2213 md->str -= nl; 2214 else 2215 md->str += nl; 2216 md->len -= nl; 2217 *mlen = ol; 2218 2219 return get_cline(NULL, 0, 2220 zlelineasstring(line, mp->llen, 2221 0, NULL, NULL, 1), 2222 mp->llen, NULL, 0, CLF_JOIN); 2223 } 2224 } 2225 } 2226 } 2227 } 2228 return NULL; 2229} 2230 2231/* This is used to match a sub-string in a sub-cline. The length of the 2232 * matched portion is returned. This tests only for exact equality. */ 2233 2234static int 2235sub_match(Cmdata md, char *str, int len, int sfx) 2236{ 2237 int ret = 0, l, ind, add; 2238 char *p, *q; 2239#ifdef MULTIBYTE_SUPPORT 2240 int fulllen = len; 2241 char *fullstr = str; 2242 mbstate_t mbs; 2243#endif 2244 2245 if (sfx) { 2246 str += len; 2247 ind = -1; add = -1; 2248 } else { 2249 ind = 0; add = 1; 2250 } 2251 /* str and len describe the old string, in md we have the new one. */ 2252 while (len) { 2253 if (check_cmdata(md, sfx)) 2254 return ret; 2255 2256 /* 2257 * Look for a common prefix. If we do include metafied 2258 * characters, at this stage we still need the overall length 2259 * including Meta's as separate characters. 2260 */ 2261 for (l = 0, p = str, q = md->str; 2262 l < len && l < md->len && p[ind] == q[ind]; 2263 l++, p += add, q += add) {} 2264 2265 /* Make sure we don't end in the middle of a Meta sequence. */ 2266 if (add == 1) { 2267 if (l && p[-1] == Meta) 2268 l--; 2269 } else { 2270 if (l && ((l < len && p[-1] == Meta) 2271 || (l < md->len && q[-1] == Meta))) 2272 l--; 2273 } 2274#ifdef MULTIBYTE_SUPPORT 2275 /* 2276 * Make sure we don't end in the middle of a multibyte character. 2277 * Don't need to do this if the match ended at the start 2278 * of the original string. 2279 * 2280 * Let q be the match point we've found. 2281 */ 2282 q = sfx ? str - l : str + l; 2283 if (q != fullstr) { 2284 memset(&mbs, 0, sizeof mbs); 2285 /* 2286 * Otherwise read characters from the start of the original 2287 * string until we reach or pass the match point. This 2288 * is rather inefficient, but in general only reading 2289 * the full string can keep track of where we are in 2290 * a character. With a prefix we could be more efficient, 2291 * but it's difficult with a suffix where the match point 2292 * moves backwards. 2293 */ 2294 for (p = fullstr; p < fullstr + fulllen; ) { 2295 wchar_t wc; 2296 /* 2297 * ret must, in fact, be set by the current logic, 2298 * but gcc doesn't realise (at least some versions don't). 2299 */ 2300 size_t cnt = MB_INVALID; 2301 int diff; 2302 char *p2; 2303 2304 /* 2305 * Because the string is metafied, we need to 2306 * assembled wide characters a byte at a time. 2307 */ 2308 for (p2 = p; p2 < fullstr + fulllen; p2++) { 2309 char curchar = (*p2 == Meta) ? (*++p2 ^ 32) : *p2; 2310 cnt = mbrtowc(&wc, &curchar, 1, &mbs); 2311 /* Continue while character is incomplete. */ 2312 if (cnt != MB_INCOMPLETE) 2313 break; 2314 } 2315 if (cnt == MB_INVALID || cnt == MB_INCOMPLETE) { 2316 /* not a valid character, give up test */ 2317 break; 2318 } 2319 /* increment p2 for last byte read */ 2320 diff = ++p2 - q; 2321 if (diff == 0) { 2322 /* 2323 * Prefix or suffix matches at end of multbyte character, 2324 * so OK. 2325 */ 2326 break; 2327 } else if (diff > 0) { 2328 /* 2329 * The prefix or suffix finishes in the middle 2330 * of a character. Shorten it until it doesn't. 2331 */ 2332 if (sfx) { 2333 /* 2334 * We need to remove the trailing part of 2335 * the character from the suffix. 2336 */ 2337 l -= diff; 2338 } else { 2339 /* 2340 * We need to remove the initial part of 2341 * the character from the prefix. 2342 */ 2343 l -= (q - p); 2344 } 2345 break; 2346 } 2347 /* Advance over full character */ 2348 p = p2; 2349 } 2350 } 2351#endif 2352 if (l) { 2353 /* There was a common prefix, use it. */ 2354 md->len -= l; len -= l; 2355 if (sfx) { 2356 md->str -= l; str -= l; 2357 } else { 2358 md->str += l; str += l; 2359 } 2360 ret += l; 2361 } else if (md->line || md->len != md->olen || !md->astr) 2362 return ret; 2363 else { 2364 /* We still have the line string to try. */ 2365 md->line = 1; 2366 md->len = md->alen; 2367 md->str = md->astr; 2368 } 2369 } 2370 return ret; 2371} 2372 2373/* This is used to build a common prefix or suffix sub-list. If requested 2374 * it returns the unmatched cline lists in orest and nrest. */ 2375 2376/**/ 2377static void 2378join_psfx(Cline ot, Cline nt, Cline *orest, Cline *nrest, int sfx) 2379{ 2380 Cline p = NULL, o, n; 2381 struct cmdata md, omd; 2382 char **sstr = NULL; 2383 int len, join = 0, line = 0, *slen = NULL; 2384 2385 if (sfx) { 2386 o = ot->suffix; n = nt->suffix; 2387 } else { 2388 o = ot->prefix; n = nt->prefix; 2389 } 2390 if (!o) { 2391 if (orest) 2392 *orest = NULL; 2393 if (nrest) 2394 *nrest = n; 2395 if (n && n->wlen) 2396 ot->flags |= CLF_MISS; 2397 2398 return; 2399 } 2400 if (!n) { 2401 if (sfx) 2402 ot->suffix = NULL; 2403 else 2404 ot->prefix = NULL; 2405 2406 if (orest) 2407 *orest = o; 2408 else 2409 free_cline(o); 2410 if (nrest) 2411 *nrest = NULL; 2412 return; 2413 } 2414 md.cl = n; 2415 md.len = 0; 2416 2417 /* Walk through the old list. */ 2418 while (o) { 2419 join = 0; 2420 memcpy(&omd, &md, sizeof(struct cmdata)); 2421 2422 /* We first get the length of the prefix equal in both strings. */ 2423 if (o->flags & CLF_LINE) { 2424 if ((len = sub_match(&md, o->line, o->llen, sfx)) != o->llen) { 2425 join = 1; line = 1; slen = &(o->llen); sstr = &(o->line); 2426 } 2427 } else if ((len = sub_match(&md, o->word, o->wlen, sfx)) != o->wlen) { 2428 if (o->line) { 2429 memcpy(&md, &omd, sizeof(struct cmdata)); 2430 o->flags |= CLF_LINE | CLF_DIFF; 2431 2432 continue; 2433 } 2434 o->llen = o->llen - ot->slen; 2435 join = 1; line = 0; slen = &(o->wlen); sstr = &(o->word); 2436 } 2437 if (join) { 2438 /* There is a rest that is different in the two lists, 2439 * we try to build a new cline matching both strings. */ 2440 Cline joinl; 2441 int jlen; 2442 2443 if ((joinl = join_sub(&md, *sstr + len, *slen - len, 2444 &jlen, sfx, !(o->flags & CLF_JOIN)))) { 2445 /* We have one, insert it into the list. */ 2446 joinl->flags |= CLF_DIFF; 2447 if (len + jlen != *slen) { 2448 Cline rest; 2449 2450 rest = get_cline(NULL, 0, *sstr + (sfx ? 0 : len + jlen), 2451 *slen - len - jlen, NULL, 0, 0); 2452 2453 rest->next = o->next; 2454 joinl->next = rest; 2455 } else 2456 joinl->next = o->next; 2457 2458 if (len) { 2459 if (sfx) 2460 *sstr += *slen - len; 2461 *slen = len; 2462 o->next = joinl; 2463 } else { 2464 o->next = NULL; 2465 free_cline(o); 2466 if (p) 2467 p->next = joinl; 2468 else if (sfx) 2469 ot->suffix = joinl; 2470 else 2471 ot->prefix = joinl; 2472 } 2473 o = joinl; 2474 join = 0; 2475 } 2476 } 2477 if (join) { 2478 /* We couldn't build a cline for a common string, so we 2479 * cut the list here. */ 2480 if (len) { 2481 Cline r; 2482 2483 if (orest) { 2484 if (line) 2485 r = get_cline(o->line + len, *slen - len, 2486 NULL, 0, NULL, 0, o->flags); 2487 else 2488 r = get_cline(NULL, 0, o->word + len, *slen - len, 2489 NULL, 0, o->flags); 2490 2491 r->next = o->next; 2492 *orest = r; 2493 2494 *slen = len; 2495 o->next = NULL; 2496 } else { 2497 if (sfx) 2498 *sstr += *slen - len; 2499 *slen = len; 2500 free_cline(o->next); 2501 o->next = NULL; 2502 } 2503 } else { 2504 if (p) 2505 p->next = NULL; 2506 else if (sfx) 2507 ot->suffix = NULL; 2508 else 2509 ot->prefix = NULL; 2510 2511 if (orest) 2512 *orest = o; 2513 else 2514 free_cline(o); 2515 } 2516 if (!orest || !nrest) 2517 ot->flags |= CLF_MISS; 2518 2519 if (nrest) 2520 *nrest = undo_cmdata(&md, sfx); 2521 2522 return; 2523 } 2524 p = o; 2525 o = o->next; 2526 } 2527 if (md.len || md.cl) 2528 ot->flags |= CLF_MISS; 2529 if (orest) 2530 *orest = NULL; 2531 if (nrest) 2532 *nrest = undo_cmdata(&md, sfx); 2533} 2534 2535/* This builds the common prefix and suffix for a mid-cline -- the one 2536 * describing the place where the prefix and the suffix meet. */ 2537 2538/**/ 2539static void 2540join_mid(Cline o, Cline n) 2541{ 2542 if (o->flags & CLF_JOIN) { 2543 /* The JOIN flag is set in the old cline struct if it was 2544 * already joined with another one. In this case the suffix 2545 * field contains the suffix from previous calls. */ 2546 Cline nr; 2547 2548 join_psfx(o, n, NULL, &nr, 0); 2549 2550 n->suffix = revert_cline(nr); 2551 2552 join_psfx(o, n, NULL, NULL, 1); 2553 } else { 2554 /* This is the first time for both structs, so the prefix field 2555 * contains the whole sub-list. */ 2556 Cline or, nr; 2557 2558 o->flags |= CLF_JOIN; 2559 2560 /* We let us give both rests and use them as the suffixes. */ 2561 join_psfx(o, n, &or, &nr, 0); 2562 2563 if (or) 2564 or->llen = (o->slen > or->wlen ? or->wlen : o->slen); 2565 o->suffix = revert_cline(or); 2566 n->suffix = revert_cline(nr); 2567 2568 join_psfx(o, n, NULL, NULL, 1); 2569 } 2570 n->suffix = NULL; 2571} 2572 2573/* This turns the sequence of anchor cline structs from b to e into a 2574 * prefix sequence, puts it before the prefix of e and then tries to 2575 * join that with the prefix of a. 2576 * This is needed if some matches had a anchor match spec and others 2577 * didn't. */ 2578 2579/**/ 2580static int 2581sub_join(Cline a, Cline b, Cline e, int anew) 2582{ 2583 if (!e->suffix && a->prefix) { 2584 Cline op = e->prefix, n = NULL, *p = &n, t, ca; 2585 int min = 0, max = 0; 2586 2587 for (; b != e; b = b->next) { 2588 if ((*p = t = b->prefix)) { 2589 while (t->next) 2590 t = t->next; 2591 p = &(t->next); 2592 } 2593 b->suffix = b->prefix = NULL; 2594 b->flags &= ~CLF_SUF; 2595 min += b->min; 2596 max += b->max; 2597 *p = b; 2598 p = &(b->next); 2599 } 2600 *p = e->prefix; 2601 ca = a->prefix; 2602 2603 while (n) { 2604 e->prefix = cp_cline(n, 1); 2605 a->prefix = cp_cline(ca, 1); 2606 2607 if (anew) { 2608 int f = e->flags; 2609 2610 join_psfx(e, a, NULL, NULL, 0); 2611 e->flags = f; 2612 if (e->prefix) 2613 return max - min; 2614 } else { 2615 int f = e->flags; 2616 2617 join_psfx(a, e, NULL, NULL, 0); 2618 e->flags = f; 2619 if (a->prefix) 2620 return max - min; 2621 } 2622 min -= n->min; 2623 2624 if (n == op) 2625 break; 2626 n = n->next; 2627 } 2628 return max - min; 2629 } 2630 return 0; 2631} 2632 2633/* This simplifies the cline list given as the first argument so that 2634 * it also matches the second list. */ 2635 2636/**/ 2637Cline 2638join_clines(Cline o, Cline n) 2639{ 2640 cline_setlens(n, 1); 2641 2642 /* First time called, just return the new list. On further invocations 2643 * we will get it as the first argument. */ 2644 if (!o) 2645 return n; 2646 else { 2647 Cline oo = o, nn = n, po = NULL, pn = NULL, x; 2648 int diff; 2649 2650 /* Walk through the lists. */ 2651 while (o && n) { 2652 /* If one of them describes a new part and the other one does 2653 * not, synchronise them by searching an old part in the 2654 * other list. */ 2655 if ((o->flags & CLF_NEW) && !(n->flags & CLF_NEW)) { 2656 Cline t, tn; 2657 2658 for (t = o; (tn = t->next) && 2659 ((tn->flags & CLF_NEW) || !cmp_anchors(tn, n, 0)); 2660 t = tn); 2661 if (tn) { 2662 diff = sub_join(n, o, tn, 1); 2663 2664 if (po) 2665 po->next = tn; 2666 else 2667 oo = tn; 2668 2669 t->next = NULL; 2670 free_cline(o); 2671 x = o; 2672 o = tn; 2673 2674 if (po && po->prefix && cmp_anchors(x, po, 0)) { 2675 po->flags |= CLF_MISS; 2676 po->max += diff; 2677 } else { 2678 o->flags |= CLF_MISS; 2679 o->max += diff; 2680 } 2681 continue; 2682 } 2683 } 2684 if (!(o->flags & CLF_NEW) && (n->flags & CLF_NEW)) { 2685 Cline t, tn; 2686 2687 for (t = n; (tn = t->next) && 2688 ((tn->flags & CLF_NEW) || !cmp_anchors(o, tn, 0)); 2689 t = tn); 2690 if (tn) { 2691 int of = o->flags & CLF_MISS; 2692 2693 diff = sub_join(o, n, tn, 0); 2694 o->flags = (o->flags & ~CLF_MISS) | of; 2695 2696 if (po && po->prefix && cmp_anchors(n, pn, 0)) { 2697 po->flags |= CLF_MISS; 2698 po->max += diff; 2699 } else { 2700 o->flags |= CLF_MISS; 2701 o->max += diff; 2702 } 2703 n = tn; 2704 continue; 2705 } 2706 } 2707 /* Almost the same as above, but for the case that they 2708 * describe different types of parts (prefix, suffix, or mid). */ 2709 if ((o->flags & (CLF_SUF | CLF_MID)) != 2710 (n->flags & (CLF_SUF | CLF_MID))) { 2711 Cline t, tn; 2712 2713 for (t = n; 2714 (tn = t->next) && 2715 (tn->flags & (CLF_SUF | CLF_MID)) != 2716 (o->flags & (CLF_SUF | CLF_MID)); 2717 t = tn); 2718 if (tn && cmp_anchors(o, tn, 1)) { 2719 sub_join(o, n, tn, 0); 2720 2721 n = tn; 2722 continue; 2723 } 2724 for (t = o; 2725 (tn = t->next) && 2726 (tn->flags & (CLF_SUF | CLF_MID)) != 2727 (n->flags & (CLF_SUF | CLF_MID)); 2728 t = tn); 2729 if (tn && cmp_anchors(tn, n, 1)) { 2730 sub_join(n, o, tn, 1); 2731 2732 if (po) 2733 po->next = tn; 2734 else 2735 oo = tn; 2736 t->next = NULL; 2737 free_cline(o); 2738 o = tn; 2739 continue; 2740 } 2741 if (o->flags & CLF_MID) { 2742 o->flags = (o->flags & ~CLF_MID) | (n->flags & CLF_SUF); 2743 if (n->flags & CLF_SUF) { 2744 free_cline(o->prefix); 2745 o->prefix = NULL; 2746 } else { 2747 free_cline(o->suffix); 2748 o->suffix = NULL; 2749 } 2750 } 2751 break; 2752 } 2753 /* Now see if they have matching anchors. If not, cut the list. */ 2754 if (!(o->flags & CLF_MID) && !cmp_anchors(o, n, 1)) { 2755 Cline t, tn, tt, to = NULL; 2756 2757 for (t = n; (tn = t->next); t = tn) 2758 if (!(tn->flags & CLF_NEW) && (tn->flags & CLF_SKIP)) { 2759 for (tt = o; (to = tt->next); tt = to) 2760 if (!(to->flags & CLF_NEW) && (to->flags & CLF_SKIP) && 2761 cmp_anchors(tn, to, 1)) 2762 break; 2763 if (to) 2764 break; 2765 } 2766 if (tn) { 2767 if (po) 2768 po->next = to; 2769 else 2770 oo = to; 2771 o = to; 2772 2773 diff = sub_join(o, n, tn, 0); 2774 2775 o->flags |= CLF_MISS; 2776 o->max += diff; 2777 2778 n = tn; 2779 po = o; 2780 o = o->next; 2781 pn = n; 2782 n = n->next; 2783 continue; 2784 } else { 2785 for (t = o; (to = t->next); t = to) 2786 if ((to->flags & CLF_SKIP) && cmp_anchors(n, to, 1)) 2787 break; 2788 2789 if (to) { 2790 diff = sub_join(n, o, to, 1); 2791 2792 if (po) 2793 po->next = to; 2794 else 2795 oo = to; 2796 x = o; 2797 o = to; 2798 if (po && po->prefix && cmp_anchors(x, po, 0)) { 2799 po->flags |= CLF_MISS; 2800 po->max += diff; 2801 } else { 2802 o->flags |= CLF_MISS; 2803 o->max += diff; 2804 } 2805 continue; 2806 } else { 2807 for (tt = NULL, t = n; (tn = t->next); t = tn) { 2808 if (tn->flags & CLF_SKIP) 2809 for (tt = o; (to = tt->next); tt = to) 2810 if ((to->flags & CLF_SKIP) && 2811 cmp_anchors(tn, to, 1)) 2812 break; 2813 if (to) 2814 break; 2815 } 2816 if (to) { 2817 diff = sub_join(n, o, to, 1); 2818 2819 if (po) 2820 po->next = to; 2821 else 2822 oo = to; 2823 x = o; 2824 o = to; 2825 if (po && po->prefix && cmp_anchors(x, po, 0)) { 2826 po->flags |= CLF_MISS; 2827 po->max += diff; 2828 } else { 2829 o->flags |= CLF_MISS; 2830 o->max += diff; 2831 } 2832 continue; 2833 } else { 2834 for (tn = n; tn; tn = tn->next) 2835 if ((tn->flags & CLF_NEW) == 2836 (o->flags & CLF_NEW) && 2837 cmp_anchors(tn, o, 1)) break; 2838 2839 if (tn) { 2840 int of = o->flags & CLF_MISS; 2841 2842 if ((diff = sub_join(o, n, tn, 0))) { 2843 o->flags = (o->flags & ~CLF_MISS) | of; 2844 if (po && po->prefix) { 2845 po->flags |= CLF_MISS; 2846 po->max += diff; 2847 } 2848 else { 2849 o->flags |= CLF_MISS; 2850 o->max += diff; 2851 } 2852 } 2853 n = tn; 2854 po = o; 2855 o = o->next; 2856 pn = n; 2857 n = n->next; 2858 continue; 2859 } 2860 if (o->flags & CLF_SUF) 2861 break; 2862 2863 o->word = o->line = o->orig = NULL; 2864 o->wlen = 0; 2865 free_cline(o->next); 2866 o->next = NULL; 2867 o->flags |= CLF_MISS; 2868 } 2869 } 2870 } 2871 } 2872 /* Ok, they are equal, now copy the information about the 2873 * original string if needed, calculate minimum and maximum 2874 * lengths, and join the sub-lists. */ 2875 if (!o->orig && !o->olen) { 2876 o->orig = n->orig; 2877 o->olen = n->olen; 2878 } 2879 if (n->min < o->min) 2880 o->min = n->min; 2881 if (n->max > o->max) 2882 o->max = n->max; 2883 if (o->flags & CLF_MID) 2884 join_mid(o, n); 2885 else 2886 join_psfx(o, n, NULL, NULL, (o->flags & CLF_SUF)); 2887 2888 po = o; 2889 o = o->next; 2890 pn = n; 2891 n = n->next; 2892 } 2893 /* Free the rest of the old list. */ 2894 if (o) { 2895 if (po) 2896 po->next = NULL; 2897 else 2898 oo = NULL; 2899 2900 free_cline(o); 2901 } 2902 free_cline(nn); 2903 2904 return oo; 2905 } 2906} 2907