1/* 2 * pattern.c - pattern matching 3 * 4 * This file is part of zsh, the Z shell. 5 * 6 * Copyright (c) 1999 Peter Stephenson 7 * All rights reserved. 8 * 9 * Permission is hereby granted, without written agreement and without 10 * license or royalty fees, to use, copy, modify, and distribute this 11 * software and to distribute modified versions of this software for any 12 * purpose, provided that the above copyright notice and the following 13 * two paragraphs appear in all copies of this software. 14 * 15 * In no event shall Peter Stephenson or the Zsh Development Group be liable 16 * to any party for direct, indirect, special, incidental, or consequential 17 * damages arising out of the use of this software and its documentation, 18 * even if Peter Stephenson and the Zsh Development Group have been advised of 19 * the possibility of such damage. 20 * 21 * Peter Stephenson and the Zsh Development Group specifically disclaim any 22 * warranties, including, but not limited to, the implied warranties of 23 * merchantability and fitness for a particular purpose. The software 24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the 25 * Zsh Development Group have no obligation to provide maintenance, 26 * support, updates, enhancements, or modifications. 27 * 28 * Pattern matching code derived from the regexp library by Henry 29 * Spencer, which has the following copyright. 30 * 31 * Copyright (c) 1986 by University of Toronto. 32 * Written by Henry Spencer. Not derived from licensed software. 33 * 34 * Permission is granted to anyone to use this software for any 35 * purpose on any computer system, and to redistribute it freely, 36 * subject to the following restrictions: 37 * 38 * 1. The author is not responsible for the consequences of use of 39 * this software, no matter how awful, even if they arise 40 * from defects in it. 41 * 42 * 2. The origin of this software must not be misrepresented, either 43 * by explicit claim or by omission. 44 * 45 * 3. Altered versions must be plainly marked as such, and must not 46 * be misrepresented as being the original software. 47 * 48 * Eagle-eyed readers will notice this is an altered version. Incredibly 49 * sharp-eyed readers might even find bits that weren't altered. 50 * 51 * 52 * And I experienced a sense that, like certain regular 53 * expressions, seemed to match the day from beginning to end, so 54 * that I did not need to identify the parenthesised subexpression 55 * that told of dawn, nor the group of characters that indicated 56 * the moment when my grandfather returned home with news of 57 * Swann's departure for Paris; and the whole length of the month 58 * of May, as if matched by a closure, fitted into the buffer of my 59 * life with no sign of overflowing, turning the days, like a 60 * procession of insects that could consist of this or that 61 * species, into a random and unstructured repetition of different 62 * sequences, anchored from the first day of the month to the last 63 * in the same fashion as the weeks when I knew I would not see 64 * Gilberte and would search in vain for any occurrences of the 65 * string in the avenue of hawthorns by Tansonville, without my 66 * having to delimit explicitly the start or finish of the pattern. 67 * 68 * M. Proust, "In Search of Lost Files", 69 * bk I, "The Walk by Bourne's Place". 70 */ 71 72#include "zsh.mdh" 73 74/* 75 * The following union is used mostly for alignment purposes. 76 * Normal nodes are longs, while certain nodes take a char * as an argument; 77 * here we make sure that they both work out to the same length. 78 * The compiled regexp we construct consists of upats stuck together; 79 * anything else to be added (strings, numbers) is stuck after and 80 * then aligned to a whole number of upat units. 81 * 82 * Note also that offsets are in terms of the sizes of these things. 83 */ 84union upat { 85 long l; 86 unsigned char *p; 87}; 88 89typedef union upat *Upat; 90 91#include "pattern.pro" 92 93/* Number of active parenthesized expressions allowed in backreferencing */ 94#define NSUBEXP 9 95 96/* definition number opnd? meaning */ 97#define P_END 0x00 /* no End of program. */ 98#define P_EXCSYNC 0x01 /* no Test if following exclude already failed */ 99#define P_EXCEND 0x02 /* no Test if exclude matched orig branch */ 100#define P_BACK 0x03 /* no Match "", "next" ptr points backward. */ 101#define P_EXACTLY 0x04 /* lstr Match this string. */ 102#define P_NOTHING 0x05 /* no Match empty string. */ 103#define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */ 104#define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */ 105#define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */ 106#define P_ISSTART 0x09 /* no Match start of string. */ 107#define P_ISEND 0x0a /* no Match end of string. */ 108#define P_COUNTSTART 0x0b /* no Initialise P_COUNT */ 109#define P_COUNT 0x0c /* 3*long uc* node Match a number of repetitions */ 110/* numbered so we can test bit 5 for a branch */ 111#define P_BRANCH 0x20 /* node Match this alternative, or the next... */ 112#define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */ 113/* excludes are also branches, but have bit 4 set, too */ 114#define P_EXCLUDE 0x30 /* uc* node Exclude this from previous branch */ 115#define P_EXCLUDP 0x31 /* uc* node Exclude, using full file path so far */ 116/* numbered so we can test bit 6 so as not to match initial '.' */ 117#define P_ANY 0x40 /* no Match any one character. */ 118#define P_ANYOF 0x41 /* str Match any character in this string. */ 119#define P_ANYBUT 0x42 /* str Match any character not in this string. */ 120#define P_STAR 0x43 /* no Match any set of characters. */ 121#define P_NUMRNG 0x44 /* zr, zr Match a numeric range. */ 122#define P_NUMFROM 0x45 /* zr Match a number >= X */ 123#define P_NUMTO 0x46 /* zr Match a number <= X */ 124#define P_NUMANY 0x47 /* no Match any set of decimal digits */ 125/* spaces left for P_OPEN+n,... for backreferences */ 126#define P_OPEN 0x80 /* no Mark this point in input as start of n. */ 127#define P_CLOSE 0x90 /* no Analogous to OPEN. */ 128/* 129 * no no argument 130 * zr the range type zrange_t: may be zlong or unsigned long 131 * char a single char 132 * uc* a pointer to unsigned char, used at run time and initialised 133 * to NULL. 134 * str null-terminated, metafied string 135 * lstr length as long then string, not null-terminated, unmetafied. 136 */ 137 138/* 139 * Notes on usage: 140 * P_WBRANCH: This works like a branch and is used in complex closures, 141 * to ensure we don't succeed on a zero-length match of the pattern, 142 * since that would cause an infinite loop. We do this by recording 143 * the positions where we have already tried to match. See the 144 * P_WBRANCH test in patmatch(). 145 * 146 * P_ANY, P_ANYOF: the operand is a null terminated 147 * string. Normal characters match as expected. Characters 148 * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate 149 * Posix range tests. This relies on imeta returning true for these 150 * characters. We treat unknown POSIX ranges as never matching. 151 * PP_RANGE means the next two (possibly metafied) characters form 152 * the limits of a range to test; it's too much like hard work to 153 * expand the range. 154 * 155 * P_EXCLUDE, P_EXCSYNC, PEXCEND: P_EXCLUDE appears in the pattern like 156 * P_BRANCH, but applies to the immediately preceding branch. The code in 157 * the corresponding branch is followed by a P_EXCSYNC, which simply 158 * acts as a marker that a P_EXCLUDE comes next. The P_EXCLUDE 159 * has a pointer to char embeded in it, which works 160 * like P_WBRANCH: if we get to the P_EXCSYNC, and we already matched 161 * up to the same position, fail. Thus we are forced to backtrack 162 * on closures in the P_BRANCH if the first attempt was excluded. 163 * Corresponding to P_EXCSYNC in the original branch, there is a 164 * P_EXCEND in the exclusion. If we get to this point, and we did 165 * *not* match in the original branch, the exclusion itself fails, 166 * otherwise it succeeds since we know the tail already matches, 167 * so P_EXCEND is the end of the exclusion test. 168 * The whole sorry mess looks like this, where the upper lines 169 * show the linkage of the branches, and the lower shows the linkage 170 * of their pattern arguments. 171 * 172 * --------------------- ---------------------- 173 * ^ v ^ v 174 * ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail 175 * ^ 176 * | | 177 * -------------------------------------- 178 * 179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception 180 * that we prepend the path so far to the exclude pattern. This is 181 * for top level file globs, e.g. ** / *.c~*foo.c 182 * ^ I had to leave this space 183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long. 184 * 185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified 186 * closure (#cN,M) and is used to initialise the count. Executing 187 * the pattern leads back to the P_COUNT, while the next links of the 188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern: 189 * 190 * ---------------- 191 * v ^ 192 * <COUNTSTART><COUNT>pattern<BACK> tail 193 * v v ^ 194 * ------------------------ 195 */ 196 197#define P_OP(p) ((p)->l & 0xff) 198#define P_NEXT(p) ((p)->l >> 8) 199#define P_OPERAND(p) ((p) + 1) 200#define P_ISBRANCH(p) ((p)->l & 0x20) 201#define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30) 202#define P_NOTDOT(p) ((p)->l & 0x40) 203 204/* Specific to lstr type, i.e. P_EXACTLY. */ 205#define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */ 206#define P_LS_STR(p) ((char *)((p) + 2)) 207 208/* Specific to P_COUNT: arguments as offset in nodes from operator */ 209#define P_CT_CURRENT (1) /* Current count */ 210#define P_CT_MIN (2) /* Minimum count */ 211#define P_CT_MAX (3) /* Maximum count, -1 for none */ 212#define P_CT_PTR (4) /* Pointer to last match start */ 213#define P_CT_OPERAND (5) /* Operand of P_COUNT */ 214 215/* Flags needed when pattern is executed */ 216#define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */ 217#define P_HSTART 0x02 /* Starts with # or ##'d pattern. */ 218#define P_PURESTR 0x04 /* Can be matched with a strcmp */ 219 220#if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT) 221typedef zlong zrange_t; 222#define ZRANGE_T_IS_SIGNED (1) 223#else 224typedef unsigned long zrange_t; 225#endif 226 227/* 228 * Characters which terminate a pattern segment. We actually use 229 * a pointer patendseg which skips the first character if we are not 230 * parsing a file pattern. 231 * Note that the size of this and the next array are hard-wired 232 * via the definitions. 233 */ 234 235static char endseg[] = { 236 '/', /* file only */ 237 '\0', Bar, Outpar, /* all patterns */ 238 Tilde /* extended glob only */ 239}; 240 241#define PATENDSEGLEN_NORM 4 242#define PATENDSEGLEN_EXT 5 243 244/* Characters which terminate a simple string */ 245 246static char endstr[] = { 247 '/', /* file only */ 248 '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep, 249 /* all patterns */ 250 Tilde, Hat, Pound /* extended glob only */ 251}; 252 253#define PATENDSTRLEN_NORM 10 254#define PATENDSTRLEN_EXT 13 255 256 257/* Default size for pattern buffer */ 258#define P_DEF_ALLOC 256 259 260/* Flags used in compilation */ 261static char *patstart, *patparse; /* input pointers */ 262static int patnpar; /* () count */ 263static char *patcode; /* point of code emission */ 264static long patsize; /* size of code */ 265static char *patout; /* start of code emission string */ 266static long patalloc; /* size allocated for same */ 267static char *patendseg; /* characters ending segment */ 268static int patendseglen; /* length of same */ 269static char *patendstr; /* characters ending plain string */ 270static int patendstrlen; /* length of sameo */ 271 272/* Flags used in both compilation and execution */ 273static int patflags; /* flags passed down to patcompile */ 274static int patglobflags; /* globbing flags & approx */ 275 276/* 277 * Increment pointer to metafied multibyte string. 278 */ 279#ifdef MULTIBYTE_SUPPORT 280typedef wint_t patint_t; 281 282#define PEOF WEOF 283 284#define METACHARINC(x) ((void)metacharinc(&x)) 285 286/* 287 * TODO: the shiftstate isn't well handled; we don't guarantee 288 * to maintain it properly between characters. If we don't 289 * need it we should use mbtowc() instead. 290 */ 291static mbstate_t shiftstate; 292 293/* 294 * Multibyte version: it's (almost) as easy to return the 295 * value as not, so do so since we sometimes need it.. 296 */ 297static wchar_t 298metacharinc(char **x) 299{ 300 char *inptr = *x; 301 char inchar; 302 size_t ret = MB_INVALID; 303 wchar_t wc; 304 305 /* 306 * Cheat if the top bit isn't set. This is second-guessing 307 * the library, but we know for sure that if the character 308 * set doesn't have the property that all bytes with the 8th 309 * bit clear are single characters then we are stuffed. 310 */ 311 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80)) 312 { 313 if (itok(*inptr)) 314 inchar = ztokens[*inptr++ - Pound]; 315 else if (*inptr == Meta) { 316 inptr++; 317 inchar = *inptr++ ^ 32; 318 } else { 319 inchar = *inptr++; 320 } 321 *x = inptr; 322 return (wchar_t)STOUC(inchar); 323 } 324 325 while (*inptr) { 326 if (itok(*inptr)) 327 inchar = ztokens[*inptr++ - Pound]; 328 else if (*inptr == Meta) { 329 inptr++; 330 inchar = *inptr++ ^ 32; 331 } else { 332 inchar = *inptr++; 333 } 334 ret = mbrtowc(&wc, &inchar, 1, &shiftstate); 335 336 if (ret == MB_INVALID) 337 break; 338 if (ret == MB_INCOMPLETE) 339 continue; 340 *x = inptr; 341 return wc; 342 } 343 344 /* Error. Treat as single byte. */ 345 /* Reset the shift state for next time. */ 346 memset(&shiftstate, 0, sizeof(shiftstate)); 347 return (wchar_t) STOUC(*(*x)++); 348} 349 350#else 351typedef int patint_t; 352 353#define PEOF EOF 354 355#define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1)) 356#endif 357 358/* 359 * Return unmetafied char from string (x is any char *). 360 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not 361 * in effect. 362 */ 363#define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x)) 364 365/* Add n more characters, ensuring there is enough space. */ 366 367enum { 368 PA_NOALIGN = 1, 369 PA_UNMETA = 2 370}; 371 372/**/ 373static void 374patadd(char *add, int ch, long n, int paflags) 375{ 376 /* Make sure everything gets aligned unless we get PA_NOALIGN. */ 377 long newpatsize = patsize + n; 378 if (!(paflags & PA_NOALIGN)) 379 newpatsize = (newpatsize + sizeof(union upat) - 1) & 380 ~(sizeof(union upat) - 1); 381 if (patalloc < newpatsize) { 382 long newpatalloc = 383 2*(newpatsize > patalloc ? newpatsize : patalloc); 384 patout = (char *)zrealloc((char *)patout, newpatalloc); 385 patcode = patout + patsize; 386 patalloc = newpatalloc; 387 } 388 patsize = newpatsize; 389 if (add) { 390 if (paflags & PA_UNMETA) { 391 /* 392 * Unmetafy and untokenize the string as we go. 393 * The Meta characters in add aren't counted in n. 394 */ 395 while (n--) { 396 if (itok(*add)) 397 *patcode++ = ztokens[*add++ - Pound]; 398 else if (*add == Meta) { 399 add++; 400 *patcode++ = *add++ ^ 32; 401 } else { 402 *patcode++ = *add++; 403 } 404 } 405 } else { 406 while (n--) 407 *patcode++ = *add++; 408 } 409 } else 410 *patcode++ = ch; 411 patcode = patout + patsize; 412} 413 414static long rn_offs; 415/* operates on pointers to union upat, returns a pointer */ 416#define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \ 417 (P_OP(p) == P_BACK) ? \ 418 ((p)-rn_offs) : ((p)+rn_offs) : NULL) 419 420/* Called before parsing a set of file matchs to initialize flags */ 421 422/**/ 423void 424patcompstart(void) 425{ 426 if (isset(CASEGLOB)) 427 patglobflags = 0; 428 else 429 patglobflags = GF_IGNCASE; 430 if (isset(MULTIBYTE)) 431 patglobflags |= GF_MULTIBYTE; 432} 433 434/* 435 * Top level pattern compilation subroutine 436 * exp is a null-terminated, metafied string. 437 * inflags is an or of some PAT_* flags. 438 * endexp, if non-null, is set to a pointer to the end of the 439 * part of exp which was compiled. This is used when 440 * compiling patterns for directories which must be 441 * matched recursively. 442 */ 443 444/**/ 445mod_export Patprog 446patcompile(char *exp, int inflags, char **endexp) 447{ 448 int flags = 0; 449 long len = 0; 450 long startoff; 451 Upat pscan; 452 char *lng, *strp = NULL; 453 Patprog p; 454 455 startoff = sizeof(struct patprog); 456 /* Ensure alignment of start of program string */ 457 startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1); 458 459 /* Allocate reasonable sized chunk if none, reduce size if too big */ 460 if (patalloc != P_DEF_ALLOC) 461 patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC); 462 patcode = patout + startoff; 463 patsize = patcode - patout; 464 patstart = patparse = exp; 465 /* 466 * Note global patnpar numbers parentheses 1..9, while patnpar 467 * in struct is actual count of parentheses. 468 */ 469 patnpar = 1; 470 patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP); 471 472 patendseg = endseg; 473 patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM; 474 patendstr = endstr; 475 patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM; 476 477 if (!(patflags & PAT_FILE)) { 478 patendseg++; 479 patendstr++; 480 patendseglen--; 481 patendstrlen--; 482 remnulargs(patparse); 483 if (isset(MULTIBYTE)) 484 patglobflags = GF_MULTIBYTE; 485 else 486 patglobflags = 0; 487 } 488 if (patflags & PAT_LCMATCHUC) 489 patglobflags |= GF_LCMATCHUC; 490 /* 491 * Have to be set now, since they get updated during compilation. 492 */ 493 ((Patprog)patout)->globflags = patglobflags; 494 495 if (!(patflags & PAT_ANY)) { 496 /* Look for a really pure string, with no tokens at all. */ 497 if (!(patglobflags & ~GF_MULTIBYTE) 498#ifdef __CYGWIN__ 499 /* 500 * If the OS treats files case-insensitively and we 501 * are looking at files, we don't need to use pattern 502 * matching to find the file. 503 */ 504 || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE)) 505#endif 506 ) 507 { 508 /* 509 * Waah! I wish I understood this. 510 * Empty metafied strings have an initial Nularg. 511 * This never corresponds to a real character in 512 * a glob pattern or string, so skip it. 513 */ 514 if (*exp == Nularg) 515 exp++; 516 for (strp = exp; *strp && 517 (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp); 518 strp++) 519 ; 520 } 521 if (!strp || (*strp && *strp != '/')) { 522 /* No, do normal compilation. */ 523 strp = NULL; 524 if (patcompswitch(0, &flags) == 0) 525 return NULL; 526 } else { 527 /* 528 * Yes, copy the string, and skip compilation altogether. 529 * Null terminate for the benefit of globbing. 530 * Leave metafied both for globbing and for our own 531 * efficiency. 532 */ 533 patparse = strp; 534 len = strp - exp; 535 patadd(exp, 0, len + 1, 0); 536 patout[startoff + len] = '\0'; 537 patflags |= PAT_PURES; 538 } 539 } 540 541 /* end of compilation: safe to use pointers */ 542 p = (Patprog)patout; 543 p->startoff = startoff; 544 p->patstartch = '\0'; 545 p->globend = patglobflags; 546 p->flags = patflags; 547 p->mustoff = 0; 548 p->size = patsize; 549 p->patmlen = len; 550 p->patnpar = patnpar-1; 551 552 if (!strp) { 553 pscan = (Upat)(patout + startoff); 554 555 if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) { 556 /* only one top level choice */ 557 pscan = P_OPERAND(pscan); 558 559 if (flags & P_PURESTR) { 560 /* 561 * The pattern can be matched with a simple strncmp/strcmp. 562 * Careful in case we've overwritten the node for the next ptr. 563 */ 564 char *dst = patout + startoff; 565 Upat next; 566 p->flags |= PAT_PURES; 567 for (; pscan; pscan = next) { 568 next = PATNEXT(pscan); 569 if (P_OP(pscan) == P_EXACTLY) { 570 char *opnd = P_LS_STR(pscan), *mtest; 571 long oplen = P_LS_LEN(pscan), ilen; 572 int nmeta = 0; 573 /* 574 * Unfortunately we unmetafied the string 575 * and we need to put any metacharacters 576 * back now we know it's a pure string. 577 * This shouldn't happen too often, it's 578 * just that there are some cases such 579 * as . and .. in files where we really 580 * need a pure string even if there are 581 * pattern characters flying around. 582 */ 583 for (mtest = opnd, ilen = oplen; ilen; 584 mtest++, ilen--) 585 if (imeta(*mtest)) 586 nmeta++; 587 if (nmeta) { 588 char *oldpatout = patout; 589 patadd(NULL, 0, nmeta, 0); 590 /* 591 * Yuk. 592 */ 593 p = (Patprog)patout; 594 opnd = patout + (opnd - oldpatout); 595 dst = patout + startoff; 596 } 597 598 while (oplen--) { 599 if (imeta(*opnd)) { 600 *dst++ = Meta; 601 *dst++ = *opnd++ ^ 32; 602 } else { 603 *dst++ = *opnd++; 604 } 605 } 606 } 607 } 608 p->size = dst - patout; 609 /* patmlen is really strlen. We don't need a null. */ 610 p->patmlen = p->size - startoff; 611 } else { 612 /* starting point info */ 613 if (P_OP(pscan) == P_EXACTLY && !p->globflags && 614 P_LS_LEN(pscan)) 615 p->patstartch = *P_LS_STR(pscan); 616 /* 617 * Find the longest literal string in something expensive. 618 * This is itself not all that cheap if we have 619 * case-insensitive matching or approximation, so don't. 620 */ 621 if ((flags & P_HSTART) && !p->globflags) { 622 lng = NULL; 623 len = 0; 624 for (; pscan; pscan = PATNEXT(pscan)) 625 if (P_OP(pscan) == P_EXACTLY && 626 P_LS_LEN(pscan) >= len) { 627 lng = P_LS_STR(pscan); 628 len = P_LS_LEN(pscan); 629 } 630 if (lng) { 631 p->mustoff = lng - patout; 632 p->patmlen = len; 633 } 634 } 635 } 636 } 637 } 638 639 /* 640 * The pattern was compiled in a fixed buffer: unless told otherwise, 641 * we stick the compiled pattern on the heap. This is necessary 642 * for files where we will often be compiling multiple segments at once. 643 * But if we get the ZDUP flag we always put it in zalloc()ed memory. 644 */ 645 if (patflags & PAT_ZDUP) { 646 Patprog newp = (Patprog)zalloc(patsize); 647 memcpy((char *)newp, (char *)p, patsize); 648 p = newp; 649 } else if (!(patflags & PAT_STATIC)) { 650 Patprog newp = (Patprog)zhalloc(patsize); 651 memcpy((char *)newp, (char *)p, patsize); 652 p = newp; 653 } 654 655 if (endexp) 656 *endexp = patparse; 657 return p; 658} 659 660/* 661 * Main body or parenthesized subexpression in pattern 662 * Parenthesis (and any ksh_glob gubbins) will have been removed. 663 */ 664 665/**/ 666static long 667patcompswitch(int paren, int *flagp) 668{ 669 long starter, br, ender, excsync = 0; 670 int parno = 0; 671 int flags, gfchanged = 0; 672 long savglobflags = (long)patglobflags; 673 Upat ptr; 674 675 *flagp = 0; 676 677 if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) { 678 /* 679 * parenthesized: make an open node. 680 * We can only refer to the first nine parentheses. 681 * For any others, we just use P_OPEN on its own; there's 682 * no gain in arbitrarily limiting the number of parentheses. 683 */ 684 parno = patnpar++; 685 starter = patnode(P_OPEN + parno); 686 } else 687 starter = 0; 688 689 br = patnode(P_BRANCH); 690 if (!patcompbranch(&flags)) 691 return 0; 692 if (patglobflags != (int)savglobflags) 693 gfchanged++; 694 if (starter) 695 pattail(starter, br); 696 else 697 starter = br; 698 699 *flagp |= flags & (P_HSTART|P_PURESTR); 700 701 while (*patparse == Bar || 702 (isset(EXTENDEDGLOB) && *patparse == Tilde && 703 (patparse[1] == '/' || 704 !memchr(patendseg, patparse[1], patendseglen)))) { 705 int tilde = *patparse++ == Tilde; 706 long gfnode = 0, newbr; 707 708 *flagp &= ~P_PURESTR; 709 710 if (tilde) { 711 union upat up; 712 /* excsync remembers the P_EXCSYNC node before a chain of 713 * exclusions: all point back to this. only the 714 * original (non-excluded) branch gets a trailing P_EXCSYNC. 715 */ 716 if (!excsync) { 717 excsync = patnode(P_EXCSYNC); 718 patoptail(br, excsync); 719 } 720 /* 721 * By default, approximations are turned off in exclusions: 722 * we need to do this here as otherwise the code compiling 723 * the exclusion doesn't know if the flags have really 724 * changed if the error count gets restored. 725 */ 726 patglobflags &= ~0xff; 727 if (!(patflags & PAT_FILET) || paren) { 728 br = patnode(P_EXCLUDE); 729 } else { 730 /* 731 * At top level (paren == 0) in a file glob !(patflags 732 * &PAT_FILET) do the exclusion prepending the file path 733 * so far. We need to flag this to avoid unnecessarily 734 * copying the path. 735 */ 736 br = patnode(P_EXCLUDP); 737 patflags |= PAT_HAS_EXCLUDP; 738 } 739 up.p = NULL; 740 patadd((char *)&up, 0, sizeof(up), 0); 741 /* / is not treated as special if we are at top level */ 742 if (!paren && *patendseg == '/') { 743 tilde++; 744 patendseg++; 745 patendseglen--; 746 patendstr++; 747 patendstrlen--; 748 } 749 } else { 750 excsync = 0; 751 br = patnode(P_BRANCH); 752 /* 753 * The position of the following statements means globflags 754 * set in the main branch carry over to the exclusion. 755 */ 756 if (!paren) { 757 patglobflags = 0; 758 if (((Patprog)patout)->globflags) { 759 /* 760 * If at top level, we need to reinitialize flags to zero, 761 * since (#i)foo|bar only applies to foo and we stuck 762 * the #i into the global flags. 763 * We could have done it so that they only got set in the 764 * first branch, but it's quite convenient having any 765 * global flags set in the header and not buried in the 766 * pattern. (Or maybe it isn't and we should 767 * forget this bit and always stick in an explicit GFLAGS 768 * statement instead of using the header.) 769 * Also, this can't happen for file globs where there are 770 * no top-level |'s. 771 * 772 * No gfchanged, as nothing to follow branch at top 773 * level. 774 */ 775 union upat up; 776 gfnode = patnode(P_GFLAGS); 777 up.l = patglobflags; 778 patadd((char *)&up, 0, sizeof(union upat), 0); 779 } 780 } else { 781 patglobflags = (int)savglobflags; 782 } 783 } 784 newbr = patcompbranch(&flags); 785 if (tilde == 2) { 786 /* restore special treatment of / */ 787 patendseg--; 788 patendseglen++; 789 patendstr--; 790 patendstrlen++; 791 } 792 if (!newbr) 793 return 0; 794 if (gfnode) 795 pattail(gfnode, newbr); 796 if (!tilde && patglobflags != (int)savglobflags) 797 gfchanged++; 798 pattail(starter, br); 799 if (excsync) 800 patoptail(br, patnode(P_EXCEND)); 801 *flagp |= flags & P_HSTART; 802 } 803 804 /* 805 * Make a closing node, hooking it to the end. 806 * Note that we can't optimize P_NOTHING out here, since another 807 * branch at that point would indicate the current choices continue, 808 * which they don't. 809 */ 810 ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END); 811 pattail(starter, ender); 812 813 /* 814 * Hook the tails of the branches to the closing node, 815 * except for exclusions which terminate where they are. 816 */ 817 for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr)) 818 if (!P_ISEXCLUDE(ptr)) 819 patoptail(ptr-(Upat)patout, ender); 820 821 /* check for proper termination */ 822 if ((paren && *patparse++ != Outpar) || 823 (!paren && *patparse && 824 !((patflags & PAT_FILE) && *patparse == '/'))) 825 return 0; 826 827 if (paren && gfchanged) { 828 /* 829 * Restore old values of flags when leaving parentheses. 830 * gfchanged detects a change in any branch (except exclusions 831 * which are separate), since we need to emit this even if 832 * a later branch happened to put the flags back. 833 */ 834 pattail(ender, patnode(P_GFLAGS)); 835 patglobflags = (int)savglobflags; 836 patadd((char *)&savglobflags, 0, sizeof(long), 0); 837 } 838 839 return starter; 840} 841 842/* 843 * Compile something ended by Bar, Outpar, Tilde, or end of string. 844 * Note the BRANCH or EXCLUDE tag must already have been omitted: 845 * this returns the position of the operand of that. 846 */ 847 848/**/ 849static long 850patcompbranch(int *flagp) 851{ 852 long chain, latest = 0, starter; 853 int flags = 0; 854 855 *flagp = P_PURESTR; 856 857 starter = chain = 0; 858 while (!memchr(patendseg, *patparse, patendseglen) || 859 (*patparse == Tilde && patparse[1] != '/' && 860 memchr(patendseg, patparse[1], patendseglen))) { 861 if (isset(EXTENDEDGLOB) && 862 ((!isset(SHGLOB) && 863 (*patparse == Inpar && patparse[1] == Pound)) || 864 (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar && 865 patparse[2] == Pound))) { 866 /* Globbing flags. */ 867 char *pp1 = patparse; 868 int oldglobflags = patglobflags, ignore; 869 long assert; 870 patparse += (*patparse == '@') ? 3 : 2; 871 if (!patgetglobflags(&patparse, &assert, &ignore)) 872 return 0; 873 if (!ignore) { 874 if (assert) { 875 /* 876 * Start/end assertion looking like flags, but 877 * actually handled as a normal node 878 */ 879 latest = patnode(assert); 880 flags = 0; 881 } else { 882 if (pp1 == patstart) { 883 /* Right at start of pattern, the simplest case. 884 * Put them into the flags and don't emit anything. 885 */ 886 ((Patprog)patout)->globflags = patglobflags; 887 continue; 888 } else if (!*patparse) { 889 /* Right at the end, so just leave the flags for 890 * the next Patprog in the chain to pick up. 891 */ 892 break; 893 } 894 /* 895 * Otherwise, we have to stick them in as a pattern 896 * matching nothing. 897 */ 898 if (oldglobflags != patglobflags) { 899 /* Flags changed */ 900 union upat up; 901 latest = patnode(P_GFLAGS); 902 up.l = patglobflags; 903 patadd((char *)&up, 0, sizeof(union upat), 0); 904 } else { 905 /* No effect. */ 906 continue; 907 } 908 } 909 } else if (!*patparse) 910 break; 911 else 912 continue; 913 } else if (isset(EXTENDEDGLOB) && *patparse == Hat) { 914 /* 915 * ^pat: anything but pat. For proper backtracking, 916 * etc., we turn this into (*~pat), except without the 917 * parentheses. 918 */ 919 patparse++; 920 latest = patcompnot(0, &flags); 921 } else 922 latest = patcomppiece(&flags); 923 if (!latest) 924 return 0; 925 if (!starter) 926 starter = latest; 927 if (!(flags & P_PURESTR)) 928 *flagp &= ~P_PURESTR; 929 if (!chain) 930 *flagp |= flags & P_HSTART; 931 else 932 pattail(chain, latest); 933 chain = latest; 934 } 935 /* check if there was nothing in the loop, i.e. () */ 936 if (!chain) 937 starter = patnode(P_NOTHING); 938 939 return starter; 940} 941 942/* get glob flags, return 1 for success, 0 for failure */ 943 944/**/ 945int 946patgetglobflags(char **strp, long *assertp, int *ignore) 947{ 948 char *nptr, *ptr = *strp; 949 zlong ret; 950 951 *assertp = 0; 952 *ignore = 1; 953 /* (#X): assumes we are still positioned on the first X */ 954 for (; *ptr && *ptr != Outpar; ptr++) { 955 if (*ptr == 'q') { 956 /* Glob qualifiers, ignored in pattern code */ 957 while (*ptr && *ptr != Outpar) 958 ptr++; 959 break; 960 } else { 961 *ignore = 0; 962 switch (*ptr) { 963 case 'a': 964 /* Approximate matching, max no. of errors follows */ 965 ret = zstrtol(++ptr, &nptr, 10); 966 /* 967 * We can't have more than 254, because we need 255 to 968 * mark 254 errors in wbranch and exclude sync strings 969 * (hypothetically --- hope no-one tries it). 970 */ 971 if (ret < 0 || ret > 254 || ptr == nptr) 972 return 0; 973 patglobflags = (patglobflags & ~0xff) | (ret & 0xff); 974 ptr = nptr-1; 975 break; 976 977 case 'l': 978 /* Lowercase in pattern matches lower or upper in target */ 979 patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC; 980 break; 981 982 case 'i': 983 /* Fully case insensitive */ 984 patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE; 985 break; 986 987 case 'I': 988 /* Restore case sensitivity */ 989 patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE); 990 break; 991 992 case 'b': 993 /* Make backreferences */ 994 patglobflags |= GF_BACKREF; 995 break; 996 997 case 'B': 998 /* Don't make backreferences */ 999 patglobflags &= ~GF_BACKREF; 1000 break; 1001 1002 case 'm': 1003 /* Make references to complete match */ 1004 patglobflags |= GF_MATCHREF; 1005 break; 1006 1007 case 'M': 1008 /* Don't */ 1009 patglobflags &= ~GF_MATCHREF; 1010 break; 1011 1012 case 's': 1013 *assertp = P_ISSTART; 1014 break; 1015 1016 case 'e': 1017 *assertp = P_ISEND; 1018 break; 1019 1020 case 'u': 1021 patglobflags |= GF_MULTIBYTE; 1022 break; 1023 1024 case 'U': 1025 patglobflags &= ~GF_MULTIBYTE; 1026 break; 1027 1028 default: 1029 return 0; 1030 } 1031 } 1032 } 1033 if (*ptr != Outpar) 1034 return 0; 1035 /* Start/end assertions must appear on their own. */ 1036 if (*assertp && (*strp)[1] != Outpar) 1037 return 0; 1038 *strp = ptr + 1; 1039 return 1; 1040} 1041 1042 1043static const char *colon_stuffs[] = { 1044 "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph", 1045 "lower", "print", "punct", "space", "upper", "xdigit", "IDENT", 1046 "IFS", "IFSSPACE", "WORD", NULL 1047}; 1048 1049/* 1050 * Handle the guts of a [:stuff:] character class element. 1051 * start is the beginning of "stuff" and len is its length. 1052 * This code is exported for the benefit of completion matching. 1053 */ 1054 1055/**/ 1056mod_export int 1057range_type(char *start, int len) 1058{ 1059 const char **csp; 1060 1061 for (csp = colon_stuffs; *csp; csp++) { 1062 if (!strncmp(start, *csp, len)) 1063 return (csp - colon_stuffs) + PP_FIRST; 1064 } 1065 1066 return PP_UNKWN; 1067} 1068 1069 1070/* 1071 * Convert the contents of a [...] or [^...] expression (just the 1072 * ... part) back into a string. This is used by compfiles -p/-P 1073 * for some reason. The compiled form (a metafied string) is 1074 * passed in rangestr. 1075 * 1076 * If outstr is non-NULL the compiled form is placed there. It 1077 * must be sufficiently long. A terminating NULL is appended. 1078 * 1079 * Return the length required, not including the terminating NULL. 1080 * 1081 * TODO: this is non-multibyte for now. It will need to be defined 1082 * appropriately with MULTIBYTE_SUPPORT when the completion matching 1083 * code catches up. 1084 */ 1085 1086/**/ 1087mod_export int 1088pattern_range_to_string(char *rangestr, char *outstr) 1089{ 1090 int len = 0; 1091 1092 while (*rangestr) { 1093 if (imeta(STOUC(*rangestr))) { 1094 int swtype = STOUC(*rangestr) - STOUC(Meta); 1095 1096 if (swtype == 0) { 1097 /* Ordindary metafied character */ 1098 if (outstr) 1099 { 1100 *outstr++ = Meta; 1101 *outstr++ = rangestr[1] ^ 32; 1102 } 1103 len += 2; 1104 rangestr += 2; 1105 } else if (swtype == PP_RANGE) { 1106 /* X-Y range */ 1107 int i; 1108 1109 for (i = 0; i < 2; i++) { 1110 if (*rangestr == Meta) { 1111 if (outstr) { 1112 *outstr++ = Meta; 1113 *outstr++ = rangestr[1]; 1114 } 1115 len += 2; 1116 rangestr += 2; 1117 } else { 1118 if (outstr) 1119 *outstr++ = *rangestr; 1120 len++; 1121 rangestr++; 1122 } 1123 1124 if (i == 0) { 1125 if (outstr) 1126 *outstr++ = '-'; 1127 len++; 1128 } 1129 } 1130 } else if (swtype >= PP_FIRST && swtype <= PP_LAST) { 1131 /* [:stuff:]; we need to output [: and :] */ 1132 const char *found = colon_stuffs[swtype - PP_FIRST]; 1133 int newlen = strlen(found); 1134 if (outstr) { 1135 strcpy(outstr, "[:"); 1136 outstr += 2; 1137 memcpy(outstr, found, newlen); 1138 outstr += newlen; 1139 strcpy(outstr, ":]"); 1140 outstr += 2; 1141 } 1142 len += newlen + 4; 1143 rangestr++; 1144 } else { 1145 /* shouldn't happen */ 1146 DPUTS(1, "BUG: unknown PP_ code in pattern range"); 1147 rangestr++; 1148 } 1149 } else { 1150 /* ordinary character, guaranteed no Meta handling needed */ 1151 if (outstr) 1152 *outstr++ = *rangestr; 1153 len++; 1154 rangestr++; 1155 } 1156 } 1157 1158 if (outstr) 1159 *outstr = '\0'; 1160 return len; 1161} 1162 1163/* 1164 * compile a chunk such as a literal string or a [...] followed 1165 * by a possible hash operator 1166 */ 1167 1168/**/ 1169static long 1170patcomppiece(int *flagp) 1171{ 1172 long starter = 0, next, op, opnd; 1173 int flags, flags2, kshchar, len, ch, patch, nmeta; 1174 int pound, count; 1175 union upat up; 1176 char *nptr, *str0, *ptr, *patprev; 1177 zrange_t from, to; 1178 char *charstart; 1179 1180 flags = 0; 1181 str0 = patprev = patparse; 1182 for (;;) { 1183 /* 1184 * Check if we have a string. First, we need to make sure 1185 * the string doesn't introduce a ksh-like parenthesized expression. 1186 */ 1187 kshchar = '\0'; 1188 if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) { 1189 if (strchr("?*+!@", *patparse)) 1190 kshchar = STOUC(*patparse); 1191 else if (*patparse == Star || *patparse == Quest) 1192 kshchar = STOUC(ztokens[*patparse - Pound]); 1193 } 1194 1195 /* 1196 * End of string (or no string at all) if ksh-type parentheses, 1197 * or special character, unless that character is a tilde and 1198 * the character following is an end-of-segment character. Thus 1199 * tildes are not special if there is nothing following to 1200 * be excluded. 1201 */ 1202 if (kshchar || (memchr(patendstr, *patparse, patendstrlen) && 1203 (*patparse != Tilde || 1204 patparse[1] == '/' || 1205 !memchr(patendseg, patparse[1], patendseglen)))) 1206 break; 1207 1208 /* Remember the previous character for backtracking */ 1209 patprev = patparse; 1210 METACHARINC(patparse); 1211 } 1212 1213 if (patparse > str0) { 1214 long slen = patparse - str0; 1215 int morelen; 1216 1217 /* Ordinary string: cancel kshchar lookahead */ 1218 kshchar = '\0'; 1219 /* 1220 * Assume it matches a simple string until we find otherwise. 1221 */ 1222 flags |= P_PURESTR; 1223 DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece."); 1224 /* more than one character matched? */ 1225 morelen = (patprev > str0); 1226 /* 1227 * If we have more than one character, a following hash 1228 * or (#c...) only applies to the last, so backtrack one character. 1229 */ 1230 if (isset(EXTENDEDGLOB) && 1231 (*patparse == Pound || 1232 (*patparse == Inpar && patparse[1] == Pound && 1233 patparse[2] == 'c')) && morelen) 1234 patparse = patprev; 1235 /* 1236 * If len is 1, we can't have an active # following, so doesn't 1237 * matter that we don't make X in `XX#' simple. 1238 */ 1239 if (!morelen) 1240 flags |= P_SIMPLE; 1241 starter = patnode(P_EXACTLY); 1242 1243 /* Get length of string without metafication. */ 1244 nmeta = 0; 1245 /* inherited from domatch, but why, exactly? */ 1246 if (*str0 == Nularg) 1247 str0++; 1248 for (ptr = str0; ptr < patparse; ptr++) { 1249 if (*ptr == Meta) { 1250 nmeta++; 1251 ptr++; 1252 } 1253 } 1254 slen = (patparse - str0) - nmeta; 1255 /* First add length, which is a long */ 1256 patadd((char *)&slen, 0, sizeof(long), 0); 1257 /* 1258 * Then the string, not null terminated. 1259 * Unmetafy and untokenize; pass the final length, 1260 * which is what we need to allocate, i.e. not including 1261 * a count for each Meta in the string. 1262 */ 1263 patadd(str0, 0, slen, PA_UNMETA); 1264 nptr = P_LS_STR((Upat)patout + starter); 1265 /* 1266 * It's much simpler to turn off pure string mode for 1267 * any case-insensitive or approximate matching; usually, 1268 * that is correct, or they wouldn't have been turned on. 1269 * However, we need to make sure we match a "." or ".." 1270 * in a file name as a pure string. There's a minor bug 1271 * that this will also apply to something like 1272 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're 1273 * going to write funny patterns, you get no sympathy from me. 1274 */ 1275 if (patglobflags & 1276#ifdef __CYGWIN__ 1277 /* 1278 * As above: don't use pattern matching for files 1279 * just because of case insensitivity if file system 1280 * is known to be case insensitive. 1281 * 1282 * This is known to be necessary in at least one case: 1283 * if "mount -c /" is in effect, so that drives appear 1284 * directly under / instead of the usual /cygdrive, they 1285 * aren't shown by readdir(). So it's vital we don't use 1286 * globbing to find "/c", since that'll fail. 1287 */ 1288 ((patflags & PAT_FILE) ? 1289 (0xFF|GF_LCMATCHUC) : 1290 (0xFF|GF_LCMATCHUC|GF_IGNCASE)) 1291#else 1292 (0xFF|GF_LCMATCHUC|GF_IGNCASE) 1293#endif 1294 ) { 1295 if (!(patflags & PAT_FILE)) 1296 flags &= ~P_PURESTR; 1297 else if (!(nptr[0] == '.' && 1298 (slen == 1 || (nptr[1] == '.' && slen == 2)))) 1299 flags &= ~P_PURESTR; 1300 } 1301 } else { 1302 if (kshchar) 1303 patparse++; 1304 1305 patch = *patparse; 1306 METACHARINC(patparse); 1307 switch(patch) { 1308 case Quest: 1309 flags |= P_SIMPLE; 1310 starter = patnode(P_ANY); 1311 break; 1312 case Star: 1313 /* kshchar is used as a sign that we can't have #'s. */ 1314 kshchar = -1; 1315 starter = patnode(P_STAR); 1316 break; 1317 case Inbrack: 1318 flags |= P_SIMPLE; 1319 if (*patparse == Hat || *patparse == '^' || *patparse == '!') { 1320 patparse++; 1321 starter = patnode(P_ANYBUT); 1322 } else 1323 starter = patnode(P_ANYOF); 1324 if (*patparse == Outbrack) { 1325 patparse++; 1326 patadd(NULL, ']', 1, PA_NOALIGN); 1327 } 1328 while (*patparse && *patparse != Outbrack) { 1329 /* Meta is not a token */ 1330 if (*patparse == Inbrack && patparse[1] == ':' && 1331 (nptr = strchr(patparse+2, ':')) && 1332 nptr[1] == Outbrack) { 1333 /* Posix range. */ 1334 patparse += 2; 1335 len = nptr - patparse; 1336 ch = range_type(patparse, len); 1337 patparse = nptr + 2; 1338 if (ch != PP_UNKWN) 1339 patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN); 1340 continue; 1341 } 1342 charstart = patparse; 1343 METACHARINC(patparse); 1344 1345 if (*patparse == '-' && patparse[1] && 1346 patparse[1] != Outbrack) { 1347 patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN); 1348 if (itok(*charstart)) { 1349 patadd(0, STOUC(ztokens[*charstart - Pound]), 1, 1350 PA_NOALIGN); 1351 } else { 1352 patadd(charstart, 0, patparse-charstart, PA_NOALIGN); 1353 } 1354 charstart = ++patparse; /* skip ASCII '-' */ 1355 METACHARINC(patparse); 1356 } 1357 if (itok(*charstart)) { 1358 patadd(0, STOUC(ztokens[*charstart - Pound]), 1, 1359 PA_NOALIGN); 1360 } else { 1361 patadd(charstart, 0, patparse-charstart, PA_NOALIGN); 1362 } 1363 } 1364 if (*patparse != Outbrack) 1365 return 0; 1366 patparse++; 1367 /* terminate null string and fix alignment */ 1368 patadd(NULL, 0, 1, 0); 1369 break; 1370 case Inpar: 1371 /* is this how to treat parentheses in SHGLOB? */ 1372 if (isset(SHGLOB) && !kshchar) 1373 return 0; 1374 if (kshchar == '!') { 1375 /* This is nasty, we should really either handle all 1376 * kshglobbing below or here. But most of the 1377 * others look like non-ksh patterns, while this one 1378 * doesn't, so we handle it here and leave the rest. 1379 * We treat it like an extendedglob ^, except that 1380 * it goes into parentheses. 1381 * 1382 * If we did do kshglob here, we could support 1383 * the old behaviour that things like !(foo)## 1384 * work, but it makes the code more complicated at 1385 * the expense of allowing the user to do things 1386 * they shouldn't. 1387 */ 1388 if (!(starter = patcompnot(1, &flags2))) 1389 return 0; 1390 } else if (!(starter = patcompswitch(1, &flags2))) 1391 return 0; 1392 flags |= flags2 & P_HSTART; 1393 break; 1394 case Inang: 1395 /* Numeric glob */ 1396 len = 0; /* beginning present 1, end present 2 */ 1397 if (idigit(*patparse)) { 1398 from = (zrange_t) zstrtol((char *)patparse, 1399 (char **)&nptr, 10); 1400 patparse = nptr; 1401 len |= 1; 1402 } 1403 DPUTS(*patparse != '-', "BUG: - missing from numeric glob"); 1404 patparse++; 1405 if (idigit(*patparse)) { 1406 to = (zrange_t) zstrtol((char *)patparse, 1407 (char **)&nptr, 10); 1408 patparse = nptr; 1409 len |= 2; 1410 } 1411 if (*patparse != Outang) 1412 return 0; 1413 patparse++; 1414 switch(len) { 1415 case 3: 1416 starter = patnode(P_NUMRNG); 1417 patadd((char *)&from, 0, sizeof(from), 0); 1418 patadd((char *)&to, 0, sizeof(to), 0); 1419 break; 1420 case 2: 1421 starter = patnode(P_NUMTO); 1422 patadd((char *)&to, 0, sizeof(to), 0); 1423 break; 1424 case 1: 1425 starter = patnode(P_NUMFROM); 1426 patadd((char *)&from, 0, sizeof(from), 0); 1427 break; 1428 case 0: 1429 starter = patnode(P_NUMANY); 1430 break; 1431 } 1432 /* This can't be simple, because it isn't. 1433 * Mention in manual that matching digits with [...] 1434 * is more efficient. 1435 */ 1436 break; 1437 case Pound: 1438 DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string"); 1439 /* 1440 * A hash here is an error; it should follow something 1441 * repeatable. 1442 */ 1443 return 0; 1444 break; 1445 case Bnullkeep: 1446 /* 1447 * Marker for restoring a backslash in output: 1448 * does not match a character. 1449 */ 1450 next = patcomppiece(flagp); 1451 /* 1452 * Can't match a pure string since we need to do this 1453 * as multiple chunks. 1454 */ 1455 *flagp &= ~P_PURESTR; 1456 return next; 1457 break; 1458#ifdef DEBUG 1459 default: 1460 dputs("BUG: character not handled in patcomppiece"); 1461 return 0; 1462 break; 1463#endif 1464 } 1465 } 1466 1467 count = 0; 1468 if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) && 1469 !(count = (isset(EXTENDEDGLOB) && *patparse == Inpar && 1470 patparse[1] == Pound && patparse[2] == 'c')) && 1471 (kshchar <= 0 || kshchar == '@' || kshchar == '!')) { 1472 *flagp = flags; 1473 return starter; 1474 } 1475 1476 /* too much at once doesn't currently work */ 1477 if (kshchar && pound) 1478 return 0; 1479 1480 if (kshchar == '*') { 1481 op = P_ONEHASH; 1482 *flagp = P_HSTART; 1483 } else if (kshchar == '+') { 1484 op = P_TWOHASH; 1485 *flagp = P_HSTART; 1486 } else if (kshchar == '?') { 1487 op = 0; 1488 *flagp = 0; 1489 } else if (count) { 1490 op = P_COUNT; 1491 patparse += 3; 1492 *flagp = P_HSTART; 1493 } else if (*++patparse == Pound) { 1494 op = P_TWOHASH; 1495 patparse++; 1496 *flagp = P_HSTART; 1497 } else { 1498 op = P_ONEHASH; 1499 *flagp = P_HSTART; 1500 } 1501 1502 /* 1503 * Note optimizations with pointers into P_NOTHING branches: some 1504 * should logically point to next node after current piece. 1505 * 1506 * Backtracking is also encoded in a slightly obscure way: the 1507 * code emitted ensures we test the non-empty branch of complex 1508 * patterns before the empty branch on each repetition. Hence 1509 * each time we fail on a non-empty branch, we try the empty branch, 1510 * which is equivalent to backtracking. 1511 */ 1512 if (op == P_COUNT) { 1513 /* (#cN,M) */ 1514 union upat countargs[P_CT_OPERAND]; 1515 char *opp = patparse; 1516 1517 countargs[0].l = P_COUNT; 1518 countargs[P_CT_CURRENT].l = 0L; 1519 countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10); 1520 if (patparse == opp) { 1521 /* missing number treated as zero */ 1522 countargs[P_CT_MIN].l = 0L; 1523 } 1524 if (*patparse != ',' && *patparse != Comma) { 1525 /* either max = min or error */ 1526 if (*patparse != Outpar) 1527 return 0; 1528 countargs[P_CT_MAX].l = countargs[P_CT_MIN].l; 1529 } else { 1530 opp = ++patparse; 1531 countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10); 1532 if (*patparse != Outpar) 1533 return 0; 1534 if (patparse == opp) { 1535 /* missing number treated as infinity: record as -1 */ 1536 countargs[P_CT_MAX].l = -1L; 1537 } 1538 } 1539 patparse++; 1540 countargs[P_CT_PTR].p = NULL; 1541 /* Mark this chain as a min/max count... */ 1542 patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs)); 1543 /* 1544 * The next of the operand is a loop back to the P_COUNT. This is 1545 * how we get recursion for the count. We don't loop back to 1546 * the P_COUNTSTART; that's used for initialising the count 1547 * and saving and restoring the count for any enclosing use 1548 * of the match. 1549 */ 1550 opnd = P_OPERAND(starter) + P_CT_OPERAND; 1551 pattail(opnd, patnode(P_BACK)); 1552 pattail(opnd, P_OPERAND(starter)); 1553 /* 1554 * The next of the counter operators is what follows the 1555 * closure. 1556 * This handles matching of the tail. 1557 */ 1558 next = patnode(P_NOTHING); 1559 pattail(starter, next); 1560 pattail(P_OPERAND(starter), next); 1561 } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && 1562 P_OP((Upat)patout+starter) == P_ANY) { 1563 /* Optimize ?# to *. Silly thing to do, since who would use 1564 * use ?# ? But it makes the later code shorter. 1565 */ 1566 Upat uptr = (Upat)patout + starter; 1567 if (op == P_TWOHASH) { 1568 /* ?## becomes ?* */ 1569 uptr->l = (uptr->l & ~0xff) | P_ANY; 1570 pattail(starter, patnode(P_STAR)); 1571 } else { 1572 uptr->l = (uptr->l & ~0xff) | P_STAR; 1573 } 1574 } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) { 1575 /* Simplify, but not if we need to look for approximations. */ 1576 patinsert(op, starter, NULL, 0); 1577 } else if (op == P_ONEHASH) { 1578 /* Emit x# as (x&|), where & means "self". */ 1579 up.p = NULL; 1580 patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up)); 1581 /* Either x */ 1582 patoptail(starter, patnode(P_BACK)); /* and loop */ 1583 patoptail(starter, starter); /* back */ 1584 pattail(starter, patnode(P_BRANCH)); /* or */ 1585 pattail(starter, patnode(P_NOTHING)); /* null. */ 1586 } else if (op == P_TWOHASH) { 1587 /* Emit x## as x(&|) where & means "self". */ 1588 next = patnode(P_WBRANCH); /* Either */ 1589 up.p = NULL; 1590 patadd((char *)&up, 0, sizeof(up), 0); 1591 pattail(starter, next); 1592 pattail(patnode(P_BACK), starter); /* loop back */ 1593 pattail(next, patnode(P_BRANCH)); /* or */ 1594 pattail(starter, patnode(P_NOTHING)); /* null. */ 1595 } else if (kshchar == '?') { 1596 /* Emit ?(x) as (x|) */ 1597 patinsert(P_BRANCH, starter, NULL, 0); /* Either x */ 1598 pattail(starter, patnode(P_BRANCH)); /* or */ 1599 next = patnode(P_NOTHING); /* null */ 1600 pattail(starter, next); 1601 patoptail(starter, next); 1602 } 1603 if (*patparse == Pound) 1604 return 0; 1605 1606 return starter; 1607} 1608 1609/* 1610 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with 1611 * parentheses if necessary. As you see, that's really quite easy. 1612 */ 1613 1614/**/ 1615static long 1616patcompnot(int paren, int *flagsp) 1617{ 1618 union upat up; 1619 long excsync, br, excl, n, starter; 1620 int dummy; 1621 1622 /* Here, we're matching a star at the start. */ 1623 *flagsp = P_HSTART; 1624 1625 starter = patnode(P_BRANCH); 1626 br = patnode(P_STAR); 1627 excsync = patnode(P_EXCSYNC); 1628 pattail(br, excsync); 1629 pattail(starter, excl = patnode(P_EXCLUDE)); 1630 up.p = NULL; 1631 patadd((char *)&up, 0, sizeof(up), 0); 1632 if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy)))) 1633 return 0; 1634 pattail(br, patnode(P_EXCEND)); 1635 n = patnode(P_NOTHING); /* just so much easier */ 1636 pattail(excsync, n); 1637 pattail(excl, n); 1638 1639 return starter; 1640} 1641 1642/* Emit a node */ 1643 1644/**/ 1645static long 1646patnode(long op) 1647{ 1648 long starter = (Upat)patcode - (Upat)patout; 1649 union upat up; 1650 1651 up.l = op; 1652 patadd((char *)&up, 0, sizeof(union upat), 0); 1653 return starter; 1654} 1655 1656/* 1657 * insert an operator in front of an already emitted operand: 1658 * we relocate the operand. there had better be nothing else after. 1659 */ 1660 1661/**/ 1662static void 1663patinsert(long op, int opnd, char *xtra, int sz) 1664{ 1665 char *src, *dst, *opdst; 1666 union upat buf, *lptr; 1667 1668 buf.l = 0; 1669 patadd((char *)&buf, 0, sizeof(buf), 0); 1670 if (sz) 1671 patadd(xtra, 0, sz, 0); 1672 src = patcode - sizeof(union upat) - sz; 1673 dst = patcode; 1674 opdst = patout + opnd * sizeof(union upat); 1675 while (src > opdst) 1676 *--dst = *--src; 1677 1678 /* A cast can't be an lvalue */ 1679 lptr = (Upat)opdst; 1680 lptr->l = op; 1681 opdst += sizeof(union upat); 1682 while (sz--) 1683 *opdst++ = *xtra++; 1684} 1685 1686/* set the 'next' pointer at the end of a node chain */ 1687 1688/**/ 1689static void 1690pattail(long p, long val) 1691{ 1692 Upat scan, temp; 1693 long offset; 1694 1695 scan = (Upat)patout + p; 1696 for (;;) { 1697 if (!(temp = PATNEXT(scan))) 1698 break; 1699 scan = temp; 1700 } 1701 1702 offset = (P_OP(scan) == P_BACK) 1703 ? (scan - (Upat)patout) - val : val - (scan - (Upat)patout); 1704 1705 scan->l |= offset << 8; 1706} 1707 1708/* do pattail, but on operand of first argument; nop if operandless */ 1709 1710/**/ 1711static void patoptail(long p, long val) 1712{ 1713 Upat ptr = (Upat)patout + p; 1714 int op = P_OP(ptr); 1715 if (!p || !P_ISBRANCH(ptr)) 1716 return; 1717 if (op == P_BRANCH) 1718 pattail(P_OPERAND(p), val); 1719 else 1720 pattail(P_OPERAND(p) + 1, val); 1721} 1722 1723 1724/* 1725 * Run a pattern. 1726 */ 1727static char *patinstart; /* Start of input string */ 1728static char *patinend; /* End of input string */ 1729static char *patinput; /* String input pointer */ 1730static char *patinpath; /* Full path for use with ~ exclusions */ 1731static int patinlen; /* Length of last successful match. 1732 * Includes count of Meta characters. 1733 */ 1734 1735static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */ 1736static char *patendp[NSUBEXP]; /* Pointer to backref ends */ 1737static int parsfound; /* parentheses (with backrefs) found */ 1738 1739static int globdots; /* Glob initial dots? */ 1740 1741/* 1742 * Character functions operating on unmetafied strings. 1743 */ 1744#ifdef MULTIBYTE_SUPPORT 1745 1746/* Get a character from the start point in a string */ 1747#define CHARREF(x, y) charref((x), (y)) 1748static wchar_t 1749charref(char *x, char *y) 1750{ 1751 wchar_t wc; 1752 size_t ret; 1753 1754 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) 1755 return (wchar_t) STOUC(*x); 1756 1757 ret = mbrtowc(&wc, x, y-x, &shiftstate); 1758 1759 if (ret == MB_INVALID || ret == MB_INCOMPLETE) { 1760 /* Error. Treat as single byte. */ 1761 /* Reset the shift state for next time. */ 1762 memset(&shiftstate, 0, sizeof(shiftstate)); 1763 return (wchar_t) STOUC(*x); 1764 } 1765 1766 return wc; 1767} 1768 1769/* Get a pointer to the next character */ 1770#define CHARNEXT(x, y) charnext((x), (y)) 1771static char * 1772charnext(char *x, char *y) 1773{ 1774 wchar_t wc; 1775 size_t ret; 1776 1777 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) 1778 return x + 1; 1779 1780 ret = mbrtowc(&wc, x, y-x, &shiftstate); 1781 1782 if (ret == MB_INVALID || ret == MB_INCOMPLETE) { 1783 /* Error. Treat as single byte. */ 1784 /* Reset the shift state for next time. */ 1785 memset(&shiftstate, 0, sizeof(shiftstate)); 1786 return x + 1; 1787 } 1788 1789 /* Nulls here are normal characters */ 1790 return x + (ret ? ret : 1); 1791} 1792 1793/* Increment a pointer past the current character. */ 1794#define CHARINC(x, y) ((x) = charnext((x), (y))) 1795 1796 1797/* Get a character and increment */ 1798#define CHARREFINC(x, y, z) charrefinc(&(x), (y), (z)) 1799static wchar_t 1800charrefinc(char **x, char *y, int *z) 1801{ 1802 wchar_t wc; 1803 size_t ret; 1804 1805 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80)) 1806 return (wchar_t) STOUC(*(*x)++); 1807 1808 ret = mbrtowc(&wc, *x, y-*x, &shiftstate); 1809 1810 if (ret == MB_INVALID || ret == MB_INCOMPLETE) { 1811 /* Error. Treat as single byte, but flag. */ 1812 *z = 1; 1813 /* Reset the shift state for next time. */ 1814 memset(&shiftstate, 0, sizeof(shiftstate)); 1815 return (wchar_t) STOUC(*(*x)++); 1816 } 1817 1818 /* Nulls here are normal characters */ 1819 *x += ret ? ret : 1; 1820 1821 return wc; 1822} 1823 1824 1825/* 1826 * Counter the number of characters between two pointers, smaller first 1827 * 1828 * This is used when setting values in parameters, so we obey 1829 * the MULTIBYTE option (even if it's been overridden locally). 1830 */ 1831#define CHARSUB(x,y) charsub(x, y) 1832static ptrdiff_t 1833charsub(char *x, char *y) 1834{ 1835 ptrdiff_t res = 0; 1836 size_t ret; 1837 wchar_t wc; 1838 1839 if (!isset(MULTIBYTE)) 1840 return y - x; 1841 1842 while (x < y) { 1843 ret = mbrtowc(&wc, x, y-x, &shiftstate); 1844 1845 if (ret == MB_INVALID || ret == MB_INCOMPLETE) { 1846 /* Error. Treat remainder as single characters */ 1847 return res + (y - x); 1848 } 1849 1850 /* Treat nulls as normal characters */ 1851 if (!ret) 1852 ret = 1; 1853 res++; 1854 x += ret; 1855 } 1856 1857 return res; 1858} 1859 1860#else /* no MULTIBYTE_SUPPORT */ 1861 1862/* Get a character from the start point in a string */ 1863#define CHARREF(x, y) (STOUC(*(x))) 1864/* Get a pointer to the next character */ 1865#define CHARNEXT(x, y) ((x)+1) 1866/* Increment a pointer past the current character. */ 1867#define CHARINC(x, y) ((x)++) 1868/* Get a character and increment */ 1869#define CHARREFINC(x, y, z) (STOUC(*(x)++)) 1870/* Counter the number of characters between two pointers, smaller first */ 1871#define CHARSUB(x,y) ((y) - (x)) 1872 1873#endif /* MULTIBYTE_SUPPORT */ 1874 1875/* 1876 * The following need to be accessed in the globbing scanner for 1877 * a multi-component file path. See horror story in glob.c. 1878 */ 1879/**/ 1880int errsfound; /* Total error count so far */ 1881 1882/**/ 1883int forceerrs; /* Forced maximum error count */ 1884 1885/**/ 1886void 1887pattrystart(void) 1888{ 1889 forceerrs = -1; 1890 errsfound = 0; 1891} 1892 1893/* 1894 * Test prog against null-terminated, metafied string. 1895 */ 1896 1897/**/ 1898mod_export int 1899pattry(Patprog prog, char *string) 1900{ 1901 return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); 1902} 1903 1904/* 1905 * Test prog against string of given length, no null termination 1906 * but still metafied at this point. offset gives an offset 1907 * to include in reported match indices 1908 */ 1909 1910/**/ 1911mod_export int 1912pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) 1913{ 1914 return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); 1915} 1916 1917/* 1918 * Test prog against string with given lengths. The input 1919 * string is metafied; stringlen is the raw string length, and 1920 * unmetalen the number of characters in the original string (some 1921 * of which may now be metafied). Either value may be -1 1922 * to indicate a null-terminated string which will be counted. Note 1923 * there may be a severe penalty for this if a lot of matching is done 1924 * on one string. 1925 * 1926 * offset is the position in the original string (not seen by 1927 * the pattern module) at which we are trying to match. 1928 * This is added in to the positions recorded in patbeginp and patendp 1929 * when we are looking for substrings. Currently this only happens 1930 * in the parameter substitution code. 1931 * 1932 * Note this is a character offset, i.e. a metafied character 1933 * counts as 1. 1934 * 1935 * The last three arguments are used to report the positions for the 1936 * backreferences. On entry, *nump should contain the maximum number 1937 * of positions to report. In this case the match, mbegin, mend 1938 * arrays are not altered. 1939 * 1940 * If nump is NULL but endp is not NULL, then *endp is set to the 1941 * end position of the match, taking into account patinstart. 1942 */ 1943 1944/**/ 1945mod_export int 1946pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, 1947 int patoffset, 1948 int *nump, int *begp, int *endp) 1949{ 1950 int i, maxnpos = 0, ret, needfullpath, unmetalenp; 1951 int origlen; 1952 char **sp, **ep, *tryalloced, *ptr; 1953 char *progstr = (char *)prog + prog->startoff; 1954 1955 if (nump) { 1956 maxnpos = *nump; 1957 *nump = 0; 1958 } 1959 /* inherited from domatch, but why, exactly? */ 1960 if (*string == Nularg) { 1961 string++; 1962 unmetalen--; 1963 } 1964 1965 if (stringlen < 0) 1966 stringlen = strlen(string); 1967 origlen = stringlen; 1968 1969 patflags = prog->flags; 1970 /* 1971 * For a top-level ~-exclusion, we will need the full 1972 * path to exclude, so copy the path so far and append the 1973 * current test string. 1974 */ 1975 needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; 1976 1977 /* Get the length of the full string when unmetafied. */ 1978 if (unmetalen < 0) 1979 unmetalen = ztrsub(string + stringlen, string); 1980 if (needfullpath) 1981 unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); 1982 else 1983 unmetalenp = 0; 1984 1985 DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), 1986 "rum sort of file exclusion"); 1987 /* 1988 * Partly for efficiency, and partly for the convenience of 1989 * globbing, we don't unmetafy pure string patterns, and 1990 * there's no reason to if the pattern is just a *. 1991 */ 1992 if (!(patflags & (PAT_PURES|PAT_ANY)) 1993 && (needfullpath || unmetalen != stringlen)) { 1994 /* 1995 * We need to copy if we need to prepend the path so far 1996 * (in which case we copy both chunks), or if we have 1997 * Meta characters. 1998 */ 1999 char *dst; 2000 int icopy, ncopy; 2001 2002 dst = tryalloced = zalloc(unmetalen + unmetalenp); 2003 2004 if (needfullpath) { 2005 /* loop twice, copy path buffer first time */ 2006 ptr = pathbuf; 2007 ncopy = unmetalenp; 2008 } else { 2009 /* just loop once, copy string with unmetafication */ 2010 ptr = string; 2011 ncopy = unmetalen; 2012 } 2013 for (icopy = 0; icopy < 2; icopy++) { 2014 for (i = 0; i < ncopy; i++) { 2015 if (*ptr == Meta) { 2016 ptr++; 2017 *dst++ = *ptr++ ^ 32; 2018 } else { 2019 *dst++ = *ptr++; 2020 } 2021 } 2022 if (!needfullpath) 2023 break; 2024 /* next time append test string to path so far */ 2025 ptr = string; 2026 ncopy = unmetalen; 2027 } 2028 2029 if (needfullpath) { 2030 patinstart = tryalloced + unmetalenp; 2031 patinpath = tryalloced; 2032 } else { 2033 patinstart = tryalloced; 2034 patinpath = NULL; 2035 } 2036 stringlen = unmetalen; 2037 } else { 2038 patinstart = string; 2039 tryalloced = patinpath = NULL; 2040 } 2041 2042 patinend = patinstart + stringlen; 2043 /* 2044 * From now on we do not require NULL termination of 2045 * the test string. There should also be no more references 2046 * to the variable string. 2047 */ 2048 2049 if (prog->flags & (PAT_PURES|PAT_ANY)) { 2050 /* 2051 * Either we are testing against a pure string, 2052 * or we can match anything at all. 2053 */ 2054 int ret; 2055 if (prog->flags & PAT_ANY) { 2056 /* 2057 * Optimisation for a single "*": always matches 2058 * (except for no_glob_dots, see below). 2059 */ 2060 ret = 1; 2061 } else { 2062 /* 2063 * Testing a pure string. See if initial 2064 * components match. 2065 */ 2066 int lendiff = stringlen - prog->patmlen; 2067 if (lendiff < 0) { 2068 /* No, the pattern string is too long. */ 2069 ret = 0; 2070 } else if (!memcmp(progstr, patinstart, prog->patmlen)) { 2071 /* 2072 * Initial component matches. Matches either 2073 * if lengths are the same or we are not anchored 2074 * to the end of the string. 2075 */ 2076 ret = !lendiff || (prog->flags & PAT_NOANCH); 2077 } else { 2078 /* No match. */ 2079 ret = 0; 2080 } 2081 } 2082 if (ret) { 2083 /* 2084 * For files, we won't match initial "."s unless 2085 * glob_dots is set. 2086 */ 2087 if ((prog->flags & PAT_NOGLD) && *patinstart == '.') { 2088 ret = 0; 2089 } else { 2090 /* 2091 * Remember the length in case used for ${..#..} etc. 2092 * In this case, we didn't unmetafy the string. 2093 */ 2094 patinlen = (int)prog->patmlen; 2095 /* if matching files, must update globbing flags */ 2096 patglobflags = prog->globend; 2097 2098 if ((patglobflags & GF_MATCHREF) && 2099 !(patflags & PAT_FILE)) { 2100 char *str = ztrduppfx(patinstart, patinlen); 2101 char *ptr = patinstart; 2102 int mlen = 0; 2103 2104 /* 2105 * Count the characters. We're not using CHARSUB() 2106 * because the string is still metafied. We're 2107 * not using mb_metastrlen() because that expects 2108 * the string to be null terminated. 2109 */ 2110 MB_METACHARINIT(); 2111 while (ptr < patinstart + patinlen) { 2112 mlen++; 2113 ptr += MB_METACHARLEN(ptr); 2114 } 2115 2116 setsparam("MATCH", str); 2117 setiparam("MBEGIN", 2118 (zlong)(patoffset + !isset(KSHARRAYS))); 2119 setiparam("MEND", 2120 (zlong)(mlen + patoffset + 2121 !isset(KSHARRAYS) - 1)); 2122 } 2123 } 2124 } 2125 2126 if (tryalloced) 2127 zfree(tryalloced, unmetalen + unmetalenp); 2128 2129 return ret; 2130 } else { 2131 /* 2132 * Test for a `must match' string, unless we're scanning for a match 2133 * in which case we don't need to do this each time. 2134 */ 2135 ret = 1; 2136 if (!(prog->flags & PAT_SCAN) && prog->mustoff) 2137 { 2138 char *testptr; /* start pointer into test string */ 2139 char *teststop; /* last point from which we can match */ 2140 char *patptr = (char *)prog + prog->mustoff; 2141 int patlen = prog->patmlen; 2142 int found = 0; 2143 2144 if (patlen > stringlen) { 2145 /* Too long, can't match. */ 2146 ret = 0; 2147 } else { 2148 teststop = patinend - patlen; 2149 2150 for (testptr = patinstart; testptr <= teststop; testptr++) 2151 { 2152 if (!memcmp(testptr, patptr, patlen)) { 2153 found = 1; 2154 break; 2155 } 2156 } 2157 2158 if (!found) 2159 ret = 0; 2160 } 2161 } 2162 if (!ret) { 2163 if (tryalloced) 2164 zfree(tryalloced, unmetalen + unmetalenp); 2165 return 0; 2166 } 2167 2168 patglobflags = prog->globflags; 2169 if (!(patflags & PAT_FILE)) { 2170 forceerrs = -1; 2171 errsfound = 0; 2172 } 2173 globdots = !(patflags & PAT_NOGLD); 2174 parsfound = 0; 2175 2176 patinput = patinstart; 2177 2178 if (patmatch((Upat)progstr)) { 2179 /* 2180 * we were lazy and didn't save the globflags if an exclusion 2181 * failed, so set it now 2182 */ 2183 patglobflags = prog->globend; 2184 2185 /* 2186 * Record length of successful match, including Meta 2187 * characters. Do it here so that patmatchlen() can return 2188 * it even if we delete the pattern strings. 2189 */ 2190 patinlen = patinput - patinstart; 2191 /* 2192 * Optimization: if we didn't find any Meta characters 2193 * to begin with, we don't need to look for them now. 2194 */ 2195 if (unmetalen != origlen) { 2196 for (ptr = patinstart; ptr < patinput; ptr++) 2197 if (imeta(*ptr)) 2198 patinlen++; 2199 } 2200 2201 /* 2202 * Should we clear backreferences and matches on a failed 2203 * match? 2204 */ 2205 if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) { 2206 /* 2207 * m flag: for global match. This carries no overhead 2208 * in the pattern matching part. 2209 * 2210 * Remember the test pattern is already unmetafied. 2211 */ 2212 char *str; 2213 int mlen = CHARSUB(patinstart, patinput); 2214 2215 str = metafy(patinstart, patinput - patinstart, META_DUP); 2216 setsparam("MATCH", str); 2217 setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS))); 2218 setiparam("MEND", 2219 (zlong)(mlen + patoffset + 2220 !isset(KSHARRAYS) - 1)); 2221 } 2222 if (prog->patnpar && nump) { 2223 /* 2224 * b flag: for backreferences using parentheses. Reported 2225 * directly. 2226 */ 2227 *nump = prog->patnpar; 2228 2229 sp = patbeginp; 2230 ep = patendp; 2231 2232 for (i = 0; i < prog->patnpar && i < maxnpos; i++) { 2233 if (parsfound & (1 << i)) { 2234 if (begp) 2235 *begp++ = CHARSUB(patinstart, *sp) + patoffset; 2236 if (endp) 2237 *endp++ = CHARSUB(patinstart, *ep) + patoffset 2238 - 1; 2239 } else { 2240 if (begp) 2241 *begp++ = -1; 2242 if (endp) 2243 *endp++ = -1; 2244 } 2245 2246 sp++; 2247 ep++; 2248 } 2249 } else if (prog->patnpar && !(patflags & PAT_FILE)) { 2250 /* 2251 * b flag: for backreferences using parentheses. 2252 */ 2253 int palen = prog->patnpar+1; 2254 char **matcharr, **mbeginarr, **mendarr; 2255 char numbuf[DIGBUFSIZE]; 2256 2257 matcharr = zshcalloc(palen*sizeof(char *)); 2258 mbeginarr = zshcalloc(palen*sizeof(char *)); 2259 mendarr = zshcalloc(palen*sizeof(char *)); 2260 2261 sp = patbeginp; 2262 ep = patendp; 2263 2264 for (i = 0; i < prog->patnpar; i++) { 2265 if (parsfound & (1 << i)) { 2266 matcharr[i] = metafy(*sp, *ep - *sp, META_DUP); 2267 /* 2268 * mbegin and mend give indexes into the string 2269 * in the standard notation, i.e. respecting 2270 * KSHARRAYS, and with the end index giving 2271 * the last character, not one beyond. 2272 * For example, foo=foo; [[ $foo = (f)oo ]] gives 2273 * (without KSHARRAYS) indexes 1 and 1, which 2274 * corresponds to indexing as ${foo[1,1]}. 2275 */ 2276 sprintf(numbuf, "%ld", 2277 (long)(CHARSUB(patinstart, *sp) + 2278 patoffset + 2279 !isset(KSHARRAYS))); 2280 mbeginarr[i] = ztrdup(numbuf); 2281 sprintf(numbuf, "%ld", 2282 (long)(CHARSUB(patinstart, *ep) + 2283 patoffset + 2284 !isset(KSHARRAYS) - 1)); 2285 mendarr[i] = ztrdup(numbuf); 2286 } else { 2287 /* Pattern wasn't set: either it was in an 2288 * unmatched branch, or a hashed parenthesis 2289 * that didn't match at all. 2290 */ 2291 matcharr[i] = ztrdup(""); 2292 mbeginarr[i] = ztrdup("-1"); 2293 mendarr[i] = ztrdup("-1"); 2294 } 2295 sp++; 2296 ep++; 2297 } 2298 setaparam("match", matcharr); 2299 setaparam("mbegin", mbeginarr); 2300 setaparam("mend", mendarr); 2301 } 2302 2303 if (!nump && endp) { 2304 /* 2305 * We just need the overall end position. 2306 */ 2307 *endp = CHARSUB(patinstart, patinput) + patoffset; 2308 } 2309 2310 ret = 1; 2311 } else 2312 ret = 0; 2313 2314 if (tryalloced) 2315 zfree(tryalloced, unmetalen + unmetalenp); 2316 2317 return ret; 2318 } 2319} 2320 2321/* 2322 * Return length of previous succesful match. This is 2323 * in metafied bytes, i.e. includes a count of Meta characters. 2324 * Unusual and futile attempt at modular encapsulation. 2325 */ 2326 2327/**/ 2328int 2329patmatchlen(void) 2330{ 2331 return patinlen; 2332} 2333 2334/* 2335 * Match literal characters with case insensitivity test: the first 2336 * comes from the input string, the second the current pattern. 2337 */ 2338#ifdef MULTIBYTE_SUPPORT 2339#define ISUPPER(x) iswupper(x) 2340#define ISLOWER(x) iswlower(x) 2341#define TOUPPER(x) towupper(x) 2342#define TOLOWER(x) towlower(x) 2343#define ISDIGIT(x) iswdigit(x) 2344#else 2345#define ISUPPER(x) isupper(x) 2346#define ISLOWER(x) islower(x) 2347#define TOUPPER(x) toupper(x) 2348#define TOLOWER(x) tolower(x) 2349#define ISDIGIT(x) idigit(x) 2350#endif 2351#define CHARMATCH(chin, chpa) (chin == chpa || \ 2352 ((patglobflags & GF_IGNCASE) ? \ 2353 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \ 2354 (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \ 2355 (patglobflags & GF_LCMATCHUC) ? \ 2356 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0)) 2357 2358/* 2359 * The same but caching an expression from the first argument, 2360 * Requires local charmatch_cache definition. 2361 */ 2362#define CHARMATCH_EXPR(expr, chpa) \ 2363 (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa)) 2364 2365/* 2366 * exactpos is used to remember how far down an exact string we have 2367 * matched, if we are doing approximation and can therefore redo from 2368 * the same point; we never need to otherwise. 2369 * 2370 * exactend is a pointer to the end of the string, which isn't 2371 * null-terminated. 2372 */ 2373static char *exactpos, *exactend; 2374 2375/* 2376 * Main matching routine. 2377 * 2378 * Testing the tail end of a match is usually done by recursion, but 2379 * we try to eliminate that in favour of looping for simple cases. 2380 */ 2381 2382/**/ 2383static int 2384patmatch(Upat prog) 2385{ 2386 /* Current and next nodes */ 2387 Upat scan = prog, next, opnd; 2388 char *start, *save, *chrop, *chrend, *compend; 2389 int savglobflags, op, no, min, fail = 0, saverrsfound; 2390 zrange_t from, to, comp; 2391 patint_t nextch; 2392 2393 while (scan) { 2394 next = PATNEXT(scan); 2395 2396 if (!globdots && P_NOTDOT(scan) && patinput == patinstart && 2397 patinput < patinend && *patinput == '.') 2398 return 0; 2399 2400 switch (P_OP(scan)) { 2401 case P_ANY: 2402 if (patinput == patinend) 2403 fail = 1; 2404 else 2405 CHARINC(patinput, patinend); 2406 break; 2407 case P_EXACTLY: 2408 /* 2409 * acts as nothing if *chrop is null: this is used by 2410 * approx code. 2411 */ 2412 if (exactpos) { 2413 chrop = exactpos; 2414 chrend = exactend; 2415 } else { 2416 chrop = P_LS_STR(scan); 2417 chrend = chrop + P_LS_LEN(scan); 2418 } 2419 exactpos = NULL; 2420 while (chrop < chrend && patinput < patinend) { 2421 char *savpatinput = patinput; 2422 char *savchrop = chrop; 2423 int badin = 0, badpa = 0; 2424 /* 2425 * Care with character matching: 2426 * We do need to convert the character to wide 2427 * representation if possible, because we may need 2428 * to do case transformation. However, we should 2429 * be careful in case one, but not the other, wasn't 2430 * representable in the current locale---in that 2431 * case they don't match even if the returned 2432 * values (one properly converted, one raw) are 2433 * the same. 2434 */ 2435 patint_t chin = CHARREFINC(patinput, patinend, &badin); 2436 patint_t chpa = CHARREFINC(chrop, chrend, &badpa); 2437 if (!CHARMATCH(chin, chpa) || badin != badpa) { 2438 fail = 1; 2439 patinput = savpatinput; 2440 chrop = savchrop; 2441 break; 2442 } 2443 } 2444 if (chrop < chrend) { 2445 exactpos = chrop; 2446 exactend = chrend; 2447 fail = 1; 2448 } 2449 break; 2450 case P_ANYOF: 2451 case P_ANYBUT: 2452 if (patinput == patinend) 2453 fail = 1; 2454 else { 2455#ifdef MULTIBYTE_SUPPORT 2456 wchar_t cr = CHARREF(patinput, patinend); 2457 char *scanop = (char *)P_OPERAND(scan); 2458 if (patglobflags & GF_MULTIBYTE) { 2459 if (mb_patmatchrange(scanop, cr, NULL, NULL) ^ 2460 (P_OP(scan) == P_ANYOF)) 2461 fail = 1; 2462 else 2463 CHARINC(patinput, patinend); 2464 } else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^ 2465 (P_OP(scan) == P_ANYOF)) 2466 fail = 1; 2467 else 2468 CHARINC(patinput, patinend); 2469#else 2470 if (patmatchrange((char *)P_OPERAND(scan), 2471 CHARREF(patinput, patinend), NULL, NULL) ^ 2472 (P_OP(scan) == P_ANYOF)) 2473 fail = 1; 2474 else 2475 CHARINC(patinput, patinend); 2476#endif 2477 } 2478 break; 2479 case P_NUMRNG: 2480 case P_NUMFROM: 2481 case P_NUMTO: 2482 /* 2483 * To do this properly, we really have to treat numbers as 2484 * closures: that's so things like <1-1000>33 will 2485 * match 633 (they didn't up to 3.1.6). To avoid making this 2486 * too inefficient, we see if there's an exact match next: 2487 * if there is, and it's not a digit, we return 1 after 2488 * the first attempt. 2489 */ 2490 op = P_OP(scan); 2491 start = (char *)P_OPERAND(scan); 2492 from = to = 0; 2493 if (op != P_NUMTO) { 2494#ifdef ZSH_64_BIT_TYPE 2495 /* We can't rely on pointer alignment being good enough. */ 2496 memcpy((char *)&from, start, sizeof(zrange_t)); 2497#else 2498 from = *((zrange_t *) start); 2499#endif 2500 start += sizeof(zrange_t); 2501 } 2502 if (op != P_NUMFROM) { 2503#ifdef ZSH_64_BIT_TYPE 2504 memcpy((char *)&to, start, sizeof(zrange_t)); 2505#else 2506 to = *((zrange_t *) start); 2507#endif 2508 } 2509 start = compend = patinput; 2510 comp = 0; 2511 while (patinput < patinend && idigit(*patinput)) { 2512 if (comp) 2513 comp *= 10; 2514 comp += *patinput - '0'; 2515 patinput++; 2516 compend++; 2517 2518 if (comp & ((zrange_t)1 << (sizeof(comp)*8 - 2519#ifdef ZRANGE_T_IS_SIGNED 2520 2 2521#else 2522 1 2523#endif 2524 ))) { 2525 /* 2526 * Out of range (allowing for signedness, which 2527 * we need if we are using zlongs). 2528 * This is as far as we can go. 2529 * If we're doing a range "from", skip all the 2530 * remaining numbers. Otherwise, we can't 2531 * match beyond the previous point anyway. 2532 * Leave the pointer to the last calculated 2533 * position (compend) where it was before. 2534 */ 2535 if (op == P_NUMFROM) { 2536 while (patinput < patinend && idigit(*patinput)) 2537 patinput++; 2538 } 2539 } 2540 } 2541 save = patinput; 2542 no = 0; 2543 while (patinput > start) { 2544 /* if already too small, no power on earth can save it */ 2545 if (comp < from && patinput <= compend) 2546 break; 2547 if ((op == P_NUMFROM || comp <= to) && patmatch(next)) { 2548 return 1; 2549 } 2550 if (!no && P_OP(next) == P_EXACTLY && 2551 (!P_LS_LEN(next) || 2552 !idigit(STOUC(*P_LS_STR(next)))) && 2553 !(patglobflags & 0xff)) 2554 return 0; 2555 patinput = --save; 2556 no++; 2557 /* 2558 * With a range start and an unrepresentable test 2559 * number, we just back down the test string without 2560 * changing the number until we get to a representable 2561 * one. 2562 */ 2563 if (patinput < compend) 2564 comp /= 10; 2565 } 2566 patinput = start; 2567 fail = 1; 2568 break; 2569 case P_NUMANY: 2570 /* This is <->: any old set of digits, don't bother comparing */ 2571 start = patinput; 2572 while (patinput < patinend && idigit(*patinput)) 2573 patinput++; 2574 save = patinput; 2575 no = 0; 2576 while (patinput > start) { 2577 if (patmatch(next)) 2578 return 1; 2579 if (!no && P_OP(next) == P_EXACTLY && 2580 (!P_LS_LEN(next) || 2581 !idigit(*P_LS_STR(next))) && 2582 !(patglobflags & 0xff)) 2583 return 0; 2584 patinput = --save; 2585 no++; 2586 } 2587 patinput = start; 2588 fail = 1; 2589 break; 2590 case P_NOTHING: 2591 break; 2592 case P_BACK: 2593 break; 2594 case P_GFLAGS: 2595 patglobflags = P_OPERAND(scan)->l; 2596 break; 2597 case P_OPEN: 2598 case P_OPEN+1: 2599 case P_OPEN+2: 2600 case P_OPEN+3: 2601 case P_OPEN+4: 2602 case P_OPEN+5: 2603 case P_OPEN+6: 2604 case P_OPEN+7: 2605 case P_OPEN+8: 2606 case P_OPEN+9: 2607 no = P_OP(scan) - P_OPEN; 2608 save = patinput; 2609 2610 if (patmatch(next)) { 2611 /* 2612 * Don't set patbeginp if some later invocation of 2613 * the same parentheses already has. 2614 */ 2615 if (no && !(parsfound & (1 << (no - 1)))) { 2616 patbeginp[no-1] = save; 2617 parsfound |= 1 << (no - 1); 2618 } 2619 return 1; 2620 } else 2621 return 0; 2622 break; 2623 case P_CLOSE: 2624 case P_CLOSE+1: 2625 case P_CLOSE+2: 2626 case P_CLOSE+3: 2627 case P_CLOSE+4: 2628 case P_CLOSE+5: 2629 case P_CLOSE+6: 2630 case P_CLOSE+7: 2631 case P_CLOSE+8: 2632 case P_CLOSE+9: 2633 no = P_OP(scan) - P_CLOSE; 2634 save = patinput; 2635 2636 if (patmatch(next)) { 2637 if (no && !(parsfound & (1 << (no + 15)))) { 2638 patendp[no-1] = save; 2639 parsfound |= 1 << (no + 15); 2640 } 2641 return 1; 2642 } else 2643 return 0; 2644 break; 2645 case P_EXCSYNC: 2646 /* See the P_EXCLUDE code below for where syncptr comes from */ 2647 { 2648 unsigned char *syncptr; 2649 Upat after; 2650 after = P_OPERAND(scan); 2651 DPUTS(!P_ISEXCLUDE(after), 2652 "BUG: EXCSYNC not followed by EXCLUDE."); 2653 DPUTS(!P_OPERAND(after)->p, 2654 "BUG: EXCSYNC not handled by EXCLUDE"); 2655 syncptr = P_OPERAND(after)->p + (patinput - patinstart); 2656 /* 2657 * If we already matched from here, this time we fail. 2658 * See WBRANCH code for story about error count. 2659 */ 2660 if (*syncptr && errsfound + 1 >= *syncptr) 2661 return 0; 2662 /* 2663 * Else record that we (possibly) matched this time. 2664 * No harm if we don't: then the previous test will just 2665 * short cut the attempted match that is bound to fail. 2666 * We never try to exclude something that has already 2667 * failed anyway. 2668 */ 2669 *syncptr = errsfound + 1; 2670 } 2671 break; 2672 case P_EXCEND: 2673 /* 2674 * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE 2675 * branch. Actually, we don't bother following it: all we 2676 * need to know is that we successfully matched so far up 2677 * to the end of the asserted pattern; the endpoint 2678 * in the target string is nulled out. 2679 */ 2680 if (!(fail = (patinput < patinend))) 2681 return 1; 2682 break; 2683 case P_BRANCH: 2684 case P_WBRANCH: 2685 /* P_EXCLUDE shouldn't occur without a P_BRANCH */ 2686 if (!P_ISBRANCH(next)) { 2687 /* no choice, avoid recursion */ 2688 DPUTS(P_OP(scan) == P_WBRANCH, 2689 "BUG: WBRANCH with no alternative."); 2690 next = P_OPERAND(scan); 2691 } else { 2692 do { 2693 save = patinput; 2694 savglobflags = patglobflags; 2695 saverrsfound = errsfound; 2696 if (P_ISEXCLUDE(next)) { 2697 /* 2698 * The strategy is to test the asserted pattern, 2699 * recording via P_EXCSYNC how far the part to 2700 * be excluded matched. We then set the 2701 * length of the test string to that 2702 * point and see if the exclusion as far as 2703 * P_EXCEND also matches that string. 2704 * We need to keep testing the asserted pattern 2705 * by backtracking, since the first attempt 2706 * may be excluded while a later attempt may not. 2707 * For this we keep a pointer just after 2708 * the P_EXCLUDE which is tested by the P_EXCSYNC 2709 * to see if we matched there last time, in which 2710 * case we fail. If there is nothing to backtrack 2711 * over, that doesn't matter: we should fail anyway. 2712 * The pointer also tells us where the asserted 2713 * pattern matched for use by the exclusion. 2714 * 2715 * It's hard to allocate space for this 2716 * beforehand since we may need to do it 2717 * recursively. 2718 * 2719 * P.S. in case you were wondering, this code 2720 * is horrible. 2721 */ 2722 Upat syncstrp; 2723 char *origpatinend; 2724 unsigned char *oldsyncstr; 2725 char *matchpt = NULL; 2726 int ret, savglobdots, matchederrs = 0; 2727 int savparsfound = parsfound; 2728 DPUTS(P_OP(scan) == P_WBRANCH, 2729 "BUG: excluded WBRANCH"); 2730 syncstrp = P_OPERAND(next); 2731 /* 2732 * Unlike WBRANCH, each test at the same exclude 2733 * sync point (due to an external loop) is separate, 2734 * i.e testing (foo~bar)# is no different from 2735 * (foo~bar)(foo~bar)... from the exclusion point 2736 * of view, so we use a different sync string. 2737 */ 2738 oldsyncstr = syncstrp->p; 2739 syncstrp->p = (unsigned char *) 2740 zshcalloc((patinend - patinstart) + 1); 2741 origpatinend = patinend; 2742 while ((ret = patmatch(P_OPERAND(scan)))) { 2743 unsigned char *syncpt; 2744 char *savpatinstart; 2745 int savforce = forceerrs; 2746 int savpatflags = patflags, synclen; 2747 forceerrs = -1; 2748 savglobdots = globdots; 2749 matchederrs = errsfound; 2750 matchpt = patinput; /* may not be end */ 2751 globdots = 1; /* OK to match . first */ 2752 /* Find the point where the scan 2753 * matched the part to be excluded: because 2754 * of backtracking, the one 2755 * most recently matched will be the first. 2756 * (Luckily, backtracking is done after all 2757 * possibilities for approximation have been 2758 * checked.) 2759 */ 2760 for (syncpt = syncstrp->p; !*syncpt; syncpt++) 2761 ; 2762 synclen = syncpt - syncstrp->p; 2763 if (patinstart + synclen != patinend) { 2764 /* 2765 * Temporarily mark the string as 2766 * ending at this point. 2767 */ 2768 DPUTS(patinstart + synclen > matchpt, 2769 "BUG: EXCSYNC failed"); 2770 2771 patinend = patinstart + synclen; 2772 /* 2773 * If this isn't really the end of the string, 2774 * remember this for the (#e) assertion. 2775 */ 2776 patflags |= PAT_NOTEND; 2777 } 2778 savpatinstart = patinstart; 2779 next = PATNEXT(scan); 2780 while (next && P_ISEXCLUDE(next)) { 2781 patinput = save; 2782 /* 2783 * turn off approximations in exclusions: 2784 * note we keep remaining patglobflags 2785 * set by asserted branch (or previous 2786 * excluded branches, for consistency). 2787 */ 2788 patglobflags &= ~0xff; 2789 errsfound = 0; 2790 opnd = P_OPERAND(next) + 1; 2791 if (P_OP(next) == P_EXCLUDP && patinpath) { 2792 /* 2793 * Top level exclusion with a file, 2794 * applies to whole path so add the 2795 * segments already matched. 2796 * We copied these in front of the 2797 * test pattern, so patinend doesn't 2798 * need moving. 2799 */ 2800 DPUTS(patinput != patinstart, 2801 "BUG: not at start excluding path"); 2802 patinput = patinstart = patinpath; 2803 } 2804 if (patmatch(opnd)) { 2805 ret = 0; 2806 /* 2807 * Another subtlety: if we exclude the 2808 * match, any parentheses just found 2809 * become invalidated. 2810 */ 2811 parsfound = savparsfound; 2812 } 2813 if (patinpath) { 2814 patinput = savpatinstart + 2815 (patinput - patinstart); 2816 patinstart = savpatinstart; 2817 } 2818 if (!ret) 2819 break; 2820 next = PATNEXT(next); 2821 } 2822 /* 2823 * Restore original end position. 2824 */ 2825 patinend = origpatinend; 2826 patflags = savpatflags; 2827 globdots = savglobdots; 2828 forceerrs = savforce; 2829 if (ret) 2830 break; 2831 patinput = save; 2832 patglobflags = savglobflags; 2833 errsfound = saverrsfound; 2834 } 2835 zfree((char *)syncstrp->p, 2836 (patinend - patinstart) + 1); 2837 syncstrp->p = oldsyncstr; 2838 if (ret) { 2839 patinput = matchpt; 2840 errsfound = matchederrs; 2841 return 1; 2842 } 2843 while ((scan = PATNEXT(scan)) && 2844 P_ISEXCLUDE(scan)) 2845 ; 2846 } else { 2847 int ret = 1, pfree = 0; 2848 Upat ptrp = NULL; 2849 unsigned char *ptr; 2850 if (P_OP(scan) == P_WBRANCH) { 2851 /* 2852 * This is where we make sure that we are not 2853 * repeatedly matching zero-length strings in 2854 * a closure, which would cause an infinite loop, 2855 * and also remove exponential behaviour in 2856 * backtracking nested closures. 2857 * The P_WBRANCH operator leaves a space for a 2858 * uchar *, initialized to NULL, which is 2859 * turned into a string the same length as the 2860 * target string. Every time we match from a 2861 * particular point in the target string, we 2862 * stick a 1 at the corresponding point here. 2863 * If we come round to the same branch again, and 2864 * there is already a 1, then the test fails. 2865 */ 2866 opnd = P_OPERAND(scan); 2867 ptrp = opnd++; 2868 if (!ptrp->p) { 2869 ptrp->p = (unsigned char *) 2870 zshcalloc((patinend - patinstart) + 1); 2871 pfree = 1; 2872 } 2873 ptr = ptrp->p + (patinput - patinstart); 2874 2875 /* 2876 * Without approximation, this is just a 2877 * single bit test. With approximation, we 2878 * need to know how many errors there were 2879 * last time we made the test. If errsfound 2880 * is now smaller than it was, hence we can 2881 * make more approximations in the remaining 2882 * code, we continue with the test. 2883 * (This is why the max number of errors is 2884 * 254, not 255.) 2885 */ 2886 if (*ptr && errsfound + 1 >= *ptr) 2887 ret = 0; 2888 *ptr = errsfound + 1; 2889 } else 2890 opnd = P_OPERAND(scan); 2891 if (ret) 2892 ret = patmatch(opnd); 2893 if (pfree) { 2894 zfree((char *)ptrp->p, 2895 (patinend - patinstart) + 1); 2896 ptrp->p = NULL; 2897 } 2898 if (ret) 2899 return 1; 2900 scan = PATNEXT(scan); 2901 } 2902 patinput = save; 2903 patglobflags = savglobflags; 2904 errsfound = saverrsfound; 2905 DPUTS(P_OP(scan) == P_WBRANCH, 2906 "BUG: WBRANCH not first choice."); 2907 next = PATNEXT(scan); 2908 } while (scan && P_ISBRANCH(scan)); 2909 return 0; 2910 } 2911 break; 2912 case P_STAR: 2913 /* Handle specially for speed, although really P_ONEHASH+P_ANY */ 2914 case P_ONEHASH: 2915 case P_TWOHASH: 2916 /* 2917 * This is just simple cases, matching one character. 2918 * With approximations, we still handle * this way, since 2919 * no approximation is ever necessary, but other closures 2920 * are handled by the more complicated branching method 2921 */ 2922 op = P_OP(scan); 2923 /* Note that no counts possibly metafied characters */ 2924 start = patinput; 2925 { 2926 char *lastcharstart; 2927 /* 2928 * Array to record the start of characters for 2929 * backtracking. 2930 */ 2931 VARARR(char, charstart, patinend-patinput); 2932 memset(charstart, 0, patinend-patinput); 2933 2934 if (op == P_STAR) { 2935 for (no = 0; patinput < patinend; 2936 CHARINC(patinput, patinend)) 2937 { 2938 charstart[patinput-start] = 1; 2939 no++; 2940 } 2941 /* simple optimization for reasonably common case */ 2942 if (P_OP(next) == P_END) 2943 return 1; 2944 } else { 2945 DPUTS(patglobflags & 0xff, 2946 "BUG: wrong backtracking with approximation."); 2947 if (!globdots && P_NOTDOT(P_OPERAND(scan)) && 2948 patinput == patinstart && patinput < patinend && 2949 CHARREF(patinput, patinend) == ZWC('.')) 2950 return 0; 2951 no = patrepeat(P_OPERAND(scan), charstart); 2952 } 2953 min = (op == P_TWOHASH) ? 1 : 0; 2954 /* 2955 * Lookahead to avoid useless matches. This is not possible 2956 * with approximation. 2957 */ 2958 if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) && 2959 !(patglobflags & 0xff)) { 2960 char *nextop = P_LS_STR(next); 2961#ifdef MULTIBYTE_SUPPORT 2962 /* else second argument of CHARREF isn't used */ 2963 int nextlen = P_LS_LEN(next); 2964#endif 2965 /* 2966 * If that P_EXACTLY is last (common in simple patterns, 2967 * such as *.c), then it can be only be matched at one 2968 * point in the test string, so record that. 2969 */ 2970 if (P_OP(PATNEXT(next)) == P_END && 2971 !(patflags & PAT_NOANCH)) { 2972 int ptlen = patinend - patinput; 2973 int lenmatch = patinend - 2974 (min ? CHARNEXT(start, patinend) : start); 2975 /* Are we in the right range? */ 2976 if (P_LS_LEN(next) > lenmatch || 2977 P_LS_LEN(next) < ptlen) 2978 return 0; 2979 /* Yes, just position appropriately and test. */ 2980 patinput += ptlen - P_LS_LEN(next); 2981 /* 2982 * Here we will need to be careful that patinput is not 2983 * in the middle of a multibyte character. 2984 */ 2985 /* Continue loop with P_EXACTLY test. */ 2986 break; 2987 } 2988 nextch = CHARREF(nextop, nextop + nextlen); 2989 } else 2990 nextch = PEOF; 2991 savglobflags = patglobflags; 2992 saverrsfound = errsfound; 2993 lastcharstart = charstart + (patinput - start); 2994 if (no >= min) { 2995 for (;;) { 2996 patint_t charmatch_cache; 2997 if (nextch == PEOF || 2998 (patinput < patinend && 2999 CHARMATCH_EXPR(CHARREF(patinput, patinend), 3000 nextch))) { 3001 if (patmatch(next)) 3002 return 1; 3003 } 3004 if (--no < min) 3005 break; 3006 /* find start of previous full character */ 3007 while (!*--lastcharstart) 3008 DPUTS(lastcharstart < charstart, 3009 "lastcharstart invalid"); 3010 patinput = start + (lastcharstart-charstart); 3011 patglobflags = savglobflags; 3012 errsfound = saverrsfound; 3013 } 3014 } 3015 } 3016 /* 3017 * As with branches, the patmatch(next) stuff for * 3018 * handles approximation, so we don't need to try 3019 * anything here. 3020 */ 3021 return 0; 3022 case P_ISSTART: 3023 if (patinput != patinstart || (patflags & PAT_NOTSTART)) 3024 fail = 1; 3025 break; 3026 case P_ISEND: 3027 if (patinput < patinend || (patflags & PAT_NOTEND)) 3028 fail = 1; 3029 break; 3030 case P_COUNTSTART: 3031 { 3032 /* 3033 * Save and restore the current count and the 3034 * start pointer in case the pattern has been 3035 * executed by a previous repetition of a 3036 * closure. 3037 */ 3038 long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l; 3039 long savecount = *curptr; 3040 unsigned char *saveptr = scan[P_CT_PTR].p; 3041 int ret; 3042 3043 *curptr = 0L; 3044 ret = patmatch(P_OPERAND(scan)); 3045 *curptr = savecount; 3046 scan[P_CT_PTR].p = saveptr; 3047 return ret; 3048 } 3049 case P_COUNT: 3050 { 3051 /* (#cN,M): execution is relatively straightforward */ 3052 long cur = scan[P_CT_CURRENT].l; 3053 long min = scan[P_CT_MIN].l; 3054 long max = scan[P_CT_MAX].l; 3055 3056 if (cur && cur >= min && 3057 (unsigned char *)patinput == scan[P_CT_PTR].p) { 3058 /* 3059 * Not at the first attempt to match so 3060 * the previous attempt managed zero length. 3061 * We can do this indefinitely so there's 3062 * no point in going on. Simply try to 3063 * match the remainder of the pattern. 3064 */ 3065 return patmatch(next); 3066 } 3067 scan[P_CT_PTR].p = (unsigned char *)patinput; 3068 3069 if (max < 0 || cur < max) { 3070 char *patinput_thistime = patinput; 3071 scan[P_CT_CURRENT].l = cur + 1; 3072 if (patmatch(scan + P_CT_OPERAND)) 3073 return 1; 3074 patinput = patinput_thistime; 3075 } 3076 if (cur < min) 3077 return 0; 3078 return patmatch(next); 3079 } 3080 case P_END: 3081 if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH)))) 3082 return 1; 3083 break; 3084#ifdef DEBUG 3085 default: 3086 dputs("BUG: bad operand in patmatch."); 3087 return 0; 3088 break; 3089#endif 3090 } 3091 3092 if (fail) { 3093 if (errsfound < (patglobflags & 0xff) && 3094 (forceerrs == -1 || errsfound < forceerrs)) { 3095 /* 3096 * Approximation code. There are four possibilities 3097 * 3098 * 1. omit character from input string 3099 * 2. transpose characters in input and pattern strings 3100 * 3. omit character in both input and pattern strings 3101 * 4. omit character from pattern string. 3102 * 3103 * which we try in that order. 3104 * 3105 * Of these, 2, 3 and 4 require an exact match string 3106 * (P_EXACTLY) while 1, 2 and 3 require that we not 3107 * have reached the end of the input string. 3108 * 3109 * Note in each case after making the approximation we 3110 * need to retry the *same* pattern; this is what 3111 * requires exactpos, a slightly doleful way of 3112 * communicating with the exact character matcher. 3113 */ 3114 char *savexact = exactpos; 3115 save = patinput; 3116 savglobflags = patglobflags; 3117 saverrsfound = ++errsfound; 3118 fail = 0; 3119 3120 DPUTS(P_OP(scan) != P_EXACTLY && exactpos, 3121 "BUG: non-exact match has set exactpos"); 3122 3123 /* Try omitting a character from the input string */ 3124 if (patinput < patinend) { 3125 CHARINC(patinput, patinend); 3126 /* If we are not on an exact match, then this is 3127 * our last gasp effort, so we can optimize out 3128 * the recursive call. 3129 */ 3130 if (P_OP(scan) != P_EXACTLY) 3131 continue; 3132 if (patmatch(scan)) 3133 return 1; 3134 } 3135 3136 if (P_OP(scan) == P_EXACTLY) { 3137 char *nextexact = savexact; 3138 DPUTS(!savexact, 3139 "BUG: exact match has not set exactpos"); 3140 CHARINC(nextexact, exactend); 3141 3142 if (save < patinend) { 3143 char *nextin = save; 3144 CHARINC(nextin, patinend); 3145 patglobflags = savglobflags; 3146 errsfound = saverrsfound; 3147 exactpos = savexact; 3148 3149 /* 3150 * Try swapping two characters in patinput and 3151 * exactpos 3152 */ 3153 if (save < patinend && nextin < patinend && 3154 nextexact < exactend) { 3155 patint_t cin0 = CHARREF(save, patinend); 3156 patint_t cpa0 = CHARREF(exactpos, exactend); 3157 patint_t cin1 = CHARREF(nextin, patinend); 3158 patint_t cpa1 = CHARREF(nextexact, exactend); 3159 3160 if (CHARMATCH(cin0, cpa1) && 3161 CHARMATCH(cin1, cpa0)) { 3162 patinput = nextin; 3163 CHARINC(patinput, patinend); 3164 exactpos = nextexact; 3165 CHARINC(exactpos, exactend); 3166 if (patmatch(scan)) 3167 return 1; 3168 3169 patglobflags = savglobflags; 3170 errsfound = saverrsfound; 3171 } 3172 } 3173 3174 /* 3175 * Try moving up both strings. 3176 */ 3177 patinput = nextin; 3178 exactpos = nextexact; 3179 if (patmatch(scan)) 3180 return 1; 3181 3182 patinput = save; 3183 patglobflags = savglobflags; 3184 errsfound = saverrsfound; 3185 exactpos = savexact; 3186 } 3187 3188 DPUTS(exactpos == exactend, "approximating too far"); 3189 /* 3190 * Try moving up the exact match pattern. 3191 * This must be the last attempt, so just loop 3192 * instead of calling recursively. 3193 */ 3194 CHARINC(exactpos, exactend); 3195 continue; 3196 } 3197 } 3198 exactpos = NULL; 3199 return 0; 3200 } 3201 3202 scan = next; 3203 } 3204 3205 return 0; 3206} 3207 3208 3209/**/ 3210#ifdef MULTIBYTE_SUPPORT 3211 3212/* 3213 * See if character ch matches a pattern range specification. 3214 * The null-terminated specification is in range; the test 3215 * character is in ch. 3216 * 3217 * indptr is used by completion matching, which is why this 3218 * function is exported. If indptr is not NULL we set *indptr 3219 * to the index of the character in the range string, adjusted 3220 * in the case of "A-B" ranges such that A would count as its 3221 * normal index (say IA), B would count as IA + (B-A), and any 3222 * character within the range as appropriate. We're not strictly 3223 * guaranteed this fits within a wint_t, but if this is Unicode 3224 * in 32 bits we have a fair amount of distance left over. 3225 * 3226 * mtp is used in the same circumstances. *mtp returns the match type: 3227 * 0 for a standard character, else the PP_ index. It's not 3228 * useful if the match failed. 3229 */ 3230 3231/**/ 3232mod_export int 3233mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp) 3234{ 3235 wchar_t r1, r2; 3236 3237 if (indptr) 3238 *indptr = 0; 3239 /* 3240 * Careful here: unlike other strings, range is a NULL-terminated, 3241 * metafied string, because we need to treat the Posix and hyphenated 3242 * ranges specially. 3243 */ 3244 while (*range) { 3245 if (imeta(STOUC(*range))) { 3246 int swtype = STOUC(*range++) - STOUC(Meta); 3247 if (mtp) 3248 *mtp = swtype; 3249 switch (swtype) { 3250 case 0: 3251 /* ordinary metafied character */ 3252 range--; 3253 if (metacharinc(&range) == ch) 3254 return 1; 3255 break; 3256 case PP_ALPHA: 3257 if (iswalpha(ch)) 3258 return 1; 3259 break; 3260 case PP_ALNUM: 3261 if (iswalnum(ch)) 3262 return 1; 3263 break; 3264 case PP_ASCII: 3265 if ((ch & ~0x7f) == 0) 3266 return 1; 3267 break; 3268 case PP_BLANK: 3269 if (ch == L' ' || ch == L'\t') 3270 return 1; 3271 break; 3272 case PP_CNTRL: 3273 if (iswcntrl(ch)) 3274 return 1; 3275 break; 3276 case PP_DIGIT: 3277 if (iswdigit(ch)) 3278 return 1; 3279 break; 3280 case PP_GRAPH: 3281 if (iswgraph(ch)) 3282 return 1; 3283 break; 3284 case PP_LOWER: 3285 if (iswlower(ch)) 3286 return 1; 3287 break; 3288 case PP_PRINT: 3289 if (iswprint(ch)) 3290 return 1; 3291 break; 3292 case PP_PUNCT: 3293 if (iswpunct(ch)) 3294 return 1; 3295 break; 3296 case PP_SPACE: 3297 if (iswspace(ch)) 3298 return 1; 3299 break; 3300 case PP_UPPER: 3301 if (iswupper(ch)) 3302 return 1; 3303 break; 3304 case PP_XDIGIT: 3305 if (iswxdigit(ch)) 3306 return 1; 3307 break; 3308 case PP_IDENT: 3309 if (wcsitype(ch, IIDENT)) 3310 return 1; 3311 break; 3312 case PP_IFS: 3313 if (wcsitype(ch, ISEP)) 3314 return 1; 3315 break; 3316 case PP_IFSSPACE: 3317 /* must be ASCII space character */ 3318 if (ch < 128 && iwsep((int)ch)) 3319 return 1; 3320 break; 3321 case PP_WORD: 3322 if (wcsitype(ch, IWORD)) 3323 return 1; 3324 break; 3325 case PP_RANGE: 3326 r1 = metacharinc(&range); 3327 r2 = metacharinc(&range); 3328 if (r1 <= ch && ch <= r2) { 3329 if (indptr) 3330 *indptr += ch - r1; 3331 return 1; 3332 } 3333 /* Careful not to screw up counting with bogus range */ 3334 if (indptr && r1 < r2) { 3335 /* 3336 * This gets incremented again below to get 3337 * us past the range end. This is correct. 3338 */ 3339 *indptr += r2 - r1; 3340 } 3341 break; 3342 case PP_UNKWN: 3343 DPUTS(1, "BUG: unknown posix range passed through.\n"); 3344 break; 3345 default: 3346 DPUTS(1, "BUG: unknown metacharacter in range."); 3347 break; 3348 } 3349 } else if (metacharinc(&range) == ch) { 3350 if (mtp) 3351 *mtp = 0; 3352 return 1; 3353 } 3354 if (indptr) 3355 (*indptr)++; 3356 } 3357 return 0; 3358} 3359 3360 3361/* 3362 * This is effectively the reverse of mb_patmatchrange(). 3363 * Given a range descriptor of the same form, and an index into it, 3364 * try to determine the character that is matched. If the index 3365 * points to a [:...:] generic style match, set chr to WEOF and 3366 * return the type in mtp instead. Return 1 if successful, 0 if 3367 * there was no corresponding index. Note all pointer arguments 3368 * must be non-null. 3369 */ 3370 3371/**/ 3372mod_export int 3373mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp) 3374{ 3375 wchar_t r1, r2, rchr; 3376 wint_t rdiff; 3377 3378 *chr = WEOF; 3379 *mtp = 0; 3380 3381 while (*range) { 3382 if (imeta(STOUC(*range))) { 3383 int swtype = STOUC(*range++) - STOUC(Meta); 3384 switch (swtype) { 3385 case 0: 3386 range--; 3387 rchr = metacharinc(&range); 3388 if (!ind) { 3389 *chr = (wint_t) rchr; 3390 return 1; 3391 } 3392 break; 3393 3394 case PP_ALPHA: 3395 case PP_ALNUM: 3396 case PP_ASCII: 3397 case PP_BLANK: 3398 case PP_CNTRL: 3399 case PP_DIGIT: 3400 case PP_GRAPH: 3401 case PP_LOWER: 3402 case PP_PRINT: 3403 case PP_PUNCT: 3404 case PP_SPACE: 3405 case PP_UPPER: 3406 case PP_XDIGIT: 3407 case PP_IDENT: 3408 case PP_IFS: 3409 case PP_IFSSPACE: 3410 case PP_WORD: 3411 if (!ind) { 3412 *mtp = swtype; 3413 return 1; 3414 } 3415 break; 3416 3417 case PP_RANGE: 3418 r1 = metacharinc(&range); 3419 r2 = metacharinc(&range); 3420 rdiff = (wint_t)r2 - (wint_t)r1; 3421 if (rdiff >= ind) { 3422 *chr = (wint_t)r1 + ind; 3423 return 1; 3424 } 3425 /* note the extra decrement to ind below */ 3426 ind -= rdiff; 3427 break; 3428 case PP_UNKWN: 3429 DPUTS(1, "BUG: unknown posix range passed through.\n"); 3430 break; 3431 default: 3432 DPUTS(1, "BUG: unknown metacharacter in range."); 3433 break; 3434 } 3435 } else { 3436 rchr = metacharinc(&range); 3437 if (!ind) { 3438 *chr = (wint_t)rchr; 3439 return 1; 3440 } 3441 } 3442 if (!ind--) 3443 break; 3444 } 3445 3446 /* No corresponding index. */ 3447 return 0; 3448} 3449 3450/**/ 3451#endif /* MULTIBYTE_SUPPORT */ 3452 3453/* 3454 * Identical function to mb_patmatchrange() above for single-byte 3455 * characters. 3456 */ 3457 3458/**/ 3459mod_export int 3460patmatchrange(char *range, int ch, int *indptr, int *mtp) 3461{ 3462 int r1, r2; 3463 3464 if (indptr) 3465 *indptr = 0; 3466 /* 3467 * Careful here: unlike other strings, range is a NULL-terminated, 3468 * metafied string, because we need to treat the Posix and hyphenated 3469 * ranges specially. 3470 */ 3471 for (; *range; range++) { 3472 if (imeta(STOUC(*range))) { 3473 int swtype = STOUC(*range) - STOUC(Meta); 3474 if (mtp) 3475 *mtp = swtype; 3476 switch (swtype) { 3477 case 0: 3478 if (STOUC(*++range ^ 32) == ch) 3479 return 1; 3480 break; 3481 case PP_ALPHA: 3482 if (isalpha(ch)) 3483 return 1; 3484 break; 3485 case PP_ALNUM: 3486 if (isalnum(ch)) 3487 return 1; 3488 break; 3489 case PP_ASCII: 3490 if ((ch & ~0x7f) == 0) 3491 return 1; 3492 break; 3493 case PP_BLANK: 3494 if (ch == ' ' || ch == '\t') 3495 return 1; 3496 break; 3497 case PP_CNTRL: 3498 if (iscntrl(ch)) 3499 return 1; 3500 break; 3501 case PP_DIGIT: 3502 if (isdigit(ch)) 3503 return 1; 3504 break; 3505 case PP_GRAPH: 3506 if (isgraph(ch)) 3507 return 1; 3508 break; 3509 case PP_LOWER: 3510 if (islower(ch)) 3511 return 1; 3512 break; 3513 case PP_PRINT: 3514 if (isprint(ch)) 3515 return 1; 3516 break; 3517 case PP_PUNCT: 3518 if (ispunct(ch)) 3519 return 1; 3520 break; 3521 case PP_SPACE: 3522 if (isspace(ch)) 3523 return 1; 3524 break; 3525 case PP_UPPER: 3526 if (isupper(ch)) 3527 return 1; 3528 break; 3529 case PP_XDIGIT: 3530 if (isxdigit(ch)) 3531 return 1; 3532 break; 3533 case PP_IDENT: 3534 if (iident(ch)) 3535 return 1; 3536 break; 3537 case PP_IFS: 3538 if (isep(ch)) 3539 return 1; 3540 break; 3541 case PP_IFSSPACE: 3542 if (iwsep(ch)) 3543 return 1; 3544 break; 3545 case PP_WORD: 3546 if (iword(ch)) 3547 return 1; 3548 break; 3549 case PP_RANGE: 3550 range++; 3551 r1 = STOUC(UNMETA(range)); 3552 METACHARINC(range); 3553 r2 = STOUC(UNMETA(range)); 3554 if (*range == Meta) 3555 range++; 3556 if (r1 <= ch && ch <= r2) { 3557 if (indptr) 3558 *indptr += ch - r1; 3559 return 1; 3560 } 3561 if (indptr && r1 < r2) 3562 *indptr += r2 - r1; 3563 break; 3564 case PP_UNKWN: 3565 DPUTS(1, "BUG: unknown posix range passed through.\n"); 3566 break; 3567 default: 3568 DPUTS(1, "BUG: unknown metacharacter in range."); 3569 break; 3570 } 3571 } else if (STOUC(*range) == ch) { 3572 if (mtp) 3573 *mtp = 0; 3574 return 1; 3575 } 3576 if (indptr) 3577 (*indptr)++; 3578 } 3579 return 0; 3580} 3581 3582 3583/**/ 3584#ifndef MULTIBYTE_SUPPORT 3585 3586/* 3587 * Identical function to mb_patmatchindex() above for single-byte 3588 * characters. Here -1 represents a character that needs a special type. 3589 * 3590 * Unlike patmatchrange, we only need this in ZLE, which always 3591 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use 3592 * this function in that case. 3593 */ 3594 3595/**/ 3596mod_export int 3597patmatchindex(char *range, int ind, int *chr, int *mtp) 3598{ 3599 int r1, r2, rdiff, rchr; 3600 3601 *chr = -1; 3602 *mtp = 0; 3603 3604 for (; *range; range++) { 3605 if (imeta(STOUC(*range))) { 3606 int swtype = STOUC(*range) - STOUC(Meta); 3607 switch (swtype) { 3608 case 0: 3609 /* ordinary metafied character */ 3610 rchr = STOUC(*++range) ^ 32; 3611 if (!ind) { 3612 *chr = rchr; 3613 return 1; 3614 } 3615 break; 3616 3617 case PP_ALPHA: 3618 case PP_ALNUM: 3619 case PP_ASCII: 3620 case PP_BLANK: 3621 case PP_CNTRL: 3622 case PP_DIGIT: 3623 case PP_GRAPH: 3624 case PP_LOWER: 3625 case PP_PRINT: 3626 case PP_PUNCT: 3627 case PP_SPACE: 3628 case PP_UPPER: 3629 case PP_XDIGIT: 3630 case PP_IDENT: 3631 case PP_IFS: 3632 case PP_IFSSPACE: 3633 case PP_WORD: 3634 if (!ind) { 3635 *mtp = swtype; 3636 return 1; 3637 } 3638 break; 3639 3640 case PP_RANGE: 3641 range++; 3642 r1 = STOUC(UNMETA(range)); 3643 METACHARINC(range); 3644 r2 = STOUC(UNMETA(range)); 3645 if (*range == Meta) 3646 range++; 3647 rdiff = r2 - r1; 3648 if (rdiff >= ind) { 3649 *chr = r1 + ind; 3650 return 1; 3651 } 3652 /* note the extra decrement to ind below */ 3653 ind -= rdiff; 3654 break; 3655 case PP_UNKWN: 3656 DPUTS(1, "BUG: unknown posix range passed through.\n"); 3657 break; 3658 default: 3659 DPUTS(1, "BUG: unknown metacharacter in range."); 3660 break; 3661 } 3662 } else { 3663 if (!ind) { 3664 *chr = STOUC(*range); 3665 return 1; 3666 } 3667 } 3668 if (!ind--) 3669 break; 3670 } 3671 3672 /* No corresponding index. */ 3673 return 0; 3674} 3675 3676/**/ 3677#endif /* MULTIBYTE_SUPPORT */ 3678 3679/* 3680 * Repeatedly match something simple and say how many times. 3681 * charstart is an array parallel to that starting at patinput 3682 * and records the start of (possibly multibyte) characters 3683 * to aid in later backtracking. 3684 */ 3685 3686/**/ 3687static int patrepeat(Upat p, char *charstart) 3688{ 3689 int count = 0; 3690 patint_t tch, charmatch_cache; 3691 char *scan, *opnd; 3692 3693 scan = patinput; 3694 opnd = (char *)P_OPERAND(p); 3695 3696 switch(P_OP(p)) { 3697#ifdef DEBUG 3698 case P_ANY: 3699 dputs("BUG: ?# did not get optimized to *"); 3700 return 0; 3701 break; 3702#endif 3703 case P_EXACTLY: 3704 DPUTS(P_LS_LEN(p) != 1, "closure following more than one character"); 3705 tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p)); 3706 while (scan < patinend && 3707 CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) { 3708 charstart[scan-patinput] = 1; 3709 count++; 3710 CHARINC(scan, patinend); 3711 } 3712 break; 3713 case P_ANYOF: 3714 case P_ANYBUT: 3715 while (scan < patinend) { 3716#ifdef MULTIBYTE_SUPPORT 3717 wchar_t cr = CHARREF(scan, patinend); 3718 if (patglobflags & GF_MULTIBYTE) { 3719 if (mb_patmatchrange(opnd, cr, NULL, NULL) ^ 3720 (P_OP(p) == P_ANYOF)) 3721 break; 3722 } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^ 3723 (P_OP(p) == P_ANYOF)) 3724 break; 3725#else 3726 if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^ 3727 (P_OP(p) == P_ANYOF)) 3728 break; 3729#endif 3730 charstart[scan-patinput] = 1; 3731 count++; 3732 CHARINC(scan, patinend); 3733 } 3734 break; 3735#ifdef DEBUG 3736 default: 3737 dputs("BUG: something very strange is happening in patrepeat"); 3738 return 0; 3739 break; 3740#endif 3741 } 3742 3743 patinput = scan; 3744 return count; 3745} 3746 3747/* Free a patprog. */ 3748 3749/**/ 3750mod_export void 3751freepatprog(Patprog prog) 3752{ 3753 if (prog && prog != dummy_patprog1 && prog != dummy_patprog2) 3754 zfree(prog, prog->size); 3755} 3756