1/* 2 * re_*exec and friends - match REs 3 * 4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. 5 * 6 * Development of this software was funded, in part, by Cray Research Inc., 7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 8 * Corporation, none of whom are responsible for the results. The author 9 * thanks all of them. 10 * 11 * Redistribution and use in source and binary forms -- with or without 12 * modification -- are permitted for any purpose, provided that 13 * redistributions in source form retain this entire copyright notice and 14 * indicate the origin and nature of any modifications. 15 * 16 * I'd appreciate being given credit for this package in the documentation of 17 * software which uses it, but that is not a requirement. 18 * 19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include "regguts.h" 32 33/* 34 * Lazy-DFA representation. 35 */ 36 37struct arcp { /* "pointer" to an outarc */ 38 struct sset *ss; 39 color co; 40}; 41 42struct sset { /* state set */ 43 unsigned *states; /* pointer to bitvector */ 44 unsigned hash; /* hash of bitvector */ 45#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) 46#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ 47 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) 48 int flags; 49#define STARTER 01 /* the initial state set */ 50#define POSTSTATE 02 /* includes the goal state */ 51#define LOCKED 04 /* locked in cache */ 52#define NOPROGRESS 010 /* zero-progress state set */ 53 struct arcp ins; /* chain of inarcs pointing here */ 54 chr *lastseen; /* last entered on arrival here */ 55 struct sset **outs; /* outarc vector indexed by color */ 56 struct arcp *inchain; /* chain-pointer vector for outarcs */ 57}; 58 59struct dfa { 60 int nssets; /* size of cache */ 61 int nssused; /* how many entries occupied yet */ 62 int nstates; /* number of states */ 63 int ncolors; /* length of outarc and inchain vectors */ 64 int wordsper; /* length of state-set bitvectors */ 65 struct sset *ssets; /* state-set cache */ 66 unsigned *statesarea; /* bitvector storage */ 67 unsigned *work; /* pointer to work area within statesarea */ 68 struct sset **outsarea; /* outarc-vector storage */ 69 struct arcp *incarea; /* inchain storage */ 70 struct cnfa *cnfa; 71 struct colormap *cm; 72 chr *lastpost; /* location of last cache-flushed success */ 73 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ 74 struct sset *search; /* replacement-search-pointer memory */ 75 int cptsmalloced; /* were the areas individually malloced? */ 76 char *mallocarea; /* self, or master malloced area, or NULL */ 77}; 78 79#define WORK 1 /* number of work bitvectors needed */ 80 81/* 82 * Setup for non-malloc allocation for small cases. 83 */ 84 85#define FEWSTATES 20 /* must be less than UBITS */ 86#define FEWCOLORS 15 87struct smalldfa { 88 struct dfa dfa; 89 struct sset ssets[FEWSTATES*2]; 90 unsigned statesarea[FEWSTATES*2 + WORK]; 91 struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; 92 struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; 93}; 94#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ 95 96/* 97 * Internal variables, bundled for easy passing around. 98 */ 99 100struct vars { 101 regex_t *re; 102 struct guts *g; 103 int eflags; /* copies of arguments */ 104 size_t nmatch; 105 regmatch_t *pmatch; 106 rm_detail_t *details; 107 chr *start; /* start of string */ 108 chr *stop; /* just past end of string */ 109 int err; /* error code if any (0 none) */ 110 regoff_t *mem; /* memory vector for backtracking */ 111 struct smalldfa dfa1; 112 struct smalldfa dfa2; 113}; 114#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ 115#define ISERR() VISERR(v) 116#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) 117#define ERR(e) VERR(v, e) /* record an error */ 118#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ 119#define OFF(p) ((p) - v->start) 120#define LOFF(p) ((long)OFF(p)) 121 122/* 123 * forward declarations 124 */ 125/* =====^!^===== begin forwards =====^!^===== */ 126/* automatically gathered by fwd; do not hand-edit */ 127/* === regexec.c === */ 128int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int); 129static int find(struct vars *, struct cnfa *, struct colormap *); 130static int cfind(struct vars *, struct cnfa *, struct colormap *); 131static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); 132static VOID zapsubs(regmatch_t *, size_t); 133static VOID zapmem(struct vars *, struct subre *); 134static VOID subset(struct vars *, struct subre *, chr *, chr *); 135static int dissect(struct vars *, struct subre *, chr *, chr *); 136static int condissect(struct vars *, struct subre *, chr *, chr *); 137static int altdissect(struct vars *, struct subre *, chr *, chr *); 138static int cdissect(struct vars *, struct subre *, chr *, chr *); 139static int ccondissect(struct vars *, struct subre *, chr *, chr *); 140static int crevdissect(struct vars *, struct subre *, chr *, chr *); 141static int cbrdissect(struct vars *, struct subre *, chr *, chr *); 142static int caltdissect(struct vars *, struct subre *, chr *, chr *); 143/* === rege_dfa.c === */ 144static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); 145static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); 146static chr *lastcold(struct vars *, struct dfa *); 147static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); 148static VOID freedfa(struct dfa *); 149static unsigned hash(unsigned *, int); 150static struct sset *initialize(struct vars *, struct dfa *, chr *); 151static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); 152static int lacon(struct vars *, struct cnfa *, chr *, pcolor); 153static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); 154static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); 155/* automatically gathered by fwd; do not hand-edit */ 156/* =====^!^===== end forwards =====^!^===== */ 157 158/* 159 - exec - match regular expression 160 ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, 161 ^ size_t, regmatch_t [], int); 162 */ 163int 164exec( 165 regex_t *re, 166 CONST chr *string, 167 size_t len, 168 rm_detail_t *details, 169 size_t nmatch, 170 regmatch_t pmatch[], 171 int flags) 172{ 173 AllocVars(v); 174 int st; 175 size_t n; 176 int backref; 177#define LOCALMAT 20 178 regmatch_t mat[LOCALMAT]; 179#define LOCALMEM 40 180 regoff_t mem[LOCALMEM]; 181 182 /* 183 * Sanity checks. 184 */ 185 186 if (re == NULL || string == NULL || re->re_magic != REMAGIC) { 187 FreeVars(v); 188 return REG_INVARG; 189 } 190 if (re->re_csize != sizeof(chr)) { 191 FreeVars(v); 192 return REG_MIXED; 193 } 194 195 /* 196 * Setup. 197 */ 198 199 v->re = re; 200 v->g = (struct guts *)re->re_guts; 201 if ((v->g->cflags®_EXPECT) && details == NULL) { 202 FreeVars(v); 203 return REG_INVARG; 204 } 205 if (v->g->info®_UIMPOSSIBLE) { 206 FreeVars(v); 207 return REG_NOMATCH; 208 } 209 backref = (v->g->info®_UBACKREF) ? 1 : 0; 210 v->eflags = flags; 211 if (v->g->cflags®_NOSUB) { 212 nmatch = 0; /* override client */ 213 } 214 v->nmatch = nmatch; 215 if (backref) { 216 /* 217 * Need work area. 218 */ 219 220 if (v->g->nsub + 1 <= LOCALMAT) { 221 v->pmatch = mat; 222 } else { 223 v->pmatch = (regmatch_t *) 224 MALLOC((v->g->nsub + 1) * sizeof(regmatch_t)); 225 } 226 if (v->pmatch == NULL) { 227 FreeVars(v); 228 return REG_ESPACE; 229 } 230 v->nmatch = v->g->nsub + 1; 231 } else { 232 v->pmatch = pmatch; 233 } 234 v->details = details; 235 v->start = (chr *)string; 236 v->stop = (chr *)string + len; 237 v->err = 0; 238 if (backref) { 239 /* 240 * Need retry memory. 241 */ 242 243 assert(v->g->ntree >= 0); 244 n = (size_t)v->g->ntree; 245 if (n <= LOCALMEM) { 246 v->mem = mem; 247 } else { 248 v->mem = (regoff_t *) MALLOC(n*sizeof(regoff_t)); 249 } 250 if (v->mem == NULL) { 251 if (v->pmatch != pmatch && v->pmatch != mat) { 252 FREE(v->pmatch); 253 } 254 FreeVars(v); 255 return REG_ESPACE; 256 } 257 } else { 258 v->mem = NULL; 259 } 260 261 /* 262 * Do it. 263 */ 264 265 assert(v->g->tree != NULL); 266 if (backref) { 267 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); 268 } else { 269 st = find(v, &v->g->tree->cnfa, &v->g->cmap); 270 } 271 272 /* 273 * Copy (portion of) match vector over if necessary. 274 */ 275 276 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { 277 zapsubs(pmatch, nmatch); 278 n = (nmatch < v->nmatch) ? nmatch : v->nmatch; 279 memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); 280 } 281 282 /* 283 * Clean up. 284 */ 285 286 if (v->pmatch != pmatch && v->pmatch != mat) { 287 FREE(v->pmatch); 288 } 289 if (v->mem != NULL && v->mem != mem) { 290 FREE(v->mem); 291 } 292 FreeVars(v); 293 return st; 294} 295 296/* 297 - find - find a match for the main NFA (no-complications case) 298 ^ static int find(struct vars *, struct cnfa *, struct colormap *); 299 */ 300static int 301find( 302 struct vars *v, 303 struct cnfa *cnfa, 304 struct colormap *cm) 305{ 306 struct dfa *s; 307 struct dfa *d; 308 chr *begin; 309 chr *end = NULL; 310 chr *cold; 311 chr *open; /* Open and close of range of possible 312 * starts */ 313 chr *close; 314 int hitend; 315 int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; 316 317 /* 318 * First, a shot with the search RE. 319 */ 320 321 s = newdfa(v, &v->g->search, cm, &v->dfa1); 322 assert(!(ISERR() && s != NULL)); 323 NOERR(); 324 MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); 325 cold = NULL; 326 close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL); 327 freedfa(s); 328 NOERR(); 329 if (v->g->cflags®_EXPECT) { 330 assert(v->details != NULL); 331 if (cold != NULL) { 332 v->details->rm_extend.rm_so = OFF(cold); 333 } else { 334 v->details->rm_extend.rm_so = OFF(v->stop); 335 } 336 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ 337 } 338 if (close == NULL) { /* not found */ 339 return REG_NOMATCH; 340 } 341 if (v->nmatch == 0) { /* found, don't need exact location */ 342 return REG_OKAY; 343 } 344 345 /* 346 * Find starting point and match. 347 */ 348 349 assert(cold != NULL); 350 open = cold; 351 cold = NULL; 352 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); 353 d = newdfa(v, cnfa, cm, &v->dfa1); 354 assert(!(ISERR() && d != NULL)); 355 NOERR(); 356 for (begin = open; begin <= close; begin++) { 357 MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); 358 if (shorter) { 359 end = shortest(v, d, begin, begin, v->stop, NULL, &hitend); 360 } else { 361 end = longest(v, d, begin, v->stop, &hitend); 362 } 363 NOERR(); 364 if (hitend && cold == NULL) { 365 cold = begin; 366 } 367 if (end != NULL) { 368 break; /* NOTE BREAK OUT */ 369 } 370 } 371 assert(end != NULL); /* search RE succeeded so loop should */ 372 freedfa(d); 373 374 /* 375 * And pin down details. 376 */ 377 378 assert(v->nmatch > 0); 379 v->pmatch[0].rm_so = OFF(begin); 380 v->pmatch[0].rm_eo = OFF(end); 381 if (v->g->cflags®_EXPECT) { 382 if (cold != NULL) { 383 v->details->rm_extend.rm_so = OFF(cold); 384 } else { 385 v->details->rm_extend.rm_so = OFF(v->stop); 386 } 387 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ 388 } 389 if (v->nmatch == 1) { /* no need for submatches */ 390 return REG_OKAY; 391 } 392 393 /* 394 * Submatches. 395 */ 396 397 zapsubs(v->pmatch, v->nmatch); 398 return dissect(v, v->g->tree, begin, end); 399} 400 401/* 402 - cfind - find a match for the main NFA (with complications) 403 ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); 404 */ 405static int 406cfind( 407 struct vars *v, 408 struct cnfa *cnfa, 409 struct colormap *cm) 410{ 411 struct dfa *s; 412 struct dfa *d; 413 chr *cold = NULL; /* silence gcc 4 warning */ 414 int ret; 415 416 s = newdfa(v, &v->g->search, cm, &v->dfa1); 417 NOERR(); 418 d = newdfa(v, cnfa, cm, &v->dfa2); 419 if (ISERR()) { 420 assert(d == NULL); 421 freedfa(s); 422 return v->err; 423 } 424 425 ret = cfindloop(v, cnfa, cm, d, s, &cold); 426 427 freedfa(d); 428 freedfa(s); 429 NOERR(); 430 if (v->g->cflags®_EXPECT) { 431 assert(v->details != NULL); 432 if (cold != NULL) { 433 v->details->rm_extend.rm_so = OFF(cold); 434 } else { 435 v->details->rm_extend.rm_so = OFF(v->stop); 436 } 437 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ 438 } 439 return ret; 440} 441 442/* 443 - cfindloop - the heart of cfind 444 ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, 445 ^ struct dfa *, struct dfa *, chr **); 446 */ 447static int 448cfindloop( 449 struct vars *v, 450 struct cnfa *cnfa, 451 struct colormap *cm, 452 struct dfa *d, 453 struct dfa *s, 454 chr **coldp) /* where to put coldstart pointer */ 455{ 456 chr *begin; 457 chr *end; 458 chr *cold; 459 chr *open; /* Open and close of range of possible 460 * starts */ 461 chr *close; 462 chr *estart; 463 chr *estop; 464 int er; 465 int shorter = v->g->tree->flags&SHORTER; 466 int hitend; 467 468 assert(d != NULL && s != NULL); 469 cold = NULL; 470 close = v->start; 471 do { 472 MDEBUG(("\ncsearch at %ld\n", LOFF(close))); 473 close = shortest(v, s, close, close, v->stop, &cold, NULL); 474 if (close == NULL) { 475 break; /* NOTE BREAK */ 476 } 477 assert(cold != NULL); 478 open = cold; 479 cold = NULL; 480 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); 481 for (begin = open; begin <= close; begin++) { 482 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); 483 estart = begin; 484 estop = v->stop; 485 for (;;) { 486 if (shorter) { 487 end = shortest(v, d, begin, estart, estop, NULL, &hitend); 488 } else { 489 end = longest(v, d, begin, estop, &hitend); 490 } 491 if (hitend && cold == NULL) { 492 cold = begin; 493 } 494 if (end == NULL) { 495 break; /* NOTE BREAK OUT */ 496 } 497 498 MDEBUG(("tentative end %ld\n", LOFF(end))); 499 zapsubs(v->pmatch, v->nmatch); 500 zapmem(v, v->g->tree); 501 er = cdissect(v, v->g->tree, begin, end); 502 if (er == REG_OKAY) { 503 if (v->nmatch > 0) { 504 v->pmatch[0].rm_so = OFF(begin); 505 v->pmatch[0].rm_eo = OFF(end); 506 } 507 *coldp = cold; 508 return REG_OKAY; 509 } 510 if (er != REG_NOMATCH) { 511 ERR(er); 512 return er; 513 } 514 if ((shorter) ? end == estop : end == begin) { 515 /* 516 * No point in trying again. 517 */ 518 519 *coldp = cold; 520 return REG_NOMATCH; 521 } 522 523 /* 524 * Go around and try again 525 */ 526 527 if (shorter) { 528 estart = end + 1; 529 } else { 530 estop = end - 1; 531 } 532 } 533 } 534 } while (close < v->stop); 535 536 *coldp = cold; 537 return REG_NOMATCH; 538} 539 540/* 541 - zapsubs - initialize the subexpression matches to "no match" 542 ^ static VOID zapsubs(regmatch_t *, size_t); 543 */ 544static void 545zapsubs( 546 regmatch_t *p, 547 size_t n) 548{ 549 size_t i; 550 551 for (i = n-1; i > 0; i--) { 552 p[i].rm_so = -1; 553 p[i].rm_eo = -1; 554 } 555} 556 557/* 558 - zapmem - initialize the retry memory of a subtree to zeros 559 ^ static VOID zapmem(struct vars *, struct subre *); 560 */ 561static void 562zapmem( 563 struct vars *v, 564 struct subre *t) 565{ 566 if (t == NULL) { 567 return; 568 } 569 570 assert(v->mem != NULL); 571 v->mem[t->retry] = 0; 572 if (t->op == '(') { 573 assert(t->subno > 0); 574 v->pmatch[t->subno].rm_so = -1; 575 v->pmatch[t->subno].rm_eo = -1; 576 } 577 578 if (t->left != NULL) { 579 zapmem(v, t->left); 580 } 581 if (t->right != NULL) { 582 zapmem(v, t->right); 583 } 584} 585 586/* 587 - subset - set any subexpression relevant to a successful subre 588 ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); 589 */ 590static void 591subset( 592 struct vars *v, 593 struct subre *sub, 594 chr *begin, 595 chr *end) 596{ 597 int n = sub->subno; 598 599 assert(n > 0); 600 if ((size_t)n >= v->nmatch) { 601 return; 602 } 603 604 MDEBUG(("setting %d\n", n)); 605 v->pmatch[n].rm_so = OFF(begin); 606 v->pmatch[n].rm_eo = OFF(end); 607} 608 609/* 610 - dissect - determine subexpression matches (uncomplicated case) 611 ^ static int dissect(struct vars *, struct subre *, chr *, chr *); 612 */ 613static int /* regexec return code */ 614dissect( 615 struct vars *v, 616 struct subre *t, 617 chr *begin, /* beginning of relevant substring */ 618 chr *end) /* end of same */ 619{ 620 assert(t != NULL); 621 MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); 622 623 switch (t->op) { 624 case '=': /* terminal node */ 625 assert(t->left == NULL && t->right == NULL); 626 return REG_OKAY; /* no action, parent did the work */ 627 case '|': /* alternation */ 628 assert(t->left != NULL); 629 return altdissect(v, t, begin, end); 630 case 'b': /* back ref -- shouldn't be calling us! */ 631 return REG_ASSERT; 632 case '.': /* concatenation */ 633 assert(t->left != NULL && t->right != NULL); 634 return condissect(v, t, begin, end); 635 case '(': /* capturing */ 636 assert(t->left != NULL && t->right == NULL); 637 assert(t->subno > 0); 638 subset(v, t, begin, end); 639 return dissect(v, t->left, begin, end); 640 default: 641 return REG_ASSERT; 642 } 643} 644 645/* 646 - condissect - determine concatenation subexpression matches (uncomplicated) 647 ^ static int condissect(struct vars *, struct subre *, chr *, chr *); 648 */ 649static int /* regexec return code */ 650condissect( 651 struct vars *v, 652 struct subre *t, 653 chr *begin, /* beginning of relevant substring */ 654 chr *end) /* end of same */ 655{ 656 struct dfa *d; 657 struct dfa *d2; 658 chr *mid; 659 int i; 660 int shorter = (t->left->flags&SHORTER) ? 1 : 0; 661 chr *stop = (shorter) ? end : begin; 662 663 assert(t->op == '.'); 664 assert(t->left != NULL && t->left->cnfa.nstates > 0); 665 assert(t->right != NULL && t->right->cnfa.nstates > 0); 666 667 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); 668 NOERR(); 669 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); 670 if (ISERR()) { 671 assert(d2 == NULL); 672 freedfa(d); 673 return v->err; 674 } 675 676 /* 677 * Pick a tentative midpoint. 678 */ 679 680 if (shorter) { 681 mid = shortest(v, d, begin, begin, end, NULL, NULL); 682 } else { 683 mid = longest(v, d, begin, end, NULL); 684 } 685 if (mid == NULL) { 686 freedfa(d); 687 freedfa(d2); 688 return REG_ASSERT; 689 } 690 MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); 691 692 /* 693 * Iterate until satisfaction or failure. 694 */ 695 696 while (longest(v, d2, mid, end, NULL) != end) { 697 /* 698 * That midpoint didn't work, find a new one. 699 */ 700 701 if (mid == stop) { 702 /* 703 * All possibilities exhausted! 704 */ 705 706 MDEBUG(("no midpoint!\n")); 707 freedfa(d); 708 freedfa(d2); 709 return REG_ASSERT; 710 } 711 if (shorter) { 712 mid = shortest(v, d, begin, mid+1, end, NULL, NULL); 713 } else { 714 mid = longest(v, d, begin, mid-1, NULL); 715 } 716 if (mid == NULL) { 717 /* 718 * Failed to find a new one! 719 */ 720 721 MDEBUG(("failed midpoint!\n")); 722 freedfa(d); 723 freedfa(d2); 724 return REG_ASSERT; 725 } 726 MDEBUG(("new midpoint %ld\n", LOFF(mid))); 727 } 728 729 /* 730 * Satisfaction. 731 */ 732 733 MDEBUG(("successful\n")); 734 freedfa(d); 735 freedfa(d2); 736 i = dissect(v, t->left, begin, mid); 737 if (i != REG_OKAY) { 738 return i; 739 } 740 return dissect(v, t->right, mid, end); 741} 742 743/* 744 - altdissect - determine alternative subexpression matches (uncomplicated) 745 ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); 746 */ 747static int /* regexec return code */ 748altdissect( 749 struct vars *v, 750 struct subre *t, 751 chr *begin, /* beginning of relevant substring */ 752 chr *end) /* end of same */ 753{ 754 struct dfa *d; 755 int i; 756 757 assert(t != NULL); 758 assert(t->op == '|'); 759 760 for (i = 0; t != NULL; t = t->right, i++) { 761 MDEBUG(("trying %dth\n", i)); 762 assert(t->left != NULL && t->left->cnfa.nstates > 0); 763 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); 764 if (ISERR()) { 765 return v->err; 766 } 767 if (longest(v, d, begin, end, NULL) == end) { 768 MDEBUG(("success\n")); 769 freedfa(d); 770 return dissect(v, t->left, begin, end); 771 } 772 freedfa(d); 773 } 774 return REG_ASSERT; /* none of them matched?!? */ 775} 776 777/* 778 - cdissect - determine subexpression matches (with complications) 779 * The retry memory stores the offset of the trial midpoint from begin, plus 1 780 * so that 0 uniquely means "clean slate". 781 ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); 782 */ 783static int /* regexec return code */ 784cdissect( 785 struct vars *v, 786 struct subre *t, 787 chr *begin, /* beginning of relevant substring */ 788 chr *end) /* end of same */ 789{ 790 int er; 791 792 assert(t != NULL); 793 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); 794 795 switch (t->op) { 796 case '=': /* terminal node */ 797 assert(t->left == NULL && t->right == NULL); 798 return REG_OKAY; /* no action, parent did the work */ 799 case '|': /* alternation */ 800 assert(t->left != NULL); 801 return caltdissect(v, t, begin, end); 802 case 'b': /* back ref -- shouldn't be calling us! */ 803 assert(t->left == NULL && t->right == NULL); 804 return cbrdissect(v, t, begin, end); 805 case '.': /* concatenation */ 806 assert(t->left != NULL && t->right != NULL); 807 return ccondissect(v, t, begin, end); 808 case '(': /* capturing */ 809 assert(t->left != NULL && t->right == NULL); 810 assert(t->subno > 0); 811 er = cdissect(v, t->left, begin, end); 812 if (er == REG_OKAY) { 813 subset(v, t, begin, end); 814 } 815 return er; 816 default: 817 return REG_ASSERT; 818 } 819} 820 821/* 822 - ccondissect - concatenation subexpression matches (with complications) 823 * The retry memory stores the offset of the trial midpoint from begin, plus 1 824 * so that 0 uniquely means "clean slate". 825 ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); 826 */ 827static int /* regexec return code */ 828ccondissect( 829 struct vars *v, 830 struct subre *t, 831 chr *begin, /* beginning of relevant substring */ 832 chr *end) /* end of same */ 833{ 834 struct dfa *d, *d2; 835 chr *mid; 836 int er; 837 838 assert(t->op == '.'); 839 assert(t->left != NULL && t->left->cnfa.nstates > 0); 840 assert(t->right != NULL && t->right->cnfa.nstates > 0); 841 842 if (t->left->flags&SHORTER) { /* reverse scan */ 843 return crevdissect(v, t, begin, end); 844 } 845 846 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); 847 if (ISERR()) { 848 return v->err; 849 } 850 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); 851 if (ISERR()) { 852 freedfa(d); 853 return v->err; 854 } 855 MDEBUG(("cconcat %d\n", t->retry)); 856 857 /* 858 * Pick a tentative midpoint. 859 */ 860 861 if (v->mem[t->retry] == 0) { 862 mid = longest(v, d, begin, end, NULL); 863 if (mid == NULL) { 864 freedfa(d); 865 freedfa(d2); 866 return REG_NOMATCH; 867 } 868 MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); 869 v->mem[t->retry] = (mid - begin) + 1; 870 } else { 871 mid = begin + (v->mem[t->retry] - 1); 872 MDEBUG(("working midpoint %ld\n", LOFF(mid))); 873 } 874 875 /* 876 * Iterate until satisfaction or failure. 877 */ 878 879 for (;;) { 880 /* 881 * Try this midpoint on for size. 882 */ 883 884 if (longest(v, d2, mid, end, NULL) == end) { 885 er = cdissect(v, t->left, begin, mid); 886 if (er == REG_OKAY) { 887 er = cdissect(v, t->right, mid, end); 888 if (er == REG_OKAY) { 889 /* 890 * Satisfaction. 891 */ 892 893 MDEBUG(("successful\n")); 894 freedfa(d); 895 freedfa(d2); 896 return REG_OKAY; 897 } 898 } 899 if ((er != REG_OKAY) && (er != REG_NOMATCH)) { 900 freedfa(d); 901 freedfa(d2); 902 return er; 903 } 904 } 905 906 /* 907 * That midpoint didn't work, find a new one. 908 */ 909 910 if (mid == begin) { 911 /* 912 * All possibilities exhausted. 913 */ 914 915 MDEBUG(("%d no midpoint\n", t->retry)); 916 freedfa(d); 917 freedfa(d2); 918 return REG_NOMATCH; 919 } 920 mid = longest(v, d, begin, mid-1, NULL); 921 if (mid == NULL) { 922 /* 923 * Failed to find a new one. 924 */ 925 926 MDEBUG(("%d failed midpoint\n", t->retry)); 927 freedfa(d); 928 freedfa(d2); 929 return REG_NOMATCH; 930 } 931 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); 932 v->mem[t->retry] = (mid - begin) + 1; 933 zapmem(v, t->left); 934 zapmem(v, t->right); 935 } 936} 937 938/* 939 - crevdissect - determine backref shortest-first subexpression matches 940 * The retry memory stores the offset of the trial midpoint from begin, plus 1 941 * so that 0 uniquely means "clean slate". 942 ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); 943 */ 944static int /* regexec return code */ 945crevdissect( 946 struct vars *v, 947 struct subre *t, 948 chr *begin, /* beginning of relevant substring */ 949 chr *end) /* end of same */ 950{ 951 struct dfa *d; 952 struct dfa *d2; 953 chr *mid; 954 int er; 955 956 assert(t->op == '.'); 957 assert(t->left != NULL && t->left->cnfa.nstates > 0); 958 assert(t->right != NULL && t->right->cnfa.nstates > 0); 959 assert(t->left->flags&SHORTER); 960 961 /* 962 * Concatenation -- need to split the substring between parts. 963 */ 964 965 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); 966 if (ISERR()) { 967 return v->err; 968 } 969 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); 970 if (ISERR()) { 971 freedfa(d); 972 return v->err; 973 } 974 MDEBUG(("crev %d\n", t->retry)); 975 976 /* 977 * Pick a tentative midpoint. 978 */ 979 980 if (v->mem[t->retry] == 0) { 981 mid = shortest(v, d, begin, begin, end, NULL, NULL); 982 if (mid == NULL) { 983 freedfa(d); 984 freedfa(d2); 985 return REG_NOMATCH; 986 } 987 MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); 988 v->mem[t->retry] = (mid - begin) + 1; 989 } else { 990 mid = begin + (v->mem[t->retry] - 1); 991 MDEBUG(("working midpoint %ld\n", LOFF(mid))); 992 } 993 994 /* 995 * Iterate until satisfaction or failure. 996 */ 997 998 for (;;) { 999 /* 1000 * Try this midpoint on for size. 1001 */ 1002 1003 if (longest(v, d2, mid, end, NULL) == end) { 1004 er = cdissect(v, t->left, begin, mid); 1005 if (er == REG_OKAY) { 1006 er = cdissect(v, t->right, mid, end); 1007 if (er == REG_OKAY) { 1008 /* 1009 * Satisfaction. 1010 */ 1011 1012 MDEBUG(("successful\n")); 1013 freedfa(d); 1014 freedfa(d2); 1015 return REG_OKAY; 1016 } 1017 } 1018 if (er != REG_OKAY && er != REG_NOMATCH) { 1019 freedfa(d); 1020 freedfa(d2); 1021 return er; 1022 } 1023 } 1024 1025 /* 1026 * That midpoint didn't work, find a new one. 1027 */ 1028 1029 if (mid == end) { 1030 /* 1031 * All possibilities exhausted. 1032 */ 1033 1034 MDEBUG(("%d no midpoint\n", t->retry)); 1035 freedfa(d); 1036 freedfa(d2); 1037 return REG_NOMATCH; 1038 } 1039 mid = shortest(v, d, begin, mid+1, end, NULL, NULL); 1040 if (mid == NULL) { 1041 /* 1042 * Failed to find a new one. 1043 */ 1044 1045 MDEBUG(("%d failed midpoint\n", t->retry)); 1046 freedfa(d); 1047 freedfa(d2); 1048 return REG_NOMATCH; 1049 } 1050 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); 1051 v->mem[t->retry] = (mid - begin) + 1; 1052 zapmem(v, t->left); 1053 zapmem(v, t->right); 1054 } 1055} 1056 1057/* 1058 - cbrdissect - determine backref subexpression matches 1059 ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); 1060 */ 1061static int /* regexec return code */ 1062cbrdissect( 1063 struct vars *v, 1064 struct subre *t, 1065 chr *begin, /* beginning of relevant substring */ 1066 chr *end) /* end of same */ 1067{ 1068 int i; 1069 int n = t->subno; 1070 size_t len; 1071 chr *paren; 1072 chr *p; 1073 chr *stop; 1074 int min = t->min; 1075 int max = t->max; 1076 1077 assert(t != NULL); 1078 assert(t->op == 'b'); 1079 assert(n >= 0); 1080 assert((size_t)n < v->nmatch); 1081 1082 MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); 1083 1084 if (v->pmatch[n].rm_so == -1) { 1085 return REG_NOMATCH; 1086 } 1087 paren = v->start + v->pmatch[n].rm_so; 1088 len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; 1089 1090 /* 1091 * No room to maneuver -- retries are pointless. 1092 */ 1093 1094 if (v->mem[t->retry]) { 1095 return REG_NOMATCH; 1096 } 1097 v->mem[t->retry] = 1; 1098 1099 /* 1100 * Special-case zero-length string. 1101 */ 1102 1103 if (len == 0) { 1104 if (begin == end) { 1105 return REG_OKAY; 1106 } 1107 return REG_NOMATCH; 1108 } 1109 1110 /* 1111 * And too-short string. 1112 */ 1113 1114 assert(end >= begin); 1115 if ((size_t)(end - begin) < len) { 1116 return REG_NOMATCH; 1117 } 1118 stop = end - len; 1119 1120 /* 1121 * Count occurrences. 1122 */ 1123 1124 i = 0; 1125 for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { 1126 if ((*v->g->compare)(paren, p, len) != 0) { 1127 break; 1128 } 1129 i++; 1130 } 1131 MDEBUG(("cbackref found %d\n", i)); 1132 1133 /* 1134 * And sort it out. 1135 */ 1136 1137 if (p != end) { /* didn't consume all of it */ 1138 return REG_NOMATCH; 1139 } 1140 if (min <= i && (i <= max || max == INFINITY)) { 1141 return REG_OKAY; 1142 } 1143 return REG_NOMATCH; /* out of range */ 1144} 1145 1146/* 1147 - caltdissect - determine alternative subexpression matches (w. complications) 1148 ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); 1149 */ 1150static int /* regexec return code */ 1151caltdissect( 1152 struct vars *v, 1153 struct subre *t, 1154 chr *begin, /* beginning of relevant substring */ 1155 chr *end) /* end of same */ 1156{ 1157 struct dfa *d; 1158 int er; 1159#define UNTRIED 0 /* not yet tried at all */ 1160#define TRYING 1 /* top matched, trying submatches */ 1161#define TRIED 2 /* top didn't match or submatches exhausted */ 1162 1163 if (t == NULL) { 1164 return REG_NOMATCH; 1165 } 1166 assert(t->op == '|'); 1167 if (v->mem[t->retry] == TRIED) { 1168 return caltdissect(v, t->right, begin, end); 1169 } 1170 1171 MDEBUG(("calt n%d\n", t->retry)); 1172 assert(t->left != NULL); 1173 1174 if (v->mem[t->retry] == UNTRIED) { 1175 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); 1176 if (ISERR()) { 1177 return v->err; 1178 } 1179 if (longest(v, d, begin, end, NULL) != end) { 1180 freedfa(d); 1181 v->mem[t->retry] = TRIED; 1182 return caltdissect(v, t->right, begin, end); 1183 } 1184 freedfa(d); 1185 MDEBUG(("calt matched\n")); 1186 v->mem[t->retry] = TRYING; 1187 } 1188 1189 er = cdissect(v, t->left, begin, end); 1190 if (er != REG_NOMATCH) { 1191 return er; 1192 } 1193 1194 v->mem[t->retry] = TRIED; 1195 return caltdissect(v, t->right, begin, end); 1196} 1197 1198#include "rege_dfa.c" 1199 1200/* 1201 * Local Variables: 1202 * mode: c 1203 * c-basic-offset: 4 1204 * fill-column: 78 1205 * End: 1206 */ 1207