1/* 2 * The matching engine and friends. This file is #included by regexec.c 3 * after suitable #defines of a variety of macros used herein, so that 4 * different state representations can be used without duplicating masses 5 * of code. 6 */ 7 8#ifdef SNAMES 9#define matcher smatcher 10#define fast sfast 11#define slow sslow 12#define dissect sdissect 13#define backref sbackref 14#define step sstep 15#define print sprint 16#define at sat 17#define match smat 18#endif 19#ifdef LNAMES 20#define matcher lmatcher 21#define fast lfast 22#define slow lslow 23#define dissect ldissect 24#define backref lbackref 25#define step lstep 26#define print lprint 27#define at lat 28#define match lmat 29#endif 30 31/* another structure passed up and down to avoid zillions of parameters */ 32struct match { 33 struct re_guts *g; 34 int eflags; 35 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 36 char *offp; /* offsets work from here */ 37 char *beginp; /* start of string -- virtual NUL precedes */ 38 char *endp; /* end of string -- virtual NUL here */ 39 char *coldp; /* can be no match starting before here */ 40 char **lastpos; /* [nplus+1] */ 41 STATEVARS; 42 states st; /* current states */ 43 states fresh; /* states for a fresh start */ 44 states tmp; /* temporary */ 45 states empty; /* empty set of states */ 46}; 47 48#include "engine.ih" 49 50#ifdef REDEBUG 51#define SP(t, s, c) print(m, t, s, c, stdout) 52#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 53#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 54#else 55#define SP(t, s, c) /* nothing */ 56#define AT(t, p1, p2, s1, s2) /* nothing */ 57#define NOTE(s) /* nothing */ 58#endif 59 60/* 61 - matcher - the actual matching engine 62 == static int matcher(register struct re_guts *g, char *string, \ 63 == size_t nmatch, regmatch_t pmatch[], int eflags); 64 */ 65static int /* 0 success, REG_NOMATCH failure */ 66matcher(g, string, nmatch, pmatch, eflags) 67register struct re_guts *g; 68char *string; 69size_t nmatch; 70regmatch_t pmatch[]; 71int eflags; 72{ 73 register char *endp; 74 register size_t i; 75 struct match mv; 76 register struct match *m = &mv; 77 register char *dp; 78 const register sopno gf = g->firststate+1; /* +1 for OEND */ 79 const register sopno gl = g->laststate; 80 char *start; 81 char *stop; 82 83 /* simplify the situation where possible */ 84 if (g->cflags®_NOSUB) 85 nmatch = 0; 86 if (eflags®_STARTEND) { 87 start = string + pmatch[0].rm_so; 88 stop = string + pmatch[0].rm_eo; 89 } else { 90 start = string; 91 stop = start + strlen(start); 92 } 93 if (stop < start) 94 return(REG_INVARG); 95 96 /* prescreening; this does wonders for this rather slow code */ 97 if (g->must != NULL) { 98 for (dp = start; dp < stop; dp++) 99 if (*dp == g->must[0] && stop - dp >= g->mlen && 100 memcmp(dp, g->must, (size_t)g->mlen) == 0) 101 break; 102 if (dp == stop) /* we didn't find g->must */ 103 return(REG_NOMATCH); 104 } 105 106 /* match struct setup */ 107 m->g = g; 108 m->eflags = eflags; 109 m->pmatch = NULL; 110 m->lastpos = NULL; 111 m->offp = string; 112 m->beginp = start; 113 m->endp = stop; 114 STATESETUP(m, 4); 115 SETUP(m->st); 116 SETUP(m->fresh); 117 SETUP(m->tmp); 118 SETUP(m->empty); 119 CLEAR(m->empty); 120 121 /* this loop does only one repetition except for backrefs */ 122 for (;;) { 123 endp = fast(m, start, stop, gf, gl); 124 if (endp == NULL) { /* a miss */ 125 STATETEARDOWN(m); 126 return(REG_NOMATCH); 127 } 128 if (nmatch == 0 && !g->backrefs) 129 break; /* no further info needed */ 130 131 /* where? */ 132 assert(m->coldp != NULL); 133 for (;;) { 134 NOTE("finding start"); 135 endp = slow(m, m->coldp, stop, gf, gl); 136 if (endp != NULL) 137 break; 138 assert(m->coldp < m->endp); 139 m->coldp++; 140 } 141 if (nmatch == 1 && !g->backrefs) 142 break; /* no further info needed */ 143 144 /* oh my, he wants the subexpressions... */ 145 if (m->pmatch == NULL) 146 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 147 sizeof(regmatch_t)); 148 if (m->pmatch == NULL) { 149 STATETEARDOWN(m); 150 return(REG_ESPACE); 151 } 152 for (i = 1; i <= m->g->nsub; i++) 153 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 154 if (!g->backrefs && !(m->eflags®_BACKR)) { 155 NOTE("dissecting"); 156 dp = dissect(m, m->coldp, endp, gf, gl); 157 } else { 158 if (g->nplus > 0 && m->lastpos == NULL) 159 m->lastpos = (char **)malloc((g->nplus+1) * 160 sizeof(char *)); 161 if (g->nplus > 0 && m->lastpos == NULL) { 162 free(m->pmatch); 163 STATETEARDOWN(m); 164 return(REG_ESPACE); 165 } 166 NOTE("backref dissect"); 167 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 168 } 169 if (dp != NULL) 170 break; 171 172 /* uh-oh... we couldn't find a subexpression-level match */ 173 assert(g->backrefs); /* must be back references doing it */ 174 assert(g->nplus == 0 || m->lastpos != NULL); 175 for (;;) { 176 if (dp != NULL || endp <= m->coldp) 177 break; /* defeat */ 178 NOTE("backoff"); 179 endp = slow(m, m->coldp, endp-1, gf, gl); 180 if (endp == NULL) 181 break; /* defeat */ 182 /* try it on a shorter possibility */ 183#ifndef NDEBUG 184 for (i = 1; i <= m->g->nsub; i++) { 185 assert(m->pmatch[i].rm_so == -1); 186 assert(m->pmatch[i].rm_eo == -1); 187 } 188#endif 189 NOTE("backoff dissect"); 190 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 191 } 192 assert(dp == NULL || dp == endp); 193 if (dp != NULL) /* found a shorter one */ 194 break; 195 196 /* despite initial appearances, there is no match here */ 197 NOTE("false alarm"); 198 start = m->coldp + 1; /* recycle starting later */ 199 assert(start <= stop); 200 } 201 202 /* fill in the details if requested */ 203 if (nmatch > 0) { 204 pmatch[0].rm_so = m->coldp - m->offp; 205 pmatch[0].rm_eo = endp - m->offp; 206 } 207 if (nmatch > 1) { 208 assert(m->pmatch != NULL); 209 for (i = 1; i < nmatch; i++) 210 if (i <= m->g->nsub) 211 pmatch[i] = m->pmatch[i]; 212 else { 213 pmatch[i].rm_so = -1; 214 pmatch[i].rm_eo = -1; 215 } 216 } 217 218 if (m->pmatch != NULL) 219 free((char *)m->pmatch); 220 if (m->lastpos != NULL) 221 free((char *)m->lastpos); 222 STATETEARDOWN(m); 223 return(0); 224} 225 226/* 227 - dissect - figure out what matched what, no back references 228 == static char *dissect(register struct match *m, char *start, \ 229 == char *stop, sopno startst, sopno stopst); 230 */ 231static char * /* == stop (success) always */ 232dissect(m, start, stop, startst, stopst) 233register struct match *m; 234char *start; 235char *stop; 236sopno startst; 237sopno stopst; 238{ 239 register int i; 240 register sopno ss; /* start sop of current subRE */ 241 register sopno es; /* end sop of current subRE */ 242 register char *sp; /* start of string matched by it */ 243 register char *stp; /* string matched by it cannot pass here */ 244 register char *rest; /* start of rest of string */ 245 register char *tail; /* string unmatched by rest of RE */ 246 register sopno ssub; /* start sop of subsubRE */ 247 register sopno esub; /* end sop of subsubRE */ 248 register char *ssp; /* start of string matched by subsubRE */ 249 register char *sep; /* end of string matched by subsubRE */ 250 register char *oldssp; /* previous ssp */ 251 register char *dp; 252 253 AT("diss", start, stop, startst, stopst); 254 sp = start; 255 for (ss = startst; ss < stopst; ss = es) { 256 /* identify end of subRE */ 257 es = ss; 258 switch (OP(m->g->strip[es])) { 259 case OPLUS_: 260 case OQUEST_: 261 es += OPND(m->g->strip[es]); 262 break; 263 case OCH_: 264 while (OP(m->g->strip[es]) != O_CH) 265 es += OPND(m->g->strip[es]); 266 break; 267 } 268 es++; 269 270 /* figure out what it matched */ 271 switch (OP(m->g->strip[ss])) { 272 case OEND: 273 assert(nope); 274 break; 275 case OCHAR: 276 sp++; 277 break; 278 case OBOL: 279 case OEOL: 280 case OBOW: 281 case OEOW: 282 break; 283 case OANY: 284 case OANYOF: 285 sp++; 286 break; 287 case OBACK_: 288 case O_BACK: 289 assert(nope); 290 break; 291 /* cases where length of match is hard to find */ 292 case OQUEST_: 293 stp = stop; 294 for (;;) { 295 /* how long could this one be? */ 296 rest = slow(m, sp, stp, ss, es); 297 assert(rest != NULL); /* it did match */ 298 /* could the rest match the rest? */ 299 tail = slow(m, rest, stop, es, stopst); 300 if (tail == stop) 301 break; /* yes! */ 302 /* no -- try a shorter match for this one */ 303 stp = rest - 1; 304 assert(stp >= sp); /* it did work */ 305 } 306 ssub = ss + 1; 307 esub = es - 1; 308 /* did innards match? */ 309 if (slow(m, sp, rest, ssub, esub) != NULL) { 310 dp = dissect(m, sp, rest, ssub, esub); 311 assert(dp == rest); 312 } else /* no */ 313 assert(sp == rest); 314 sp = rest; 315 break; 316 case OPLUS_: 317 stp = stop; 318 for (;;) { 319 /* how long could this one be? */ 320 rest = slow(m, sp, stp, ss, es); 321 assert(rest != NULL); /* it did match */ 322 /* could the rest match the rest? */ 323 tail = slow(m, rest, stop, es, stopst); 324 if (tail == stop) 325 break; /* yes! */ 326 /* no -- try a shorter match for this one */ 327 stp = rest - 1; 328 assert(stp >= sp); /* it did work */ 329 } 330 ssub = ss + 1; 331 esub = es - 1; 332 ssp = sp; 333 oldssp = ssp; 334 for (;;) { /* find last match of innards */ 335 sep = slow(m, ssp, rest, ssub, esub); 336 if (sep == NULL || sep == ssp) 337 break; /* failed or matched null */ 338 oldssp = ssp; /* on to next try */ 339 ssp = sep; 340 } 341 if (sep == NULL) { 342 /* last successful match */ 343 sep = ssp; 344 ssp = oldssp; 345 } 346 assert(sep == rest); /* must exhaust substring */ 347 assert(slow(m, ssp, sep, ssub, esub) == rest); 348 dp = dissect(m, ssp, sep, ssub, esub); 349 assert(dp == sep); 350 sp = rest; 351 break; 352 case OCH_: 353 stp = stop; 354 for (;;) { 355 /* how long could this one be? */ 356 rest = slow(m, sp, stp, ss, es); 357 assert(rest != NULL); /* it did match */ 358 /* could the rest match the rest? */ 359 tail = slow(m, rest, stop, es, stopst); 360 if (tail == stop) 361 break; /* yes! */ 362 /* no -- try a shorter match for this one */ 363 stp = rest - 1; 364 assert(stp >= sp); /* it did work */ 365 } 366 ssub = ss + 1; 367 esub = ss + OPND(m->g->strip[ss]) - 1; 368 assert(OP(m->g->strip[esub]) == OOR1); 369 for (;;) { /* find first matching branch */ 370 if (slow(m, sp, rest, ssub, esub) == rest) 371 break; /* it matched all of it */ 372 /* that one missed, try next one */ 373 assert(OP(m->g->strip[esub]) == OOR1); 374 esub++; 375 assert(OP(m->g->strip[esub]) == OOR2); 376 ssub = esub + 1; 377 esub += OPND(m->g->strip[esub]); 378 if (OP(m->g->strip[esub]) == OOR2) 379 esub--; 380 else 381 assert(OP(m->g->strip[esub]) == O_CH); 382 } 383 dp = dissect(m, sp, rest, ssub, esub); 384 assert(dp == rest); 385 sp = rest; 386 break; 387 case O_PLUS: 388 case O_QUEST: 389 case OOR1: 390 case OOR2: 391 case O_CH: 392 assert(nope); 393 break; 394 case OLPAREN: 395 i = OPND(m->g->strip[ss]); 396 assert(0 < i && i <= m->g->nsub); 397 m->pmatch[i].rm_so = sp - m->offp; 398 break; 399 case ORPAREN: 400 i = OPND(m->g->strip[ss]); 401 assert(0 < i && i <= m->g->nsub); 402 m->pmatch[i].rm_eo = sp - m->offp; 403 break; 404 default: /* uh oh */ 405 assert(nope); 406 break; 407 } 408 } 409 410 assert(sp == stop); 411 return(sp); 412} 413 414/* 415 - backref - figure out what matched what, figuring in back references 416 == static char *backref(register struct match *m, char *start, \ 417 == char *stop, sopno startst, sopno stopst, sopno lev); 418 */ 419static char * /* == stop (success) or NULL (failure) */ 420backref(m, start, stop, startst, stopst, lev) 421register struct match *m; 422char *start; 423char *stop; 424sopno startst; 425sopno stopst; 426sopno lev; /* PLUS nesting level */ 427{ 428 register int i; 429 register sopno ss; /* start sop of current subRE */ 430 register char *sp; /* start of string matched by it */ 431 register sopno ssub; /* start sop of subsubRE */ 432 register sopno esub; /* end sop of subsubRE */ 433 register char *ssp; /* start of string matched by subsubRE */ 434 register char *dp; 435 register size_t len; 436 register int hard; 437 register sop s; 438 register regoff_t offsave; 439 register cset *cs; 440 441 AT("back", start, stop, startst, stopst); 442 sp = start; 443 444 /* get as far as we can with easy stuff */ 445 hard = 0; 446 for (ss = startst; !hard && ss < stopst; ss++) 447 switch (OP(s = m->g->strip[ss])) { 448 case OCHAR: 449 if (sp == stop || *sp++ != (char)OPND(s)) 450 return(NULL); 451 break; 452 case OANY: 453 if (sp == stop) 454 return(NULL); 455 sp++; 456 break; 457 case OANYOF: 458 cs = &m->g->sets[OPND(s)]; 459 if (sp == stop || !CHIN(cs, *sp++)) 460 return(NULL); 461 break; 462 case OBOL: 463 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 464 (sp < m->endp && *(sp-1) == '\n' && 465 (m->g->cflags®_NEWLINE)) ) 466 { /* yes */ } 467 else 468 return(NULL); 469 break; 470 case OEOL: 471 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 472 (sp < m->endp && *sp == '\n' && 473 (m->g->cflags®_NEWLINE)) ) 474 { /* yes */ } 475 else 476 return(NULL); 477 break; 478 case OBOW: 479 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 480 (sp < m->endp && *(sp-1) == '\n' && 481 (m->g->cflags®_NEWLINE)) || 482 (sp > m->beginp && 483 !ISWORD(*(sp-1))) ) && 484 (sp < m->endp && ISWORD(*sp)) ) 485 { /* yes */ } 486 else 487 return(NULL); 488 break; 489 case OEOW: 490 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 491 (sp < m->endp && *sp == '\n' && 492 (m->g->cflags®_NEWLINE)) || 493 (sp < m->endp && !ISWORD(*sp)) ) && 494 (sp > m->beginp && ISWORD(*(sp-1))) ) 495 { /* yes */ } 496 else 497 return(NULL); 498 break; 499 case O_QUEST: 500 break; 501 case OOR1: /* matches null but needs to skip */ 502 ss++; 503 s = m->g->strip[ss]; 504 do { 505 assert(OP(s) == OOR2); 506 ss += OPND(s); 507 } while (OP(s = m->g->strip[ss]) != O_CH); 508 /* note that the ss++ gets us past the O_CH */ 509 break; 510 default: /* have to make a choice */ 511 hard = 1; 512 break; 513 } 514 if (!hard) { /* that was it! */ 515 if (sp != stop) 516 return(NULL); 517 return(sp); 518 } 519 ss--; /* adjust for the for's final increment */ 520 521 /* the hard stuff */ 522 AT("hard", sp, stop, ss, stopst); 523 s = m->g->strip[ss]; 524 switch (OP(s)) { 525 case OBACK_: /* the vilest depths */ 526 i = OPND(s); 527 assert(0 < i && i <= m->g->nsub); 528 if (m->pmatch[i].rm_eo == -1) 529 return(NULL); 530 assert(m->pmatch[i].rm_so != -1); 531 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 532 assert(stop - m->beginp >= len); 533 if (sp > stop - len) 534 return(NULL); /* not enough left to match */ 535 ssp = m->offp + m->pmatch[i].rm_so; 536 if (memcmp(sp, ssp, len) != 0) 537 return(NULL); 538 while (m->g->strip[ss] != SOP(O_BACK, i)) 539 ss++; 540 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 541 break; 542 case OQUEST_: /* to null or not */ 543 dp = backref(m, sp, stop, ss+1, stopst, lev); 544 if (dp != NULL) 545 return(dp); /* not */ 546 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 547 break; 548 case OPLUS_: 549 assert(m->lastpos != NULL); 550 assert(lev+1 <= m->g->nplus); 551 m->lastpos[lev+1] = sp; 552 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 553 break; 554 case O_PLUS: 555 if (sp == m->lastpos[lev]) /* last pass matched null */ 556 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 557 /* try another pass */ 558 m->lastpos[lev] = sp; 559 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 560 if (dp == NULL) 561 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 562 else 563 return(dp); 564 break; 565 case OCH_: /* find the right one, if any */ 566 ssub = ss + 1; 567 esub = ss + OPND(s) - 1; 568 assert(OP(m->g->strip[esub]) == OOR1); 569 for (;;) { /* find first matching branch */ 570 dp = backref(m, sp, stop, ssub, esub, lev); 571 if (dp != NULL) 572 return(dp); 573 /* that one missed, try next one */ 574 if (OP(m->g->strip[esub]) == O_CH) 575 return(NULL); /* there is none */ 576 esub++; 577 assert(OP(m->g->strip[esub]) == OOR2); 578 ssub = esub + 1; 579 esub += OPND(m->g->strip[esub]); 580 if (OP(m->g->strip[esub]) == OOR2) 581 esub--; 582 else 583 assert(OP(m->g->strip[esub]) == O_CH); 584 } 585 break; 586 case OLPAREN: /* must undo assignment if rest fails */ 587 i = OPND(s); 588 assert(0 < i && i <= m->g->nsub); 589 offsave = m->pmatch[i].rm_so; 590 m->pmatch[i].rm_so = sp - m->offp; 591 dp = backref(m, sp, stop, ss+1, stopst, lev); 592 if (dp != NULL) 593 return(dp); 594 m->pmatch[i].rm_so = offsave; 595 return(NULL); 596 break; 597 case ORPAREN: /* must undo assignment if rest fails */ 598 i = OPND(s); 599 assert(0 < i && i <= m->g->nsub); 600 offsave = m->pmatch[i].rm_eo; 601 m->pmatch[i].rm_eo = sp - m->offp; 602 dp = backref(m, sp, stop, ss+1, stopst, lev); 603 if (dp != NULL) 604 return(dp); 605 m->pmatch[i].rm_eo = offsave; 606 return(NULL); 607 break; 608 default: /* uh oh */ 609 assert(nope); 610 break; 611 } 612 613 /* "can't happen" */ 614 assert(nope); 615 /* NOTREACHED */ 616 return((char *)NULL); /* dummy */ 617} 618 619/* 620 - fast - step through the string at top speed 621 == static char *fast(register struct match *m, char *start, \ 622 == char *stop, sopno startst, sopno stopst); 623 */ 624static char * /* where tentative match ended, or NULL */ 625fast(m, start, stop, startst, stopst) 626register struct match *m; 627char *start; 628char *stop; 629sopno startst; 630sopno stopst; 631{ 632 register states st = m->st; 633 register states fresh = m->fresh; 634 register states tmp = m->tmp; 635 register char *p = start; 636 register int c = (start == m->beginp) ? OUT : *(start-1); 637 register int lastc; /* previous c */ 638 register int flagch; 639 register int i; 640 register char *coldp; /* last p after which no match was underway */ 641 642 CLEAR(st); 643 SET1(st, startst); 644 st = step(m->g, startst, stopst, st, NOTHING, st); 645 ASSIGN(fresh, st); 646 SP("start", st, *p); 647 coldp = NULL; 648 for (;;) { 649 /* next character */ 650 lastc = c; 651 c = (p == m->endp) ? OUT : *p; 652 if (EQ(st, fresh)) 653 coldp = p; 654 655 /* is there an EOL and/or BOL between lastc and c? */ 656 flagch = '\0'; 657 i = 0; 658 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 659 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 660 flagch = BOL; 661 i = m->g->nbol; 662 } 663 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 664 (c == OUT && !(m->eflags®_NOTEOL)) ) { 665 flagch = (flagch == BOL) ? BOLEOL : EOL; 666 i += m->g->neol; 667 } 668 if (i != 0) { 669 for (; i > 0; i--) 670 st = step(m->g, startst, stopst, st, flagch, st); 671 SP("boleol", st, c); 672 } 673 674 /* how about a word boundary? */ 675 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 676 (c != OUT && ISWORD(c)) ) { 677 flagch = BOW; 678 } 679 if ( (lastc != OUT && ISWORD(lastc)) && 680 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 681 flagch = EOW; 682 } 683 if (flagch == BOW || flagch == EOW) { 684 st = step(m->g, startst, stopst, st, flagch, st); 685 SP("boweow", st, c); 686 } 687 688 /* are we done? */ 689 if (ISSET(st, stopst) || p == stop) 690 break; /* NOTE BREAK OUT */ 691 692 /* no, we must deal with this character */ 693 ASSIGN(tmp, st); 694 ASSIGN(st, fresh); 695 assert(c != OUT); 696 st = step(m->g, startst, stopst, tmp, c, st); 697 SP("aft", st, c); 698 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 699 p++; 700 } 701 702 assert(coldp != NULL); 703 m->coldp = coldp; 704 if (ISSET(st, stopst)) 705 return(p+1); 706 else 707 return(NULL); 708} 709 710/* 711 - slow - step through the string more deliberately 712 == static char *slow(register struct match *m, char *start, \ 713 == char *stop, sopno startst, sopno stopst); 714 */ 715static char * /* where it ended */ 716slow(m, start, stop, startst, stopst) 717register struct match *m; 718char *start; 719char *stop; 720sopno startst; 721sopno stopst; 722{ 723 register states st = m->st; 724 register states empty = m->empty; 725 register states tmp = m->tmp; 726 register char *p = start; 727 register int c = (start == m->beginp) ? OUT : *(start-1); 728 register int lastc; /* previous c */ 729 register int flagch; 730 register int i; 731 register char *matchp; /* last p at which a match ended */ 732 733 AT("slow", start, stop, startst, stopst); 734 CLEAR(st); 735 SET1(st, startst); 736 SP("sstart", st, *p); 737 st = step(m->g, startst, stopst, st, NOTHING, st); 738 matchp = NULL; 739 for (;;) { 740 /* next character */ 741 lastc = c; 742 c = (p == m->endp) ? OUT : *p; 743 744 /* is there an EOL and/or BOL between lastc and c? */ 745 flagch = '\0'; 746 i = 0; 747 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 748 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 749 flagch = BOL; 750 i = m->g->nbol; 751 } 752 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 753 (c == OUT && !(m->eflags®_NOTEOL)) ) { 754 flagch = (flagch == BOL) ? BOLEOL : EOL; 755 i += m->g->neol; 756 } 757 if (i != 0) { 758 for (; i > 0; i--) 759 st = step(m->g, startst, stopst, st, flagch, st); 760 SP("sboleol", st, c); 761 } 762 763 /* how about a word boundary? */ 764 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 765 (c != OUT && ISWORD(c)) ) { 766 flagch = BOW; 767 } 768 if ( (lastc != OUT && ISWORD(lastc)) && 769 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 770 flagch = EOW; 771 } 772 if (flagch == BOW || flagch == EOW) { 773 st = step(m->g, startst, stopst, st, flagch, st); 774 SP("sboweow", st, c); 775 } 776 777 /* are we done? */ 778 if (ISSET(st, stopst)) 779 matchp = p; 780 if (EQ(st, empty) || p == stop) 781 break; /* NOTE BREAK OUT */ 782 783 /* no, we must deal with this character */ 784 ASSIGN(tmp, st); 785 ASSIGN(st, empty); 786 assert(c != OUT); 787 st = step(m->g, startst, stopst, tmp, c, st); 788 SP("saft", st, c); 789 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 790 p++; 791 } 792 793 return(matchp); 794} 795 796 797/* 798 - step - map set of states reachable before char to set reachable after 799 == static states step(register struct re_guts *g, sopno start, sopno stop, \ 800 == register states bef, int ch, register states aft); 801 == #define BOL (OUT+1) 802 == #define EOL (BOL+1) 803 == #define BOLEOL (BOL+2) 804 == #define NOTHING (BOL+3) 805 == #define BOW (BOL+4) 806 == #define EOW (BOL+5) 807 == #define CODEMAX (BOL+5) // highest code used 808 == #define NONCHAR(c) ((c) > CHAR_MAX) 809 == #define NNONCHAR (CODEMAX-CHAR_MAX) 810 */ 811static states 812step(g, start, stop, bef, ch, aft) 813register struct re_guts *g; 814sopno start; /* start state within strip */ 815sopno stop; /* state after stop state within strip */ 816register states bef; /* states reachable before */ 817int ch; /* character or NONCHAR code */ 818register states aft; /* states already known reachable after */ 819{ 820 register cset *cs; 821 register sop s; 822 register sopno pc; 823 register onestate here; /* note, macros know this name */ 824 register sopno look; 825 register long i; 826 827 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 828 s = g->strip[pc]; 829 switch (OP(s)) { 830 case OEND: 831 assert(pc == stop-1); 832 break; 833 case OCHAR: 834 /* only characters can match */ 835 assert(!NONCHAR(ch) || ch != (char)OPND(s)); 836 if (ch == (char)OPND(s)) 837 FWD(aft, bef, 1); 838 break; 839 case OBOL: 840 if (ch == BOL || ch == BOLEOL) 841 FWD(aft, bef, 1); 842 break; 843 case OEOL: 844 if (ch == EOL || ch == BOLEOL) 845 FWD(aft, bef, 1); 846 break; 847 case OBOW: 848 if (ch == BOW) 849 FWD(aft, bef, 1); 850 break; 851 case OEOW: 852 if (ch == EOW) 853 FWD(aft, bef, 1); 854 break; 855 case OANY: 856 if (!NONCHAR(ch)) 857 FWD(aft, bef, 1); 858 break; 859 case OANYOF: 860 cs = &g->sets[OPND(s)]; 861 if (!NONCHAR(ch) && CHIN(cs, ch)) 862 FWD(aft, bef, 1); 863 break; 864 case OBACK_: /* ignored here */ 865 case O_BACK: 866 FWD(aft, aft, 1); 867 break; 868 case OPLUS_: /* forward, this is just an empty */ 869 FWD(aft, aft, 1); 870 break; 871 case O_PLUS: /* both forward and back */ 872 FWD(aft, aft, 1); 873 i = ISSETBACK(aft, OPND(s)); 874 BACK(aft, aft, OPND(s)); 875 if (!i && ISSETBACK(aft, OPND(s))) { 876 /* oho, must reconsider loop body */ 877 pc -= OPND(s) + 1; 878 INIT(here, pc); 879 } 880 break; 881 case OQUEST_: /* two branches, both forward */ 882 FWD(aft, aft, 1); 883 FWD(aft, aft, OPND(s)); 884 break; 885 case O_QUEST: /* just an empty */ 886 FWD(aft, aft, 1); 887 break; 888 case OLPAREN: /* not significant here */ 889 case ORPAREN: 890 FWD(aft, aft, 1); 891 break; 892 case OCH_: /* mark the first two branches */ 893 FWD(aft, aft, 1); 894 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 895 FWD(aft, aft, OPND(s)); 896 break; 897 case OOR1: /* done a branch, find the O_CH */ 898 if (ISSTATEIN(aft, here)) { 899 for (look = 1; 900 OP(s = g->strip[pc+look]) != O_CH; 901 look += OPND(s)) 902 assert(OP(s) == OOR2); 903 FWD(aft, aft, look); 904 } 905 break; 906 case OOR2: /* propagate OCH_'s marking */ 907 FWD(aft, aft, 1); 908 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 909 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 910 FWD(aft, aft, OPND(s)); 911 } 912 break; 913 case O_CH: /* just empty */ 914 FWD(aft, aft, 1); 915 break; 916 default: /* ooooops... */ 917 assert(nope); 918 break; 919 } 920 } 921 922 return(aft); 923} 924 925#ifdef REDEBUG 926/* 927 - print - print a set of states 928 == #ifdef REDEBUG 929 == static void print(struct match *m, char *caption, states st, \ 930 == int ch, FILE *d); 931 == #endif 932 */ 933static void 934print(m, caption, st, ch, d) 935struct match *m; 936char *caption; 937states st; 938int ch; 939FILE *d; 940{ 941 register struct re_guts *g = m->g; 942 register int i; 943 register int first = 1; 944 945 if (!(m->eflags®_TRACE)) 946 return; 947 948 fprintf(d, "%s", caption); 949 if (ch != '\0') 950 fprintf(d, " %s", pchar(ch)); 951 for (i = 0; i < g->nstates; i++) 952 if (ISSET(st, i)) { 953 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 954 first = 0; 955 } 956 fprintf(d, "\n"); 957} 958 959/* 960 - at - print current situation 961 == #ifdef REDEBUG 962 == static void at(struct match *m, char *title, char *start, char *stop, \ 963 == sopno startst, sopno stopst); 964 == #endif 965 */ 966static void 967at(m, title, start, stop, startst, stopst) 968struct match *m; 969char *title; 970char *start; 971char *stop; 972sopno startst; 973sopno stopst; 974{ 975 if (!(m->eflags®_TRACE)) 976 return; 977 978 printf("%s %s-", title, pchar(*start)); 979 printf("%s ", pchar(*stop)); 980 printf("%ld-%ld\n", (long)startst, (long)stopst); 981} 982 983#ifndef PCHARDONE 984#define PCHARDONE /* never again */ 985/* 986 - pchar - make a character printable 987 == #ifdef REDEBUG 988 == static char *pchar(int ch); 989 == #endif 990 * 991 * Is this identical to regchar() over in debug.c? Well, yes. But a 992 * duplicate here avoids having a debugging-capable regexec.o tied to 993 * a matching debug.o, and this is convenient. It all disappears in 994 * the non-debug compilation anyway, so it doesn't matter much. 995 */ 996static char * /* -> representation */ 997pchar(ch) 998int ch; 999{ 1000 static char pbuf[10]; 1001 1002 if (isprint(ch) || ch == ' ') 1003 sprintf(pbuf, "%c", ch); 1004 else 1005 sprintf(pbuf, "\\%o", ch); 1006 return(pbuf); 1007} 1008#endif 1009#endif 1010 1011#undef matcher 1012#undef fast 1013#undef slow 1014#undef dissect 1015#undef backref 1016#undef step 1017#undef print 1018#undef at 1019#undef match 1020