engine.c revision 1.4
1/* $OpenBSD: engine.c,v 1.4 1997/04/28 20:44:57 millert Exp $ */ 2 3/*- 4 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5 * Copyright (c) 1992, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Henry Spencer. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)engine.c 8.5 (Berkeley) 3/20/94 40 */ 41 42#if defined(SNAMES) && defined(LIBC_SCCS) && !defined(lint) 43static char enginercsid[] = "$OpenBSD: engine.c,v 1.4 1997/04/28 20:44:57 millert Exp $"; 44#endif /* SNAMES and LIBC_SCCS and not lint */ 45 46/* 47 * The matching engine and friends. This file is #included by regexec.c 48 * after suitable #defines of a variety of macros used herein, so that 49 * different state representations can be used without duplicating masses 50 * of code. 51 */ 52 53#ifdef SNAMES 54#define matcher smatcher 55#define fast sfast 56#define slow sslow 57#define dissect sdissect 58#define backref sbackref 59#define step sstep 60#define print sprint 61#define at sat 62#define match smat 63#endif 64#ifdef LNAMES 65#define matcher lmatcher 66#define fast lfast 67#define slow lslow 68#define dissect ldissect 69#define backref lbackref 70#define step lstep 71#define print lprint 72#define at lat 73#define match lmat 74#endif 75 76/* another structure passed up and down to avoid zillions of parameters */ 77struct match { 78 struct re_guts *g; 79 int eflags; 80 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 81 char *offp; /* offsets work from here */ 82 char *beginp; /* start of string -- virtual NUL precedes */ 83 char *endp; /* end of string -- virtual NUL here */ 84 char *coldp; /* can be no match starting before here */ 85 char **lastpos; /* [nplus+1] */ 86 STATEVARS; 87 states st; /* current states */ 88 states fresh; /* states for a fresh start */ 89 states tmp; /* temporary */ 90 states empty; /* empty set of states */ 91}; 92 93/* ========= begin header generated by ./mkh ========= */ 94#ifdef __cplusplus 95extern "C" { 96#endif 97 98/* === engine.c === */ 99static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 100static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 101static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); 102static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 103static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 104static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); 105#define BOL (OUT+1) 106#define EOL (BOL+1) 107#define BOLEOL (BOL+2) 108#define NOTHING (BOL+3) 109#define BOW (BOL+4) 110#define EOW (BOL+5) 111#define CODEMAX (BOL+5) /* highest code used */ 112#define NONCHAR(c) ((c) > CHAR_MAX) 113#define NNONCHAR (CODEMAX-CHAR_MAX) 114#ifdef REDEBUG 115static void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 116#endif 117#ifdef REDEBUG 118static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 119#endif 120#ifdef REDEBUG 121static char *pchar __P((int ch)); 122#endif 123 124#ifdef __cplusplus 125} 126#endif 127/* ========= end header generated by ./mkh ========= */ 128 129#ifdef REDEBUG 130#define SP(t, s, c) print(m, t, s, c, stdout) 131#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 132#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } 133#else 134#define SP(t, s, c) /* nothing */ 135#define AT(t, p1, p2, s1, s2) /* nothing */ 136#define NOTE(s) /* nothing */ 137#endif 138 139/* 140 - matcher - the actual matching engine 141 == static int matcher(register struct re_guts *g, char *string, \ 142 == size_t nmatch, regmatch_t pmatch[], int eflags); 143 */ 144static int /* 0 success, REG_NOMATCH failure */ 145matcher(g, string, nmatch, pmatch, eflags) 146register struct re_guts *g; 147char *string; 148size_t nmatch; 149regmatch_t pmatch[]; 150int eflags; 151{ 152 register char *endp; 153 register int i; 154 struct match mv; 155 register struct match *m = &mv; 156 register char *dp; 157 const register sopno gf = g->firststate+1; /* +1 for OEND */ 158 const register sopno gl = g->laststate; 159 char *start; 160 char *stop; 161 162 /* simplify the situation where possible */ 163 if (g->cflags®_NOSUB) 164 nmatch = 0; 165 if (eflags®_STARTEND) { 166 start = string + pmatch[0].rm_so; 167 stop = string + pmatch[0].rm_eo; 168 } else { 169 start = string; 170 stop = start + strlen(start); 171 } 172 if (stop < start) 173 return(REG_INVARG); 174 175 /* prescreening; this does wonders for this rather slow code */ 176 if (g->must != NULL) { 177 for (dp = start; dp < stop; dp++) 178 if (*dp == g->must[0] && stop - dp >= g->mlen && 179 memcmp(dp, g->must, (size_t)g->mlen) == 0) 180 break; 181 if (dp == stop) /* we didn't find g->must */ 182 return(REG_NOMATCH); 183 } 184 185 /* match struct setup */ 186 m->g = g; 187 m->eflags = eflags; 188 m->pmatch = NULL; 189 m->lastpos = NULL; 190 m->offp = string; 191 m->beginp = start; 192 m->endp = stop; 193 STATESETUP(m, 4); 194 SETUP(m->st); 195 SETUP(m->fresh); 196 SETUP(m->tmp); 197 SETUP(m->empty); 198 CLEAR(m->empty); 199 200 /* this loop does only one repetition except for backrefs */ 201 for (;;) { 202 endp = fast(m, start, stop, gf, gl); 203 if (endp == NULL) { /* a miss */ 204 STATETEARDOWN(m); 205 return(REG_NOMATCH); 206 } 207 if (nmatch == 0 && !g->backrefs) 208 break; /* no further info needed */ 209 210 /* where? */ 211 assert(m->coldp != NULL); 212 for (;;) { 213 NOTE("finding start"); 214 endp = slow(m, m->coldp, stop, gf, gl); 215 if (endp != NULL) 216 break; 217 assert(m->coldp < m->endp); 218 m->coldp++; 219 } 220 if (nmatch == 1 && !g->backrefs) 221 break; /* no further info needed */ 222 223 /* oh my, he wants the subexpressions... */ 224 if (m->pmatch == NULL) 225 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 226 sizeof(regmatch_t)); 227 if (m->pmatch == NULL) { 228 STATETEARDOWN(m); 229 return(REG_ESPACE); 230 } 231 for (i = 1; i <= m->g->nsub; i++) 232 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 233 if (!g->backrefs && !(m->eflags®_BACKR)) { 234 NOTE("dissecting"); 235 dp = dissect(m, m->coldp, endp, gf, gl); 236 } else { 237 if (g->nplus > 0 && m->lastpos == NULL) 238 m->lastpos = (char **)malloc((g->nplus+1) * 239 sizeof(char *)); 240 if (g->nplus > 0 && m->lastpos == NULL) { 241 free(m->pmatch); 242 STATETEARDOWN(m); 243 return(REG_ESPACE); 244 } 245 NOTE("backref dissect"); 246 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 247 } 248 if (dp != NULL) 249 break; 250 251 /* uh-oh... we couldn't find a subexpression-level match */ 252 assert(g->backrefs); /* must be back references doing it */ 253 assert(g->nplus == 0 || m->lastpos != NULL); 254 for (;;) { 255 if (dp != NULL || endp <= m->coldp) 256 break; /* defeat */ 257 NOTE("backoff"); 258 endp = slow(m, m->coldp, endp-1, gf, gl); 259 if (endp == NULL) 260 break; /* defeat */ 261 /* try it on a shorter possibility */ 262#ifndef NDEBUG 263 for (i = 1; i <= m->g->nsub; i++) { 264 assert(m->pmatch[i].rm_so == -1); 265 assert(m->pmatch[i].rm_eo == -1); 266 } 267#endif 268 NOTE("backoff dissect"); 269 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 270 } 271 assert(dp == NULL || dp == endp); 272 if (dp != NULL) /* found a shorter one */ 273 break; 274 275 /* despite initial appearances, there is no match here */ 276 NOTE("false alarm"); 277 start = m->coldp + 1; /* recycle starting later */ 278 assert(start <= stop); 279 } 280 281 /* fill in the details if requested */ 282 if (nmatch > 0) { 283 pmatch[0].rm_so = m->coldp - m->offp; 284 pmatch[0].rm_eo = endp - m->offp; 285 } 286 if (nmatch > 1) { 287 assert(m->pmatch != NULL); 288 for (i = 1; i < nmatch; i++) 289 if (i <= m->g->nsub) 290 pmatch[i] = m->pmatch[i]; 291 else { 292 pmatch[i].rm_so = -1; 293 pmatch[i].rm_eo = -1; 294 } 295 } 296 297 if (m->pmatch != NULL) 298 free((char *)m->pmatch); 299 if (m->lastpos != NULL) 300 free((char *)m->lastpos); 301 STATETEARDOWN(m); 302 return(0); 303} 304 305/* 306 - dissect - figure out what matched what, no back references 307 == static char *dissect(register struct match *m, char *start, \ 308 == char *stop, sopno startst, sopno stopst); 309 */ 310static char * /* == stop (success) always */ 311dissect(m, start, stop, startst, stopst) 312register struct match *m; 313char *start; 314char *stop; 315sopno startst; 316sopno stopst; 317{ 318 register int i; 319 register sopno ss; /* start sop of current subRE */ 320 register sopno es; /* end sop of current subRE */ 321 register char *sp; /* start of string matched by it */ 322 register char *stp; /* string matched by it cannot pass here */ 323 register char *rest; /* start of rest of string */ 324 register char *tail; /* string unmatched by rest of RE */ 325 register sopno ssub; /* start sop of subsubRE */ 326 register sopno esub; /* end sop of subsubRE */ 327 register char *ssp; /* start of string matched by subsubRE */ 328 register char *sep; /* end of string matched by subsubRE */ 329 register char *oldssp; /* previous ssp */ 330 register char *dp; 331 332 AT("diss", start, stop, startst, stopst); 333 sp = start; 334 for (ss = startst; ss < stopst; ss = es) { 335 /* identify end of subRE */ 336 es = ss; 337 switch (OP(m->g->strip[es])) { 338 case OPLUS_: 339 case OQUEST_: 340 es += OPND(m->g->strip[es]); 341 break; 342 case OCH_: 343 while (OP(m->g->strip[es]) != O_CH) 344 es += OPND(m->g->strip[es]); 345 break; 346 } 347 es++; 348 349 /* figure out what it matched */ 350 switch (OP(m->g->strip[ss])) { 351 case OEND: 352 assert(nope); 353 break; 354 case OCHAR: 355 sp++; 356 break; 357 case OBOL: 358 case OEOL: 359 case OBOW: 360 case OEOW: 361 break; 362 case OANY: 363 case OANYOF: 364 sp++; 365 break; 366 case OBACK_: 367 case O_BACK: 368 assert(nope); 369 break; 370 /* cases where length of match is hard to find */ 371 case OQUEST_: 372 stp = stop; 373 for (;;) { 374 /* how long could this one be? */ 375 rest = slow(m, sp, stp, ss, es); 376 assert(rest != NULL); /* it did match */ 377 /* could the rest match the rest? */ 378 tail = slow(m, rest, stop, es, stopst); 379 if (tail == stop) 380 break; /* yes! */ 381 /* no -- try a shorter match for this one */ 382 stp = rest - 1; 383 assert(stp >= sp); /* it did work */ 384 } 385 ssub = ss + 1; 386 esub = es - 1; 387 /* did innards match? */ 388 if (slow(m, sp, rest, ssub, esub) != NULL) { 389 dp = dissect(m, sp, rest, ssub, esub); 390 assert(dp == rest); 391 } else /* no */ 392 assert(sp == rest); 393 sp = rest; 394 break; 395 case OPLUS_: 396 stp = stop; 397 for (;;) { 398 /* how long could this one be? */ 399 rest = slow(m, sp, stp, ss, es); 400 assert(rest != NULL); /* it did match */ 401 /* could the rest match the rest? */ 402 tail = slow(m, rest, stop, es, stopst); 403 if (tail == stop) 404 break; /* yes! */ 405 /* no -- try a shorter match for this one */ 406 stp = rest - 1; 407 assert(stp >= sp); /* it did work */ 408 } 409 ssub = ss + 1; 410 esub = es - 1; 411 ssp = sp; 412 oldssp = ssp; 413 for (;;) { /* find last match of innards */ 414 sep = slow(m, ssp, rest, ssub, esub); 415 if (sep == NULL || sep == ssp) 416 break; /* failed or matched null */ 417 oldssp = ssp; /* on to next try */ 418 ssp = sep; 419 } 420 if (sep == NULL) { 421 /* last successful match */ 422 sep = ssp; 423 ssp = oldssp; 424 } 425 assert(sep == rest); /* must exhaust substring */ 426 assert(slow(m, ssp, sep, ssub, esub) == rest); 427 dp = dissect(m, ssp, sep, ssub, esub); 428 assert(dp == sep); 429 sp = rest; 430 break; 431 case OCH_: 432 stp = stop; 433 for (;;) { 434 /* how long could this one be? */ 435 rest = slow(m, sp, stp, ss, es); 436 assert(rest != NULL); /* it did match */ 437 /* could the rest match the rest? */ 438 tail = slow(m, rest, stop, es, stopst); 439 if (tail == stop) 440 break; /* yes! */ 441 /* no -- try a shorter match for this one */ 442 stp = rest - 1; 443 assert(stp >= sp); /* it did work */ 444 } 445 ssub = ss + 1; 446 esub = ss + OPND(m->g->strip[ss]) - 1; 447 assert(OP(m->g->strip[esub]) == OOR1); 448 for (;;) { /* find first matching branch */ 449 if (slow(m, sp, rest, ssub, esub) == rest) 450 break; /* it matched all of it */ 451 /* that one missed, try next one */ 452 assert(OP(m->g->strip[esub]) == OOR1); 453 esub++; 454 assert(OP(m->g->strip[esub]) == OOR2); 455 ssub = esub + 1; 456 esub += OPND(m->g->strip[esub]); 457 if (OP(m->g->strip[esub]) == OOR2) 458 esub--; 459 else 460 assert(OP(m->g->strip[esub]) == O_CH); 461 } 462 dp = dissect(m, sp, rest, ssub, esub); 463 assert(dp == rest); 464 sp = rest; 465 break; 466 case O_PLUS: 467 case O_QUEST: 468 case OOR1: 469 case OOR2: 470 case O_CH: 471 assert(nope); 472 break; 473 case OLPAREN: 474 i = OPND(m->g->strip[ss]); 475 assert(0 < i && i <= m->g->nsub); 476 m->pmatch[i].rm_so = sp - m->offp; 477 break; 478 case ORPAREN: 479 i = OPND(m->g->strip[ss]); 480 assert(0 < i && i <= m->g->nsub); 481 m->pmatch[i].rm_eo = sp - m->offp; 482 break; 483 default: /* uh oh */ 484 assert(nope); 485 break; 486 } 487 } 488 489 assert(sp == stop); 490 return(sp); 491} 492 493/* 494 - backref - figure out what matched what, figuring in back references 495 == static char *backref(register struct match *m, char *start, \ 496 == char *stop, sopno startst, sopno stopst, sopno lev); 497 */ 498static char * /* == stop (success) or NULL (failure) */ 499backref(m, start, stop, startst, stopst, lev) 500register struct match *m; 501char *start; 502char *stop; 503sopno startst; 504sopno stopst; 505sopno lev; /* PLUS nesting level */ 506{ 507 register int i; 508 register sopno ss; /* start sop of current subRE */ 509 register char *sp; /* start of string matched by it */ 510 register sopno ssub; /* start sop of subsubRE */ 511 register sopno esub; /* end sop of subsubRE */ 512 register char *ssp; /* start of string matched by subsubRE */ 513 register char *dp; 514 register size_t len; 515 register int hard; 516 register sop s; 517 register regoff_t offsave; 518 register cset *cs; 519 520 AT("back", start, stop, startst, stopst); 521 sp = start; 522 523 /* get as far as we can with easy stuff */ 524 hard = 0; 525 for (ss = startst; !hard && ss < stopst; ss++) 526 switch (OP(s = m->g->strip[ss])) { 527 case OCHAR: 528 if (sp == stop || *sp++ != (char)OPND(s)) 529 return(NULL); 530 break; 531 case OANY: 532 if (sp == stop) 533 return(NULL); 534 sp++; 535 break; 536 case OANYOF: 537 cs = &m->g->sets[OPND(s)]; 538 if (sp == stop || !CHIN(cs, *sp++)) 539 return(NULL); 540 break; 541 case OBOL: 542 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 543 (sp < m->endp && *(sp-1) == '\n' && 544 (m->g->cflags®_NEWLINE)) ) 545 { /* yes */ } 546 else 547 return(NULL); 548 break; 549 case OEOL: 550 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 551 (sp < m->endp && *sp == '\n' && 552 (m->g->cflags®_NEWLINE)) ) 553 { /* yes */ } 554 else 555 return(NULL); 556 break; 557 case OBOW: 558 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 559 (sp < m->endp && *(sp-1) == '\n' && 560 (m->g->cflags®_NEWLINE)) || 561 (sp > m->beginp && 562 !ISWORD(*(sp-1))) ) && 563 (sp < m->endp && ISWORD(*sp)) ) 564 { /* yes */ } 565 else 566 return(NULL); 567 break; 568 case OEOW: 569 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 570 (sp < m->endp && *sp == '\n' && 571 (m->g->cflags®_NEWLINE)) || 572 (sp < m->endp && !ISWORD(*sp)) ) && 573 (sp > m->beginp && ISWORD(*(sp-1))) ) 574 { /* yes */ } 575 else 576 return(NULL); 577 break; 578 case O_QUEST: 579 break; 580 case OOR1: /* matches null but needs to skip */ 581 ss++; 582 s = m->g->strip[ss]; 583 do { 584 assert(OP(s) == OOR2); 585 ss += OPND(s); 586 } while (OP(s = m->g->strip[ss]) != O_CH); 587 /* note that the ss++ gets us past the O_CH */ 588 break; 589 default: /* have to make a choice */ 590 hard = 1; 591 break; 592 } 593 if (!hard) { /* that was it! */ 594 if (sp != stop) 595 return(NULL); 596 return(sp); 597 } 598 ss--; /* adjust for the for's final increment */ 599 600 /* the hard stuff */ 601 AT("hard", sp, stop, ss, stopst); 602 s = m->g->strip[ss]; 603 switch (OP(s)) { 604 case OBACK_: /* the vilest depths */ 605 i = OPND(s); 606 assert(0 < i && i <= m->g->nsub); 607 if (m->pmatch[i].rm_eo == -1) 608 return(NULL); 609 assert(m->pmatch[i].rm_so != -1); 610 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 611 assert(stop - m->beginp >= len); 612 if (sp > stop - len) 613 return(NULL); /* not enough left to match */ 614 ssp = m->offp + m->pmatch[i].rm_so; 615 if (memcmp(sp, ssp, len) != 0) 616 return(NULL); 617 while (m->g->strip[ss] != SOP(O_BACK, i)) 618 ss++; 619 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 620 break; 621 case OQUEST_: /* to null or not */ 622 dp = backref(m, sp, stop, ss+1, stopst, lev); 623 if (dp != NULL) 624 return(dp); /* not */ 625 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 626 break; 627 case OPLUS_: 628 assert(m->lastpos != NULL); 629 assert(lev+1 <= m->g->nplus); 630 m->lastpos[lev+1] = sp; 631 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 632 break; 633 case O_PLUS: 634 if (sp == m->lastpos[lev]) /* last pass matched null */ 635 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 636 /* try another pass */ 637 m->lastpos[lev] = sp; 638 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 639 if (dp == NULL) 640 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 641 else 642 return(dp); 643 break; 644 case OCH_: /* find the right one, if any */ 645 ssub = ss + 1; 646 esub = ss + OPND(s) - 1; 647 assert(OP(m->g->strip[esub]) == OOR1); 648 for (;;) { /* find first matching branch */ 649 dp = backref(m, sp, stop, ssub, esub, lev); 650 if (dp != NULL) 651 return(dp); 652 /* that one missed, try next one */ 653 if (OP(m->g->strip[esub]) == O_CH) 654 return(NULL); /* there is none */ 655 esub++; 656 assert(OP(m->g->strip[esub]) == OOR2); 657 ssub = esub + 1; 658 esub += OPND(m->g->strip[esub]); 659 if (OP(m->g->strip[esub]) == OOR2) 660 esub--; 661 else 662 assert(OP(m->g->strip[esub]) == O_CH); 663 } 664 break; 665 case OLPAREN: /* must undo assignment if rest fails */ 666 i = OPND(s); 667 assert(0 < i && i <= m->g->nsub); 668 offsave = m->pmatch[i].rm_so; 669 m->pmatch[i].rm_so = sp - m->offp; 670 dp = backref(m, sp, stop, ss+1, stopst, lev); 671 if (dp != NULL) 672 return(dp); 673 m->pmatch[i].rm_so = offsave; 674 return(NULL); 675 break; 676 case ORPAREN: /* must undo assignment if rest fails */ 677 i = OPND(s); 678 assert(0 < i && i <= m->g->nsub); 679 offsave = m->pmatch[i].rm_eo; 680 m->pmatch[i].rm_eo = sp - m->offp; 681 dp = backref(m, sp, stop, ss+1, stopst, lev); 682 if (dp != NULL) 683 return(dp); 684 m->pmatch[i].rm_eo = offsave; 685 return(NULL); 686 break; 687 default: /* uh oh */ 688 assert(nope); 689 break; 690 } 691 692 /* "can't happen" */ 693 assert(nope); 694 /* NOTREACHED */ 695} 696 697/* 698 - fast - step through the string at top speed 699 == static char *fast(register struct match *m, char *start, \ 700 == char *stop, sopno startst, sopno stopst); 701 */ 702static char * /* where tentative match ended, or NULL */ 703fast(m, start, stop, startst, stopst) 704register struct match *m; 705char *start; 706char *stop; 707sopno startst; 708sopno stopst; 709{ 710 register states st = m->st; 711 register states fresh = m->fresh; 712 register states tmp = m->tmp; 713 register char *p = start; 714 register int c = (start == m->beginp) ? OUT : *(start-1); 715 register int lastc; /* previous c */ 716 register int flagch; 717 register int i; 718 register char *coldp; /* last p after which no match was underway */ 719 720 CLEAR(st); 721 SET1(st, startst); 722 st = step(m->g, startst, stopst, st, NOTHING, st); 723 ASSIGN(fresh, st); 724 SP("start", st, *p); 725 coldp = NULL; 726 for (;;) { 727 /* next character */ 728 lastc = c; 729 c = (p == m->endp) ? OUT : *p; 730 if (EQ(st, fresh)) 731 coldp = p; 732 733 /* is there an EOL and/or BOL between lastc and c? */ 734 flagch = '\0'; 735 i = 0; 736 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 737 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 738 flagch = BOL; 739 i = m->g->nbol; 740 } 741 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 742 (c == OUT && !(m->eflags®_NOTEOL)) ) { 743 flagch = (flagch == BOL) ? BOLEOL : EOL; 744 i += m->g->neol; 745 } 746 if (i != 0) { 747 for (; i > 0; i--) 748 st = step(m->g, startst, stopst, st, flagch, st); 749 SP("boleol", st, c); 750 } 751 752 /* how about a word boundary? */ 753 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 754 (c != OUT && ISWORD(c)) ) { 755 flagch = BOW; 756 } 757 if ( (lastc != OUT && ISWORD(lastc)) && 758 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 759 flagch = EOW; 760 } 761 if (flagch == BOW || flagch == EOW) { 762 st = step(m->g, startst, stopst, st, flagch, st); 763 SP("boweow", st, c); 764 } 765 766 /* are we done? */ 767 if (ISSET(st, stopst) || p == stop) 768 break; /* NOTE BREAK OUT */ 769 770 /* no, we must deal with this character */ 771 ASSIGN(tmp, st); 772 ASSIGN(st, fresh); 773 assert(c != OUT); 774 st = step(m->g, startst, stopst, tmp, c, st); 775 SP("aft", st, c); 776 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 777 p++; 778 } 779 780 assert(coldp != NULL); 781 m->coldp = coldp; 782 if (ISSET(st, stopst)) 783 return(p+1); 784 else 785 return(NULL); 786} 787 788/* 789 - slow - step through the string more deliberately 790 == static char *slow(register struct match *m, char *start, \ 791 == char *stop, sopno startst, sopno stopst); 792 */ 793static char * /* where it ended */ 794slow(m, start, stop, startst, stopst) 795register struct match *m; 796char *start; 797char *stop; 798sopno startst; 799sopno stopst; 800{ 801 register states st = m->st; 802 register states empty = m->empty; 803 register states tmp = m->tmp; 804 register char *p = start; 805 register int c = (start == m->beginp) ? OUT : *(start-1); 806 register int lastc; /* previous c */ 807 register int flagch; 808 register int i; 809 register char *matchp; /* last p at which a match ended */ 810 811 AT("slow", start, stop, startst, stopst); 812 CLEAR(st); 813 SET1(st, startst); 814 SP("sstart", st, *p); 815 st = step(m->g, startst, stopst, st, NOTHING, st); 816 matchp = NULL; 817 for (;;) { 818 /* next character */ 819 lastc = c; 820 c = (p == m->endp) ? OUT : *p; 821 822 /* is there an EOL and/or BOL between lastc and c? */ 823 flagch = '\0'; 824 i = 0; 825 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 826 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 827 flagch = BOL; 828 i = m->g->nbol; 829 } 830 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 831 (c == OUT && !(m->eflags®_NOTEOL)) ) { 832 flagch = (flagch == BOL) ? BOLEOL : EOL; 833 i += m->g->neol; 834 } 835 if (i != 0) { 836 for (; i > 0; i--) 837 st = step(m->g, startst, stopst, st, flagch, st); 838 SP("sboleol", st, c); 839 } 840 841 /* how about a word boundary? */ 842 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 843 (c != OUT && ISWORD(c)) ) { 844 flagch = BOW; 845 } 846 if ( (lastc != OUT && ISWORD(lastc)) && 847 (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 848 flagch = EOW; 849 } 850 if (flagch == BOW || flagch == EOW) { 851 st = step(m->g, startst, stopst, st, flagch, st); 852 SP("sboweow", st, c); 853 } 854 855 /* are we done? */ 856 if (ISSET(st, stopst)) 857 matchp = p; 858 if (EQ(st, empty) || p == stop) 859 break; /* NOTE BREAK OUT */ 860 861 /* no, we must deal with this character */ 862 ASSIGN(tmp, st); 863 ASSIGN(st, empty); 864 assert(c != OUT); 865 st = step(m->g, startst, stopst, tmp, c, st); 866 SP("saft", st, c); 867 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 868 p++; 869 } 870 871 return(matchp); 872} 873 874 875/* 876 - step - map set of states reachable before char to set reachable after 877 == static states step(register struct re_guts *g, sopno start, sopno stop, \ 878 == register states bef, int ch, register states aft); 879 == #define BOL (OUT+1) 880 == #define EOL (BOL+1) 881 == #define BOLEOL (BOL+2) 882 == #define NOTHING (BOL+3) 883 == #define BOW (BOL+4) 884 == #define EOW (BOL+5) 885 == #define CODEMAX (BOL+5) // highest code used 886 == #define NONCHAR(c) ((c) > CHAR_MAX) 887 == #define NNONCHAR (CODEMAX-CHAR_MAX) 888 */ 889static states 890step(g, start, stop, bef, ch, aft) 891register struct re_guts *g; 892sopno start; /* start state within strip */ 893sopno stop; /* state after stop state within strip */ 894register states bef; /* states reachable before */ 895int ch; /* character or NONCHAR code */ 896register states aft; /* states already known reachable after */ 897{ 898 register cset *cs; 899 register sop s; 900 register sopno pc; 901 register onestate here; /* note, macros know this name */ 902 register sopno look; 903 register int i; 904 905 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 906 s = g->strip[pc]; 907 switch (OP(s)) { 908 case OEND: 909 assert(pc == stop-1); 910 break; 911 case OCHAR: 912 /* only characters can match */ 913 assert(!NONCHAR(ch) || ch != (char)OPND(s)); 914 if (ch == (char)OPND(s)) 915 FWD(aft, bef, 1); 916 break; 917 case OBOL: 918 if (ch == BOL || ch == BOLEOL) 919 FWD(aft, bef, 1); 920 break; 921 case OEOL: 922 if (ch == EOL || ch == BOLEOL) 923 FWD(aft, bef, 1); 924 break; 925 case OBOW: 926 if (ch == BOW) 927 FWD(aft, bef, 1); 928 break; 929 case OEOW: 930 if (ch == EOW) 931 FWD(aft, bef, 1); 932 break; 933 case OANY: 934 if (!NONCHAR(ch)) 935 FWD(aft, bef, 1); 936 break; 937 case OANYOF: 938 cs = &g->sets[OPND(s)]; 939 if (!NONCHAR(ch) && CHIN(cs, ch)) 940 FWD(aft, bef, 1); 941 break; 942 case OBACK_: /* ignored here */ 943 case O_BACK: 944 FWD(aft, aft, 1); 945 break; 946 case OPLUS_: /* forward, this is just an empty */ 947 FWD(aft, aft, 1); 948 break; 949 case O_PLUS: /* both forward and back */ 950 FWD(aft, aft, 1); 951 i = ISSETBACK(aft, OPND(s)); 952 BACK(aft, aft, OPND(s)); 953 if (!i && ISSETBACK(aft, OPND(s))) { 954 /* oho, must reconsider loop body */ 955 pc -= OPND(s) + 1; 956 INIT(here, pc); 957 } 958 break; 959 case OQUEST_: /* two branches, both forward */ 960 FWD(aft, aft, 1); 961 FWD(aft, aft, OPND(s)); 962 break; 963 case O_QUEST: /* just an empty */ 964 FWD(aft, aft, 1); 965 break; 966 case OLPAREN: /* not significant here */ 967 case ORPAREN: 968 FWD(aft, aft, 1); 969 break; 970 case OCH_: /* mark the first two branches */ 971 FWD(aft, aft, 1); 972 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 973 FWD(aft, aft, OPND(s)); 974 break; 975 case OOR1: /* done a branch, find the O_CH */ 976 if (ISSTATEIN(aft, here)) { 977 for (look = 1; 978 OP(s = g->strip[pc+look]) != O_CH; 979 look += OPND(s)) 980 assert(OP(s) == OOR2); 981 FWD(aft, aft, look); 982 } 983 break; 984 case OOR2: /* propagate OCH_'s marking */ 985 FWD(aft, aft, 1); 986 if (OP(g->strip[pc+OPND(s)]) != O_CH) { 987 assert(OP(g->strip[pc+OPND(s)]) == OOR2); 988 FWD(aft, aft, OPND(s)); 989 } 990 break; 991 case O_CH: /* just empty */ 992 FWD(aft, aft, 1); 993 break; 994 default: /* ooooops... */ 995 assert(nope); 996 break; 997 } 998 } 999 1000 return(aft); 1001} 1002 1003#ifdef REDEBUG 1004/* 1005 - print - print a set of states 1006 == #ifdef REDEBUG 1007 == static void print(struct match *m, char *caption, states st, \ 1008 == int ch, FILE *d); 1009 == #endif 1010 */ 1011static void 1012print(m, caption, st, ch, d) 1013struct match *m; 1014char *caption; 1015states st; 1016int ch; 1017FILE *d; 1018{ 1019 register struct re_guts *g = m->g; 1020 register int i; 1021 register int first = 1; 1022 1023 if (!(m->eflags®_TRACE)) 1024 return; 1025 1026 (void)fprintf(d, "%s", caption); 1027 if (ch != '\0') 1028 (void)fprintf(d, " %s", pchar(ch)); 1029 for (i = 0; i < g->nstates; i++) 1030 if (ISSET(st, i)) { 1031 (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 1032 first = 0; 1033 } 1034 (void)fprintf(d, "\n"); 1035} 1036 1037/* 1038 - at - print current situation 1039 == #ifdef REDEBUG 1040 == static void at(struct match *m, char *title, char *start, char *stop, \ 1041 == sopno startst, sopno stopst); 1042 == #endif 1043 */ 1044static void 1045at(m, title, start, stop, startst, stopst) 1046struct match *m; 1047char *title; 1048char *start; 1049char *stop; 1050sopno startst; 1051sopno stopst; 1052{ 1053 if (!(m->eflags®_TRACE)) 1054 return; 1055 1056 (void)printf("%s %s-", title, pchar(*start)); 1057 (void)printf("%s ", pchar(*stop)); 1058 (void)printf("%ld-%ld\n", (long)startst, (long)stopst); 1059} 1060 1061#ifndef PCHARDONE 1062#define PCHARDONE /* never again */ 1063/* 1064 - pchar - make a character printable 1065 == #ifdef REDEBUG 1066 == static char *pchar(int ch); 1067 == #endif 1068 * 1069 * Is this identical to regchar() over in debug.c? Well, yes. But a 1070 * duplicate here avoids having a debugging-capable regexec.o tied to 1071 * a matching debug.o, and this is convenient. It all disappears in 1072 * the non-debug compilation anyway, so it doesn't matter much. 1073 */ 1074static char * /* -> representation */ 1075pchar(ch) 1076int ch; 1077{ 1078 static char pbuf[10]; 1079 1080 if (isprint(ch) || ch == ' ') 1081 (void)sprintf(pbuf, "%c", ch); 1082 else 1083 (void)sprintf(pbuf, "\\%o", ch); 1084 return(pbuf); 1085} 1086#endif 1087#endif 1088 1089#undef matcher 1090#undef fast 1091#undef slow 1092#undef dissect 1093#undef backref 1094#undef step 1095#undef print 1096#undef at 1097#undef match 1098