engine.c revision 62232
11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 31573Srgrimes * Copyright (c) 1992, 1993, 1994 41573Srgrimes * The Regents of the University of California. All rights reserved. 51573Srgrimes * 61573Srgrimes * This code is derived from software contributed to Berkeley by 71573Srgrimes * Henry Spencer. 81573Srgrimes * 91573Srgrimes * Redistribution and use in source and binary forms, with or without 101573Srgrimes * modification, are permitted provided that the following conditions 111573Srgrimes * are met: 121573Srgrimes * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 3. All advertising materials mentioning features or use of this software 181573Srgrimes * must display the following acknowledgement: 191573Srgrimes * This product includes software developed by the University of 201573Srgrimes * California, Berkeley and its contributors. 211573Srgrimes * 4. Neither the name of the University nor the names of its contributors 221573Srgrimes * may be used to endorse or promote products derived from this software 231573Srgrimes * without specific prior written permission. 241573Srgrimes * 251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351573Srgrimes * SUCH DAMAGE. 361573Srgrimes * 371573Srgrimes * @(#)engine.c 8.5 (Berkeley) 3/20/94 3862232Sdcs * 3962232Sdcs * $FreeBSD: head/lib/libc/regex/engine.c 62232 2000-06-29 04:48:34Z dcs $ 401573Srgrimes */ 411573Srgrimes 421573Srgrimes/* 431573Srgrimes * The matching engine and friends. This file is #included by regexec.c 441573Srgrimes * after suitable #defines of a variety of macros used herein, so that 451573Srgrimes * different state representations can be used without duplicating masses 461573Srgrimes * of code. 471573Srgrimes */ 481573Srgrimes 491573Srgrimes#ifdef SNAMES 501573Srgrimes#define matcher smatcher 511573Srgrimes#define fast sfast 521573Srgrimes#define slow sslow 531573Srgrimes#define dissect sdissect 541573Srgrimes#define backref sbackref 551573Srgrimes#define step sstep 561573Srgrimes#define print sprint 571573Srgrimes#define at sat 581573Srgrimes#define match smat 591573Srgrimes#endif 601573Srgrimes#ifdef LNAMES 611573Srgrimes#define matcher lmatcher 621573Srgrimes#define fast lfast 631573Srgrimes#define slow lslow 641573Srgrimes#define dissect ldissect 651573Srgrimes#define backref lbackref 661573Srgrimes#define step lstep 671573Srgrimes#define print lprint 681573Srgrimes#define at lat 691573Srgrimes#define match lmat 701573Srgrimes#endif 711573Srgrimes 721573Srgrimes/* another structure passed up and down to avoid zillions of parameters */ 731573Srgrimesstruct match { 741573Srgrimes struct re_guts *g; 751573Srgrimes int eflags; 761573Srgrimes regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 771573Srgrimes char *offp; /* offsets work from here */ 781573Srgrimes char *beginp; /* start of string -- virtual NUL precedes */ 791573Srgrimes char *endp; /* end of string -- virtual NUL here */ 801573Srgrimes char *coldp; /* can be no match starting before here */ 811573Srgrimes char **lastpos; /* [nplus+1] */ 821573Srgrimes STATEVARS; 831573Srgrimes states st; /* current states */ 841573Srgrimes states fresh; /* states for a fresh start */ 851573Srgrimes states tmp; /* temporary */ 861573Srgrimes states empty; /* empty set of states */ 871573Srgrimes}; 881573Srgrimes 891573Srgrimes/* ========= begin header generated by ./mkh ========= */ 901573Srgrimes#ifdef __cplusplus 911573Srgrimesextern "C" { 921573Srgrimes#endif 931573Srgrimes 941573Srgrimes/* === engine.c === */ 951573Srgrimesstatic int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 961573Srgrimesstatic char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 971573Srgrimesstatic char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); 981573Srgrimesstatic char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 991573Srgrimesstatic char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 1001573Srgrimesstatic states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); 1011573Srgrimes#define BOL (OUT+1) 1021573Srgrimes#define EOL (BOL+1) 1031573Srgrimes#define BOLEOL (BOL+2) 1041573Srgrimes#define NOTHING (BOL+3) 1051573Srgrimes#define BOW (BOL+4) 1061573Srgrimes#define EOW (BOL+5) 1071573Srgrimes#define CODEMAX (BOL+5) /* highest code used */ 1081573Srgrimes#define NONCHAR(c) ((c) > CHAR_MAX) 1091573Srgrimes#define NNONCHAR (CODEMAX-CHAR_MAX) 1101573Srgrimes#ifdef REDEBUG 1111573Srgrimesstatic void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 1121573Srgrimes#endif 1131573Srgrimes#ifdef REDEBUG 1141573Srgrimesstatic void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 1151573Srgrimes#endif 1161573Srgrimes#ifdef REDEBUG 1171573Srgrimesstatic char *pchar __P((int ch)); 1181573Srgrimes#endif 1191573Srgrimes 1201573Srgrimes#ifdef __cplusplus 1211573Srgrimes} 1221573Srgrimes#endif 1231573Srgrimes/* ========= end header generated by ./mkh ========= */ 1241573Srgrimes 1251573Srgrimes#ifdef REDEBUG 1261573Srgrimes#define SP(t, s, c) print(m, t, s, c, stdout) 1271573Srgrimes#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1281573Srgrimes#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1291573Srgrimes#else 1301573Srgrimes#define SP(t, s, c) /* nothing */ 1311573Srgrimes#define AT(t, p1, p2, s1, s2) /* nothing */ 1321573Srgrimes#define NOTE(s) /* nothing */ 1331573Srgrimes#endif 1341573Srgrimes 1351573Srgrimes/* 1361573Srgrimes - matcher - the actual matching engine 1371573Srgrimes == static int matcher(register struct re_guts *g, char *string, \ 1381573Srgrimes == size_t nmatch, regmatch_t pmatch[], int eflags); 1391573Srgrimes */ 1401573Srgrimesstatic int /* 0 success, REG_NOMATCH failure */ 1411573Srgrimesmatcher(g, string, nmatch, pmatch, eflags) 1421573Srgrimesregister struct re_guts *g; 1431573Srgrimeschar *string; 1441573Srgrimessize_t nmatch; 1451573Srgrimesregmatch_t pmatch[]; 1461573Srgrimesint eflags; 1471573Srgrimes{ 1481573Srgrimes register char *endp; 1491573Srgrimes register int i; 1501573Srgrimes struct match mv; 1511573Srgrimes register struct match *m = &mv; 1521573Srgrimes register char *dp; 15317141Sjkh register const sopno gf = g->firststate+1; /* +1 for OEND */ 15417141Sjkh register const sopno gl = g->laststate; 1551573Srgrimes char *start; 1561573Srgrimes char *stop; 15762232Sdcs /* Boyer-Moore algorithms variables */ 15862232Sdcs register unsigned char *pp; 15962232Sdcs int cj, mj; 16062232Sdcs register unsigned char *mustfirst; 16162232Sdcs register unsigned char *mustlast; 16262232Sdcs register int mustlen; 16362232Sdcs register int *matchjump; 16462232Sdcs register int *charjump; 16562232Sdcs register unsigned char *bmp; 16662232Sdcs register unsigned char *bmps; 1671573Srgrimes 1681573Srgrimes /* simplify the situation where possible */ 1691573Srgrimes if (g->cflags®_NOSUB) 1701573Srgrimes nmatch = 0; 1711573Srgrimes if (eflags®_STARTEND) { 1721573Srgrimes start = string + pmatch[0].rm_so; 1731573Srgrimes stop = string + pmatch[0].rm_eo; 1741573Srgrimes } else { 1751573Srgrimes start = string; 1761573Srgrimes stop = start + strlen(start); 1771573Srgrimes } 1781573Srgrimes if (stop < start) 1791573Srgrimes return(REG_INVARG); 1801573Srgrimes 1811573Srgrimes /* prescreening; this does wonders for this rather slow code */ 1821573Srgrimes if (g->must != NULL) { 18362232Sdcs if(g->charjump != NULL && g->matchjump != NULL) { 18462232Sdcs mustlen = -g->mlen; 18562232Sdcs mustfirst = g->must; 18662232Sdcs mustlast = g->must + g->mlen - 1; 18762232Sdcs charjump = g->charjump; 18862232Sdcs matchjump = g->matchjump; 18962232Sdcs bmps = stop; 19062232Sdcs pp = mustlast; 19162232Sdcs for(bmp = start+g->mlen-1; bmp < bmps;) { 19262232Sdcs /* Fast skip non-matches */ 19362232Sdcs while(bmp < bmps && charjump[*bmp]) 19462232Sdcs bmp += charjump[*bmp]; 19562232Sdcs 19662232Sdcs if(bmp >= bmps) 19762232Sdcs break; 19862232Sdcs 19962232Sdcs /* Greedy matcher */ 20062232Sdcs /* We depend on not being used for 20162232Sdcs * for strings of length 1 20262232Sdcs */ 20362232Sdcs while(*--bmp == *--pp && pp != mustfirst); 20462232Sdcs 20562232Sdcs if(*bmp == *pp) 20662232Sdcs break; 20762232Sdcs 20862232Sdcs /* Jump to next possible match */ 20962232Sdcs mj = matchjump[pp - mustfirst]; 21062232Sdcs cj = charjump[*bmp]; 21162232Sdcs bmp += (cj < mj ? mj : cj); 21262232Sdcs pp = mustlast; 21362232Sdcs } 21462232Sdcs if(pp != mustfirst) 21562232Sdcs return(REG_NOMATCH); 21662232Sdcs } else { 21762232Sdcs for (dp = start; dp < stop; dp++) 21862232Sdcs if (*dp == g->must[0] && 21962232Sdcs stop - dp >= g->mlen && 22062232Sdcs memcmp(dp, g->must, (size_t)g->mlen) == 0) 22162232Sdcs break; 22262232Sdcs if (dp == stop) /* we didn't find g->must */ 22362232Sdcs return(REG_NOMATCH); 22462232Sdcs } 2251573Srgrimes } 2261573Srgrimes 2271573Srgrimes /* match struct setup */ 2281573Srgrimes m->g = g; 2291573Srgrimes m->eflags = eflags; 2301573Srgrimes m->pmatch = NULL; 2311573Srgrimes m->lastpos = NULL; 2321573Srgrimes m->offp = string; 2331573Srgrimes m->beginp = start; 2341573Srgrimes m->endp = stop; 2351573Srgrimes STATESETUP(m, 4); 2361573Srgrimes SETUP(m->st); 2371573Srgrimes SETUP(m->fresh); 2381573Srgrimes SETUP(m->tmp); 2391573Srgrimes SETUP(m->empty); 2401573Srgrimes CLEAR(m->empty); 2411573Srgrimes 2421573Srgrimes /* this loop does only one repetition except for backrefs */ 2431573Srgrimes for (;;) { 2441573Srgrimes endp = fast(m, start, stop, gf, gl); 2451573Srgrimes if (endp == NULL) { /* a miss */ 2461573Srgrimes STATETEARDOWN(m); 2471573Srgrimes return(REG_NOMATCH); 2481573Srgrimes } 2491573Srgrimes if (nmatch == 0 && !g->backrefs) 2501573Srgrimes break; /* no further info needed */ 2511573Srgrimes 2521573Srgrimes /* where? */ 2531573Srgrimes assert(m->coldp != NULL); 2541573Srgrimes for (;;) { 2551573Srgrimes NOTE("finding start"); 2561573Srgrimes endp = slow(m, m->coldp, stop, gf, gl); 2571573Srgrimes if (endp != NULL) 2581573Srgrimes break; 2591573Srgrimes assert(m->coldp < m->endp); 2601573Srgrimes m->coldp++; 2611573Srgrimes } 2621573Srgrimes if (nmatch == 1 && !g->backrefs) 2631573Srgrimes break; /* no further info needed */ 2641573Srgrimes 2651573Srgrimes /* oh my, he wants the subexpressions... */ 2661573Srgrimes if (m->pmatch == NULL) 2671573Srgrimes m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2681573Srgrimes sizeof(regmatch_t)); 2691573Srgrimes if (m->pmatch == NULL) { 2701573Srgrimes STATETEARDOWN(m); 2711573Srgrimes return(REG_ESPACE); 2721573Srgrimes } 2731573Srgrimes for (i = 1; i <= m->g->nsub; i++) 2741573Srgrimes m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2751573Srgrimes if (!g->backrefs && !(m->eflags®_BACKR)) { 2761573Srgrimes NOTE("dissecting"); 2771573Srgrimes dp = dissect(m, m->coldp, endp, gf, gl); 2781573Srgrimes } else { 2791573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) 2801573Srgrimes m->lastpos = (char **)malloc((g->nplus+1) * 2811573Srgrimes sizeof(char *)); 2821573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) { 2831573Srgrimes free(m->pmatch); 2841573Srgrimes STATETEARDOWN(m); 2851573Srgrimes return(REG_ESPACE); 2861573Srgrimes } 2871573Srgrimes NOTE("backref dissect"); 2881573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 2891573Srgrimes } 2901573Srgrimes if (dp != NULL) 2911573Srgrimes break; 2921573Srgrimes 2931573Srgrimes /* uh-oh... we couldn't find a subexpression-level match */ 2941573Srgrimes assert(g->backrefs); /* must be back references doing it */ 2951573Srgrimes assert(g->nplus == 0 || m->lastpos != NULL); 2961573Srgrimes for (;;) { 2971573Srgrimes if (dp != NULL || endp <= m->coldp) 2981573Srgrimes break; /* defeat */ 2991573Srgrimes NOTE("backoff"); 3001573Srgrimes endp = slow(m, m->coldp, endp-1, gf, gl); 3011573Srgrimes if (endp == NULL) 3021573Srgrimes break; /* defeat */ 3031573Srgrimes /* try it on a shorter possibility */ 3041573Srgrimes#ifndef NDEBUG 3051573Srgrimes for (i = 1; i <= m->g->nsub; i++) { 3061573Srgrimes assert(m->pmatch[i].rm_so == -1); 3071573Srgrimes assert(m->pmatch[i].rm_eo == -1); 3081573Srgrimes } 3091573Srgrimes#endif 3101573Srgrimes NOTE("backoff dissect"); 3111573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3121573Srgrimes } 3131573Srgrimes assert(dp == NULL || dp == endp); 3141573Srgrimes if (dp != NULL) /* found a shorter one */ 3151573Srgrimes break; 3161573Srgrimes 3171573Srgrimes /* despite initial appearances, there is no match here */ 3181573Srgrimes NOTE("false alarm"); 3191573Srgrimes start = m->coldp + 1; /* recycle starting later */ 3201573Srgrimes assert(start <= stop); 3211573Srgrimes } 3221573Srgrimes 3231573Srgrimes /* fill in the details if requested */ 3241573Srgrimes if (nmatch > 0) { 3251573Srgrimes pmatch[0].rm_so = m->coldp - m->offp; 3261573Srgrimes pmatch[0].rm_eo = endp - m->offp; 3271573Srgrimes } 3281573Srgrimes if (nmatch > 1) { 3291573Srgrimes assert(m->pmatch != NULL); 3301573Srgrimes for (i = 1; i < nmatch; i++) 3311573Srgrimes if (i <= m->g->nsub) 3321573Srgrimes pmatch[i] = m->pmatch[i]; 3331573Srgrimes else { 3341573Srgrimes pmatch[i].rm_so = -1; 3351573Srgrimes pmatch[i].rm_eo = -1; 3361573Srgrimes } 3371573Srgrimes } 3381573Srgrimes 3391573Srgrimes if (m->pmatch != NULL) 3401573Srgrimes free((char *)m->pmatch); 3411573Srgrimes if (m->lastpos != NULL) 3421573Srgrimes free((char *)m->lastpos); 3431573Srgrimes STATETEARDOWN(m); 3441573Srgrimes return(0); 3451573Srgrimes} 3461573Srgrimes 3471573Srgrimes/* 3481573Srgrimes - dissect - figure out what matched what, no back references 3491573Srgrimes == static char *dissect(register struct match *m, char *start, \ 3501573Srgrimes == char *stop, sopno startst, sopno stopst); 3511573Srgrimes */ 3521573Srgrimesstatic char * /* == stop (success) always */ 3531573Srgrimesdissect(m, start, stop, startst, stopst) 3541573Srgrimesregister struct match *m; 3551573Srgrimeschar *start; 3561573Srgrimeschar *stop; 3571573Srgrimessopno startst; 3581573Srgrimessopno stopst; 3591573Srgrimes{ 3601573Srgrimes register int i; 3611573Srgrimes register sopno ss; /* start sop of current subRE */ 3621573Srgrimes register sopno es; /* end sop of current subRE */ 3631573Srgrimes register char *sp; /* start of string matched by it */ 3641573Srgrimes register char *stp; /* string matched by it cannot pass here */ 3651573Srgrimes register char *rest; /* start of rest of string */ 3661573Srgrimes register char *tail; /* string unmatched by rest of RE */ 3671573Srgrimes register sopno ssub; /* start sop of subsubRE */ 3681573Srgrimes register sopno esub; /* end sop of subsubRE */ 3691573Srgrimes register char *ssp; /* start of string matched by subsubRE */ 3701573Srgrimes register char *sep; /* end of string matched by subsubRE */ 3711573Srgrimes register char *oldssp; /* previous ssp */ 3721573Srgrimes register char *dp; 3731573Srgrimes 3741573Srgrimes AT("diss", start, stop, startst, stopst); 3751573Srgrimes sp = start; 3761573Srgrimes for (ss = startst; ss < stopst; ss = es) { 3771573Srgrimes /* identify end of subRE */ 3781573Srgrimes es = ss; 3791573Srgrimes switch (OP(m->g->strip[es])) { 3801573Srgrimes case OPLUS_: 3811573Srgrimes case OQUEST_: 3821573Srgrimes es += OPND(m->g->strip[es]); 3831573Srgrimes break; 3841573Srgrimes case OCH_: 3851573Srgrimes while (OP(m->g->strip[es]) != O_CH) 3861573Srgrimes es += OPND(m->g->strip[es]); 3871573Srgrimes break; 3881573Srgrimes } 3891573Srgrimes es++; 3901573Srgrimes 3911573Srgrimes /* figure out what it matched */ 3921573Srgrimes switch (OP(m->g->strip[ss])) { 3931573Srgrimes case OEND: 3941573Srgrimes assert(nope); 3951573Srgrimes break; 3961573Srgrimes case OCHAR: 3971573Srgrimes sp++; 3981573Srgrimes break; 3991573Srgrimes case OBOL: 4001573Srgrimes case OEOL: 4011573Srgrimes case OBOW: 4021573Srgrimes case OEOW: 4031573Srgrimes break; 4041573Srgrimes case OANY: 4051573Srgrimes case OANYOF: 4061573Srgrimes sp++; 4071573Srgrimes break; 4081573Srgrimes case OBACK_: 4091573Srgrimes case O_BACK: 4101573Srgrimes assert(nope); 4111573Srgrimes break; 4121573Srgrimes /* cases where length of match is hard to find */ 4131573Srgrimes case OQUEST_: 4141573Srgrimes stp = stop; 4151573Srgrimes for (;;) { 4161573Srgrimes /* how long could this one be? */ 4171573Srgrimes rest = slow(m, sp, stp, ss, es); 4181573Srgrimes assert(rest != NULL); /* it did match */ 4191573Srgrimes /* could the rest match the rest? */ 4201573Srgrimes tail = slow(m, rest, stop, es, stopst); 4211573Srgrimes if (tail == stop) 4221573Srgrimes break; /* yes! */ 4231573Srgrimes /* no -- try a shorter match for this one */ 4241573Srgrimes stp = rest - 1; 4251573Srgrimes assert(stp >= sp); /* it did work */ 4261573Srgrimes } 4271573Srgrimes ssub = ss + 1; 4281573Srgrimes esub = es - 1; 4291573Srgrimes /* did innards match? */ 4301573Srgrimes if (slow(m, sp, rest, ssub, esub) != NULL) { 4311573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 4321573Srgrimes assert(dp == rest); 4331573Srgrimes } else /* no */ 4341573Srgrimes assert(sp == rest); 4351573Srgrimes sp = rest; 4361573Srgrimes break; 4371573Srgrimes case OPLUS_: 4381573Srgrimes stp = stop; 4391573Srgrimes for (;;) { 4401573Srgrimes /* how long could this one be? */ 4411573Srgrimes rest = slow(m, sp, stp, ss, es); 4421573Srgrimes assert(rest != NULL); /* it did match */ 4431573Srgrimes /* could the rest match the rest? */ 4441573Srgrimes tail = slow(m, rest, stop, es, stopst); 4451573Srgrimes if (tail == stop) 4461573Srgrimes break; /* yes! */ 4471573Srgrimes /* no -- try a shorter match for this one */ 4481573Srgrimes stp = rest - 1; 4491573Srgrimes assert(stp >= sp); /* it did work */ 4501573Srgrimes } 4511573Srgrimes ssub = ss + 1; 4521573Srgrimes esub = es - 1; 4531573Srgrimes ssp = sp; 4541573Srgrimes oldssp = ssp; 4551573Srgrimes for (;;) { /* find last match of innards */ 4561573Srgrimes sep = slow(m, ssp, rest, ssub, esub); 4571573Srgrimes if (sep == NULL || sep == ssp) 4581573Srgrimes break; /* failed or matched null */ 4591573Srgrimes oldssp = ssp; /* on to next try */ 4601573Srgrimes ssp = sep; 4611573Srgrimes } 4621573Srgrimes if (sep == NULL) { 4631573Srgrimes /* last successful match */ 4641573Srgrimes sep = ssp; 4651573Srgrimes ssp = oldssp; 4661573Srgrimes } 4671573Srgrimes assert(sep == rest); /* must exhaust substring */ 4681573Srgrimes assert(slow(m, ssp, sep, ssub, esub) == rest); 4691573Srgrimes dp = dissect(m, ssp, sep, ssub, esub); 4701573Srgrimes assert(dp == sep); 4711573Srgrimes sp = rest; 4721573Srgrimes break; 4731573Srgrimes case OCH_: 4741573Srgrimes stp = stop; 4751573Srgrimes for (;;) { 4761573Srgrimes /* how long could this one be? */ 4771573Srgrimes rest = slow(m, sp, stp, ss, es); 4781573Srgrimes assert(rest != NULL); /* it did match */ 4791573Srgrimes /* could the rest match the rest? */ 4801573Srgrimes tail = slow(m, rest, stop, es, stopst); 4811573Srgrimes if (tail == stop) 4821573Srgrimes break; /* yes! */ 4831573Srgrimes /* no -- try a shorter match for this one */ 4841573Srgrimes stp = rest - 1; 4851573Srgrimes assert(stp >= sp); /* it did work */ 4861573Srgrimes } 4871573Srgrimes ssub = ss + 1; 4881573Srgrimes esub = ss + OPND(m->g->strip[ss]) - 1; 4891573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 4901573Srgrimes for (;;) { /* find first matching branch */ 4911573Srgrimes if (slow(m, sp, rest, ssub, esub) == rest) 4921573Srgrimes break; /* it matched all of it */ 4931573Srgrimes /* that one missed, try next one */ 4941573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 4951573Srgrimes esub++; 4961573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 4971573Srgrimes ssub = esub + 1; 4981573Srgrimes esub += OPND(m->g->strip[esub]); 4991573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 5001573Srgrimes esub--; 5011573Srgrimes else 5021573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 5031573Srgrimes } 5041573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 5051573Srgrimes assert(dp == rest); 5061573Srgrimes sp = rest; 5071573Srgrimes break; 5081573Srgrimes case O_PLUS: 5091573Srgrimes case O_QUEST: 5101573Srgrimes case OOR1: 5111573Srgrimes case OOR2: 5121573Srgrimes case O_CH: 5131573Srgrimes assert(nope); 5141573Srgrimes break; 5151573Srgrimes case OLPAREN: 5161573Srgrimes i = OPND(m->g->strip[ss]); 5171573Srgrimes assert(0 < i && i <= m->g->nsub); 5181573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 5191573Srgrimes break; 5201573Srgrimes case ORPAREN: 5211573Srgrimes i = OPND(m->g->strip[ss]); 5221573Srgrimes assert(0 < i && i <= m->g->nsub); 5231573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 5241573Srgrimes break; 5251573Srgrimes default: /* uh oh */ 5261573Srgrimes assert(nope); 5271573Srgrimes break; 5281573Srgrimes } 5291573Srgrimes } 5301573Srgrimes 5311573Srgrimes assert(sp == stop); 5321573Srgrimes return(sp); 5331573Srgrimes} 5341573Srgrimes 5351573Srgrimes/* 5361573Srgrimes - backref - figure out what matched what, figuring in back references 5371573Srgrimes == static char *backref(register struct match *m, char *start, \ 5381573Srgrimes == char *stop, sopno startst, sopno stopst, sopno lev); 5391573Srgrimes */ 5401573Srgrimesstatic char * /* == stop (success) or NULL (failure) */ 5411573Srgrimesbackref(m, start, stop, startst, stopst, lev) 5421573Srgrimesregister struct match *m; 5431573Srgrimeschar *start; 5441573Srgrimeschar *stop; 5451573Srgrimessopno startst; 5461573Srgrimessopno stopst; 5471573Srgrimessopno lev; /* PLUS nesting level */ 5481573Srgrimes{ 5491573Srgrimes register int i; 5501573Srgrimes register sopno ss; /* start sop of current subRE */ 5511573Srgrimes register char *sp; /* start of string matched by it */ 5521573Srgrimes register sopno ssub; /* start sop of subsubRE */ 5531573Srgrimes register sopno esub; /* end sop of subsubRE */ 5541573Srgrimes register char *ssp; /* start of string matched by subsubRE */ 5551573Srgrimes register char *dp; 5561573Srgrimes register size_t len; 5571573Srgrimes register int hard; 5581573Srgrimes register sop s; 5591573Srgrimes register regoff_t offsave; 5601573Srgrimes register cset *cs; 5611573Srgrimes 5621573Srgrimes AT("back", start, stop, startst, stopst); 5631573Srgrimes sp = start; 5641573Srgrimes 5651573Srgrimes /* get as far as we can with easy stuff */ 5661573Srgrimes hard = 0; 5671573Srgrimes for (ss = startst; !hard && ss < stopst; ss++) 5681573Srgrimes switch (OP(s = m->g->strip[ss])) { 5691573Srgrimes case OCHAR: 5701573Srgrimes if (sp == stop || *sp++ != (char)OPND(s)) 5711573Srgrimes return(NULL); 5721573Srgrimes break; 5731573Srgrimes case OANY: 5741573Srgrimes if (sp == stop) 5751573Srgrimes return(NULL); 5761573Srgrimes sp++; 5771573Srgrimes break; 5781573Srgrimes case OANYOF: 5791573Srgrimes cs = &m->g->sets[OPND(s)]; 5801573Srgrimes if (sp == stop || !CHIN(cs, *sp++)) 5811573Srgrimes return(NULL); 5821573Srgrimes break; 5831573Srgrimes case OBOL: 5841573Srgrimes if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 5851573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 5861573Srgrimes (m->g->cflags®_NEWLINE)) ) 5871573Srgrimes { /* yes */ } 5881573Srgrimes else 5891573Srgrimes return(NULL); 5901573Srgrimes break; 5911573Srgrimes case OEOL: 5921573Srgrimes if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 5931573Srgrimes (sp < m->endp && *sp == '\n' && 5941573Srgrimes (m->g->cflags®_NEWLINE)) ) 5951573Srgrimes { /* yes */ } 5961573Srgrimes else 5971573Srgrimes return(NULL); 5981573Srgrimes break; 5991573Srgrimes case OBOW: 6001573Srgrimes if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6011573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6021573Srgrimes (m->g->cflags®_NEWLINE)) || 6031573Srgrimes (sp > m->beginp && 6041573Srgrimes !ISWORD(*(sp-1))) ) && 6051573Srgrimes (sp < m->endp && ISWORD(*sp)) ) 6061573Srgrimes { /* yes */ } 6071573Srgrimes else 6081573Srgrimes return(NULL); 6091573Srgrimes break; 6101573Srgrimes case OEOW: 6111573Srgrimes if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6121573Srgrimes (sp < m->endp && *sp == '\n' && 6131573Srgrimes (m->g->cflags®_NEWLINE)) || 6141573Srgrimes (sp < m->endp && !ISWORD(*sp)) ) && 6151573Srgrimes (sp > m->beginp && ISWORD(*(sp-1))) ) 6161573Srgrimes { /* yes */ } 6171573Srgrimes else 6181573Srgrimes return(NULL); 6191573Srgrimes break; 6201573Srgrimes case O_QUEST: 6211573Srgrimes break; 6221573Srgrimes case OOR1: /* matches null but needs to skip */ 6231573Srgrimes ss++; 6241573Srgrimes s = m->g->strip[ss]; 6251573Srgrimes do { 6261573Srgrimes assert(OP(s) == OOR2); 6271573Srgrimes ss += OPND(s); 6281573Srgrimes } while (OP(s = m->g->strip[ss]) != O_CH); 6291573Srgrimes /* note that the ss++ gets us past the O_CH */ 6301573Srgrimes break; 6311573Srgrimes default: /* have to make a choice */ 6321573Srgrimes hard = 1; 6331573Srgrimes break; 6341573Srgrimes } 6351573Srgrimes if (!hard) { /* that was it! */ 6361573Srgrimes if (sp != stop) 6371573Srgrimes return(NULL); 6381573Srgrimes return(sp); 6391573Srgrimes } 6401573Srgrimes ss--; /* adjust for the for's final increment */ 6411573Srgrimes 6421573Srgrimes /* the hard stuff */ 6431573Srgrimes AT("hard", sp, stop, ss, stopst); 6441573Srgrimes s = m->g->strip[ss]; 6451573Srgrimes switch (OP(s)) { 6461573Srgrimes case OBACK_: /* the vilest depths */ 6471573Srgrimes i = OPND(s); 6481573Srgrimes assert(0 < i && i <= m->g->nsub); 6491573Srgrimes if (m->pmatch[i].rm_eo == -1) 6501573Srgrimes return(NULL); 6511573Srgrimes assert(m->pmatch[i].rm_so != -1); 6521573Srgrimes len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 6531573Srgrimes assert(stop - m->beginp >= len); 6541573Srgrimes if (sp > stop - len) 6551573Srgrimes return(NULL); /* not enough left to match */ 6561573Srgrimes ssp = m->offp + m->pmatch[i].rm_so; 6571573Srgrimes if (memcmp(sp, ssp, len) != 0) 6581573Srgrimes return(NULL); 6591573Srgrimes while (m->g->strip[ss] != SOP(O_BACK, i)) 6601573Srgrimes ss++; 6611573Srgrimes return(backref(m, sp+len, stop, ss+1, stopst, lev)); 6621573Srgrimes break; 6631573Srgrimes case OQUEST_: /* to null or not */ 6641573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 6651573Srgrimes if (dp != NULL) 6661573Srgrimes return(dp); /* not */ 6671573Srgrimes return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 6681573Srgrimes break; 6691573Srgrimes case OPLUS_: 6701573Srgrimes assert(m->lastpos != NULL); 6711573Srgrimes assert(lev+1 <= m->g->nplus); 6721573Srgrimes m->lastpos[lev+1] = sp; 6731573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev+1)); 6741573Srgrimes break; 6751573Srgrimes case O_PLUS: 6761573Srgrimes if (sp == m->lastpos[lev]) /* last pass matched null */ 6771573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 6781573Srgrimes /* try another pass */ 6791573Srgrimes m->lastpos[lev] = sp; 6801573Srgrimes dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 6811573Srgrimes if (dp == NULL) 6821573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 6831573Srgrimes else 6841573Srgrimes return(dp); 6851573Srgrimes break; 6861573Srgrimes case OCH_: /* find the right one, if any */ 6871573Srgrimes ssub = ss + 1; 6881573Srgrimes esub = ss + OPND(s) - 1; 6891573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 6901573Srgrimes for (;;) { /* find first matching branch */ 6911573Srgrimes dp = backref(m, sp, stop, ssub, esub, lev); 6921573Srgrimes if (dp != NULL) 6931573Srgrimes return(dp); 6941573Srgrimes /* that one missed, try next one */ 6951573Srgrimes if (OP(m->g->strip[esub]) == O_CH) 6961573Srgrimes return(NULL); /* there is none */ 6971573Srgrimes esub++; 6981573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 6991573Srgrimes ssub = esub + 1; 7001573Srgrimes esub += OPND(m->g->strip[esub]); 7011573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 7021573Srgrimes esub--; 7031573Srgrimes else 7041573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 7051573Srgrimes } 7061573Srgrimes break; 7071573Srgrimes case OLPAREN: /* must undo assignment if rest fails */ 7081573Srgrimes i = OPND(s); 7091573Srgrimes assert(0 < i && i <= m->g->nsub); 7101573Srgrimes offsave = m->pmatch[i].rm_so; 7111573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 7121573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7131573Srgrimes if (dp != NULL) 7141573Srgrimes return(dp); 7151573Srgrimes m->pmatch[i].rm_so = offsave; 7161573Srgrimes return(NULL); 7171573Srgrimes break; 7181573Srgrimes case ORPAREN: /* must undo assignment if rest fails */ 7191573Srgrimes i = OPND(s); 7201573Srgrimes assert(0 < i && i <= m->g->nsub); 7211573Srgrimes offsave = m->pmatch[i].rm_eo; 7221573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 7231573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7241573Srgrimes if (dp != NULL) 7251573Srgrimes return(dp); 7261573Srgrimes m->pmatch[i].rm_eo = offsave; 7271573Srgrimes return(NULL); 7281573Srgrimes break; 7291573Srgrimes default: /* uh oh */ 7301573Srgrimes assert(nope); 7311573Srgrimes break; 7321573Srgrimes } 7331573Srgrimes 7341573Srgrimes /* "can't happen" */ 7351573Srgrimes assert(nope); 7361573Srgrimes /* NOTREACHED */ 73711664Sphk return "shut up gcc"; 7381573Srgrimes} 7391573Srgrimes 7401573Srgrimes/* 7411573Srgrimes - fast - step through the string at top speed 7421573Srgrimes == static char *fast(register struct match *m, char *start, \ 7431573Srgrimes == char *stop, sopno startst, sopno stopst); 7441573Srgrimes */ 7451573Srgrimesstatic char * /* where tentative match ended, or NULL */ 7461573Srgrimesfast(m, start, stop, startst, stopst) 7471573Srgrimesregister struct match *m; 7481573Srgrimeschar *start; 7491573Srgrimeschar *stop; 7501573Srgrimessopno startst; 7511573Srgrimessopno stopst; 7521573Srgrimes{ 7531573Srgrimes register states st = m->st; 7541573Srgrimes register states fresh = m->fresh; 7551573Srgrimes register states tmp = m->tmp; 7561573Srgrimes register char *p = start; 7571573Srgrimes register int c = (start == m->beginp) ? OUT : *(start-1); 7581573Srgrimes register int lastc; /* previous c */ 7591573Srgrimes register int flagch; 7601573Srgrimes register int i; 7611573Srgrimes register char *coldp; /* last p after which no match was underway */ 7621573Srgrimes 7631573Srgrimes CLEAR(st); 7641573Srgrimes SET1(st, startst); 7651573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 7661573Srgrimes ASSIGN(fresh, st); 7671573Srgrimes SP("start", st, *p); 7681573Srgrimes coldp = NULL; 7691573Srgrimes for (;;) { 7701573Srgrimes /* next character */ 7711573Srgrimes lastc = c; 7721573Srgrimes c = (p == m->endp) ? OUT : *p; 7731573Srgrimes if (EQ(st, fresh)) 7741573Srgrimes coldp = p; 7751573Srgrimes 7761573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 7771573Srgrimes flagch = '\0'; 7781573Srgrimes i = 0; 7791573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 7801573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 7811573Srgrimes flagch = BOL; 7821573Srgrimes i = m->g->nbol; 7831573Srgrimes } 7841573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 7851573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 7861573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 7871573Srgrimes i += m->g->neol; 7881573Srgrimes } 7891573Srgrimes if (i != 0) { 7901573Srgrimes for (; i > 0; i--) 7911573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 7921573Srgrimes SP("boleol", st, c); 7931573Srgrimes } 7941573Srgrimes 7951573Srgrimes /* how about a word boundary? */ 7961573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 7971573Srgrimes (c != OUT && ISWORD(c)) ) { 7981573Srgrimes flagch = BOW; 7991573Srgrimes } 8001573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8011573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8021573Srgrimes flagch = EOW; 8031573Srgrimes } 8041573Srgrimes if (flagch == BOW || flagch == EOW) { 8051573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8061573Srgrimes SP("boweow", st, c); 8071573Srgrimes } 8081573Srgrimes 8091573Srgrimes /* are we done? */ 8101573Srgrimes if (ISSET(st, stopst) || p == stop) 8111573Srgrimes break; /* NOTE BREAK OUT */ 8121573Srgrimes 8131573Srgrimes /* no, we must deal with this character */ 8141573Srgrimes ASSIGN(tmp, st); 8151573Srgrimes ASSIGN(st, fresh); 8161573Srgrimes assert(c != OUT); 8171573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 8181573Srgrimes SP("aft", st, c); 8191573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 8201573Srgrimes p++; 8211573Srgrimes } 8221573Srgrimes 8231573Srgrimes assert(coldp != NULL); 8241573Srgrimes m->coldp = coldp; 8251573Srgrimes if (ISSET(st, stopst)) 8261573Srgrimes return(p+1); 8271573Srgrimes else 8281573Srgrimes return(NULL); 8291573Srgrimes} 8301573Srgrimes 8311573Srgrimes/* 8321573Srgrimes - slow - step through the string more deliberately 8331573Srgrimes == static char *slow(register struct match *m, char *start, \ 8341573Srgrimes == char *stop, sopno startst, sopno stopst); 8351573Srgrimes */ 8361573Srgrimesstatic char * /* where it ended */ 8371573Srgrimesslow(m, start, stop, startst, stopst) 8381573Srgrimesregister struct match *m; 8391573Srgrimeschar *start; 8401573Srgrimeschar *stop; 8411573Srgrimessopno startst; 8421573Srgrimessopno stopst; 8431573Srgrimes{ 8441573Srgrimes register states st = m->st; 8451573Srgrimes register states empty = m->empty; 8461573Srgrimes register states tmp = m->tmp; 8471573Srgrimes register char *p = start; 8481573Srgrimes register int c = (start == m->beginp) ? OUT : *(start-1); 8491573Srgrimes register int lastc; /* previous c */ 8501573Srgrimes register int flagch; 8511573Srgrimes register int i; 8521573Srgrimes register char *matchp; /* last p at which a match ended */ 8531573Srgrimes 8541573Srgrimes AT("slow", start, stop, startst, stopst); 8551573Srgrimes CLEAR(st); 8561573Srgrimes SET1(st, startst); 8571573Srgrimes SP("sstart", st, *p); 8581573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 8591573Srgrimes matchp = NULL; 8601573Srgrimes for (;;) { 8611573Srgrimes /* next character */ 8621573Srgrimes lastc = c; 8631573Srgrimes c = (p == m->endp) ? OUT : *p; 8641573Srgrimes 8651573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 8661573Srgrimes flagch = '\0'; 8671573Srgrimes i = 0; 8681573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8691573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8701573Srgrimes flagch = BOL; 8711573Srgrimes i = m->g->nbol; 8721573Srgrimes } 8731573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8741573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 8751573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 8761573Srgrimes i += m->g->neol; 8771573Srgrimes } 8781573Srgrimes if (i != 0) { 8791573Srgrimes for (; i > 0; i--) 8801573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8811573Srgrimes SP("sboleol", st, c); 8821573Srgrimes } 8831573Srgrimes 8841573Srgrimes /* how about a word boundary? */ 8851573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8861573Srgrimes (c != OUT && ISWORD(c)) ) { 8871573Srgrimes flagch = BOW; 8881573Srgrimes } 8891573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8901573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8911573Srgrimes flagch = EOW; 8921573Srgrimes } 8931573Srgrimes if (flagch == BOW || flagch == EOW) { 8941573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8951573Srgrimes SP("sboweow", st, c); 8961573Srgrimes } 8971573Srgrimes 8981573Srgrimes /* are we done? */ 8991573Srgrimes if (ISSET(st, stopst)) 9001573Srgrimes matchp = p; 9011573Srgrimes if (EQ(st, empty) || p == stop) 9021573Srgrimes break; /* NOTE BREAK OUT */ 9031573Srgrimes 9041573Srgrimes /* no, we must deal with this character */ 9051573Srgrimes ASSIGN(tmp, st); 9061573Srgrimes ASSIGN(st, empty); 9071573Srgrimes assert(c != OUT); 9081573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 9091573Srgrimes SP("saft", st, c); 9101573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 9111573Srgrimes p++; 9121573Srgrimes } 9131573Srgrimes 9141573Srgrimes return(matchp); 9151573Srgrimes} 9161573Srgrimes 9171573Srgrimes 9181573Srgrimes/* 9191573Srgrimes - step - map set of states reachable before char to set reachable after 9201573Srgrimes == static states step(register struct re_guts *g, sopno start, sopno stop, \ 9211573Srgrimes == register states bef, int ch, register states aft); 9221573Srgrimes == #define BOL (OUT+1) 9231573Srgrimes == #define EOL (BOL+1) 9241573Srgrimes == #define BOLEOL (BOL+2) 9251573Srgrimes == #define NOTHING (BOL+3) 9261573Srgrimes == #define BOW (BOL+4) 9271573Srgrimes == #define EOW (BOL+5) 9281573Srgrimes == #define CODEMAX (BOL+5) // highest code used 9291573Srgrimes == #define NONCHAR(c) ((c) > CHAR_MAX) 9301573Srgrimes == #define NNONCHAR (CODEMAX-CHAR_MAX) 9311573Srgrimes */ 9321573Srgrimesstatic states 9331573Srgrimesstep(g, start, stop, bef, ch, aft) 9341573Srgrimesregister struct re_guts *g; 9351573Srgrimessopno start; /* start state within strip */ 9361573Srgrimessopno stop; /* state after stop state within strip */ 9371573Srgrimesregister states bef; /* states reachable before */ 9381573Srgrimesint ch; /* character or NONCHAR code */ 9391573Srgrimesregister states aft; /* states already known reachable after */ 9401573Srgrimes{ 9411573Srgrimes register cset *cs; 9421573Srgrimes register sop s; 9431573Srgrimes register sopno pc; 9441573Srgrimes register onestate here; /* note, macros know this name */ 9451573Srgrimes register sopno look; 9461573Srgrimes register int i; 9471573Srgrimes 9481573Srgrimes for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 9491573Srgrimes s = g->strip[pc]; 9501573Srgrimes switch (OP(s)) { 9511573Srgrimes case OEND: 9521573Srgrimes assert(pc == stop-1); 9531573Srgrimes break; 9541573Srgrimes case OCHAR: 9551573Srgrimes /* only characters can match */ 9561573Srgrimes assert(!NONCHAR(ch) || ch != (char)OPND(s)); 9571573Srgrimes if (ch == (char)OPND(s)) 9581573Srgrimes FWD(aft, bef, 1); 9591573Srgrimes break; 9601573Srgrimes case OBOL: 9611573Srgrimes if (ch == BOL || ch == BOLEOL) 9621573Srgrimes FWD(aft, bef, 1); 9631573Srgrimes break; 9641573Srgrimes case OEOL: 9651573Srgrimes if (ch == EOL || ch == BOLEOL) 9661573Srgrimes FWD(aft, bef, 1); 9671573Srgrimes break; 9681573Srgrimes case OBOW: 9691573Srgrimes if (ch == BOW) 9701573Srgrimes FWD(aft, bef, 1); 9711573Srgrimes break; 9721573Srgrimes case OEOW: 9731573Srgrimes if (ch == EOW) 9741573Srgrimes FWD(aft, bef, 1); 9751573Srgrimes break; 9761573Srgrimes case OANY: 9771573Srgrimes if (!NONCHAR(ch)) 9781573Srgrimes FWD(aft, bef, 1); 9791573Srgrimes break; 9801573Srgrimes case OANYOF: 9811573Srgrimes cs = &g->sets[OPND(s)]; 9821573Srgrimes if (!NONCHAR(ch) && CHIN(cs, ch)) 9831573Srgrimes FWD(aft, bef, 1); 9841573Srgrimes break; 9851573Srgrimes case OBACK_: /* ignored here */ 9861573Srgrimes case O_BACK: 9871573Srgrimes FWD(aft, aft, 1); 9881573Srgrimes break; 9891573Srgrimes case OPLUS_: /* forward, this is just an empty */ 9901573Srgrimes FWD(aft, aft, 1); 9911573Srgrimes break; 9921573Srgrimes case O_PLUS: /* both forward and back */ 9931573Srgrimes FWD(aft, aft, 1); 9941573Srgrimes i = ISSETBACK(aft, OPND(s)); 9951573Srgrimes BACK(aft, aft, OPND(s)); 9961573Srgrimes if (!i && ISSETBACK(aft, OPND(s))) { 9971573Srgrimes /* oho, must reconsider loop body */ 9981573Srgrimes pc -= OPND(s) + 1; 9991573Srgrimes INIT(here, pc); 10001573Srgrimes } 10011573Srgrimes break; 10021573Srgrimes case OQUEST_: /* two branches, both forward */ 10031573Srgrimes FWD(aft, aft, 1); 10041573Srgrimes FWD(aft, aft, OPND(s)); 10051573Srgrimes break; 10061573Srgrimes case O_QUEST: /* just an empty */ 10071573Srgrimes FWD(aft, aft, 1); 10081573Srgrimes break; 10091573Srgrimes case OLPAREN: /* not significant here */ 10101573Srgrimes case ORPAREN: 10111573Srgrimes FWD(aft, aft, 1); 10121573Srgrimes break; 10131573Srgrimes case OCH_: /* mark the first two branches */ 10141573Srgrimes FWD(aft, aft, 1); 10151573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10161573Srgrimes FWD(aft, aft, OPND(s)); 10171573Srgrimes break; 10181573Srgrimes case OOR1: /* done a branch, find the O_CH */ 10191573Srgrimes if (ISSTATEIN(aft, here)) { 10201573Srgrimes for (look = 1; 10211573Srgrimes OP(s = g->strip[pc+look]) != O_CH; 10221573Srgrimes look += OPND(s)) 10231573Srgrimes assert(OP(s) == OOR2); 10241573Srgrimes FWD(aft, aft, look); 10251573Srgrimes } 10261573Srgrimes break; 10271573Srgrimes case OOR2: /* propagate OCH_'s marking */ 10281573Srgrimes FWD(aft, aft, 1); 10291573Srgrimes if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10301573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10311573Srgrimes FWD(aft, aft, OPND(s)); 10321573Srgrimes } 10331573Srgrimes break; 10341573Srgrimes case O_CH: /* just empty */ 10351573Srgrimes FWD(aft, aft, 1); 10361573Srgrimes break; 10371573Srgrimes default: /* ooooops... */ 10381573Srgrimes assert(nope); 10391573Srgrimes break; 10401573Srgrimes } 10411573Srgrimes } 10421573Srgrimes 10431573Srgrimes return(aft); 10441573Srgrimes} 10451573Srgrimes 10461573Srgrimes#ifdef REDEBUG 10471573Srgrimes/* 10481573Srgrimes - print - print a set of states 10491573Srgrimes == #ifdef REDEBUG 10501573Srgrimes == static void print(struct match *m, char *caption, states st, \ 10511573Srgrimes == int ch, FILE *d); 10521573Srgrimes == #endif 10531573Srgrimes */ 10541573Srgrimesstatic void 10551573Srgrimesprint(m, caption, st, ch, d) 10561573Srgrimesstruct match *m; 10571573Srgrimeschar *caption; 10581573Srgrimesstates st; 10591573Srgrimesint ch; 10601573SrgrimesFILE *d; 10611573Srgrimes{ 10621573Srgrimes register struct re_guts *g = m->g; 10631573Srgrimes register int i; 10641573Srgrimes register int first = 1; 10651573Srgrimes 10661573Srgrimes if (!(m->eflags®_TRACE)) 10671573Srgrimes return; 10681573Srgrimes 10691573Srgrimes fprintf(d, "%s", caption); 10701573Srgrimes if (ch != '\0') 10711573Srgrimes fprintf(d, " %s", pchar(ch)); 10721573Srgrimes for (i = 0; i < g->nstates; i++) 10731573Srgrimes if (ISSET(st, i)) { 10741573Srgrimes fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 10751573Srgrimes first = 0; 10761573Srgrimes } 10771573Srgrimes fprintf(d, "\n"); 10781573Srgrimes} 10791573Srgrimes 10808870Srgrimes/* 10811573Srgrimes - at - print current situation 10821573Srgrimes == #ifdef REDEBUG 10831573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \ 10841573Srgrimes == sopno startst, sopno stopst); 10851573Srgrimes == #endif 10861573Srgrimes */ 10871573Srgrimesstatic void 10881573Srgrimesat(m, title, start, stop, startst, stopst) 10891573Srgrimesstruct match *m; 10901573Srgrimeschar *title; 10911573Srgrimeschar *start; 10921573Srgrimeschar *stop; 10931573Srgrimessopno startst; 10941573Srgrimessopno stopst; 10951573Srgrimes{ 10961573Srgrimes if (!(m->eflags®_TRACE)) 10971573Srgrimes return; 10981573Srgrimes 10991573Srgrimes printf("%s %s-", title, pchar(*start)); 11001573Srgrimes printf("%s ", pchar(*stop)); 11011573Srgrimes printf("%ld-%ld\n", (long)startst, (long)stopst); 11021573Srgrimes} 11031573Srgrimes 11041573Srgrimes#ifndef PCHARDONE 11051573Srgrimes#define PCHARDONE /* never again */ 11061573Srgrimes/* 11071573Srgrimes - pchar - make a character printable 11081573Srgrimes == #ifdef REDEBUG 11091573Srgrimes == static char *pchar(int ch); 11101573Srgrimes == #endif 11111573Srgrimes * 11121573Srgrimes * Is this identical to regchar() over in debug.c? Well, yes. But a 11131573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to 11141573Srgrimes * a matching debug.o, and this is convenient. It all disappears in 11151573Srgrimes * the non-debug compilation anyway, so it doesn't matter much. 11161573Srgrimes */ 11171573Srgrimesstatic char * /* -> representation */ 11181573Srgrimespchar(ch) 11191573Srgrimesint ch; 11201573Srgrimes{ 11211573Srgrimes static char pbuf[10]; 11221573Srgrimes 112317508Sache if (isprint((uch)ch) || ch == ' ') 11241573Srgrimes sprintf(pbuf, "%c", ch); 11251573Srgrimes else 11261573Srgrimes sprintf(pbuf, "\\%o", ch); 11271573Srgrimes return(pbuf); 11281573Srgrimes} 11291573Srgrimes#endif 11301573Srgrimes#endif 11311573Srgrimes 11321573Srgrimes#undef matcher 11331573Srgrimes#undef fast 11341573Srgrimes#undef slow 11351573Srgrimes#undef dissect 11361573Srgrimes#undef backref 11371573Srgrimes#undef step 11381573Srgrimes#undef print 11391573Srgrimes#undef at 11401573Srgrimes#undef match 1141