engine.c revision 92889
11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 31573Srgrimes * Copyright (c) 1992, 1993, 1994 41573Srgrimes * The Regents of the University of California. All rights reserved. 51573Srgrimes * 61573Srgrimes * This code is derived from software contributed to Berkeley by 71573Srgrimes * Henry Spencer. 81573Srgrimes * 91573Srgrimes * Redistribution and use in source and binary forms, with or without 101573Srgrimes * modification, are permitted provided that the following conditions 111573Srgrimes * are met: 121573Srgrimes * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 3. All advertising materials mentioning features or use of this software 181573Srgrimes * must display the following acknowledgement: 191573Srgrimes * This product includes software developed by the University of 201573Srgrimes * California, Berkeley and its contributors. 211573Srgrimes * 4. Neither the name of the University nor the names of its contributors 221573Srgrimes * may be used to endorse or promote products derived from this software 231573Srgrimes * without specific prior written permission. 241573Srgrimes * 251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351573Srgrimes * SUCH DAMAGE. 361573Srgrimes * 371573Srgrimes * @(#)engine.c 8.5 (Berkeley) 3/20/94 3862232Sdcs * 3962232Sdcs * $FreeBSD: head/lib/libc/regex/engine.c 92889 2002-03-21 18:49:23Z obrien $ 401573Srgrimes */ 411573Srgrimes 421573Srgrimes/* 431573Srgrimes * The matching engine and friends. This file is #included by regexec.c 441573Srgrimes * after suitable #defines of a variety of macros used herein, so that 451573Srgrimes * different state representations can be used without duplicating masses 461573Srgrimes * of code. 471573Srgrimes */ 481573Srgrimes 491573Srgrimes#ifdef SNAMES 501573Srgrimes#define matcher smatcher 511573Srgrimes#define fast sfast 521573Srgrimes#define slow sslow 531573Srgrimes#define dissect sdissect 541573Srgrimes#define backref sbackref 551573Srgrimes#define step sstep 561573Srgrimes#define print sprint 571573Srgrimes#define at sat 581573Srgrimes#define match smat 591573Srgrimes#endif 601573Srgrimes#ifdef LNAMES 611573Srgrimes#define matcher lmatcher 621573Srgrimes#define fast lfast 631573Srgrimes#define slow lslow 641573Srgrimes#define dissect ldissect 651573Srgrimes#define backref lbackref 661573Srgrimes#define step lstep 671573Srgrimes#define print lprint 681573Srgrimes#define at lat 691573Srgrimes#define match lmat 701573Srgrimes#endif 711573Srgrimes 721573Srgrimes/* another structure passed up and down to avoid zillions of parameters */ 731573Srgrimesstruct match { 741573Srgrimes struct re_guts *g; 751573Srgrimes int eflags; 761573Srgrimes regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 771573Srgrimes char *offp; /* offsets work from here */ 781573Srgrimes char *beginp; /* start of string -- virtual NUL precedes */ 791573Srgrimes char *endp; /* end of string -- virtual NUL here */ 801573Srgrimes char *coldp; /* can be no match starting before here */ 811573Srgrimes char **lastpos; /* [nplus+1] */ 821573Srgrimes STATEVARS; 831573Srgrimes states st; /* current states */ 841573Srgrimes states fresh; /* states for a fresh start */ 851573Srgrimes states tmp; /* temporary */ 861573Srgrimes states empty; /* empty set of states */ 871573Srgrimes}; 881573Srgrimes 891573Srgrimes/* ========= begin header generated by ./mkh ========= */ 901573Srgrimes#ifdef __cplusplus 911573Srgrimesextern "C" { 921573Srgrimes#endif 931573Srgrimes 941573Srgrimes/* === engine.c === */ 951573Srgrimesstatic int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 961573Srgrimesstatic char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 971573Srgrimesstatic char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev)); 981573Srgrimesstatic char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 991573Srgrimesstatic char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst)); 1001573Srgrimesstatic states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft)); 1011573Srgrimes#define BOL (OUT+1) 1021573Srgrimes#define EOL (BOL+1) 1031573Srgrimes#define BOLEOL (BOL+2) 1041573Srgrimes#define NOTHING (BOL+3) 1051573Srgrimes#define BOW (BOL+4) 1061573Srgrimes#define EOW (BOL+5) 1071573Srgrimes#define CODEMAX (BOL+5) /* highest code used */ 1081573Srgrimes#define NONCHAR(c) ((c) > CHAR_MAX) 1091573Srgrimes#define NNONCHAR (CODEMAX-CHAR_MAX) 1101573Srgrimes#ifdef REDEBUG 1111573Srgrimesstatic void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 1121573Srgrimes#endif 1131573Srgrimes#ifdef REDEBUG 1141573Srgrimesstatic void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 1151573Srgrimes#endif 1161573Srgrimes#ifdef REDEBUG 1171573Srgrimesstatic char *pchar __P((int ch)); 1181573Srgrimes#endif 1191573Srgrimes 1201573Srgrimes#ifdef __cplusplus 1211573Srgrimes} 1221573Srgrimes#endif 1231573Srgrimes/* ========= end header generated by ./mkh ========= */ 1241573Srgrimes 1251573Srgrimes#ifdef REDEBUG 1261573Srgrimes#define SP(t, s, c) print(m, t, s, c, stdout) 1271573Srgrimes#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1281573Srgrimes#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1291573Srgrimes#else 1301573Srgrimes#define SP(t, s, c) /* nothing */ 1311573Srgrimes#define AT(t, p1, p2, s1, s2) /* nothing */ 1321573Srgrimes#define NOTE(s) /* nothing */ 1331573Srgrimes#endif 1341573Srgrimes 1351573Srgrimes/* 1361573Srgrimes - matcher - the actual matching engine 13792889Sobrien == static int matcher(struct re_guts *g, char *string, \ 1381573Srgrimes == size_t nmatch, regmatch_t pmatch[], int eflags); 1391573Srgrimes */ 1401573Srgrimesstatic int /* 0 success, REG_NOMATCH failure */ 1411573Srgrimesmatcher(g, string, nmatch, pmatch, eflags) 14292889Sobrienstruct re_guts *g; 1431573Srgrimeschar *string; 1441573Srgrimessize_t nmatch; 1451573Srgrimesregmatch_t pmatch[]; 1461573Srgrimesint eflags; 1471573Srgrimes{ 14892889Sobrien char *endp; 14992889Sobrien int i; 1501573Srgrimes struct match mv; 15192889Sobrien struct match *m = &mv; 15292889Sobrien char *dp; 15392889Sobrien const sopno gf = g->firststate+1; /* +1 for OEND */ 15492889Sobrien const sopno gl = g->laststate; 1551573Srgrimes char *start; 1561573Srgrimes char *stop; 15762232Sdcs /* Boyer-Moore algorithms variables */ 15892889Sobrien char *pp; 15962232Sdcs int cj, mj; 16092889Sobrien char *mustfirst; 16192889Sobrien char *mustlast; 16292889Sobrien int *matchjump; 16392889Sobrien int *charjump; 1641573Srgrimes 1651573Srgrimes /* simplify the situation where possible */ 1661573Srgrimes if (g->cflags®_NOSUB) 1671573Srgrimes nmatch = 0; 1681573Srgrimes if (eflags®_STARTEND) { 1691573Srgrimes start = string + pmatch[0].rm_so; 1701573Srgrimes stop = string + pmatch[0].rm_eo; 1711573Srgrimes } else { 1721573Srgrimes start = string; 1731573Srgrimes stop = start + strlen(start); 1741573Srgrimes } 1751573Srgrimes if (stop < start) 1761573Srgrimes return(REG_INVARG); 1771573Srgrimes 1781573Srgrimes /* prescreening; this does wonders for this rather slow code */ 1791573Srgrimes if (g->must != NULL) { 18062391Sdcs if (g->charjump != NULL && g->matchjump != NULL) { 18162232Sdcs mustfirst = g->must; 18262232Sdcs mustlast = g->must + g->mlen - 1; 18362232Sdcs charjump = g->charjump; 18462232Sdcs matchjump = g->matchjump; 18562232Sdcs pp = mustlast; 18662754Sdcs for (dp = start+g->mlen-1; dp < stop;) { 18762232Sdcs /* Fast skip non-matches */ 18862754Sdcs while (dp < stop && charjump[*dp]) 18962754Sdcs dp += charjump[*dp]; 19062232Sdcs 19162754Sdcs if (dp >= stop) 19262232Sdcs break; 19362232Sdcs 19462232Sdcs /* Greedy matcher */ 19562232Sdcs /* We depend on not being used for 19662232Sdcs * for strings of length 1 19762232Sdcs */ 19862754Sdcs while (*--dp == *--pp && pp != mustfirst); 19962232Sdcs 20062754Sdcs if (*dp == *pp) 20162232Sdcs break; 20262232Sdcs 20362232Sdcs /* Jump to next possible match */ 20462232Sdcs mj = matchjump[pp - mustfirst]; 20562754Sdcs cj = charjump[*dp]; 20662754Sdcs dp += (cj < mj ? mj : cj); 20762754Sdcs pp = mustlast; 20862232Sdcs } 20962391Sdcs if (pp != mustfirst) 21062232Sdcs return(REG_NOMATCH); 21162232Sdcs } else { 21262232Sdcs for (dp = start; dp < stop; dp++) 21362232Sdcs if (*dp == g->must[0] && 21462232Sdcs stop - dp >= g->mlen && 21562232Sdcs memcmp(dp, g->must, (size_t)g->mlen) == 0) 21662232Sdcs break; 21762232Sdcs if (dp == stop) /* we didn't find g->must */ 21862232Sdcs return(REG_NOMATCH); 21962232Sdcs } 2201573Srgrimes } 2211573Srgrimes 2221573Srgrimes /* match struct setup */ 2231573Srgrimes m->g = g; 2241573Srgrimes m->eflags = eflags; 2251573Srgrimes m->pmatch = NULL; 2261573Srgrimes m->lastpos = NULL; 2271573Srgrimes m->offp = string; 2281573Srgrimes m->beginp = start; 2291573Srgrimes m->endp = stop; 2301573Srgrimes STATESETUP(m, 4); 2311573Srgrimes SETUP(m->st); 2321573Srgrimes SETUP(m->fresh); 2331573Srgrimes SETUP(m->tmp); 2341573Srgrimes SETUP(m->empty); 2351573Srgrimes CLEAR(m->empty); 2361573Srgrimes 23762391Sdcs /* Adjust start according to moffset, to speed things up */ 23862391Sdcs if (g->moffset > -1) 23962854Sdcs start = ((dp - g->moffset) < start) ? start : dp - g->moffset; 24062391Sdcs 2411573Srgrimes /* this loop does only one repetition except for backrefs */ 2421573Srgrimes for (;;) { 2431573Srgrimes endp = fast(m, start, stop, gf, gl); 2441573Srgrimes if (endp == NULL) { /* a miss */ 2451573Srgrimes STATETEARDOWN(m); 2461573Srgrimes return(REG_NOMATCH); 2471573Srgrimes } 2481573Srgrimes if (nmatch == 0 && !g->backrefs) 2491573Srgrimes break; /* no further info needed */ 2501573Srgrimes 2511573Srgrimes /* where? */ 2521573Srgrimes assert(m->coldp != NULL); 2531573Srgrimes for (;;) { 2541573Srgrimes NOTE("finding start"); 2551573Srgrimes endp = slow(m, m->coldp, stop, gf, gl); 2561573Srgrimes if (endp != NULL) 2571573Srgrimes break; 2581573Srgrimes assert(m->coldp < m->endp); 2591573Srgrimes m->coldp++; 2601573Srgrimes } 2611573Srgrimes if (nmatch == 1 && !g->backrefs) 2621573Srgrimes break; /* no further info needed */ 2631573Srgrimes 2641573Srgrimes /* oh my, he wants the subexpressions... */ 2651573Srgrimes if (m->pmatch == NULL) 2661573Srgrimes m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2671573Srgrimes sizeof(regmatch_t)); 2681573Srgrimes if (m->pmatch == NULL) { 2691573Srgrimes STATETEARDOWN(m); 2701573Srgrimes return(REG_ESPACE); 2711573Srgrimes } 2721573Srgrimes for (i = 1; i <= m->g->nsub; i++) 2731573Srgrimes m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2741573Srgrimes if (!g->backrefs && !(m->eflags®_BACKR)) { 2751573Srgrimes NOTE("dissecting"); 2761573Srgrimes dp = dissect(m, m->coldp, endp, gf, gl); 2771573Srgrimes } else { 2781573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) 2791573Srgrimes m->lastpos = (char **)malloc((g->nplus+1) * 2801573Srgrimes sizeof(char *)); 2811573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) { 2821573Srgrimes free(m->pmatch); 2831573Srgrimes STATETEARDOWN(m); 2841573Srgrimes return(REG_ESPACE); 2851573Srgrimes } 2861573Srgrimes NOTE("backref dissect"); 2871573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 2881573Srgrimes } 2891573Srgrimes if (dp != NULL) 2901573Srgrimes break; 2911573Srgrimes 2921573Srgrimes /* uh-oh... we couldn't find a subexpression-level match */ 2931573Srgrimes assert(g->backrefs); /* must be back references doing it */ 2941573Srgrimes assert(g->nplus == 0 || m->lastpos != NULL); 2951573Srgrimes for (;;) { 2961573Srgrimes if (dp != NULL || endp <= m->coldp) 2971573Srgrimes break; /* defeat */ 2981573Srgrimes NOTE("backoff"); 2991573Srgrimes endp = slow(m, m->coldp, endp-1, gf, gl); 3001573Srgrimes if (endp == NULL) 3011573Srgrimes break; /* defeat */ 3021573Srgrimes /* try it on a shorter possibility */ 3031573Srgrimes#ifndef NDEBUG 3041573Srgrimes for (i = 1; i <= m->g->nsub; i++) { 3051573Srgrimes assert(m->pmatch[i].rm_so == -1); 3061573Srgrimes assert(m->pmatch[i].rm_eo == -1); 3071573Srgrimes } 3081573Srgrimes#endif 3091573Srgrimes NOTE("backoff dissect"); 3101573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3111573Srgrimes } 3121573Srgrimes assert(dp == NULL || dp == endp); 3131573Srgrimes if (dp != NULL) /* found a shorter one */ 3141573Srgrimes break; 3151573Srgrimes 3161573Srgrimes /* despite initial appearances, there is no match here */ 3171573Srgrimes NOTE("false alarm"); 3181573Srgrimes start = m->coldp + 1; /* recycle starting later */ 3191573Srgrimes assert(start <= stop); 3201573Srgrimes } 3211573Srgrimes 3221573Srgrimes /* fill in the details if requested */ 3231573Srgrimes if (nmatch > 0) { 3241573Srgrimes pmatch[0].rm_so = m->coldp - m->offp; 3251573Srgrimes pmatch[0].rm_eo = endp - m->offp; 3261573Srgrimes } 3271573Srgrimes if (nmatch > 1) { 3281573Srgrimes assert(m->pmatch != NULL); 3291573Srgrimes for (i = 1; i < nmatch; i++) 3301573Srgrimes if (i <= m->g->nsub) 3311573Srgrimes pmatch[i] = m->pmatch[i]; 3321573Srgrimes else { 3331573Srgrimes pmatch[i].rm_so = -1; 3341573Srgrimes pmatch[i].rm_eo = -1; 3351573Srgrimes } 3361573Srgrimes } 3371573Srgrimes 3381573Srgrimes if (m->pmatch != NULL) 3391573Srgrimes free((char *)m->pmatch); 3401573Srgrimes if (m->lastpos != NULL) 3411573Srgrimes free((char *)m->lastpos); 3421573Srgrimes STATETEARDOWN(m); 3431573Srgrimes return(0); 3441573Srgrimes} 3451573Srgrimes 3461573Srgrimes/* 3471573Srgrimes - dissect - figure out what matched what, no back references 34892889Sobrien == static char *dissect(struct match *m, char *start, \ 3491573Srgrimes == char *stop, sopno startst, sopno stopst); 3501573Srgrimes */ 3511573Srgrimesstatic char * /* == stop (success) always */ 3521573Srgrimesdissect(m, start, stop, startst, stopst) 35392889Sobrienstruct match *m; 3541573Srgrimeschar *start; 3551573Srgrimeschar *stop; 3561573Srgrimessopno startst; 3571573Srgrimessopno stopst; 3581573Srgrimes{ 35992889Sobrien int i; 36092889Sobrien sopno ss; /* start sop of current subRE */ 36192889Sobrien sopno es; /* end sop of current subRE */ 36292889Sobrien char *sp; /* start of string matched by it */ 36392889Sobrien char *stp; /* string matched by it cannot pass here */ 36492889Sobrien char *rest; /* start of rest of string */ 36592889Sobrien char *tail; /* string unmatched by rest of RE */ 36692889Sobrien sopno ssub; /* start sop of subsubRE */ 36792889Sobrien sopno esub; /* end sop of subsubRE */ 36892889Sobrien char *ssp; /* start of string matched by subsubRE */ 36992889Sobrien char *sep; /* end of string matched by subsubRE */ 37092889Sobrien char *oldssp; /* previous ssp */ 37192889Sobrien char *dp; 3721573Srgrimes 3731573Srgrimes AT("diss", start, stop, startst, stopst); 3741573Srgrimes sp = start; 3751573Srgrimes for (ss = startst; ss < stopst; ss = es) { 3761573Srgrimes /* identify end of subRE */ 3771573Srgrimes es = ss; 3781573Srgrimes switch (OP(m->g->strip[es])) { 3791573Srgrimes case OPLUS_: 3801573Srgrimes case OQUEST_: 3811573Srgrimes es += OPND(m->g->strip[es]); 3821573Srgrimes break; 3831573Srgrimes case OCH_: 3841573Srgrimes while (OP(m->g->strip[es]) != O_CH) 3851573Srgrimes es += OPND(m->g->strip[es]); 3861573Srgrimes break; 3871573Srgrimes } 3881573Srgrimes es++; 3891573Srgrimes 3901573Srgrimes /* figure out what it matched */ 3911573Srgrimes switch (OP(m->g->strip[ss])) { 3921573Srgrimes case OEND: 3931573Srgrimes assert(nope); 3941573Srgrimes break; 3951573Srgrimes case OCHAR: 3961573Srgrimes sp++; 3971573Srgrimes break; 3981573Srgrimes case OBOL: 3991573Srgrimes case OEOL: 4001573Srgrimes case OBOW: 4011573Srgrimes case OEOW: 4021573Srgrimes break; 4031573Srgrimes case OANY: 4041573Srgrimes case OANYOF: 4051573Srgrimes sp++; 4061573Srgrimes break; 4071573Srgrimes case OBACK_: 4081573Srgrimes case O_BACK: 4091573Srgrimes assert(nope); 4101573Srgrimes break; 4111573Srgrimes /* cases where length of match is hard to find */ 4121573Srgrimes case OQUEST_: 4131573Srgrimes stp = stop; 4141573Srgrimes for (;;) { 4151573Srgrimes /* how long could this one be? */ 4161573Srgrimes rest = slow(m, sp, stp, ss, es); 4171573Srgrimes assert(rest != NULL); /* it did match */ 4181573Srgrimes /* could the rest match the rest? */ 4191573Srgrimes tail = slow(m, rest, stop, es, stopst); 4201573Srgrimes if (tail == stop) 4211573Srgrimes break; /* yes! */ 4221573Srgrimes /* no -- try a shorter match for this one */ 4231573Srgrimes stp = rest - 1; 4241573Srgrimes assert(stp >= sp); /* it did work */ 4251573Srgrimes } 4261573Srgrimes ssub = ss + 1; 4271573Srgrimes esub = es - 1; 4281573Srgrimes /* did innards match? */ 4291573Srgrimes if (slow(m, sp, rest, ssub, esub) != NULL) { 4301573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 4311573Srgrimes assert(dp == rest); 4321573Srgrimes } else /* no */ 4331573Srgrimes assert(sp == rest); 4341573Srgrimes sp = rest; 4351573Srgrimes break; 4361573Srgrimes case OPLUS_: 4371573Srgrimes stp = stop; 4381573Srgrimes for (;;) { 4391573Srgrimes /* how long could this one be? */ 4401573Srgrimes rest = slow(m, sp, stp, ss, es); 4411573Srgrimes assert(rest != NULL); /* it did match */ 4421573Srgrimes /* could the rest match the rest? */ 4431573Srgrimes tail = slow(m, rest, stop, es, stopst); 4441573Srgrimes if (tail == stop) 4451573Srgrimes break; /* yes! */ 4461573Srgrimes /* no -- try a shorter match for this one */ 4471573Srgrimes stp = rest - 1; 4481573Srgrimes assert(stp >= sp); /* it did work */ 4491573Srgrimes } 4501573Srgrimes ssub = ss + 1; 4511573Srgrimes esub = es - 1; 4521573Srgrimes ssp = sp; 4531573Srgrimes oldssp = ssp; 4541573Srgrimes for (;;) { /* find last match of innards */ 4551573Srgrimes sep = slow(m, ssp, rest, ssub, esub); 4561573Srgrimes if (sep == NULL || sep == ssp) 4571573Srgrimes break; /* failed or matched null */ 4581573Srgrimes oldssp = ssp; /* on to next try */ 4591573Srgrimes ssp = sep; 4601573Srgrimes } 4611573Srgrimes if (sep == NULL) { 4621573Srgrimes /* last successful match */ 4631573Srgrimes sep = ssp; 4641573Srgrimes ssp = oldssp; 4651573Srgrimes } 4661573Srgrimes assert(sep == rest); /* must exhaust substring */ 4671573Srgrimes assert(slow(m, ssp, sep, ssub, esub) == rest); 4681573Srgrimes dp = dissect(m, ssp, sep, ssub, esub); 4691573Srgrimes assert(dp == sep); 4701573Srgrimes sp = rest; 4711573Srgrimes break; 4721573Srgrimes case OCH_: 4731573Srgrimes stp = stop; 4741573Srgrimes for (;;) { 4751573Srgrimes /* how long could this one be? */ 4761573Srgrimes rest = slow(m, sp, stp, ss, es); 4771573Srgrimes assert(rest != NULL); /* it did match */ 4781573Srgrimes /* could the rest match the rest? */ 4791573Srgrimes tail = slow(m, rest, stop, es, stopst); 4801573Srgrimes if (tail == stop) 4811573Srgrimes break; /* yes! */ 4821573Srgrimes /* no -- try a shorter match for this one */ 4831573Srgrimes stp = rest - 1; 4841573Srgrimes assert(stp >= sp); /* it did work */ 4851573Srgrimes } 4861573Srgrimes ssub = ss + 1; 4871573Srgrimes esub = ss + OPND(m->g->strip[ss]) - 1; 4881573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 4891573Srgrimes for (;;) { /* find first matching branch */ 4901573Srgrimes if (slow(m, sp, rest, ssub, esub) == rest) 4911573Srgrimes break; /* it matched all of it */ 4921573Srgrimes /* that one missed, try next one */ 4931573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 4941573Srgrimes esub++; 4951573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 4961573Srgrimes ssub = esub + 1; 4971573Srgrimes esub += OPND(m->g->strip[esub]); 4981573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 4991573Srgrimes esub--; 5001573Srgrimes else 5011573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 5021573Srgrimes } 5031573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 5041573Srgrimes assert(dp == rest); 5051573Srgrimes sp = rest; 5061573Srgrimes break; 5071573Srgrimes case O_PLUS: 5081573Srgrimes case O_QUEST: 5091573Srgrimes case OOR1: 5101573Srgrimes case OOR2: 5111573Srgrimes case O_CH: 5121573Srgrimes assert(nope); 5131573Srgrimes break; 5141573Srgrimes case OLPAREN: 5151573Srgrimes i = OPND(m->g->strip[ss]); 5161573Srgrimes assert(0 < i && i <= m->g->nsub); 5171573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 5181573Srgrimes break; 5191573Srgrimes case ORPAREN: 5201573Srgrimes i = OPND(m->g->strip[ss]); 5211573Srgrimes assert(0 < i && i <= m->g->nsub); 5221573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 5231573Srgrimes break; 5241573Srgrimes default: /* uh oh */ 5251573Srgrimes assert(nope); 5261573Srgrimes break; 5271573Srgrimes } 5281573Srgrimes } 5291573Srgrimes 5301573Srgrimes assert(sp == stop); 5311573Srgrimes return(sp); 5321573Srgrimes} 5331573Srgrimes 5341573Srgrimes/* 5351573Srgrimes - backref - figure out what matched what, figuring in back references 53692889Sobrien == static char *backref(struct match *m, char *start, \ 5371573Srgrimes == char *stop, sopno startst, sopno stopst, sopno lev); 5381573Srgrimes */ 5391573Srgrimesstatic char * /* == stop (success) or NULL (failure) */ 5401573Srgrimesbackref(m, start, stop, startst, stopst, lev) 54192889Sobrienstruct match *m; 5421573Srgrimeschar *start; 5431573Srgrimeschar *stop; 5441573Srgrimessopno startst; 5451573Srgrimessopno stopst; 5461573Srgrimessopno lev; /* PLUS nesting level */ 5471573Srgrimes{ 54892889Sobrien int i; 54992889Sobrien sopno ss; /* start sop of current subRE */ 55092889Sobrien char *sp; /* start of string matched by it */ 55192889Sobrien sopno ssub; /* start sop of subsubRE */ 55292889Sobrien sopno esub; /* end sop of subsubRE */ 55392889Sobrien char *ssp; /* start of string matched by subsubRE */ 55492889Sobrien char *dp; 55592889Sobrien size_t len; 55692889Sobrien int hard; 55792889Sobrien sop s; 55892889Sobrien regoff_t offsave; 55992889Sobrien cset *cs; 5601573Srgrimes 5611573Srgrimes AT("back", start, stop, startst, stopst); 5621573Srgrimes sp = start; 5631573Srgrimes 5641573Srgrimes /* get as far as we can with easy stuff */ 5651573Srgrimes hard = 0; 5661573Srgrimes for (ss = startst; !hard && ss < stopst; ss++) 5671573Srgrimes switch (OP(s = m->g->strip[ss])) { 5681573Srgrimes case OCHAR: 5691573Srgrimes if (sp == stop || *sp++ != (char)OPND(s)) 5701573Srgrimes return(NULL); 5711573Srgrimes break; 5721573Srgrimes case OANY: 5731573Srgrimes if (sp == stop) 5741573Srgrimes return(NULL); 5751573Srgrimes sp++; 5761573Srgrimes break; 5771573Srgrimes case OANYOF: 5781573Srgrimes cs = &m->g->sets[OPND(s)]; 5791573Srgrimes if (sp == stop || !CHIN(cs, *sp++)) 5801573Srgrimes return(NULL); 5811573Srgrimes break; 5821573Srgrimes case OBOL: 5831573Srgrimes if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 5841573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 5851573Srgrimes (m->g->cflags®_NEWLINE)) ) 5861573Srgrimes { /* yes */ } 5871573Srgrimes else 5881573Srgrimes return(NULL); 5891573Srgrimes break; 5901573Srgrimes case OEOL: 5911573Srgrimes if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 5921573Srgrimes (sp < m->endp && *sp == '\n' && 5931573Srgrimes (m->g->cflags®_NEWLINE)) ) 5941573Srgrimes { /* yes */ } 5951573Srgrimes else 5961573Srgrimes return(NULL); 5971573Srgrimes break; 5981573Srgrimes case OBOW: 5991573Srgrimes if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6001573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6011573Srgrimes (m->g->cflags®_NEWLINE)) || 6021573Srgrimes (sp > m->beginp && 6031573Srgrimes !ISWORD(*(sp-1))) ) && 6041573Srgrimes (sp < m->endp && ISWORD(*sp)) ) 6051573Srgrimes { /* yes */ } 6061573Srgrimes else 6071573Srgrimes return(NULL); 6081573Srgrimes break; 6091573Srgrimes case OEOW: 6101573Srgrimes if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6111573Srgrimes (sp < m->endp && *sp == '\n' && 6121573Srgrimes (m->g->cflags®_NEWLINE)) || 6131573Srgrimes (sp < m->endp && !ISWORD(*sp)) ) && 6141573Srgrimes (sp > m->beginp && ISWORD(*(sp-1))) ) 6151573Srgrimes { /* yes */ } 6161573Srgrimes else 6171573Srgrimes return(NULL); 6181573Srgrimes break; 6191573Srgrimes case O_QUEST: 6201573Srgrimes break; 6211573Srgrimes case OOR1: /* matches null but needs to skip */ 6221573Srgrimes ss++; 6231573Srgrimes s = m->g->strip[ss]; 6241573Srgrimes do { 6251573Srgrimes assert(OP(s) == OOR2); 6261573Srgrimes ss += OPND(s); 6271573Srgrimes } while (OP(s = m->g->strip[ss]) != O_CH); 6281573Srgrimes /* note that the ss++ gets us past the O_CH */ 6291573Srgrimes break; 6301573Srgrimes default: /* have to make a choice */ 6311573Srgrimes hard = 1; 6321573Srgrimes break; 6331573Srgrimes } 6341573Srgrimes if (!hard) { /* that was it! */ 6351573Srgrimes if (sp != stop) 6361573Srgrimes return(NULL); 6371573Srgrimes return(sp); 6381573Srgrimes } 6391573Srgrimes ss--; /* adjust for the for's final increment */ 6401573Srgrimes 6411573Srgrimes /* the hard stuff */ 6421573Srgrimes AT("hard", sp, stop, ss, stopst); 6431573Srgrimes s = m->g->strip[ss]; 6441573Srgrimes switch (OP(s)) { 6451573Srgrimes case OBACK_: /* the vilest depths */ 6461573Srgrimes i = OPND(s); 6471573Srgrimes assert(0 < i && i <= m->g->nsub); 6481573Srgrimes if (m->pmatch[i].rm_eo == -1) 6491573Srgrimes return(NULL); 6501573Srgrimes assert(m->pmatch[i].rm_so != -1); 6511573Srgrimes len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 6521573Srgrimes assert(stop - m->beginp >= len); 6531573Srgrimes if (sp > stop - len) 6541573Srgrimes return(NULL); /* not enough left to match */ 6551573Srgrimes ssp = m->offp + m->pmatch[i].rm_so; 6561573Srgrimes if (memcmp(sp, ssp, len) != 0) 6571573Srgrimes return(NULL); 6581573Srgrimes while (m->g->strip[ss] != SOP(O_BACK, i)) 6591573Srgrimes ss++; 6601573Srgrimes return(backref(m, sp+len, stop, ss+1, stopst, lev)); 6611573Srgrimes break; 6621573Srgrimes case OQUEST_: /* to null or not */ 6631573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 6641573Srgrimes if (dp != NULL) 6651573Srgrimes return(dp); /* not */ 6661573Srgrimes return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 6671573Srgrimes break; 6681573Srgrimes case OPLUS_: 6691573Srgrimes assert(m->lastpos != NULL); 6701573Srgrimes assert(lev+1 <= m->g->nplus); 6711573Srgrimes m->lastpos[lev+1] = sp; 6721573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev+1)); 6731573Srgrimes break; 6741573Srgrimes case O_PLUS: 6751573Srgrimes if (sp == m->lastpos[lev]) /* last pass matched null */ 6761573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 6771573Srgrimes /* try another pass */ 6781573Srgrimes m->lastpos[lev] = sp; 6791573Srgrimes dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 6801573Srgrimes if (dp == NULL) 6811573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 6821573Srgrimes else 6831573Srgrimes return(dp); 6841573Srgrimes break; 6851573Srgrimes case OCH_: /* find the right one, if any */ 6861573Srgrimes ssub = ss + 1; 6871573Srgrimes esub = ss + OPND(s) - 1; 6881573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 6891573Srgrimes for (;;) { /* find first matching branch */ 6901573Srgrimes dp = backref(m, sp, stop, ssub, esub, lev); 6911573Srgrimes if (dp != NULL) 6921573Srgrimes return(dp); 6931573Srgrimes /* that one missed, try next one */ 6941573Srgrimes if (OP(m->g->strip[esub]) == O_CH) 6951573Srgrimes return(NULL); /* there is none */ 6961573Srgrimes esub++; 6971573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 6981573Srgrimes ssub = esub + 1; 6991573Srgrimes esub += OPND(m->g->strip[esub]); 7001573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 7011573Srgrimes esub--; 7021573Srgrimes else 7031573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 7041573Srgrimes } 7051573Srgrimes break; 7061573Srgrimes case OLPAREN: /* must undo assignment if rest fails */ 7071573Srgrimes i = OPND(s); 7081573Srgrimes assert(0 < i && i <= m->g->nsub); 7091573Srgrimes offsave = m->pmatch[i].rm_so; 7101573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 7111573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7121573Srgrimes if (dp != NULL) 7131573Srgrimes return(dp); 7141573Srgrimes m->pmatch[i].rm_so = offsave; 7151573Srgrimes return(NULL); 7161573Srgrimes break; 7171573Srgrimes case ORPAREN: /* must undo assignment if rest fails */ 7181573Srgrimes i = OPND(s); 7191573Srgrimes assert(0 < i && i <= m->g->nsub); 7201573Srgrimes offsave = m->pmatch[i].rm_eo; 7211573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 7221573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7231573Srgrimes if (dp != NULL) 7241573Srgrimes return(dp); 7251573Srgrimes m->pmatch[i].rm_eo = offsave; 7261573Srgrimes return(NULL); 7271573Srgrimes break; 7281573Srgrimes default: /* uh oh */ 7291573Srgrimes assert(nope); 7301573Srgrimes break; 7311573Srgrimes } 7321573Srgrimes 7331573Srgrimes /* "can't happen" */ 7341573Srgrimes assert(nope); 7351573Srgrimes /* NOTREACHED */ 73611664Sphk return "shut up gcc"; 7371573Srgrimes} 7381573Srgrimes 7391573Srgrimes/* 7401573Srgrimes - fast - step through the string at top speed 74192889Sobrien == static char *fast(struct match *m, char *start, \ 7421573Srgrimes == char *stop, sopno startst, sopno stopst); 7431573Srgrimes */ 7441573Srgrimesstatic char * /* where tentative match ended, or NULL */ 7451573Srgrimesfast(m, start, stop, startst, stopst) 74692889Sobrienstruct match *m; 7471573Srgrimeschar *start; 7481573Srgrimeschar *stop; 7491573Srgrimessopno startst; 7501573Srgrimessopno stopst; 7511573Srgrimes{ 75292889Sobrien states st = m->st; 75392889Sobrien states fresh = m->fresh; 75492889Sobrien states tmp = m->tmp; 75592889Sobrien char *p = start; 75692889Sobrien int c = (start == m->beginp) ? OUT : *(start-1); 75792889Sobrien int lastc; /* previous c */ 75892889Sobrien int flagch; 75992889Sobrien int i; 76092889Sobrien char *coldp; /* last p after which no match was underway */ 7611573Srgrimes 7621573Srgrimes CLEAR(st); 7631573Srgrimes SET1(st, startst); 7641573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 7651573Srgrimes ASSIGN(fresh, st); 7661573Srgrimes SP("start", st, *p); 7671573Srgrimes coldp = NULL; 7681573Srgrimes for (;;) { 7691573Srgrimes /* next character */ 7701573Srgrimes lastc = c; 7711573Srgrimes c = (p == m->endp) ? OUT : *p; 7721573Srgrimes if (EQ(st, fresh)) 7731573Srgrimes coldp = p; 7741573Srgrimes 7751573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 7761573Srgrimes flagch = '\0'; 7771573Srgrimes i = 0; 7781573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 7791573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 7801573Srgrimes flagch = BOL; 7811573Srgrimes i = m->g->nbol; 7821573Srgrimes } 7831573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 7841573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 7851573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 7861573Srgrimes i += m->g->neol; 7871573Srgrimes } 7881573Srgrimes if (i != 0) { 7891573Srgrimes for (; i > 0; i--) 7901573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 7911573Srgrimes SP("boleol", st, c); 7921573Srgrimes } 7931573Srgrimes 7941573Srgrimes /* how about a word boundary? */ 7951573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 7961573Srgrimes (c != OUT && ISWORD(c)) ) { 7971573Srgrimes flagch = BOW; 7981573Srgrimes } 7991573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8001573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8011573Srgrimes flagch = EOW; 8021573Srgrimes } 8031573Srgrimes if (flagch == BOW || flagch == EOW) { 8041573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8051573Srgrimes SP("boweow", st, c); 8061573Srgrimes } 8071573Srgrimes 8081573Srgrimes /* are we done? */ 8091573Srgrimes if (ISSET(st, stopst) || p == stop) 8101573Srgrimes break; /* NOTE BREAK OUT */ 8111573Srgrimes 8121573Srgrimes /* no, we must deal with this character */ 8131573Srgrimes ASSIGN(tmp, st); 8141573Srgrimes ASSIGN(st, fresh); 8151573Srgrimes assert(c != OUT); 8161573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 8171573Srgrimes SP("aft", st, c); 8181573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 8191573Srgrimes p++; 8201573Srgrimes } 8211573Srgrimes 8221573Srgrimes assert(coldp != NULL); 8231573Srgrimes m->coldp = coldp; 8241573Srgrimes if (ISSET(st, stopst)) 8251573Srgrimes return(p+1); 8261573Srgrimes else 8271573Srgrimes return(NULL); 8281573Srgrimes} 8291573Srgrimes 8301573Srgrimes/* 8311573Srgrimes - slow - step through the string more deliberately 83292889Sobrien == static char *slow(struct match *m, char *start, \ 8331573Srgrimes == char *stop, sopno startst, sopno stopst); 8341573Srgrimes */ 8351573Srgrimesstatic char * /* where it ended */ 8361573Srgrimesslow(m, start, stop, startst, stopst) 83792889Sobrienstruct match *m; 8381573Srgrimeschar *start; 8391573Srgrimeschar *stop; 8401573Srgrimessopno startst; 8411573Srgrimessopno stopst; 8421573Srgrimes{ 84392889Sobrien states st = m->st; 84492889Sobrien states empty = m->empty; 84592889Sobrien states tmp = m->tmp; 84692889Sobrien char *p = start; 84792889Sobrien int c = (start == m->beginp) ? OUT : *(start-1); 84892889Sobrien int lastc; /* previous c */ 84992889Sobrien int flagch; 85092889Sobrien int i; 85192889Sobrien char *matchp; /* last p at which a match ended */ 8521573Srgrimes 8531573Srgrimes AT("slow", start, stop, startst, stopst); 8541573Srgrimes CLEAR(st); 8551573Srgrimes SET1(st, startst); 8561573Srgrimes SP("sstart", st, *p); 8571573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 8581573Srgrimes matchp = NULL; 8591573Srgrimes for (;;) { 8601573Srgrimes /* next character */ 8611573Srgrimes lastc = c; 8621573Srgrimes c = (p == m->endp) ? OUT : *p; 8631573Srgrimes 8641573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 8651573Srgrimes flagch = '\0'; 8661573Srgrimes i = 0; 8671573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8681573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8691573Srgrimes flagch = BOL; 8701573Srgrimes i = m->g->nbol; 8711573Srgrimes } 8721573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8731573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 8741573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 8751573Srgrimes i += m->g->neol; 8761573Srgrimes } 8771573Srgrimes if (i != 0) { 8781573Srgrimes for (; i > 0; i--) 8791573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8801573Srgrimes SP("sboleol", st, c); 8811573Srgrimes } 8821573Srgrimes 8831573Srgrimes /* how about a word boundary? */ 8841573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8851573Srgrimes (c != OUT && ISWORD(c)) ) { 8861573Srgrimes flagch = BOW; 8871573Srgrimes } 8881573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8891573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8901573Srgrimes flagch = EOW; 8911573Srgrimes } 8921573Srgrimes if (flagch == BOW || flagch == EOW) { 8931573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8941573Srgrimes SP("sboweow", st, c); 8951573Srgrimes } 8961573Srgrimes 8971573Srgrimes /* are we done? */ 8981573Srgrimes if (ISSET(st, stopst)) 8991573Srgrimes matchp = p; 9001573Srgrimes if (EQ(st, empty) || p == stop) 9011573Srgrimes break; /* NOTE BREAK OUT */ 9021573Srgrimes 9031573Srgrimes /* no, we must deal with this character */ 9041573Srgrimes ASSIGN(tmp, st); 9051573Srgrimes ASSIGN(st, empty); 9061573Srgrimes assert(c != OUT); 9071573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 9081573Srgrimes SP("saft", st, c); 9091573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 9101573Srgrimes p++; 9111573Srgrimes } 9121573Srgrimes 9131573Srgrimes return(matchp); 9141573Srgrimes} 9151573Srgrimes 9161573Srgrimes 9171573Srgrimes/* 9181573Srgrimes - step - map set of states reachable before char to set reachable after 91992889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \ 92092889Sobrien == states bef, int ch, states aft); 9211573Srgrimes == #define BOL (OUT+1) 9221573Srgrimes == #define EOL (BOL+1) 9231573Srgrimes == #define BOLEOL (BOL+2) 9241573Srgrimes == #define NOTHING (BOL+3) 9251573Srgrimes == #define BOW (BOL+4) 9261573Srgrimes == #define EOW (BOL+5) 9271573Srgrimes == #define CODEMAX (BOL+5) // highest code used 9281573Srgrimes == #define NONCHAR(c) ((c) > CHAR_MAX) 9291573Srgrimes == #define NNONCHAR (CODEMAX-CHAR_MAX) 9301573Srgrimes */ 9311573Srgrimesstatic states 9321573Srgrimesstep(g, start, stop, bef, ch, aft) 93392889Sobrienstruct re_guts *g; 9341573Srgrimessopno start; /* start state within strip */ 9351573Srgrimessopno stop; /* state after stop state within strip */ 93692889Sobrienstates bef; /* states reachable before */ 9371573Srgrimesint ch; /* character or NONCHAR code */ 93892889Sobrienstates aft; /* states already known reachable after */ 9391573Srgrimes{ 94092889Sobrien cset *cs; 94192889Sobrien sop s; 94292889Sobrien sopno pc; 94392889Sobrien onestate here; /* note, macros know this name */ 94492889Sobrien sopno look; 94592889Sobrien int i; 9461573Srgrimes 9471573Srgrimes for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 9481573Srgrimes s = g->strip[pc]; 9491573Srgrimes switch (OP(s)) { 9501573Srgrimes case OEND: 9511573Srgrimes assert(pc == stop-1); 9521573Srgrimes break; 9531573Srgrimes case OCHAR: 9541573Srgrimes /* only characters can match */ 9551573Srgrimes assert(!NONCHAR(ch) || ch != (char)OPND(s)); 9561573Srgrimes if (ch == (char)OPND(s)) 9571573Srgrimes FWD(aft, bef, 1); 9581573Srgrimes break; 9591573Srgrimes case OBOL: 9601573Srgrimes if (ch == BOL || ch == BOLEOL) 9611573Srgrimes FWD(aft, bef, 1); 9621573Srgrimes break; 9631573Srgrimes case OEOL: 9641573Srgrimes if (ch == EOL || ch == BOLEOL) 9651573Srgrimes FWD(aft, bef, 1); 9661573Srgrimes break; 9671573Srgrimes case OBOW: 9681573Srgrimes if (ch == BOW) 9691573Srgrimes FWD(aft, bef, 1); 9701573Srgrimes break; 9711573Srgrimes case OEOW: 9721573Srgrimes if (ch == EOW) 9731573Srgrimes FWD(aft, bef, 1); 9741573Srgrimes break; 9751573Srgrimes case OANY: 9761573Srgrimes if (!NONCHAR(ch)) 9771573Srgrimes FWD(aft, bef, 1); 9781573Srgrimes break; 9791573Srgrimes case OANYOF: 9801573Srgrimes cs = &g->sets[OPND(s)]; 9811573Srgrimes if (!NONCHAR(ch) && CHIN(cs, ch)) 9821573Srgrimes FWD(aft, bef, 1); 9831573Srgrimes break; 9841573Srgrimes case OBACK_: /* ignored here */ 9851573Srgrimes case O_BACK: 9861573Srgrimes FWD(aft, aft, 1); 9871573Srgrimes break; 9881573Srgrimes case OPLUS_: /* forward, this is just an empty */ 9891573Srgrimes FWD(aft, aft, 1); 9901573Srgrimes break; 9911573Srgrimes case O_PLUS: /* both forward and back */ 9921573Srgrimes FWD(aft, aft, 1); 9931573Srgrimes i = ISSETBACK(aft, OPND(s)); 9941573Srgrimes BACK(aft, aft, OPND(s)); 9951573Srgrimes if (!i && ISSETBACK(aft, OPND(s))) { 9961573Srgrimes /* oho, must reconsider loop body */ 9971573Srgrimes pc -= OPND(s) + 1; 9981573Srgrimes INIT(here, pc); 9991573Srgrimes } 10001573Srgrimes break; 10011573Srgrimes case OQUEST_: /* two branches, both forward */ 10021573Srgrimes FWD(aft, aft, 1); 10031573Srgrimes FWD(aft, aft, OPND(s)); 10041573Srgrimes break; 10051573Srgrimes case O_QUEST: /* just an empty */ 10061573Srgrimes FWD(aft, aft, 1); 10071573Srgrimes break; 10081573Srgrimes case OLPAREN: /* not significant here */ 10091573Srgrimes case ORPAREN: 10101573Srgrimes FWD(aft, aft, 1); 10111573Srgrimes break; 10121573Srgrimes case OCH_: /* mark the first two branches */ 10131573Srgrimes FWD(aft, aft, 1); 10141573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10151573Srgrimes FWD(aft, aft, OPND(s)); 10161573Srgrimes break; 10171573Srgrimes case OOR1: /* done a branch, find the O_CH */ 10181573Srgrimes if (ISSTATEIN(aft, here)) { 10191573Srgrimes for (look = 1; 10201573Srgrimes OP(s = g->strip[pc+look]) != O_CH; 10211573Srgrimes look += OPND(s)) 10221573Srgrimes assert(OP(s) == OOR2); 10231573Srgrimes FWD(aft, aft, look); 10241573Srgrimes } 10251573Srgrimes break; 10261573Srgrimes case OOR2: /* propagate OCH_'s marking */ 10271573Srgrimes FWD(aft, aft, 1); 10281573Srgrimes if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10291573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10301573Srgrimes FWD(aft, aft, OPND(s)); 10311573Srgrimes } 10321573Srgrimes break; 10331573Srgrimes case O_CH: /* just empty */ 10341573Srgrimes FWD(aft, aft, 1); 10351573Srgrimes break; 10361573Srgrimes default: /* ooooops... */ 10371573Srgrimes assert(nope); 10381573Srgrimes break; 10391573Srgrimes } 10401573Srgrimes } 10411573Srgrimes 10421573Srgrimes return(aft); 10431573Srgrimes} 10441573Srgrimes 10451573Srgrimes#ifdef REDEBUG 10461573Srgrimes/* 10471573Srgrimes - print - print a set of states 10481573Srgrimes == #ifdef REDEBUG 10491573Srgrimes == static void print(struct match *m, char *caption, states st, \ 10501573Srgrimes == int ch, FILE *d); 10511573Srgrimes == #endif 10521573Srgrimes */ 10531573Srgrimesstatic void 10541573Srgrimesprint(m, caption, st, ch, d) 10551573Srgrimesstruct match *m; 10561573Srgrimeschar *caption; 10571573Srgrimesstates st; 10581573Srgrimesint ch; 10591573SrgrimesFILE *d; 10601573Srgrimes{ 106192889Sobrien struct re_guts *g = m->g; 106292889Sobrien int i; 106392889Sobrien int first = 1; 10641573Srgrimes 10651573Srgrimes if (!(m->eflags®_TRACE)) 10661573Srgrimes return; 10671573Srgrimes 10681573Srgrimes fprintf(d, "%s", caption); 10691573Srgrimes if (ch != '\0') 10701573Srgrimes fprintf(d, " %s", pchar(ch)); 10711573Srgrimes for (i = 0; i < g->nstates; i++) 10721573Srgrimes if (ISSET(st, i)) { 10731573Srgrimes fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 10741573Srgrimes first = 0; 10751573Srgrimes } 10761573Srgrimes fprintf(d, "\n"); 10771573Srgrimes} 10781573Srgrimes 10798870Srgrimes/* 10801573Srgrimes - at - print current situation 10811573Srgrimes == #ifdef REDEBUG 10821573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \ 10831573Srgrimes == sopno startst, sopno stopst); 10841573Srgrimes == #endif 10851573Srgrimes */ 10861573Srgrimesstatic void 10871573Srgrimesat(m, title, start, stop, startst, stopst) 10881573Srgrimesstruct match *m; 10891573Srgrimeschar *title; 10901573Srgrimeschar *start; 10911573Srgrimeschar *stop; 10921573Srgrimessopno startst; 10931573Srgrimessopno stopst; 10941573Srgrimes{ 10951573Srgrimes if (!(m->eflags®_TRACE)) 10961573Srgrimes return; 10971573Srgrimes 10981573Srgrimes printf("%s %s-", title, pchar(*start)); 10991573Srgrimes printf("%s ", pchar(*stop)); 11001573Srgrimes printf("%ld-%ld\n", (long)startst, (long)stopst); 11011573Srgrimes} 11021573Srgrimes 11031573Srgrimes#ifndef PCHARDONE 11041573Srgrimes#define PCHARDONE /* never again */ 11051573Srgrimes/* 11061573Srgrimes - pchar - make a character printable 11071573Srgrimes == #ifdef REDEBUG 11081573Srgrimes == static char *pchar(int ch); 11091573Srgrimes == #endif 11101573Srgrimes * 11111573Srgrimes * Is this identical to regchar() over in debug.c? Well, yes. But a 11121573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to 11131573Srgrimes * a matching debug.o, and this is convenient. It all disappears in 11141573Srgrimes * the non-debug compilation anyway, so it doesn't matter much. 11151573Srgrimes */ 11161573Srgrimesstatic char * /* -> representation */ 11171573Srgrimespchar(ch) 11181573Srgrimesint ch; 11191573Srgrimes{ 11201573Srgrimes static char pbuf[10]; 11211573Srgrimes 112217508Sache if (isprint((uch)ch) || ch == ' ') 11231573Srgrimes sprintf(pbuf, "%c", ch); 11241573Srgrimes else 11251573Srgrimes sprintf(pbuf, "\\%o", ch); 11261573Srgrimes return(pbuf); 11271573Srgrimes} 11281573Srgrimes#endif 11291573Srgrimes#endif 11301573Srgrimes 11311573Srgrimes#undef matcher 11321573Srgrimes#undef fast 11331573Srgrimes#undef slow 11341573Srgrimes#undef dissect 11351573Srgrimes#undef backref 11361573Srgrimes#undef step 11371573Srgrimes#undef print 11381573Srgrimes#undef at 11391573Srgrimes#undef match 1140