engine.c revision 167216
11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 31573Srgrimes * Copyright (c) 1992, 1993, 1994 41573Srgrimes * The Regents of the University of California. All rights reserved. 51573Srgrimes * 61573Srgrimes * This code is derived from software contributed to Berkeley by 71573Srgrimes * Henry Spencer. 81573Srgrimes * 91573Srgrimes * Redistribution and use in source and binary forms, with or without 101573Srgrimes * modification, are permitted provided that the following conditions 111573Srgrimes * are met: 121573Srgrimes * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 4. Neither the name of the University nor the names of its contributors 181573Srgrimes * may be used to endorse or promote products derived from this software 191573Srgrimes * without specific prior written permission. 201573Srgrimes * 211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311573Srgrimes * SUCH DAMAGE. 321573Srgrimes * 331573Srgrimes * @(#)engine.c 8.5 (Berkeley) 3/20/94 341573Srgrimes */ 351573Srgrimes 3692986Sobrien#include <sys/cdefs.h> 3792986Sobrien__FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 167216 2007-03-05 03:07:36Z delphij $"); 3892986Sobrien 391573Srgrimes/* 401573Srgrimes * The matching engine and friends. This file is #included by regexec.c 411573Srgrimes * after suitable #defines of a variety of macros used herein, so that 421573Srgrimes * different state representations can be used without duplicating masses 431573Srgrimes * of code. 441573Srgrimes */ 451573Srgrimes 461573Srgrimes#ifdef SNAMES 471573Srgrimes#define matcher smatcher 481573Srgrimes#define fast sfast 491573Srgrimes#define slow sslow 501573Srgrimes#define dissect sdissect 511573Srgrimes#define backref sbackref 521573Srgrimes#define step sstep 531573Srgrimes#define print sprint 541573Srgrimes#define at sat 551573Srgrimes#define match smat 561573Srgrimes#endif 571573Srgrimes#ifdef LNAMES 581573Srgrimes#define matcher lmatcher 591573Srgrimes#define fast lfast 601573Srgrimes#define slow lslow 611573Srgrimes#define dissect ldissect 621573Srgrimes#define backref lbackref 631573Srgrimes#define step lstep 641573Srgrimes#define print lprint 651573Srgrimes#define at lat 661573Srgrimes#define match lmat 671573Srgrimes#endif 68132019Stjr#ifdef MNAMES 69132019Stjr#define matcher mmatcher 70132019Stjr#define fast mfast 71132019Stjr#define slow mslow 72132019Stjr#define dissect mdissect 73132019Stjr#define backref mbackref 74132019Stjr#define step mstep 75132019Stjr#define print mprint 76132019Stjr#define at mat 77132019Stjr#define match mmat 78132019Stjr#endif 791573Srgrimes 801573Srgrimes/* another structure passed up and down to avoid zillions of parameters */ 811573Srgrimesstruct match { 821573Srgrimes struct re_guts *g; 831573Srgrimes int eflags; 841573Srgrimes regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 851573Srgrimes char *offp; /* offsets work from here */ 861573Srgrimes char *beginp; /* start of string -- virtual NUL precedes */ 871573Srgrimes char *endp; /* end of string -- virtual NUL here */ 881573Srgrimes char *coldp; /* can be no match starting before here */ 891573Srgrimes char **lastpos; /* [nplus+1] */ 901573Srgrimes STATEVARS; 911573Srgrimes states st; /* current states */ 921573Srgrimes states fresh; /* states for a fresh start */ 931573Srgrimes states tmp; /* temporary */ 941573Srgrimes states empty; /* empty set of states */ 95132019Stjr mbstate_t mbs; /* multibyte conversion state */ 961573Srgrimes}; 971573Srgrimes 981573Srgrimes/* ========= begin header generated by ./mkh ========= */ 991573Srgrimes#ifdef __cplusplus 1001573Srgrimesextern "C" { 1011573Srgrimes#endif 1021573Srgrimes 1031573Srgrimes/* === engine.c === */ 10492905Sobrienstatic int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 10592905Sobrienstatic char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 10692905Sobrienstatic char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); 10792905Sobrienstatic char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 10892905Sobrienstatic char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 109132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); 110132019Stjr#define BOL (OUT-1) 111132019Stjr#define EOL (BOL-1) 112132019Stjr#define BOLEOL (BOL-2) 113132019Stjr#define NOTHING (BOL-3) 114132019Stjr#define BOW (BOL-4) 115132019Stjr#define EOW (BOL-5) 116132019Stjr#define BADCHAR (BOL-6) 117132019Stjr#define NONCHAR(c) ((c) <= OUT) 1181573Srgrimes#ifdef REDEBUG 11992905Sobrienstatic void print(struct match *m, char *caption, states st, int ch, FILE *d); 1201573Srgrimes#endif 1211573Srgrimes#ifdef REDEBUG 12292905Sobrienstatic void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); 1231573Srgrimes#endif 1241573Srgrimes#ifdef REDEBUG 12592905Sobrienstatic char *pchar(int ch); 1261573Srgrimes#endif 1271573Srgrimes 1281573Srgrimes#ifdef __cplusplus 1291573Srgrimes} 1301573Srgrimes#endif 1311573Srgrimes/* ========= end header generated by ./mkh ========= */ 1321573Srgrimes 1331573Srgrimes#ifdef REDEBUG 1341573Srgrimes#define SP(t, s, c) print(m, t, s, c, stdout) 1351573Srgrimes#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1361573Srgrimes#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1371573Srgrimes#else 1381573Srgrimes#define SP(t, s, c) /* nothing */ 1391573Srgrimes#define AT(t, p1, p2, s1, s2) /* nothing */ 1401573Srgrimes#define NOTE(s) /* nothing */ 1411573Srgrimes#endif 1421573Srgrimes 1431573Srgrimes/* 1441573Srgrimes - matcher - the actual matching engine 14592889Sobrien == static int matcher(struct re_guts *g, char *string, \ 1461573Srgrimes == size_t nmatch, regmatch_t pmatch[], int eflags); 1471573Srgrimes */ 1481573Srgrimesstatic int /* 0 success, REG_NOMATCH failure */ 1491573Srgrimesmatcher(g, string, nmatch, pmatch, eflags) 15092889Sobrienstruct re_guts *g; 1511573Srgrimeschar *string; 1521573Srgrimessize_t nmatch; 1531573Srgrimesregmatch_t pmatch[]; 1541573Srgrimesint eflags; 1551573Srgrimes{ 15692889Sobrien char *endp; 15792889Sobrien int i; 1581573Srgrimes struct match mv; 15992889Sobrien struct match *m = &mv; 16092889Sobrien char *dp; 16192889Sobrien const sopno gf = g->firststate+1; /* +1 for OEND */ 16292889Sobrien const sopno gl = g->laststate; 1631573Srgrimes char *start; 1641573Srgrimes char *stop; 16562232Sdcs /* Boyer-Moore algorithms variables */ 16692889Sobrien char *pp; 16762232Sdcs int cj, mj; 16892889Sobrien char *mustfirst; 16992889Sobrien char *mustlast; 17092889Sobrien int *matchjump; 17192889Sobrien int *charjump; 1721573Srgrimes 1731573Srgrimes /* simplify the situation where possible */ 1741573Srgrimes if (g->cflags®_NOSUB) 1751573Srgrimes nmatch = 0; 1761573Srgrimes if (eflags®_STARTEND) { 1771573Srgrimes start = string + pmatch[0].rm_so; 1781573Srgrimes stop = string + pmatch[0].rm_eo; 1791573Srgrimes } else { 1801573Srgrimes start = string; 1811573Srgrimes stop = start + strlen(start); 1821573Srgrimes } 1831573Srgrimes if (stop < start) 1841573Srgrimes return(REG_INVARG); 1851573Srgrimes 1861573Srgrimes /* prescreening; this does wonders for this rather slow code */ 1871573Srgrimes if (g->must != NULL) { 18862391Sdcs if (g->charjump != NULL && g->matchjump != NULL) { 18962232Sdcs mustfirst = g->must; 19062232Sdcs mustlast = g->must + g->mlen - 1; 19162232Sdcs charjump = g->charjump; 19262232Sdcs matchjump = g->matchjump; 19362232Sdcs pp = mustlast; 19462754Sdcs for (dp = start+g->mlen-1; dp < stop;) { 19562232Sdcs /* Fast skip non-matches */ 196111010Snectar while (dp < stop && charjump[(int)*dp]) 197111010Snectar dp += charjump[(int)*dp]; 19862232Sdcs 19962754Sdcs if (dp >= stop) 20062232Sdcs break; 20162232Sdcs 20262232Sdcs /* Greedy matcher */ 20362232Sdcs /* We depend on not being used for 20462232Sdcs * for strings of length 1 20562232Sdcs */ 20662754Sdcs while (*--dp == *--pp && pp != mustfirst); 20762232Sdcs 20862754Sdcs if (*dp == *pp) 20962232Sdcs break; 21062232Sdcs 21162232Sdcs /* Jump to next possible match */ 21262232Sdcs mj = matchjump[pp - mustfirst]; 213111010Snectar cj = charjump[(int)*dp]; 21462754Sdcs dp += (cj < mj ? mj : cj); 21562754Sdcs pp = mustlast; 21662232Sdcs } 21762391Sdcs if (pp != mustfirst) 21862232Sdcs return(REG_NOMATCH); 21962232Sdcs } else { 22062232Sdcs for (dp = start; dp < stop; dp++) 22162232Sdcs if (*dp == g->must[0] && 22262232Sdcs stop - dp >= g->mlen && 22362232Sdcs memcmp(dp, g->must, (size_t)g->mlen) == 0) 22462232Sdcs break; 22562232Sdcs if (dp == stop) /* we didn't find g->must */ 22662232Sdcs return(REG_NOMATCH); 22762232Sdcs } 2281573Srgrimes } 2291573Srgrimes 2301573Srgrimes /* match struct setup */ 2311573Srgrimes m->g = g; 2321573Srgrimes m->eflags = eflags; 2331573Srgrimes m->pmatch = NULL; 2341573Srgrimes m->lastpos = NULL; 2351573Srgrimes m->offp = string; 2361573Srgrimes m->beginp = start; 2371573Srgrimes m->endp = stop; 2381573Srgrimes STATESETUP(m, 4); 2391573Srgrimes SETUP(m->st); 2401573Srgrimes SETUP(m->fresh); 2411573Srgrimes SETUP(m->tmp); 2421573Srgrimes SETUP(m->empty); 2431573Srgrimes CLEAR(m->empty); 244132019Stjr ZAPSTATE(&m->mbs); 2451573Srgrimes 24662391Sdcs /* Adjust start according to moffset, to speed things up */ 24762391Sdcs if (g->moffset > -1) 24862854Sdcs start = ((dp - g->moffset) < start) ? start : dp - g->moffset; 24962391Sdcs 2501573Srgrimes /* this loop does only one repetition except for backrefs */ 2511573Srgrimes for (;;) { 2521573Srgrimes endp = fast(m, start, stop, gf, gl); 2531573Srgrimes if (endp == NULL) { /* a miss */ 254139437Sdds if (m->pmatch != NULL) 255139437Sdds free((char *)m->pmatch); 256139437Sdds if (m->lastpos != NULL) 257139437Sdds free((char *)m->lastpos); 2581573Srgrimes STATETEARDOWN(m); 2591573Srgrimes return(REG_NOMATCH); 2601573Srgrimes } 2611573Srgrimes if (nmatch == 0 && !g->backrefs) 2621573Srgrimes break; /* no further info needed */ 2631573Srgrimes 2641573Srgrimes /* where? */ 2651573Srgrimes assert(m->coldp != NULL); 2661573Srgrimes for (;;) { 2671573Srgrimes NOTE("finding start"); 2681573Srgrimes endp = slow(m, m->coldp, stop, gf, gl); 2691573Srgrimes if (endp != NULL) 2701573Srgrimes break; 2711573Srgrimes assert(m->coldp < m->endp); 272132019Stjr m->coldp += XMBRTOWC(NULL, m->coldp, 273132019Stjr m->endp - m->coldp, &m->mbs, 0); 2741573Srgrimes } 2751573Srgrimes if (nmatch == 1 && !g->backrefs) 2761573Srgrimes break; /* no further info needed */ 2771573Srgrimes 2781573Srgrimes /* oh my, he wants the subexpressions... */ 2791573Srgrimes if (m->pmatch == NULL) 2801573Srgrimes m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2811573Srgrimes sizeof(regmatch_t)); 2821573Srgrimes if (m->pmatch == NULL) { 2831573Srgrimes STATETEARDOWN(m); 2841573Srgrimes return(REG_ESPACE); 2851573Srgrimes } 2861573Srgrimes for (i = 1; i <= m->g->nsub; i++) 2871573Srgrimes m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2881573Srgrimes if (!g->backrefs && !(m->eflags®_BACKR)) { 2891573Srgrimes NOTE("dissecting"); 2901573Srgrimes dp = dissect(m, m->coldp, endp, gf, gl); 2911573Srgrimes } else { 2921573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) 2931573Srgrimes m->lastpos = (char **)malloc((g->nplus+1) * 2941573Srgrimes sizeof(char *)); 2951573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) { 2961573Srgrimes free(m->pmatch); 2971573Srgrimes STATETEARDOWN(m); 2981573Srgrimes return(REG_ESPACE); 2991573Srgrimes } 3001573Srgrimes NOTE("backref dissect"); 3011573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3021573Srgrimes } 3031573Srgrimes if (dp != NULL) 3041573Srgrimes break; 3051573Srgrimes 3061573Srgrimes /* uh-oh... we couldn't find a subexpression-level match */ 3071573Srgrimes assert(g->backrefs); /* must be back references doing it */ 3081573Srgrimes assert(g->nplus == 0 || m->lastpos != NULL); 3091573Srgrimes for (;;) { 3101573Srgrimes if (dp != NULL || endp <= m->coldp) 3111573Srgrimes break; /* defeat */ 3121573Srgrimes NOTE("backoff"); 3131573Srgrimes endp = slow(m, m->coldp, endp-1, gf, gl); 3141573Srgrimes if (endp == NULL) 3151573Srgrimes break; /* defeat */ 3161573Srgrimes /* try it on a shorter possibility */ 3171573Srgrimes#ifndef NDEBUG 3181573Srgrimes for (i = 1; i <= m->g->nsub; i++) { 3191573Srgrimes assert(m->pmatch[i].rm_so == -1); 3201573Srgrimes assert(m->pmatch[i].rm_eo == -1); 3211573Srgrimes } 3221573Srgrimes#endif 3231573Srgrimes NOTE("backoff dissect"); 3241573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3251573Srgrimes } 3261573Srgrimes assert(dp == NULL || dp == endp); 3271573Srgrimes if (dp != NULL) /* found a shorter one */ 3281573Srgrimes break; 3291573Srgrimes 3301573Srgrimes /* despite initial appearances, there is no match here */ 3311573Srgrimes NOTE("false alarm"); 332132019Stjr /* recycle starting later */ 333132019Stjr start = m->coldp + XMBRTOWC(NULL, m->coldp, 334137959Stjr stop - m->coldp, &m->mbs, 0); 3351573Srgrimes assert(start <= stop); 3361573Srgrimes } 3371573Srgrimes 3381573Srgrimes /* fill in the details if requested */ 3391573Srgrimes if (nmatch > 0) { 3401573Srgrimes pmatch[0].rm_so = m->coldp - m->offp; 3411573Srgrimes pmatch[0].rm_eo = endp - m->offp; 3421573Srgrimes } 3431573Srgrimes if (nmatch > 1) { 3441573Srgrimes assert(m->pmatch != NULL); 3451573Srgrimes for (i = 1; i < nmatch; i++) 3461573Srgrimes if (i <= m->g->nsub) 3471573Srgrimes pmatch[i] = m->pmatch[i]; 3481573Srgrimes else { 3491573Srgrimes pmatch[i].rm_so = -1; 3501573Srgrimes pmatch[i].rm_eo = -1; 3511573Srgrimes } 3521573Srgrimes } 3531573Srgrimes 3541573Srgrimes if (m->pmatch != NULL) 3551573Srgrimes free((char *)m->pmatch); 3561573Srgrimes if (m->lastpos != NULL) 3571573Srgrimes free((char *)m->lastpos); 3581573Srgrimes STATETEARDOWN(m); 3591573Srgrimes return(0); 3601573Srgrimes} 3611573Srgrimes 3621573Srgrimes/* 3631573Srgrimes - dissect - figure out what matched what, no back references 36492889Sobrien == static char *dissect(struct match *m, char *start, \ 3651573Srgrimes == char *stop, sopno startst, sopno stopst); 3661573Srgrimes */ 3671573Srgrimesstatic char * /* == stop (success) always */ 3681573Srgrimesdissect(m, start, stop, startst, stopst) 36992889Sobrienstruct match *m; 3701573Srgrimeschar *start; 3711573Srgrimeschar *stop; 3721573Srgrimessopno startst; 3731573Srgrimessopno stopst; 3741573Srgrimes{ 37592889Sobrien int i; 37692889Sobrien sopno ss; /* start sop of current subRE */ 37792889Sobrien sopno es; /* end sop of current subRE */ 37892889Sobrien char *sp; /* start of string matched by it */ 37992889Sobrien char *stp; /* string matched by it cannot pass here */ 38092889Sobrien char *rest; /* start of rest of string */ 38192889Sobrien char *tail; /* string unmatched by rest of RE */ 38292889Sobrien sopno ssub; /* start sop of subsubRE */ 38392889Sobrien sopno esub; /* end sop of subsubRE */ 38492889Sobrien char *ssp; /* start of string matched by subsubRE */ 38592889Sobrien char *sep; /* end of string matched by subsubRE */ 38692889Sobrien char *oldssp; /* previous ssp */ 38792889Sobrien char *dp; 3881573Srgrimes 3891573Srgrimes AT("diss", start, stop, startst, stopst); 3901573Srgrimes sp = start; 3911573Srgrimes for (ss = startst; ss < stopst; ss = es) { 3921573Srgrimes /* identify end of subRE */ 3931573Srgrimes es = ss; 3941573Srgrimes switch (OP(m->g->strip[es])) { 3951573Srgrimes case OPLUS_: 3961573Srgrimes case OQUEST_: 3971573Srgrimes es += OPND(m->g->strip[es]); 3981573Srgrimes break; 3991573Srgrimes case OCH_: 4001573Srgrimes while (OP(m->g->strip[es]) != O_CH) 4011573Srgrimes es += OPND(m->g->strip[es]); 4021573Srgrimes break; 4031573Srgrimes } 4041573Srgrimes es++; 4051573Srgrimes 4061573Srgrimes /* figure out what it matched */ 4071573Srgrimes switch (OP(m->g->strip[ss])) { 4081573Srgrimes case OEND: 4091573Srgrimes assert(nope); 4101573Srgrimes break; 4111573Srgrimes case OCHAR: 412132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4131573Srgrimes break; 4141573Srgrimes case OBOL: 4151573Srgrimes case OEOL: 4161573Srgrimes case OBOW: 4171573Srgrimes case OEOW: 4181573Srgrimes break; 4191573Srgrimes case OANY: 4201573Srgrimes case OANYOF: 421132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4221573Srgrimes break; 4231573Srgrimes case OBACK_: 4241573Srgrimes case O_BACK: 4251573Srgrimes assert(nope); 4261573Srgrimes break; 4271573Srgrimes /* cases where length of match is hard to find */ 4281573Srgrimes case OQUEST_: 4291573Srgrimes stp = stop; 4301573Srgrimes for (;;) { 4311573Srgrimes /* how long could this one be? */ 4321573Srgrimes rest = slow(m, sp, stp, ss, es); 4331573Srgrimes assert(rest != NULL); /* it did match */ 4341573Srgrimes /* could the rest match the rest? */ 4351573Srgrimes tail = slow(m, rest, stop, es, stopst); 4361573Srgrimes if (tail == stop) 4371573Srgrimes break; /* yes! */ 4381573Srgrimes /* no -- try a shorter match for this one */ 4391573Srgrimes stp = rest - 1; 4401573Srgrimes assert(stp >= sp); /* it did work */ 4411573Srgrimes } 4421573Srgrimes ssub = ss + 1; 4431573Srgrimes esub = es - 1; 4441573Srgrimes /* did innards match? */ 4451573Srgrimes if (slow(m, sp, rest, ssub, esub) != NULL) { 4461573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 4471573Srgrimes assert(dp == rest); 4481573Srgrimes } else /* no */ 4491573Srgrimes assert(sp == rest); 4501573Srgrimes sp = rest; 4511573Srgrimes break; 4521573Srgrimes case OPLUS_: 4531573Srgrimes stp = stop; 4541573Srgrimes for (;;) { 4551573Srgrimes /* how long could this one be? */ 4561573Srgrimes rest = slow(m, sp, stp, ss, es); 4571573Srgrimes assert(rest != NULL); /* it did match */ 4581573Srgrimes /* could the rest match the rest? */ 4591573Srgrimes tail = slow(m, rest, stop, es, stopst); 4601573Srgrimes if (tail == stop) 4611573Srgrimes break; /* yes! */ 4621573Srgrimes /* no -- try a shorter match for this one */ 4631573Srgrimes stp = rest - 1; 4641573Srgrimes assert(stp >= sp); /* it did work */ 4651573Srgrimes } 4661573Srgrimes ssub = ss + 1; 4671573Srgrimes esub = es - 1; 4681573Srgrimes ssp = sp; 4691573Srgrimes oldssp = ssp; 4701573Srgrimes for (;;) { /* find last match of innards */ 4711573Srgrimes sep = slow(m, ssp, rest, ssub, esub); 4721573Srgrimes if (sep == NULL || sep == ssp) 4731573Srgrimes break; /* failed or matched null */ 4741573Srgrimes oldssp = ssp; /* on to next try */ 4751573Srgrimes ssp = sep; 4761573Srgrimes } 4771573Srgrimes if (sep == NULL) { 4781573Srgrimes /* last successful match */ 4791573Srgrimes sep = ssp; 4801573Srgrimes ssp = oldssp; 4811573Srgrimes } 4821573Srgrimes assert(sep == rest); /* must exhaust substring */ 4831573Srgrimes assert(slow(m, ssp, sep, ssub, esub) == rest); 4841573Srgrimes dp = dissect(m, ssp, sep, ssub, esub); 4851573Srgrimes assert(dp == sep); 4861573Srgrimes sp = rest; 4871573Srgrimes break; 4881573Srgrimes case OCH_: 4891573Srgrimes stp = stop; 4901573Srgrimes for (;;) { 4911573Srgrimes /* how long could this one be? */ 4921573Srgrimes rest = slow(m, sp, stp, ss, es); 4931573Srgrimes assert(rest != NULL); /* it did match */ 4941573Srgrimes /* could the rest match the rest? */ 4951573Srgrimes tail = slow(m, rest, stop, es, stopst); 4961573Srgrimes if (tail == stop) 4971573Srgrimes break; /* yes! */ 4981573Srgrimes /* no -- try a shorter match for this one */ 4991573Srgrimes stp = rest - 1; 5001573Srgrimes assert(stp >= sp); /* it did work */ 5011573Srgrimes } 5021573Srgrimes ssub = ss + 1; 5031573Srgrimes esub = ss + OPND(m->g->strip[ss]) - 1; 5041573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5051573Srgrimes for (;;) { /* find first matching branch */ 5061573Srgrimes if (slow(m, sp, rest, ssub, esub) == rest) 5071573Srgrimes break; /* it matched all of it */ 5081573Srgrimes /* that one missed, try next one */ 5091573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5101573Srgrimes esub++; 5111573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 5121573Srgrimes ssub = esub + 1; 5131573Srgrimes esub += OPND(m->g->strip[esub]); 5141573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 5151573Srgrimes esub--; 5161573Srgrimes else 5171573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 5181573Srgrimes } 5191573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 5201573Srgrimes assert(dp == rest); 5211573Srgrimes sp = rest; 5221573Srgrimes break; 5231573Srgrimes case O_PLUS: 5241573Srgrimes case O_QUEST: 5251573Srgrimes case OOR1: 5261573Srgrimes case OOR2: 5271573Srgrimes case O_CH: 5281573Srgrimes assert(nope); 5291573Srgrimes break; 5301573Srgrimes case OLPAREN: 5311573Srgrimes i = OPND(m->g->strip[ss]); 5321573Srgrimes assert(0 < i && i <= m->g->nsub); 5331573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 5341573Srgrimes break; 5351573Srgrimes case ORPAREN: 5361573Srgrimes i = OPND(m->g->strip[ss]); 5371573Srgrimes assert(0 < i && i <= m->g->nsub); 5381573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 5391573Srgrimes break; 5401573Srgrimes default: /* uh oh */ 5411573Srgrimes assert(nope); 5421573Srgrimes break; 5431573Srgrimes } 5441573Srgrimes } 5451573Srgrimes 5461573Srgrimes assert(sp == stop); 5471573Srgrimes return(sp); 5481573Srgrimes} 5491573Srgrimes 5501573Srgrimes/* 5511573Srgrimes - backref - figure out what matched what, figuring in back references 55292889Sobrien == static char *backref(struct match *m, char *start, \ 5531573Srgrimes == char *stop, sopno startst, sopno stopst, sopno lev); 5541573Srgrimes */ 5551573Srgrimesstatic char * /* == stop (success) or NULL (failure) */ 5561573Srgrimesbackref(m, start, stop, startst, stopst, lev) 55792889Sobrienstruct match *m; 5581573Srgrimeschar *start; 5591573Srgrimeschar *stop; 5601573Srgrimessopno startst; 5611573Srgrimessopno stopst; 5621573Srgrimessopno lev; /* PLUS nesting level */ 5631573Srgrimes{ 56492889Sobrien int i; 56592889Sobrien sopno ss; /* start sop of current subRE */ 56692889Sobrien char *sp; /* start of string matched by it */ 56792889Sobrien sopno ssub; /* start sop of subsubRE */ 56892889Sobrien sopno esub; /* end sop of subsubRE */ 56992889Sobrien char *ssp; /* start of string matched by subsubRE */ 57092889Sobrien char *dp; 57192889Sobrien size_t len; 57292889Sobrien int hard; 57392889Sobrien sop s; 57492889Sobrien regoff_t offsave; 57592889Sobrien cset *cs; 576132019Stjr wint_t wc; 5771573Srgrimes 5781573Srgrimes AT("back", start, stop, startst, stopst); 5791573Srgrimes sp = start; 5801573Srgrimes 5811573Srgrimes /* get as far as we can with easy stuff */ 5821573Srgrimes hard = 0; 5831573Srgrimes for (ss = startst; !hard && ss < stopst; ss++) 5841573Srgrimes switch (OP(s = m->g->strip[ss])) { 5851573Srgrimes case OCHAR: 586132019Stjr if (sp == stop) 5871573Srgrimes return(NULL); 588132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 589132019Stjr if (wc != OPND(s)) 590132019Stjr return(NULL); 5911573Srgrimes break; 5921573Srgrimes case OANY: 5931573Srgrimes if (sp == stop) 5941573Srgrimes return(NULL); 595132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 596132019Stjr if (wc == BADCHAR) 597132019Stjr return (NULL); 5981573Srgrimes break; 5991573Srgrimes case OANYOF: 600132019Stjr if (sp == stop) 601132019Stjr return (NULL); 6021573Srgrimes cs = &m->g->sets[OPND(s)]; 603132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 604132019Stjr if (wc == BADCHAR || !CHIN(cs, wc)) 6051573Srgrimes return(NULL); 6061573Srgrimes break; 6071573Srgrimes case OBOL: 6081573Srgrimes if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6091573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6101573Srgrimes (m->g->cflags®_NEWLINE)) ) 6111573Srgrimes { /* yes */ } 6121573Srgrimes else 6131573Srgrimes return(NULL); 6141573Srgrimes break; 6151573Srgrimes case OEOL: 6161573Srgrimes if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6171573Srgrimes (sp < m->endp && *sp == '\n' && 6181573Srgrimes (m->g->cflags®_NEWLINE)) ) 6191573Srgrimes { /* yes */ } 6201573Srgrimes else 6211573Srgrimes return(NULL); 6221573Srgrimes break; 6231573Srgrimes case OBOW: 6241573Srgrimes if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6251573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6261573Srgrimes (m->g->cflags®_NEWLINE)) || 6271573Srgrimes (sp > m->beginp && 6281573Srgrimes !ISWORD(*(sp-1))) ) && 6291573Srgrimes (sp < m->endp && ISWORD(*sp)) ) 6301573Srgrimes { /* yes */ } 6311573Srgrimes else 6321573Srgrimes return(NULL); 6331573Srgrimes break; 6341573Srgrimes case OEOW: 6351573Srgrimes if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6361573Srgrimes (sp < m->endp && *sp == '\n' && 6371573Srgrimes (m->g->cflags®_NEWLINE)) || 6381573Srgrimes (sp < m->endp && !ISWORD(*sp)) ) && 6391573Srgrimes (sp > m->beginp && ISWORD(*(sp-1))) ) 6401573Srgrimes { /* yes */ } 6411573Srgrimes else 6421573Srgrimes return(NULL); 6431573Srgrimes break; 6441573Srgrimes case O_QUEST: 6451573Srgrimes break; 6461573Srgrimes case OOR1: /* matches null but needs to skip */ 6471573Srgrimes ss++; 6481573Srgrimes s = m->g->strip[ss]; 6491573Srgrimes do { 6501573Srgrimes assert(OP(s) == OOR2); 6511573Srgrimes ss += OPND(s); 6521573Srgrimes } while (OP(s = m->g->strip[ss]) != O_CH); 6531573Srgrimes /* note that the ss++ gets us past the O_CH */ 6541573Srgrimes break; 6551573Srgrimes default: /* have to make a choice */ 6561573Srgrimes hard = 1; 6571573Srgrimes break; 6581573Srgrimes } 6591573Srgrimes if (!hard) { /* that was it! */ 6601573Srgrimes if (sp != stop) 6611573Srgrimes return(NULL); 6621573Srgrimes return(sp); 6631573Srgrimes } 6641573Srgrimes ss--; /* adjust for the for's final increment */ 6651573Srgrimes 6661573Srgrimes /* the hard stuff */ 6671573Srgrimes AT("hard", sp, stop, ss, stopst); 6681573Srgrimes s = m->g->strip[ss]; 6691573Srgrimes switch (OP(s)) { 6701573Srgrimes case OBACK_: /* the vilest depths */ 6711573Srgrimes i = OPND(s); 6721573Srgrimes assert(0 < i && i <= m->g->nsub); 6731573Srgrimes if (m->pmatch[i].rm_eo == -1) 6741573Srgrimes return(NULL); 6751573Srgrimes assert(m->pmatch[i].rm_so != -1); 6761573Srgrimes len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 677167216Sdelphij if (len == 0) 678167216Sdelphij return(NULL); 6791573Srgrimes assert(stop - m->beginp >= len); 6801573Srgrimes if (sp > stop - len) 6811573Srgrimes return(NULL); /* not enough left to match */ 6821573Srgrimes ssp = m->offp + m->pmatch[i].rm_so; 6831573Srgrimes if (memcmp(sp, ssp, len) != 0) 6841573Srgrimes return(NULL); 6851573Srgrimes while (m->g->strip[ss] != SOP(O_BACK, i)) 6861573Srgrimes ss++; 6871573Srgrimes return(backref(m, sp+len, stop, ss+1, stopst, lev)); 6881573Srgrimes break; 6891573Srgrimes case OQUEST_: /* to null or not */ 6901573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 6911573Srgrimes if (dp != NULL) 6921573Srgrimes return(dp); /* not */ 6931573Srgrimes return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 6941573Srgrimes break; 6951573Srgrimes case OPLUS_: 6961573Srgrimes assert(m->lastpos != NULL); 6971573Srgrimes assert(lev+1 <= m->g->nplus); 6981573Srgrimes m->lastpos[lev+1] = sp; 6991573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev+1)); 7001573Srgrimes break; 7011573Srgrimes case O_PLUS: 7021573Srgrimes if (sp == m->lastpos[lev]) /* last pass matched null */ 7031573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 7041573Srgrimes /* try another pass */ 7051573Srgrimes m->lastpos[lev] = sp; 7061573Srgrimes dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 7071573Srgrimes if (dp == NULL) 7081573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 7091573Srgrimes else 7101573Srgrimes return(dp); 7111573Srgrimes break; 7121573Srgrimes case OCH_: /* find the right one, if any */ 7131573Srgrimes ssub = ss + 1; 7141573Srgrimes esub = ss + OPND(s) - 1; 7151573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 7161573Srgrimes for (;;) { /* find first matching branch */ 7171573Srgrimes dp = backref(m, sp, stop, ssub, esub, lev); 7181573Srgrimes if (dp != NULL) 7191573Srgrimes return(dp); 7201573Srgrimes /* that one missed, try next one */ 7211573Srgrimes if (OP(m->g->strip[esub]) == O_CH) 7221573Srgrimes return(NULL); /* there is none */ 7231573Srgrimes esub++; 7241573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 7251573Srgrimes ssub = esub + 1; 7261573Srgrimes esub += OPND(m->g->strip[esub]); 7271573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 7281573Srgrimes esub--; 7291573Srgrimes else 7301573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 7311573Srgrimes } 7321573Srgrimes break; 7331573Srgrimes case OLPAREN: /* must undo assignment if rest fails */ 7341573Srgrimes i = OPND(s); 7351573Srgrimes assert(0 < i && i <= m->g->nsub); 7361573Srgrimes offsave = m->pmatch[i].rm_so; 7371573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 7381573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7391573Srgrimes if (dp != NULL) 7401573Srgrimes return(dp); 7411573Srgrimes m->pmatch[i].rm_so = offsave; 7421573Srgrimes return(NULL); 7431573Srgrimes break; 7441573Srgrimes case ORPAREN: /* must undo assignment if rest fails */ 7451573Srgrimes i = OPND(s); 7461573Srgrimes assert(0 < i && i <= m->g->nsub); 7471573Srgrimes offsave = m->pmatch[i].rm_eo; 7481573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 7491573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7501573Srgrimes if (dp != NULL) 7511573Srgrimes return(dp); 7521573Srgrimes m->pmatch[i].rm_eo = offsave; 7531573Srgrimes return(NULL); 7541573Srgrimes break; 7551573Srgrimes default: /* uh oh */ 7561573Srgrimes assert(nope); 7571573Srgrimes break; 7581573Srgrimes } 7591573Srgrimes 7601573Srgrimes /* "can't happen" */ 7611573Srgrimes assert(nope); 7621573Srgrimes /* NOTREACHED */ 76311664Sphk return "shut up gcc"; 7641573Srgrimes} 7651573Srgrimes 7661573Srgrimes/* 7671573Srgrimes - fast - step through the string at top speed 76892889Sobrien == static char *fast(struct match *m, char *start, \ 7691573Srgrimes == char *stop, sopno startst, sopno stopst); 7701573Srgrimes */ 7711573Srgrimesstatic char * /* where tentative match ended, or NULL */ 7721573Srgrimesfast(m, start, stop, startst, stopst) 77392889Sobrienstruct match *m; 7741573Srgrimeschar *start; 7751573Srgrimeschar *stop; 7761573Srgrimessopno startst; 7771573Srgrimessopno stopst; 7781573Srgrimes{ 77992889Sobrien states st = m->st; 78092889Sobrien states fresh = m->fresh; 78192889Sobrien states tmp = m->tmp; 78292889Sobrien char *p = start; 783132019Stjr wint_t c; 784132019Stjr wint_t lastc; /* previous c */ 785132019Stjr wint_t flagch; 78692889Sobrien int i; 78792889Sobrien char *coldp; /* last p after which no match was underway */ 788132019Stjr size_t clen; 7891573Srgrimes 7901573Srgrimes CLEAR(st); 7911573Srgrimes SET1(st, startst); 7921573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 7931573Srgrimes ASSIGN(fresh, st); 7941573Srgrimes SP("start", st, *p); 7951573Srgrimes coldp = NULL; 796132019Stjr if (start == m->beginp) 797132019Stjr c = OUT; 798132019Stjr else { 799132019Stjr /* 800132019Stjr * XXX Wrong if the previous character was multi-byte. 801132019Stjr * Newline never is (in encodings supported by FreeBSD), 802132019Stjr * so this only breaks the ISWORD tests below. 803132019Stjr */ 804132019Stjr c = (uch)*(start - 1); 805132019Stjr } 8061573Srgrimes for (;;) { 8071573Srgrimes /* next character */ 8081573Srgrimes lastc = c; 809149180Stjr if (p == m->endp) { 810149180Stjr clen = 0; 811132019Stjr c = OUT; 812149180Stjr } else 813149180Stjr clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 8141573Srgrimes if (EQ(st, fresh)) 8151573Srgrimes coldp = p; 8161573Srgrimes 8171573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 8181573Srgrimes flagch = '\0'; 8191573Srgrimes i = 0; 8201573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8211573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8221573Srgrimes flagch = BOL; 8231573Srgrimes i = m->g->nbol; 8241573Srgrimes } 8251573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8261573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 8271573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 8281573Srgrimes i += m->g->neol; 8291573Srgrimes } 8301573Srgrimes if (i != 0) { 8311573Srgrimes for (; i > 0; i--) 8321573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8331573Srgrimes SP("boleol", st, c); 8341573Srgrimes } 8351573Srgrimes 8361573Srgrimes /* how about a word boundary? */ 8371573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8381573Srgrimes (c != OUT && ISWORD(c)) ) { 8391573Srgrimes flagch = BOW; 8401573Srgrimes } 8411573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8421573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8431573Srgrimes flagch = EOW; 8441573Srgrimes } 8451573Srgrimes if (flagch == BOW || flagch == EOW) { 8461573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8471573Srgrimes SP("boweow", st, c); 8481573Srgrimes } 8491573Srgrimes 8501573Srgrimes /* are we done? */ 851149180Stjr if (ISSET(st, stopst) || p == stop || clen > stop - p) 8521573Srgrimes break; /* NOTE BREAK OUT */ 8531573Srgrimes 8541573Srgrimes /* no, we must deal with this character */ 8551573Srgrimes ASSIGN(tmp, st); 8561573Srgrimes ASSIGN(st, fresh); 8571573Srgrimes assert(c != OUT); 8581573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 8591573Srgrimes SP("aft", st, c); 8601573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 861132019Stjr p += clen; 8621573Srgrimes } 8631573Srgrimes 8641573Srgrimes assert(coldp != NULL); 8651573Srgrimes m->coldp = coldp; 8661573Srgrimes if (ISSET(st, stopst)) 867137959Stjr return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); 8681573Srgrimes else 8691573Srgrimes return(NULL); 8701573Srgrimes} 8711573Srgrimes 8721573Srgrimes/* 8731573Srgrimes - slow - step through the string more deliberately 87492889Sobrien == static char *slow(struct match *m, char *start, \ 8751573Srgrimes == char *stop, sopno startst, sopno stopst); 8761573Srgrimes */ 8771573Srgrimesstatic char * /* where it ended */ 8781573Srgrimesslow(m, start, stop, startst, stopst) 87992889Sobrienstruct match *m; 8801573Srgrimeschar *start; 8811573Srgrimeschar *stop; 8821573Srgrimessopno startst; 8831573Srgrimessopno stopst; 8841573Srgrimes{ 88592889Sobrien states st = m->st; 88692889Sobrien states empty = m->empty; 88792889Sobrien states tmp = m->tmp; 88892889Sobrien char *p = start; 889132019Stjr wint_t c; 890132019Stjr wint_t lastc; /* previous c */ 891132019Stjr wint_t flagch; 89292889Sobrien int i; 89392889Sobrien char *matchp; /* last p at which a match ended */ 894132019Stjr size_t clen; 8951573Srgrimes 8961573Srgrimes AT("slow", start, stop, startst, stopst); 8971573Srgrimes CLEAR(st); 8981573Srgrimes SET1(st, startst); 8991573Srgrimes SP("sstart", st, *p); 9001573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 9011573Srgrimes matchp = NULL; 902132019Stjr if (start == m->beginp) 903132019Stjr c = OUT; 904132019Stjr else { 905132019Stjr /* 906132019Stjr * XXX Wrong if the previous character was multi-byte. 907132019Stjr * Newline never is (in encodings supported by FreeBSD), 908132019Stjr * so this only breaks the ISWORD tests below. 909132019Stjr */ 910132019Stjr c = (uch)*(start - 1); 911132019Stjr } 9121573Srgrimes for (;;) { 9131573Srgrimes /* next character */ 9141573Srgrimes lastc = c; 915132019Stjr if (p == m->endp) { 916132019Stjr c = OUT; 917132019Stjr clen = 0; 918132019Stjr } else 919149180Stjr clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 9201573Srgrimes 9211573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 9221573Srgrimes flagch = '\0'; 9231573Srgrimes i = 0; 9241573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 9251573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 9261573Srgrimes flagch = BOL; 9271573Srgrimes i = m->g->nbol; 9281573Srgrimes } 9291573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 9301573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 9311573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 9321573Srgrimes i += m->g->neol; 9331573Srgrimes } 9341573Srgrimes if (i != 0) { 9351573Srgrimes for (; i > 0; i--) 9361573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9371573Srgrimes SP("sboleol", st, c); 9381573Srgrimes } 9391573Srgrimes 9401573Srgrimes /* how about a word boundary? */ 9411573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 9421573Srgrimes (c != OUT && ISWORD(c)) ) { 9431573Srgrimes flagch = BOW; 9441573Srgrimes } 9451573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 9461573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 9471573Srgrimes flagch = EOW; 9481573Srgrimes } 9491573Srgrimes if (flagch == BOW || flagch == EOW) { 9501573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9511573Srgrimes SP("sboweow", st, c); 9521573Srgrimes } 9531573Srgrimes 9541573Srgrimes /* are we done? */ 9551573Srgrimes if (ISSET(st, stopst)) 9561573Srgrimes matchp = p; 957149180Stjr if (EQ(st, empty) || p == stop || clen > stop - p) 9581573Srgrimes break; /* NOTE BREAK OUT */ 9591573Srgrimes 9601573Srgrimes /* no, we must deal with this character */ 9611573Srgrimes ASSIGN(tmp, st); 9621573Srgrimes ASSIGN(st, empty); 9631573Srgrimes assert(c != OUT); 9641573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 9651573Srgrimes SP("saft", st, c); 9661573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 967132019Stjr p += clen; 9681573Srgrimes } 9691573Srgrimes 9701573Srgrimes return(matchp); 9711573Srgrimes} 9721573Srgrimes 9731573Srgrimes 9741573Srgrimes/* 9751573Srgrimes - step - map set of states reachable before char to set reachable after 97692889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \ 97792889Sobrien == states bef, int ch, states aft); 978132019Stjr == #define BOL (OUT-1) 979132019Stjr == #define EOL (BOL-1) 980132019Stjr == #define BOLEOL (BOL-2) 981132019Stjr == #define NOTHING (BOL-3) 982132019Stjr == #define BOW (BOL-4) 983132019Stjr == #define EOW (BOL-5) 984132019Stjr == #define BADCHAR (BOL-6) 985132019Stjr == #define NONCHAR(c) ((c) <= OUT) 9861573Srgrimes */ 9871573Srgrimesstatic states 9881573Srgrimesstep(g, start, stop, bef, ch, aft) 98992889Sobrienstruct re_guts *g; 9901573Srgrimessopno start; /* start state within strip */ 9911573Srgrimessopno stop; /* state after stop state within strip */ 99292889Sobrienstates bef; /* states reachable before */ 993132019Stjrwint_t ch; /* character or NONCHAR code */ 99492889Sobrienstates aft; /* states already known reachable after */ 9951573Srgrimes{ 99692889Sobrien cset *cs; 99792889Sobrien sop s; 99892889Sobrien sopno pc; 99992889Sobrien onestate here; /* note, macros know this name */ 100092889Sobrien sopno look; 100192889Sobrien int i; 10021573Srgrimes 10031573Srgrimes for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 10041573Srgrimes s = g->strip[pc]; 10051573Srgrimes switch (OP(s)) { 10061573Srgrimes case OEND: 10071573Srgrimes assert(pc == stop-1); 10081573Srgrimes break; 10091573Srgrimes case OCHAR: 10101573Srgrimes /* only characters can match */ 1011132019Stjr assert(!NONCHAR(ch) || ch != OPND(s)); 1012132019Stjr if (ch == OPND(s)) 10131573Srgrimes FWD(aft, bef, 1); 10141573Srgrimes break; 10151573Srgrimes case OBOL: 10161573Srgrimes if (ch == BOL || ch == BOLEOL) 10171573Srgrimes FWD(aft, bef, 1); 10181573Srgrimes break; 10191573Srgrimes case OEOL: 10201573Srgrimes if (ch == EOL || ch == BOLEOL) 10211573Srgrimes FWD(aft, bef, 1); 10221573Srgrimes break; 10231573Srgrimes case OBOW: 10241573Srgrimes if (ch == BOW) 10251573Srgrimes FWD(aft, bef, 1); 10261573Srgrimes break; 10271573Srgrimes case OEOW: 10281573Srgrimes if (ch == EOW) 10291573Srgrimes FWD(aft, bef, 1); 10301573Srgrimes break; 10311573Srgrimes case OANY: 10321573Srgrimes if (!NONCHAR(ch)) 10331573Srgrimes FWD(aft, bef, 1); 10341573Srgrimes break; 10351573Srgrimes case OANYOF: 10361573Srgrimes cs = &g->sets[OPND(s)]; 10371573Srgrimes if (!NONCHAR(ch) && CHIN(cs, ch)) 10381573Srgrimes FWD(aft, bef, 1); 10391573Srgrimes break; 10401573Srgrimes case OBACK_: /* ignored here */ 10411573Srgrimes case O_BACK: 10421573Srgrimes FWD(aft, aft, 1); 10431573Srgrimes break; 10441573Srgrimes case OPLUS_: /* forward, this is just an empty */ 10451573Srgrimes FWD(aft, aft, 1); 10461573Srgrimes break; 10471573Srgrimes case O_PLUS: /* both forward and back */ 10481573Srgrimes FWD(aft, aft, 1); 10491573Srgrimes i = ISSETBACK(aft, OPND(s)); 10501573Srgrimes BACK(aft, aft, OPND(s)); 10511573Srgrimes if (!i && ISSETBACK(aft, OPND(s))) { 10521573Srgrimes /* oho, must reconsider loop body */ 10531573Srgrimes pc -= OPND(s) + 1; 10541573Srgrimes INIT(here, pc); 10551573Srgrimes } 10561573Srgrimes break; 10571573Srgrimes case OQUEST_: /* two branches, both forward */ 10581573Srgrimes FWD(aft, aft, 1); 10591573Srgrimes FWD(aft, aft, OPND(s)); 10601573Srgrimes break; 10611573Srgrimes case O_QUEST: /* just an empty */ 10621573Srgrimes FWD(aft, aft, 1); 10631573Srgrimes break; 10641573Srgrimes case OLPAREN: /* not significant here */ 10651573Srgrimes case ORPAREN: 10661573Srgrimes FWD(aft, aft, 1); 10671573Srgrimes break; 10681573Srgrimes case OCH_: /* mark the first two branches */ 10691573Srgrimes FWD(aft, aft, 1); 10701573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10711573Srgrimes FWD(aft, aft, OPND(s)); 10721573Srgrimes break; 10731573Srgrimes case OOR1: /* done a branch, find the O_CH */ 10741573Srgrimes if (ISSTATEIN(aft, here)) { 10751573Srgrimes for (look = 1; 10761573Srgrimes OP(s = g->strip[pc+look]) != O_CH; 10771573Srgrimes look += OPND(s)) 10781573Srgrimes assert(OP(s) == OOR2); 10791573Srgrimes FWD(aft, aft, look); 10801573Srgrimes } 10811573Srgrimes break; 10821573Srgrimes case OOR2: /* propagate OCH_'s marking */ 10831573Srgrimes FWD(aft, aft, 1); 10841573Srgrimes if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10851573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10861573Srgrimes FWD(aft, aft, OPND(s)); 10871573Srgrimes } 10881573Srgrimes break; 10891573Srgrimes case O_CH: /* just empty */ 10901573Srgrimes FWD(aft, aft, 1); 10911573Srgrimes break; 10921573Srgrimes default: /* ooooops... */ 10931573Srgrimes assert(nope); 10941573Srgrimes break; 10951573Srgrimes } 10961573Srgrimes } 10971573Srgrimes 10981573Srgrimes return(aft); 10991573Srgrimes} 11001573Srgrimes 11011573Srgrimes#ifdef REDEBUG 11021573Srgrimes/* 11031573Srgrimes - print - print a set of states 11041573Srgrimes == #ifdef REDEBUG 11051573Srgrimes == static void print(struct match *m, char *caption, states st, \ 11061573Srgrimes == int ch, FILE *d); 11071573Srgrimes == #endif 11081573Srgrimes */ 11091573Srgrimesstatic void 11101573Srgrimesprint(m, caption, st, ch, d) 11111573Srgrimesstruct match *m; 11121573Srgrimeschar *caption; 11131573Srgrimesstates st; 11141573Srgrimesint ch; 11151573SrgrimesFILE *d; 11161573Srgrimes{ 111792889Sobrien struct re_guts *g = m->g; 111892889Sobrien int i; 111992889Sobrien int first = 1; 11201573Srgrimes 11211573Srgrimes if (!(m->eflags®_TRACE)) 11221573Srgrimes return; 11231573Srgrimes 11241573Srgrimes fprintf(d, "%s", caption); 11251573Srgrimes if (ch != '\0') 11261573Srgrimes fprintf(d, " %s", pchar(ch)); 11271573Srgrimes for (i = 0; i < g->nstates; i++) 11281573Srgrimes if (ISSET(st, i)) { 11291573Srgrimes fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 11301573Srgrimes first = 0; 11311573Srgrimes } 11321573Srgrimes fprintf(d, "\n"); 11331573Srgrimes} 11341573Srgrimes 11358870Srgrimes/* 11361573Srgrimes - at - print current situation 11371573Srgrimes == #ifdef REDEBUG 11381573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \ 11391573Srgrimes == sopno startst, sopno stopst); 11401573Srgrimes == #endif 11411573Srgrimes */ 11421573Srgrimesstatic void 11431573Srgrimesat(m, title, start, stop, startst, stopst) 11441573Srgrimesstruct match *m; 11451573Srgrimeschar *title; 11461573Srgrimeschar *start; 11471573Srgrimeschar *stop; 11481573Srgrimessopno startst; 11491573Srgrimessopno stopst; 11501573Srgrimes{ 11511573Srgrimes if (!(m->eflags®_TRACE)) 11521573Srgrimes return; 11531573Srgrimes 11541573Srgrimes printf("%s %s-", title, pchar(*start)); 11551573Srgrimes printf("%s ", pchar(*stop)); 11561573Srgrimes printf("%ld-%ld\n", (long)startst, (long)stopst); 11571573Srgrimes} 11581573Srgrimes 11591573Srgrimes#ifndef PCHARDONE 11601573Srgrimes#define PCHARDONE /* never again */ 11611573Srgrimes/* 11621573Srgrimes - pchar - make a character printable 11631573Srgrimes == #ifdef REDEBUG 11641573Srgrimes == static char *pchar(int ch); 11651573Srgrimes == #endif 11661573Srgrimes * 11671573Srgrimes * Is this identical to regchar() over in debug.c? Well, yes. But a 11681573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to 11691573Srgrimes * a matching debug.o, and this is convenient. It all disappears in 11701573Srgrimes * the non-debug compilation anyway, so it doesn't matter much. 11711573Srgrimes */ 11721573Srgrimesstatic char * /* -> representation */ 11731573Srgrimespchar(ch) 11741573Srgrimesint ch; 11751573Srgrimes{ 11761573Srgrimes static char pbuf[10]; 11771573Srgrimes 117817508Sache if (isprint((uch)ch) || ch == ' ') 11791573Srgrimes sprintf(pbuf, "%c", ch); 11801573Srgrimes else 11811573Srgrimes sprintf(pbuf, "\\%o", ch); 11821573Srgrimes return(pbuf); 11831573Srgrimes} 11841573Srgrimes#endif 11851573Srgrimes#endif 11861573Srgrimes 11871573Srgrimes#undef matcher 11881573Srgrimes#undef fast 11891573Srgrimes#undef slow 11901573Srgrimes#undef dissect 11911573Srgrimes#undef backref 11921573Srgrimes#undef step 11931573Srgrimes#undef print 11941573Srgrimes#undef at 11951573Srgrimes#undef match 1196