engine.c revision 137959
11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 31573Srgrimes * Copyright (c) 1992, 1993, 1994 41573Srgrimes * The Regents of the University of California. All rights reserved. 51573Srgrimes * 61573Srgrimes * This code is derived from software contributed to Berkeley by 71573Srgrimes * Henry Spencer. 81573Srgrimes * 91573Srgrimes * Redistribution and use in source and binary forms, with or without 101573Srgrimes * modification, are permitted provided that the following conditions 111573Srgrimes * are met: 121573Srgrimes * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 3. All advertising materials mentioning features or use of this software 181573Srgrimes * must display the following acknowledgement: 191573Srgrimes * This product includes software developed by the University of 201573Srgrimes * California, Berkeley and its contributors. 211573Srgrimes * 4. Neither the name of the University nor the names of its contributors 221573Srgrimes * may be used to endorse or promote products derived from this software 231573Srgrimes * without specific prior written permission. 241573Srgrimes * 251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351573Srgrimes * SUCH DAMAGE. 361573Srgrimes * 371573Srgrimes * @(#)engine.c 8.5 (Berkeley) 3/20/94 381573Srgrimes */ 391573Srgrimes 4092986Sobrien#include <sys/cdefs.h> 4192986Sobrien__FBSDID("$FreeBSD: head/lib/libc/regex/engine.c 137959 2004-11-21 03:14:32Z tjr $"); 4292986Sobrien 431573Srgrimes/* 441573Srgrimes * The matching engine and friends. This file is #included by regexec.c 451573Srgrimes * after suitable #defines of a variety of macros used herein, so that 461573Srgrimes * different state representations can be used without duplicating masses 471573Srgrimes * of code. 481573Srgrimes */ 491573Srgrimes 501573Srgrimes#ifdef SNAMES 511573Srgrimes#define matcher smatcher 521573Srgrimes#define fast sfast 531573Srgrimes#define slow sslow 541573Srgrimes#define dissect sdissect 551573Srgrimes#define backref sbackref 561573Srgrimes#define step sstep 571573Srgrimes#define print sprint 581573Srgrimes#define at sat 591573Srgrimes#define match smat 601573Srgrimes#endif 611573Srgrimes#ifdef LNAMES 621573Srgrimes#define matcher lmatcher 631573Srgrimes#define fast lfast 641573Srgrimes#define slow lslow 651573Srgrimes#define dissect ldissect 661573Srgrimes#define backref lbackref 671573Srgrimes#define step lstep 681573Srgrimes#define print lprint 691573Srgrimes#define at lat 701573Srgrimes#define match lmat 711573Srgrimes#endif 72132019Stjr#ifdef MNAMES 73132019Stjr#define matcher mmatcher 74132019Stjr#define fast mfast 75132019Stjr#define slow mslow 76132019Stjr#define dissect mdissect 77132019Stjr#define backref mbackref 78132019Stjr#define step mstep 79132019Stjr#define print mprint 80132019Stjr#define at mat 81132019Stjr#define match mmat 82132019Stjr#endif 831573Srgrimes 841573Srgrimes/* another structure passed up and down to avoid zillions of parameters */ 851573Srgrimesstruct match { 861573Srgrimes struct re_guts *g; 871573Srgrimes int eflags; 881573Srgrimes regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 891573Srgrimes char *offp; /* offsets work from here */ 901573Srgrimes char *beginp; /* start of string -- virtual NUL precedes */ 911573Srgrimes char *endp; /* end of string -- virtual NUL here */ 921573Srgrimes char *coldp; /* can be no match starting before here */ 931573Srgrimes char **lastpos; /* [nplus+1] */ 941573Srgrimes STATEVARS; 951573Srgrimes states st; /* current states */ 961573Srgrimes states fresh; /* states for a fresh start */ 971573Srgrimes states tmp; /* temporary */ 981573Srgrimes states empty; /* empty set of states */ 99132019Stjr mbstate_t mbs; /* multibyte conversion state */ 1001573Srgrimes}; 1011573Srgrimes 1021573Srgrimes/* ========= begin header generated by ./mkh ========= */ 1031573Srgrimes#ifdef __cplusplus 1041573Srgrimesextern "C" { 1051573Srgrimes#endif 1061573Srgrimes 1071573Srgrimes/* === engine.c === */ 10892905Sobrienstatic int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 10992905Sobrienstatic char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 11092905Sobrienstatic char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); 11192905Sobrienstatic char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 11292905Sobrienstatic char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst); 113132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); 114132019Stjr#define BOL (OUT-1) 115132019Stjr#define EOL (BOL-1) 116132019Stjr#define BOLEOL (BOL-2) 117132019Stjr#define NOTHING (BOL-3) 118132019Stjr#define BOW (BOL-4) 119132019Stjr#define EOW (BOL-5) 120132019Stjr#define BADCHAR (BOL-6) 121132019Stjr#define NONCHAR(c) ((c) <= OUT) 1221573Srgrimes#ifdef REDEBUG 12392905Sobrienstatic void print(struct match *m, char *caption, states st, int ch, FILE *d); 1241573Srgrimes#endif 1251573Srgrimes#ifdef REDEBUG 12692905Sobrienstatic void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); 1271573Srgrimes#endif 1281573Srgrimes#ifdef REDEBUG 12992905Sobrienstatic char *pchar(int ch); 1301573Srgrimes#endif 1311573Srgrimes 1321573Srgrimes#ifdef __cplusplus 1331573Srgrimes} 1341573Srgrimes#endif 1351573Srgrimes/* ========= end header generated by ./mkh ========= */ 1361573Srgrimes 1371573Srgrimes#ifdef REDEBUG 1381573Srgrimes#define SP(t, s, c) print(m, t, s, c, stdout) 1391573Srgrimes#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1401573Srgrimes#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1411573Srgrimes#else 1421573Srgrimes#define SP(t, s, c) /* nothing */ 1431573Srgrimes#define AT(t, p1, p2, s1, s2) /* nothing */ 1441573Srgrimes#define NOTE(s) /* nothing */ 1451573Srgrimes#endif 1461573Srgrimes 1471573Srgrimes/* 1481573Srgrimes - matcher - the actual matching engine 14992889Sobrien == static int matcher(struct re_guts *g, char *string, \ 1501573Srgrimes == size_t nmatch, regmatch_t pmatch[], int eflags); 1511573Srgrimes */ 1521573Srgrimesstatic int /* 0 success, REG_NOMATCH failure */ 1531573Srgrimesmatcher(g, string, nmatch, pmatch, eflags) 15492889Sobrienstruct re_guts *g; 1551573Srgrimeschar *string; 1561573Srgrimessize_t nmatch; 1571573Srgrimesregmatch_t pmatch[]; 1581573Srgrimesint eflags; 1591573Srgrimes{ 16092889Sobrien char *endp; 16192889Sobrien int i; 1621573Srgrimes struct match mv; 16392889Sobrien struct match *m = &mv; 16492889Sobrien char *dp; 16592889Sobrien const sopno gf = g->firststate+1; /* +1 for OEND */ 16692889Sobrien const sopno gl = g->laststate; 1671573Srgrimes char *start; 1681573Srgrimes char *stop; 16962232Sdcs /* Boyer-Moore algorithms variables */ 17092889Sobrien char *pp; 17162232Sdcs int cj, mj; 17292889Sobrien char *mustfirst; 17392889Sobrien char *mustlast; 17492889Sobrien int *matchjump; 17592889Sobrien int *charjump; 1761573Srgrimes 1771573Srgrimes /* simplify the situation where possible */ 1781573Srgrimes if (g->cflags®_NOSUB) 1791573Srgrimes nmatch = 0; 1801573Srgrimes if (eflags®_STARTEND) { 1811573Srgrimes start = string + pmatch[0].rm_so; 1821573Srgrimes stop = string + pmatch[0].rm_eo; 1831573Srgrimes } else { 1841573Srgrimes start = string; 1851573Srgrimes stop = start + strlen(start); 1861573Srgrimes } 1871573Srgrimes if (stop < start) 1881573Srgrimes return(REG_INVARG); 1891573Srgrimes 1901573Srgrimes /* prescreening; this does wonders for this rather slow code */ 1911573Srgrimes if (g->must != NULL) { 19262391Sdcs if (g->charjump != NULL && g->matchjump != NULL) { 19362232Sdcs mustfirst = g->must; 19462232Sdcs mustlast = g->must + g->mlen - 1; 19562232Sdcs charjump = g->charjump; 19662232Sdcs matchjump = g->matchjump; 19762232Sdcs pp = mustlast; 19862754Sdcs for (dp = start+g->mlen-1; dp < stop;) { 19962232Sdcs /* Fast skip non-matches */ 200111010Snectar while (dp < stop && charjump[(int)*dp]) 201111010Snectar dp += charjump[(int)*dp]; 20262232Sdcs 20362754Sdcs if (dp >= stop) 20462232Sdcs break; 20562232Sdcs 20662232Sdcs /* Greedy matcher */ 20762232Sdcs /* We depend on not being used for 20862232Sdcs * for strings of length 1 20962232Sdcs */ 21062754Sdcs while (*--dp == *--pp && pp != mustfirst); 21162232Sdcs 21262754Sdcs if (*dp == *pp) 21362232Sdcs break; 21462232Sdcs 21562232Sdcs /* Jump to next possible match */ 21662232Sdcs mj = matchjump[pp - mustfirst]; 217111010Snectar cj = charjump[(int)*dp]; 21862754Sdcs dp += (cj < mj ? mj : cj); 21962754Sdcs pp = mustlast; 22062232Sdcs } 22162391Sdcs if (pp != mustfirst) 22262232Sdcs return(REG_NOMATCH); 22362232Sdcs } else { 22462232Sdcs for (dp = start; dp < stop; dp++) 22562232Sdcs if (*dp == g->must[0] && 22662232Sdcs stop - dp >= g->mlen && 22762232Sdcs memcmp(dp, g->must, (size_t)g->mlen) == 0) 22862232Sdcs break; 22962232Sdcs if (dp == stop) /* we didn't find g->must */ 23062232Sdcs return(REG_NOMATCH); 23162232Sdcs } 2321573Srgrimes } 2331573Srgrimes 2341573Srgrimes /* match struct setup */ 2351573Srgrimes m->g = g; 2361573Srgrimes m->eflags = eflags; 2371573Srgrimes m->pmatch = NULL; 2381573Srgrimes m->lastpos = NULL; 2391573Srgrimes m->offp = string; 2401573Srgrimes m->beginp = start; 2411573Srgrimes m->endp = stop; 2421573Srgrimes STATESETUP(m, 4); 2431573Srgrimes SETUP(m->st); 2441573Srgrimes SETUP(m->fresh); 2451573Srgrimes SETUP(m->tmp); 2461573Srgrimes SETUP(m->empty); 2471573Srgrimes CLEAR(m->empty); 248132019Stjr ZAPSTATE(&m->mbs); 2491573Srgrimes 25062391Sdcs /* Adjust start according to moffset, to speed things up */ 25162391Sdcs if (g->moffset > -1) 25262854Sdcs start = ((dp - g->moffset) < start) ? start : dp - g->moffset; 25362391Sdcs 2541573Srgrimes /* this loop does only one repetition except for backrefs */ 2551573Srgrimes for (;;) { 2561573Srgrimes endp = fast(m, start, stop, gf, gl); 2571573Srgrimes if (endp == NULL) { /* a miss */ 2581573Srgrimes STATETEARDOWN(m); 2591573Srgrimes return(REG_NOMATCH); 2601573Srgrimes } 2611573Srgrimes if (nmatch == 0 && !g->backrefs) 2621573Srgrimes break; /* no further info needed */ 2631573Srgrimes 2641573Srgrimes /* where? */ 2651573Srgrimes assert(m->coldp != NULL); 2661573Srgrimes for (;;) { 2671573Srgrimes NOTE("finding start"); 2681573Srgrimes endp = slow(m, m->coldp, stop, gf, gl); 2691573Srgrimes if (endp != NULL) 2701573Srgrimes break; 2711573Srgrimes assert(m->coldp < m->endp); 272132019Stjr m->coldp += XMBRTOWC(NULL, m->coldp, 273132019Stjr m->endp - m->coldp, &m->mbs, 0); 2741573Srgrimes } 2751573Srgrimes if (nmatch == 1 && !g->backrefs) 2761573Srgrimes break; /* no further info needed */ 2771573Srgrimes 2781573Srgrimes /* oh my, he wants the subexpressions... */ 2791573Srgrimes if (m->pmatch == NULL) 2801573Srgrimes m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2811573Srgrimes sizeof(regmatch_t)); 2821573Srgrimes if (m->pmatch == NULL) { 2831573Srgrimes STATETEARDOWN(m); 2841573Srgrimes return(REG_ESPACE); 2851573Srgrimes } 2861573Srgrimes for (i = 1; i <= m->g->nsub; i++) 2871573Srgrimes m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2881573Srgrimes if (!g->backrefs && !(m->eflags®_BACKR)) { 2891573Srgrimes NOTE("dissecting"); 2901573Srgrimes dp = dissect(m, m->coldp, endp, gf, gl); 2911573Srgrimes } else { 2921573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) 2931573Srgrimes m->lastpos = (char **)malloc((g->nplus+1) * 2941573Srgrimes sizeof(char *)); 2951573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) { 2961573Srgrimes free(m->pmatch); 2971573Srgrimes STATETEARDOWN(m); 2981573Srgrimes return(REG_ESPACE); 2991573Srgrimes } 3001573Srgrimes NOTE("backref dissect"); 3011573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3021573Srgrimes } 3031573Srgrimes if (dp != NULL) 3041573Srgrimes break; 3051573Srgrimes 3061573Srgrimes /* uh-oh... we couldn't find a subexpression-level match */ 3071573Srgrimes assert(g->backrefs); /* must be back references doing it */ 3081573Srgrimes assert(g->nplus == 0 || m->lastpos != NULL); 3091573Srgrimes for (;;) { 3101573Srgrimes if (dp != NULL || endp <= m->coldp) 3111573Srgrimes break; /* defeat */ 3121573Srgrimes NOTE("backoff"); 3131573Srgrimes endp = slow(m, m->coldp, endp-1, gf, gl); 3141573Srgrimes if (endp == NULL) 3151573Srgrimes break; /* defeat */ 3161573Srgrimes /* try it on a shorter possibility */ 3171573Srgrimes#ifndef NDEBUG 3181573Srgrimes for (i = 1; i <= m->g->nsub; i++) { 3191573Srgrimes assert(m->pmatch[i].rm_so == -1); 3201573Srgrimes assert(m->pmatch[i].rm_eo == -1); 3211573Srgrimes } 3221573Srgrimes#endif 3231573Srgrimes NOTE("backoff dissect"); 3241573Srgrimes dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 3251573Srgrimes } 3261573Srgrimes assert(dp == NULL || dp == endp); 3271573Srgrimes if (dp != NULL) /* found a shorter one */ 3281573Srgrimes break; 3291573Srgrimes 3301573Srgrimes /* despite initial appearances, there is no match here */ 3311573Srgrimes NOTE("false alarm"); 332132019Stjr /* recycle starting later */ 333132019Stjr start = m->coldp + XMBRTOWC(NULL, m->coldp, 334137959Stjr stop - m->coldp, &m->mbs, 0); 3351573Srgrimes assert(start <= stop); 3361573Srgrimes } 3371573Srgrimes 3381573Srgrimes /* fill in the details if requested */ 3391573Srgrimes if (nmatch > 0) { 3401573Srgrimes pmatch[0].rm_so = m->coldp - m->offp; 3411573Srgrimes pmatch[0].rm_eo = endp - m->offp; 3421573Srgrimes } 3431573Srgrimes if (nmatch > 1) { 3441573Srgrimes assert(m->pmatch != NULL); 3451573Srgrimes for (i = 1; i < nmatch; i++) 3461573Srgrimes if (i <= m->g->nsub) 3471573Srgrimes pmatch[i] = m->pmatch[i]; 3481573Srgrimes else { 3491573Srgrimes pmatch[i].rm_so = -1; 3501573Srgrimes pmatch[i].rm_eo = -1; 3511573Srgrimes } 3521573Srgrimes } 3531573Srgrimes 3541573Srgrimes if (m->pmatch != NULL) 3551573Srgrimes free((char *)m->pmatch); 3561573Srgrimes if (m->lastpos != NULL) 3571573Srgrimes free((char *)m->lastpos); 3581573Srgrimes STATETEARDOWN(m); 3591573Srgrimes return(0); 3601573Srgrimes} 3611573Srgrimes 3621573Srgrimes/* 3631573Srgrimes - dissect - figure out what matched what, no back references 36492889Sobrien == static char *dissect(struct match *m, char *start, \ 3651573Srgrimes == char *stop, sopno startst, sopno stopst); 3661573Srgrimes */ 3671573Srgrimesstatic char * /* == stop (success) always */ 3681573Srgrimesdissect(m, start, stop, startst, stopst) 36992889Sobrienstruct match *m; 3701573Srgrimeschar *start; 3711573Srgrimeschar *stop; 3721573Srgrimessopno startst; 3731573Srgrimessopno stopst; 3741573Srgrimes{ 37592889Sobrien int i; 37692889Sobrien sopno ss; /* start sop of current subRE */ 37792889Sobrien sopno es; /* end sop of current subRE */ 37892889Sobrien char *sp; /* start of string matched by it */ 37992889Sobrien char *stp; /* string matched by it cannot pass here */ 38092889Sobrien char *rest; /* start of rest of string */ 38192889Sobrien char *tail; /* string unmatched by rest of RE */ 38292889Sobrien sopno ssub; /* start sop of subsubRE */ 38392889Sobrien sopno esub; /* end sop of subsubRE */ 38492889Sobrien char *ssp; /* start of string matched by subsubRE */ 38592889Sobrien char *sep; /* end of string matched by subsubRE */ 38692889Sobrien char *oldssp; /* previous ssp */ 38792889Sobrien char *dp; 3881573Srgrimes 3891573Srgrimes AT("diss", start, stop, startst, stopst); 3901573Srgrimes sp = start; 3911573Srgrimes for (ss = startst; ss < stopst; ss = es) { 3921573Srgrimes /* identify end of subRE */ 3931573Srgrimes es = ss; 3941573Srgrimes switch (OP(m->g->strip[es])) { 3951573Srgrimes case OPLUS_: 3961573Srgrimes case OQUEST_: 3971573Srgrimes es += OPND(m->g->strip[es]); 3981573Srgrimes break; 3991573Srgrimes case OCH_: 4001573Srgrimes while (OP(m->g->strip[es]) != O_CH) 4011573Srgrimes es += OPND(m->g->strip[es]); 4021573Srgrimes break; 4031573Srgrimes } 4041573Srgrimes es++; 4051573Srgrimes 4061573Srgrimes /* figure out what it matched */ 4071573Srgrimes switch (OP(m->g->strip[ss])) { 4081573Srgrimes case OEND: 4091573Srgrimes assert(nope); 4101573Srgrimes break; 4111573Srgrimes case OCHAR: 412132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4131573Srgrimes break; 4141573Srgrimes case OBOL: 4151573Srgrimes case OEOL: 4161573Srgrimes case OBOW: 4171573Srgrimes case OEOW: 4181573Srgrimes break; 4191573Srgrimes case OANY: 4201573Srgrimes case OANYOF: 421132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4221573Srgrimes break; 4231573Srgrimes case OBACK_: 4241573Srgrimes case O_BACK: 4251573Srgrimes assert(nope); 4261573Srgrimes break; 4271573Srgrimes /* cases where length of match is hard to find */ 4281573Srgrimes case OQUEST_: 4291573Srgrimes stp = stop; 4301573Srgrimes for (;;) { 4311573Srgrimes /* how long could this one be? */ 4321573Srgrimes rest = slow(m, sp, stp, ss, es); 4331573Srgrimes assert(rest != NULL); /* it did match */ 4341573Srgrimes /* could the rest match the rest? */ 4351573Srgrimes tail = slow(m, rest, stop, es, stopst); 4361573Srgrimes if (tail == stop) 4371573Srgrimes break; /* yes! */ 4381573Srgrimes /* no -- try a shorter match for this one */ 4391573Srgrimes stp = rest - 1; 4401573Srgrimes assert(stp >= sp); /* it did work */ 4411573Srgrimes } 4421573Srgrimes ssub = ss + 1; 4431573Srgrimes esub = es - 1; 4441573Srgrimes /* did innards match? */ 4451573Srgrimes if (slow(m, sp, rest, ssub, esub) != NULL) { 4461573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 4471573Srgrimes assert(dp == rest); 4481573Srgrimes } else /* no */ 4491573Srgrimes assert(sp == rest); 4501573Srgrimes sp = rest; 4511573Srgrimes break; 4521573Srgrimes case OPLUS_: 4531573Srgrimes stp = stop; 4541573Srgrimes for (;;) { 4551573Srgrimes /* how long could this one be? */ 4561573Srgrimes rest = slow(m, sp, stp, ss, es); 4571573Srgrimes assert(rest != NULL); /* it did match */ 4581573Srgrimes /* could the rest match the rest? */ 4591573Srgrimes tail = slow(m, rest, stop, es, stopst); 4601573Srgrimes if (tail == stop) 4611573Srgrimes break; /* yes! */ 4621573Srgrimes /* no -- try a shorter match for this one */ 4631573Srgrimes stp = rest - 1; 4641573Srgrimes assert(stp >= sp); /* it did work */ 4651573Srgrimes } 4661573Srgrimes ssub = ss + 1; 4671573Srgrimes esub = es - 1; 4681573Srgrimes ssp = sp; 4691573Srgrimes oldssp = ssp; 4701573Srgrimes for (;;) { /* find last match of innards */ 4711573Srgrimes sep = slow(m, ssp, rest, ssub, esub); 4721573Srgrimes if (sep == NULL || sep == ssp) 4731573Srgrimes break; /* failed or matched null */ 4741573Srgrimes oldssp = ssp; /* on to next try */ 4751573Srgrimes ssp = sep; 4761573Srgrimes } 4771573Srgrimes if (sep == NULL) { 4781573Srgrimes /* last successful match */ 4791573Srgrimes sep = ssp; 4801573Srgrimes ssp = oldssp; 4811573Srgrimes } 4821573Srgrimes assert(sep == rest); /* must exhaust substring */ 4831573Srgrimes assert(slow(m, ssp, sep, ssub, esub) == rest); 4841573Srgrimes dp = dissect(m, ssp, sep, ssub, esub); 4851573Srgrimes assert(dp == sep); 4861573Srgrimes sp = rest; 4871573Srgrimes break; 4881573Srgrimes case OCH_: 4891573Srgrimes stp = stop; 4901573Srgrimes for (;;) { 4911573Srgrimes /* how long could this one be? */ 4921573Srgrimes rest = slow(m, sp, stp, ss, es); 4931573Srgrimes assert(rest != NULL); /* it did match */ 4941573Srgrimes /* could the rest match the rest? */ 4951573Srgrimes tail = slow(m, rest, stop, es, stopst); 4961573Srgrimes if (tail == stop) 4971573Srgrimes break; /* yes! */ 4981573Srgrimes /* no -- try a shorter match for this one */ 4991573Srgrimes stp = rest - 1; 5001573Srgrimes assert(stp >= sp); /* it did work */ 5011573Srgrimes } 5021573Srgrimes ssub = ss + 1; 5031573Srgrimes esub = ss + OPND(m->g->strip[ss]) - 1; 5041573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5051573Srgrimes for (;;) { /* find first matching branch */ 5061573Srgrimes if (slow(m, sp, rest, ssub, esub) == rest) 5071573Srgrimes break; /* it matched all of it */ 5081573Srgrimes /* that one missed, try next one */ 5091573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5101573Srgrimes esub++; 5111573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 5121573Srgrimes ssub = esub + 1; 5131573Srgrimes esub += OPND(m->g->strip[esub]); 5141573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 5151573Srgrimes esub--; 5161573Srgrimes else 5171573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 5181573Srgrimes } 5191573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 5201573Srgrimes assert(dp == rest); 5211573Srgrimes sp = rest; 5221573Srgrimes break; 5231573Srgrimes case O_PLUS: 5241573Srgrimes case O_QUEST: 5251573Srgrimes case OOR1: 5261573Srgrimes case OOR2: 5271573Srgrimes case O_CH: 5281573Srgrimes assert(nope); 5291573Srgrimes break; 5301573Srgrimes case OLPAREN: 5311573Srgrimes i = OPND(m->g->strip[ss]); 5321573Srgrimes assert(0 < i && i <= m->g->nsub); 5331573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 5341573Srgrimes break; 5351573Srgrimes case ORPAREN: 5361573Srgrimes i = OPND(m->g->strip[ss]); 5371573Srgrimes assert(0 < i && i <= m->g->nsub); 5381573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 5391573Srgrimes break; 5401573Srgrimes default: /* uh oh */ 5411573Srgrimes assert(nope); 5421573Srgrimes break; 5431573Srgrimes } 5441573Srgrimes } 5451573Srgrimes 5461573Srgrimes assert(sp == stop); 5471573Srgrimes return(sp); 5481573Srgrimes} 5491573Srgrimes 5501573Srgrimes/* 5511573Srgrimes - backref - figure out what matched what, figuring in back references 55292889Sobrien == static char *backref(struct match *m, char *start, \ 5531573Srgrimes == char *stop, sopno startst, sopno stopst, sopno lev); 5541573Srgrimes */ 5551573Srgrimesstatic char * /* == stop (success) or NULL (failure) */ 5561573Srgrimesbackref(m, start, stop, startst, stopst, lev) 55792889Sobrienstruct match *m; 5581573Srgrimeschar *start; 5591573Srgrimeschar *stop; 5601573Srgrimessopno startst; 5611573Srgrimessopno stopst; 5621573Srgrimessopno lev; /* PLUS nesting level */ 5631573Srgrimes{ 56492889Sobrien int i; 56592889Sobrien sopno ss; /* start sop of current subRE */ 56692889Sobrien char *sp; /* start of string matched by it */ 56792889Sobrien sopno ssub; /* start sop of subsubRE */ 56892889Sobrien sopno esub; /* end sop of subsubRE */ 56992889Sobrien char *ssp; /* start of string matched by subsubRE */ 57092889Sobrien char *dp; 57192889Sobrien size_t len; 57292889Sobrien int hard; 57392889Sobrien sop s; 57492889Sobrien regoff_t offsave; 57592889Sobrien cset *cs; 576132019Stjr wint_t wc; 5771573Srgrimes 5781573Srgrimes AT("back", start, stop, startst, stopst); 5791573Srgrimes sp = start; 5801573Srgrimes 5811573Srgrimes /* get as far as we can with easy stuff */ 5821573Srgrimes hard = 0; 5831573Srgrimes for (ss = startst; !hard && ss < stopst; ss++) 5841573Srgrimes switch (OP(s = m->g->strip[ss])) { 5851573Srgrimes case OCHAR: 586132019Stjr if (sp == stop) 5871573Srgrimes return(NULL); 588132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 589132019Stjr if (wc != OPND(s)) 590132019Stjr return(NULL); 5911573Srgrimes break; 5921573Srgrimes case OANY: 5931573Srgrimes if (sp == stop) 5941573Srgrimes return(NULL); 595132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 596132019Stjr if (wc == BADCHAR) 597132019Stjr return (NULL); 5981573Srgrimes break; 5991573Srgrimes case OANYOF: 600132019Stjr if (sp == stop) 601132019Stjr return (NULL); 6021573Srgrimes cs = &m->g->sets[OPND(s)]; 603132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 604132019Stjr if (wc == BADCHAR || !CHIN(cs, wc)) 6051573Srgrimes return(NULL); 6061573Srgrimes break; 6071573Srgrimes case OBOL: 6081573Srgrimes if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6091573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6101573Srgrimes (m->g->cflags®_NEWLINE)) ) 6111573Srgrimes { /* yes */ } 6121573Srgrimes else 6131573Srgrimes return(NULL); 6141573Srgrimes break; 6151573Srgrimes case OEOL: 6161573Srgrimes if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6171573Srgrimes (sp < m->endp && *sp == '\n' && 6181573Srgrimes (m->g->cflags®_NEWLINE)) ) 6191573Srgrimes { /* yes */ } 6201573Srgrimes else 6211573Srgrimes return(NULL); 6221573Srgrimes break; 6231573Srgrimes case OBOW: 6241573Srgrimes if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6251573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6261573Srgrimes (m->g->cflags®_NEWLINE)) || 6271573Srgrimes (sp > m->beginp && 6281573Srgrimes !ISWORD(*(sp-1))) ) && 6291573Srgrimes (sp < m->endp && ISWORD(*sp)) ) 6301573Srgrimes { /* yes */ } 6311573Srgrimes else 6321573Srgrimes return(NULL); 6331573Srgrimes break; 6341573Srgrimes case OEOW: 6351573Srgrimes if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6361573Srgrimes (sp < m->endp && *sp == '\n' && 6371573Srgrimes (m->g->cflags®_NEWLINE)) || 6381573Srgrimes (sp < m->endp && !ISWORD(*sp)) ) && 6391573Srgrimes (sp > m->beginp && ISWORD(*(sp-1))) ) 6401573Srgrimes { /* yes */ } 6411573Srgrimes else 6421573Srgrimes return(NULL); 6431573Srgrimes break; 6441573Srgrimes case O_QUEST: 6451573Srgrimes break; 6461573Srgrimes case OOR1: /* matches null but needs to skip */ 6471573Srgrimes ss++; 6481573Srgrimes s = m->g->strip[ss]; 6491573Srgrimes do { 6501573Srgrimes assert(OP(s) == OOR2); 6511573Srgrimes ss += OPND(s); 6521573Srgrimes } while (OP(s = m->g->strip[ss]) != O_CH); 6531573Srgrimes /* note that the ss++ gets us past the O_CH */ 6541573Srgrimes break; 6551573Srgrimes default: /* have to make a choice */ 6561573Srgrimes hard = 1; 6571573Srgrimes break; 6581573Srgrimes } 6591573Srgrimes if (!hard) { /* that was it! */ 6601573Srgrimes if (sp != stop) 6611573Srgrimes return(NULL); 6621573Srgrimes return(sp); 6631573Srgrimes } 6641573Srgrimes ss--; /* adjust for the for's final increment */ 6651573Srgrimes 6661573Srgrimes /* the hard stuff */ 6671573Srgrimes AT("hard", sp, stop, ss, stopst); 6681573Srgrimes s = m->g->strip[ss]; 6691573Srgrimes switch (OP(s)) { 6701573Srgrimes case OBACK_: /* the vilest depths */ 6711573Srgrimes i = OPND(s); 6721573Srgrimes assert(0 < i && i <= m->g->nsub); 6731573Srgrimes if (m->pmatch[i].rm_eo == -1) 6741573Srgrimes return(NULL); 6751573Srgrimes assert(m->pmatch[i].rm_so != -1); 6761573Srgrimes len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 6771573Srgrimes assert(stop - m->beginp >= len); 6781573Srgrimes if (sp > stop - len) 6791573Srgrimes return(NULL); /* not enough left to match */ 6801573Srgrimes ssp = m->offp + m->pmatch[i].rm_so; 6811573Srgrimes if (memcmp(sp, ssp, len) != 0) 6821573Srgrimes return(NULL); 6831573Srgrimes while (m->g->strip[ss] != SOP(O_BACK, i)) 6841573Srgrimes ss++; 6851573Srgrimes return(backref(m, sp+len, stop, ss+1, stopst, lev)); 6861573Srgrimes break; 6871573Srgrimes case OQUEST_: /* to null or not */ 6881573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 6891573Srgrimes if (dp != NULL) 6901573Srgrimes return(dp); /* not */ 6911573Srgrimes return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); 6921573Srgrimes break; 6931573Srgrimes case OPLUS_: 6941573Srgrimes assert(m->lastpos != NULL); 6951573Srgrimes assert(lev+1 <= m->g->nplus); 6961573Srgrimes m->lastpos[lev+1] = sp; 6971573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev+1)); 6981573Srgrimes break; 6991573Srgrimes case O_PLUS: 7001573Srgrimes if (sp == m->lastpos[lev]) /* last pass matched null */ 7011573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 7021573Srgrimes /* try another pass */ 7031573Srgrimes m->lastpos[lev] = sp; 7041573Srgrimes dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); 7051573Srgrimes if (dp == NULL) 7061573Srgrimes return(backref(m, sp, stop, ss+1, stopst, lev-1)); 7071573Srgrimes else 7081573Srgrimes return(dp); 7091573Srgrimes break; 7101573Srgrimes case OCH_: /* find the right one, if any */ 7111573Srgrimes ssub = ss + 1; 7121573Srgrimes esub = ss + OPND(s) - 1; 7131573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 7141573Srgrimes for (;;) { /* find first matching branch */ 7151573Srgrimes dp = backref(m, sp, stop, ssub, esub, lev); 7161573Srgrimes if (dp != NULL) 7171573Srgrimes return(dp); 7181573Srgrimes /* that one missed, try next one */ 7191573Srgrimes if (OP(m->g->strip[esub]) == O_CH) 7201573Srgrimes return(NULL); /* there is none */ 7211573Srgrimes esub++; 7221573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 7231573Srgrimes ssub = esub + 1; 7241573Srgrimes esub += OPND(m->g->strip[esub]); 7251573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 7261573Srgrimes esub--; 7271573Srgrimes else 7281573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 7291573Srgrimes } 7301573Srgrimes break; 7311573Srgrimes case OLPAREN: /* must undo assignment if rest fails */ 7321573Srgrimes i = OPND(s); 7331573Srgrimes assert(0 < i && i <= m->g->nsub); 7341573Srgrimes offsave = m->pmatch[i].rm_so; 7351573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 7361573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7371573Srgrimes if (dp != NULL) 7381573Srgrimes return(dp); 7391573Srgrimes m->pmatch[i].rm_so = offsave; 7401573Srgrimes return(NULL); 7411573Srgrimes break; 7421573Srgrimes case ORPAREN: /* must undo assignment if rest fails */ 7431573Srgrimes i = OPND(s); 7441573Srgrimes assert(0 < i && i <= m->g->nsub); 7451573Srgrimes offsave = m->pmatch[i].rm_eo; 7461573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 7471573Srgrimes dp = backref(m, sp, stop, ss+1, stopst, lev); 7481573Srgrimes if (dp != NULL) 7491573Srgrimes return(dp); 7501573Srgrimes m->pmatch[i].rm_eo = offsave; 7511573Srgrimes return(NULL); 7521573Srgrimes break; 7531573Srgrimes default: /* uh oh */ 7541573Srgrimes assert(nope); 7551573Srgrimes break; 7561573Srgrimes } 7571573Srgrimes 7581573Srgrimes /* "can't happen" */ 7591573Srgrimes assert(nope); 7601573Srgrimes /* NOTREACHED */ 76111664Sphk return "shut up gcc"; 7621573Srgrimes} 7631573Srgrimes 7641573Srgrimes/* 7651573Srgrimes - fast - step through the string at top speed 76692889Sobrien == static char *fast(struct match *m, char *start, \ 7671573Srgrimes == char *stop, sopno startst, sopno stopst); 7681573Srgrimes */ 7691573Srgrimesstatic char * /* where tentative match ended, or NULL */ 7701573Srgrimesfast(m, start, stop, startst, stopst) 77192889Sobrienstruct match *m; 7721573Srgrimeschar *start; 7731573Srgrimeschar *stop; 7741573Srgrimessopno startst; 7751573Srgrimessopno stopst; 7761573Srgrimes{ 77792889Sobrien states st = m->st; 77892889Sobrien states fresh = m->fresh; 77992889Sobrien states tmp = m->tmp; 78092889Sobrien char *p = start; 781132019Stjr wint_t c; 782132019Stjr wint_t lastc; /* previous c */ 783132019Stjr wint_t flagch; 78492889Sobrien int i; 78592889Sobrien char *coldp; /* last p after which no match was underway */ 786132019Stjr size_t clen; 7871573Srgrimes 7881573Srgrimes CLEAR(st); 7891573Srgrimes SET1(st, startst); 7901573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 7911573Srgrimes ASSIGN(fresh, st); 7921573Srgrimes SP("start", st, *p); 7931573Srgrimes coldp = NULL; 794132019Stjr if (start == m->beginp) 795132019Stjr c = OUT; 796132019Stjr else { 797132019Stjr /* 798132019Stjr * XXX Wrong if the previous character was multi-byte. 799132019Stjr * Newline never is (in encodings supported by FreeBSD), 800132019Stjr * so this only breaks the ISWORD tests below. 801132019Stjr */ 802132019Stjr c = (uch)*(start - 1); 803132019Stjr } 8041573Srgrimes for (;;) { 8051573Srgrimes /* next character */ 8061573Srgrimes lastc = c; 807132019Stjr if (p == m->endp) 808132019Stjr c = OUT; 809132019Stjr else 810137959Stjr clen = XMBRTOWC(&c, p, stop - p, &m->mbs, BADCHAR); 8111573Srgrimes if (EQ(st, fresh)) 8121573Srgrimes coldp = p; 8131573Srgrimes 8141573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 8151573Srgrimes flagch = '\0'; 8161573Srgrimes i = 0; 8171573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8181573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8191573Srgrimes flagch = BOL; 8201573Srgrimes i = m->g->nbol; 8211573Srgrimes } 8221573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8231573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 8241573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 8251573Srgrimes i += m->g->neol; 8261573Srgrimes } 8271573Srgrimes if (i != 0) { 8281573Srgrimes for (; i > 0; i--) 8291573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8301573Srgrimes SP("boleol", st, c); 8311573Srgrimes } 8321573Srgrimes 8331573Srgrimes /* how about a word boundary? */ 8341573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8351573Srgrimes (c != OUT && ISWORD(c)) ) { 8361573Srgrimes flagch = BOW; 8371573Srgrimes } 8381573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8391573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8401573Srgrimes flagch = EOW; 8411573Srgrimes } 8421573Srgrimes if (flagch == BOW || flagch == EOW) { 8431573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8441573Srgrimes SP("boweow", st, c); 8451573Srgrimes } 8461573Srgrimes 8471573Srgrimes /* are we done? */ 8481573Srgrimes if (ISSET(st, stopst) || p == stop) 8491573Srgrimes break; /* NOTE BREAK OUT */ 8501573Srgrimes 8511573Srgrimes /* no, we must deal with this character */ 8521573Srgrimes ASSIGN(tmp, st); 8531573Srgrimes ASSIGN(st, fresh); 8541573Srgrimes assert(c != OUT); 8551573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 8561573Srgrimes SP("aft", st, c); 8571573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 858132019Stjr p += clen; 8591573Srgrimes } 8601573Srgrimes 8611573Srgrimes assert(coldp != NULL); 8621573Srgrimes m->coldp = coldp; 8631573Srgrimes if (ISSET(st, stopst)) 864137959Stjr return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); 8651573Srgrimes else 8661573Srgrimes return(NULL); 8671573Srgrimes} 8681573Srgrimes 8691573Srgrimes/* 8701573Srgrimes - slow - step through the string more deliberately 87192889Sobrien == static char *slow(struct match *m, char *start, \ 8721573Srgrimes == char *stop, sopno startst, sopno stopst); 8731573Srgrimes */ 8741573Srgrimesstatic char * /* where it ended */ 8751573Srgrimesslow(m, start, stop, startst, stopst) 87692889Sobrienstruct match *m; 8771573Srgrimeschar *start; 8781573Srgrimeschar *stop; 8791573Srgrimessopno startst; 8801573Srgrimessopno stopst; 8811573Srgrimes{ 88292889Sobrien states st = m->st; 88392889Sobrien states empty = m->empty; 88492889Sobrien states tmp = m->tmp; 88592889Sobrien char *p = start; 886132019Stjr wint_t c; 887132019Stjr wint_t lastc; /* previous c */ 888132019Stjr wint_t flagch; 88992889Sobrien int i; 89092889Sobrien char *matchp; /* last p at which a match ended */ 891132019Stjr size_t clen; 8921573Srgrimes 8931573Srgrimes AT("slow", start, stop, startst, stopst); 8941573Srgrimes CLEAR(st); 8951573Srgrimes SET1(st, startst); 8961573Srgrimes SP("sstart", st, *p); 8971573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 8981573Srgrimes matchp = NULL; 899132019Stjr if (start == m->beginp) 900132019Stjr c = OUT; 901132019Stjr else { 902132019Stjr /* 903132019Stjr * XXX Wrong if the previous character was multi-byte. 904132019Stjr * Newline never is (in encodings supported by FreeBSD), 905132019Stjr * so this only breaks the ISWORD tests below. 906132019Stjr */ 907132019Stjr c = (uch)*(start - 1); 908132019Stjr } 9091573Srgrimes for (;;) { 9101573Srgrimes /* next character */ 9111573Srgrimes lastc = c; 912132019Stjr if (p == m->endp) { 913132019Stjr c = OUT; 914132019Stjr clen = 0; 915132019Stjr } else 916137959Stjr clen = XMBRTOWC(&c, p, stop - p, &m->mbs, BADCHAR); 9171573Srgrimes 9181573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 9191573Srgrimes flagch = '\0'; 9201573Srgrimes i = 0; 9211573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 9221573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 9231573Srgrimes flagch = BOL; 9241573Srgrimes i = m->g->nbol; 9251573Srgrimes } 9261573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 9271573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 9281573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 9291573Srgrimes i += m->g->neol; 9301573Srgrimes } 9311573Srgrimes if (i != 0) { 9321573Srgrimes for (; i > 0; i--) 9331573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9341573Srgrimes SP("sboleol", st, c); 9351573Srgrimes } 9361573Srgrimes 9371573Srgrimes /* how about a word boundary? */ 9381573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 9391573Srgrimes (c != OUT && ISWORD(c)) ) { 9401573Srgrimes flagch = BOW; 9411573Srgrimes } 9421573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 9431573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 9441573Srgrimes flagch = EOW; 9451573Srgrimes } 9461573Srgrimes if (flagch == BOW || flagch == EOW) { 9471573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9481573Srgrimes SP("sboweow", st, c); 9491573Srgrimes } 9501573Srgrimes 9511573Srgrimes /* are we done? */ 9521573Srgrimes if (ISSET(st, stopst)) 9531573Srgrimes matchp = p; 9541573Srgrimes if (EQ(st, empty) || p == stop) 9551573Srgrimes break; /* NOTE BREAK OUT */ 9561573Srgrimes 9571573Srgrimes /* no, we must deal with this character */ 9581573Srgrimes ASSIGN(tmp, st); 9591573Srgrimes ASSIGN(st, empty); 9601573Srgrimes assert(c != OUT); 9611573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 9621573Srgrimes SP("saft", st, c); 9631573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 964132019Stjr p += clen; 9651573Srgrimes } 9661573Srgrimes 9671573Srgrimes return(matchp); 9681573Srgrimes} 9691573Srgrimes 9701573Srgrimes 9711573Srgrimes/* 9721573Srgrimes - step - map set of states reachable before char to set reachable after 97392889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \ 97492889Sobrien == states bef, int ch, states aft); 975132019Stjr == #define BOL (OUT-1) 976132019Stjr == #define EOL (BOL-1) 977132019Stjr == #define BOLEOL (BOL-2) 978132019Stjr == #define NOTHING (BOL-3) 979132019Stjr == #define BOW (BOL-4) 980132019Stjr == #define EOW (BOL-5) 981132019Stjr == #define BADCHAR (BOL-6) 982132019Stjr == #define NONCHAR(c) ((c) <= OUT) 9831573Srgrimes */ 9841573Srgrimesstatic states 9851573Srgrimesstep(g, start, stop, bef, ch, aft) 98692889Sobrienstruct re_guts *g; 9871573Srgrimessopno start; /* start state within strip */ 9881573Srgrimessopno stop; /* state after stop state within strip */ 98992889Sobrienstates bef; /* states reachable before */ 990132019Stjrwint_t ch; /* character or NONCHAR code */ 99192889Sobrienstates aft; /* states already known reachable after */ 9921573Srgrimes{ 99392889Sobrien cset *cs; 99492889Sobrien sop s; 99592889Sobrien sopno pc; 99692889Sobrien onestate here; /* note, macros know this name */ 99792889Sobrien sopno look; 99892889Sobrien int i; 9991573Srgrimes 10001573Srgrimes for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 10011573Srgrimes s = g->strip[pc]; 10021573Srgrimes switch (OP(s)) { 10031573Srgrimes case OEND: 10041573Srgrimes assert(pc == stop-1); 10051573Srgrimes break; 10061573Srgrimes case OCHAR: 10071573Srgrimes /* only characters can match */ 1008132019Stjr assert(!NONCHAR(ch) || ch != OPND(s)); 1009132019Stjr if (ch == OPND(s)) 10101573Srgrimes FWD(aft, bef, 1); 10111573Srgrimes break; 10121573Srgrimes case OBOL: 10131573Srgrimes if (ch == BOL || ch == BOLEOL) 10141573Srgrimes FWD(aft, bef, 1); 10151573Srgrimes break; 10161573Srgrimes case OEOL: 10171573Srgrimes if (ch == EOL || ch == BOLEOL) 10181573Srgrimes FWD(aft, bef, 1); 10191573Srgrimes break; 10201573Srgrimes case OBOW: 10211573Srgrimes if (ch == BOW) 10221573Srgrimes FWD(aft, bef, 1); 10231573Srgrimes break; 10241573Srgrimes case OEOW: 10251573Srgrimes if (ch == EOW) 10261573Srgrimes FWD(aft, bef, 1); 10271573Srgrimes break; 10281573Srgrimes case OANY: 10291573Srgrimes if (!NONCHAR(ch)) 10301573Srgrimes FWD(aft, bef, 1); 10311573Srgrimes break; 10321573Srgrimes case OANYOF: 10331573Srgrimes cs = &g->sets[OPND(s)]; 10341573Srgrimes if (!NONCHAR(ch) && CHIN(cs, ch)) 10351573Srgrimes FWD(aft, bef, 1); 10361573Srgrimes break; 10371573Srgrimes case OBACK_: /* ignored here */ 10381573Srgrimes case O_BACK: 10391573Srgrimes FWD(aft, aft, 1); 10401573Srgrimes break; 10411573Srgrimes case OPLUS_: /* forward, this is just an empty */ 10421573Srgrimes FWD(aft, aft, 1); 10431573Srgrimes break; 10441573Srgrimes case O_PLUS: /* both forward and back */ 10451573Srgrimes FWD(aft, aft, 1); 10461573Srgrimes i = ISSETBACK(aft, OPND(s)); 10471573Srgrimes BACK(aft, aft, OPND(s)); 10481573Srgrimes if (!i && ISSETBACK(aft, OPND(s))) { 10491573Srgrimes /* oho, must reconsider loop body */ 10501573Srgrimes pc -= OPND(s) + 1; 10511573Srgrimes INIT(here, pc); 10521573Srgrimes } 10531573Srgrimes break; 10541573Srgrimes case OQUEST_: /* two branches, both forward */ 10551573Srgrimes FWD(aft, aft, 1); 10561573Srgrimes FWD(aft, aft, OPND(s)); 10571573Srgrimes break; 10581573Srgrimes case O_QUEST: /* just an empty */ 10591573Srgrimes FWD(aft, aft, 1); 10601573Srgrimes break; 10611573Srgrimes case OLPAREN: /* not significant here */ 10621573Srgrimes case ORPAREN: 10631573Srgrimes FWD(aft, aft, 1); 10641573Srgrimes break; 10651573Srgrimes case OCH_: /* mark the first two branches */ 10661573Srgrimes FWD(aft, aft, 1); 10671573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10681573Srgrimes FWD(aft, aft, OPND(s)); 10691573Srgrimes break; 10701573Srgrimes case OOR1: /* done a branch, find the O_CH */ 10711573Srgrimes if (ISSTATEIN(aft, here)) { 10721573Srgrimes for (look = 1; 10731573Srgrimes OP(s = g->strip[pc+look]) != O_CH; 10741573Srgrimes look += OPND(s)) 10751573Srgrimes assert(OP(s) == OOR2); 10761573Srgrimes FWD(aft, aft, look); 10771573Srgrimes } 10781573Srgrimes break; 10791573Srgrimes case OOR2: /* propagate OCH_'s marking */ 10801573Srgrimes FWD(aft, aft, 1); 10811573Srgrimes if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10821573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10831573Srgrimes FWD(aft, aft, OPND(s)); 10841573Srgrimes } 10851573Srgrimes break; 10861573Srgrimes case O_CH: /* just empty */ 10871573Srgrimes FWD(aft, aft, 1); 10881573Srgrimes break; 10891573Srgrimes default: /* ooooops... */ 10901573Srgrimes assert(nope); 10911573Srgrimes break; 10921573Srgrimes } 10931573Srgrimes } 10941573Srgrimes 10951573Srgrimes return(aft); 10961573Srgrimes} 10971573Srgrimes 10981573Srgrimes#ifdef REDEBUG 10991573Srgrimes/* 11001573Srgrimes - print - print a set of states 11011573Srgrimes == #ifdef REDEBUG 11021573Srgrimes == static void print(struct match *m, char *caption, states st, \ 11031573Srgrimes == int ch, FILE *d); 11041573Srgrimes == #endif 11051573Srgrimes */ 11061573Srgrimesstatic void 11071573Srgrimesprint(m, caption, st, ch, d) 11081573Srgrimesstruct match *m; 11091573Srgrimeschar *caption; 11101573Srgrimesstates st; 11111573Srgrimesint ch; 11121573SrgrimesFILE *d; 11131573Srgrimes{ 111492889Sobrien struct re_guts *g = m->g; 111592889Sobrien int i; 111692889Sobrien int first = 1; 11171573Srgrimes 11181573Srgrimes if (!(m->eflags®_TRACE)) 11191573Srgrimes return; 11201573Srgrimes 11211573Srgrimes fprintf(d, "%s", caption); 11221573Srgrimes if (ch != '\0') 11231573Srgrimes fprintf(d, " %s", pchar(ch)); 11241573Srgrimes for (i = 0; i < g->nstates; i++) 11251573Srgrimes if (ISSET(st, i)) { 11261573Srgrimes fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 11271573Srgrimes first = 0; 11281573Srgrimes } 11291573Srgrimes fprintf(d, "\n"); 11301573Srgrimes} 11311573Srgrimes 11328870Srgrimes/* 11331573Srgrimes - at - print current situation 11341573Srgrimes == #ifdef REDEBUG 11351573Srgrimes == static void at(struct match *m, char *title, char *start, char *stop, \ 11361573Srgrimes == sopno startst, sopno stopst); 11371573Srgrimes == #endif 11381573Srgrimes */ 11391573Srgrimesstatic void 11401573Srgrimesat(m, title, start, stop, startst, stopst) 11411573Srgrimesstruct match *m; 11421573Srgrimeschar *title; 11431573Srgrimeschar *start; 11441573Srgrimeschar *stop; 11451573Srgrimessopno startst; 11461573Srgrimessopno stopst; 11471573Srgrimes{ 11481573Srgrimes if (!(m->eflags®_TRACE)) 11491573Srgrimes return; 11501573Srgrimes 11511573Srgrimes printf("%s %s-", title, pchar(*start)); 11521573Srgrimes printf("%s ", pchar(*stop)); 11531573Srgrimes printf("%ld-%ld\n", (long)startst, (long)stopst); 11541573Srgrimes} 11551573Srgrimes 11561573Srgrimes#ifndef PCHARDONE 11571573Srgrimes#define PCHARDONE /* never again */ 11581573Srgrimes/* 11591573Srgrimes - pchar - make a character printable 11601573Srgrimes == #ifdef REDEBUG 11611573Srgrimes == static char *pchar(int ch); 11621573Srgrimes == #endif 11631573Srgrimes * 11641573Srgrimes * Is this identical to regchar() over in debug.c? Well, yes. But a 11651573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to 11661573Srgrimes * a matching debug.o, and this is convenient. It all disappears in 11671573Srgrimes * the non-debug compilation anyway, so it doesn't matter much. 11681573Srgrimes */ 11691573Srgrimesstatic char * /* -> representation */ 11701573Srgrimespchar(ch) 11711573Srgrimesint ch; 11721573Srgrimes{ 11731573Srgrimes static char pbuf[10]; 11741573Srgrimes 117517508Sache if (isprint((uch)ch) || ch == ' ') 11761573Srgrimes sprintf(pbuf, "%c", ch); 11771573Srgrimes else 11781573Srgrimes sprintf(pbuf, "\\%o", ch); 11791573Srgrimes return(pbuf); 11801573Srgrimes} 11811573Srgrimes#endif 11821573Srgrimes#endif 11831573Srgrimes 11841573Srgrimes#undef matcher 11851573Srgrimes#undef fast 11861573Srgrimes#undef slow 11871573Srgrimes#undef dissect 11881573Srgrimes#undef backref 11891573Srgrimes#undef step 11901573Srgrimes#undef print 11911573Srgrimes#undef at 11921573Srgrimes#undef match 1193