11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 31573Srgrimes * Copyright (c) 1992, 1993, 1994 41573Srgrimes * The Regents of the University of California. All rights reserved. 51573Srgrimes * 61573Srgrimes * This code is derived from software contributed to Berkeley by 71573Srgrimes * Henry Spencer. 81573Srgrimes * 91573Srgrimes * Redistribution and use in source and binary forms, with or without 101573Srgrimes * modification, are permitted provided that the following conditions 111573Srgrimes * are met: 121573Srgrimes * 1. Redistributions of source code must retain the above copyright 131573Srgrimes * notice, this list of conditions and the following disclaimer. 141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151573Srgrimes * notice, this list of conditions and the following disclaimer in the 161573Srgrimes * documentation and/or other materials provided with the distribution. 171573Srgrimes * 4. Neither the name of the University nor the names of its contributors 181573Srgrimes * may be used to endorse or promote products derived from this software 191573Srgrimes * without specific prior written permission. 201573Srgrimes * 211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311573Srgrimes * SUCH DAMAGE. 321573Srgrimes * 331573Srgrimes * @(#)engine.c 8.5 (Berkeley) 3/20/94 341573Srgrimes */ 351573Srgrimes 3692986Sobrien#include <sys/cdefs.h> 3792986Sobrien__FBSDID("$FreeBSD: releng/10.3/lib/libc/regex/engine.c 265726 2014-05-09 01:20:39Z pfg $"); 3892986Sobrien 391573Srgrimes/* 401573Srgrimes * The matching engine and friends. This file is #included by regexec.c 411573Srgrimes * after suitable #defines of a variety of macros used herein, so that 421573Srgrimes * different state representations can be used without duplicating masses 431573Srgrimes * of code. 441573Srgrimes */ 451573Srgrimes 461573Srgrimes#ifdef SNAMES 471573Srgrimes#define matcher smatcher 481573Srgrimes#define fast sfast 491573Srgrimes#define slow sslow 501573Srgrimes#define dissect sdissect 511573Srgrimes#define backref sbackref 521573Srgrimes#define step sstep 531573Srgrimes#define print sprint 541573Srgrimes#define at sat 551573Srgrimes#define match smat 561573Srgrimes#endif 571573Srgrimes#ifdef LNAMES 581573Srgrimes#define matcher lmatcher 591573Srgrimes#define fast lfast 601573Srgrimes#define slow lslow 611573Srgrimes#define dissect ldissect 621573Srgrimes#define backref lbackref 631573Srgrimes#define step lstep 641573Srgrimes#define print lprint 651573Srgrimes#define at lat 661573Srgrimes#define match lmat 671573Srgrimes#endif 68132019Stjr#ifdef MNAMES 69132019Stjr#define matcher mmatcher 70132019Stjr#define fast mfast 71132019Stjr#define slow mslow 72132019Stjr#define dissect mdissect 73132019Stjr#define backref mbackref 74132019Stjr#define step mstep 75132019Stjr#define print mprint 76132019Stjr#define at mat 77132019Stjr#define match mmat 78132019Stjr#endif 791573Srgrimes 801573Srgrimes/* another structure passed up and down to avoid zillions of parameters */ 811573Srgrimesstruct match { 821573Srgrimes struct re_guts *g; 831573Srgrimes int eflags; 841573Srgrimes regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 85169982Sdelphij const char *offp; /* offsets work from here */ 86169982Sdelphij const char *beginp; /* start of string -- virtual NUL precedes */ 87169982Sdelphij const char *endp; /* end of string -- virtual NUL here */ 88169982Sdelphij const char *coldp; /* can be no match starting before here */ 89169982Sdelphij const char **lastpos; /* [nplus+1] */ 901573Srgrimes STATEVARS; 911573Srgrimes states st; /* current states */ 921573Srgrimes states fresh; /* states for a fresh start */ 931573Srgrimes states tmp; /* temporary */ 941573Srgrimes states empty; /* empty set of states */ 95132019Stjr mbstate_t mbs; /* multibyte conversion state */ 961573Srgrimes}; 971573Srgrimes 981573Srgrimes/* ========= begin header generated by ./mkh ========= */ 991573Srgrimes#ifdef __cplusplus 1001573Srgrimesextern "C" { 1011573Srgrimes#endif 1021573Srgrimes 1031573Srgrimes/* === engine.c === */ 104169982Sdelphijstatic int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); 105169982Sdelphijstatic const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); 106169982Sdelphijstatic const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); 107169982Sdelphijstatic const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); 108169982Sdelphijstatic const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); 109132019Stjrstatic states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); 110167222Sdelphij#define MAX_RECURSION 100 111132019Stjr#define BOL (OUT-1) 112132019Stjr#define EOL (BOL-1) 113132019Stjr#define BOLEOL (BOL-2) 114132019Stjr#define NOTHING (BOL-3) 115132019Stjr#define BOW (BOL-4) 116132019Stjr#define EOW (BOL-5) 117132019Stjr#define BADCHAR (BOL-6) 118132019Stjr#define NONCHAR(c) ((c) <= OUT) 1191573Srgrimes#ifdef REDEBUG 120169982Sdelphijstatic void print(struct match *m, const char *caption, states st, int ch, FILE *d); 1211573Srgrimes#endif 1221573Srgrimes#ifdef REDEBUG 123169982Sdelphijstatic void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst); 1241573Srgrimes#endif 1251573Srgrimes#ifdef REDEBUG 126169982Sdelphijstatic const char *pchar(int ch); 1271573Srgrimes#endif 1281573Srgrimes 1291573Srgrimes#ifdef __cplusplus 1301573Srgrimes} 1311573Srgrimes#endif 1321573Srgrimes/* ========= end header generated by ./mkh ========= */ 1331573Srgrimes 1341573Srgrimes#ifdef REDEBUG 1351573Srgrimes#define SP(t, s, c) print(m, t, s, c, stdout) 1361573Srgrimes#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1371573Srgrimes#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1381573Srgrimes#else 1391573Srgrimes#define SP(t, s, c) /* nothing */ 1401573Srgrimes#define AT(t, p1, p2, s1, s2) /* nothing */ 1411573Srgrimes#define NOTE(s) /* nothing */ 1421573Srgrimes#endif 1431573Srgrimes 1441573Srgrimes/* 1451573Srgrimes - matcher - the actual matching engine 146169982Sdelphij == static int matcher(struct re_guts *g, const char *string, \ 1471573Srgrimes == size_t nmatch, regmatch_t pmatch[], int eflags); 1481573Srgrimes */ 1491573Srgrimesstatic int /* 0 success, REG_NOMATCH failure */ 150169982Sdelphijmatcher(struct re_guts *g, 151169982Sdelphij const char *string, 152169982Sdelphij size_t nmatch, 153169982Sdelphij regmatch_t pmatch[], 154169982Sdelphij int eflags) 1551573Srgrimes{ 156169982Sdelphij const char *endp; 15792889Sobrien int i; 1581573Srgrimes struct match mv; 15992889Sobrien struct match *m = &mv; 160169982Sdelphij const char *dp; 16192889Sobrien const sopno gf = g->firststate+1; /* +1 for OEND */ 16292889Sobrien const sopno gl = g->laststate; 163169982Sdelphij const char *start; 164169982Sdelphij const char *stop; 16562232Sdcs /* Boyer-Moore algorithms variables */ 166169982Sdelphij const char *pp; 16762232Sdcs int cj, mj; 168169982Sdelphij const char *mustfirst; 169169982Sdelphij const char *mustlast; 17092889Sobrien int *matchjump; 17192889Sobrien int *charjump; 1721573Srgrimes 1731573Srgrimes /* simplify the situation where possible */ 1741573Srgrimes if (g->cflags®_NOSUB) 1751573Srgrimes nmatch = 0; 1761573Srgrimes if (eflags®_STARTEND) { 1771573Srgrimes start = string + pmatch[0].rm_so; 1781573Srgrimes stop = string + pmatch[0].rm_eo; 1791573Srgrimes } else { 1801573Srgrimes start = string; 1811573Srgrimes stop = start + strlen(start); 1821573Srgrimes } 1831573Srgrimes if (stop < start) 1841573Srgrimes return(REG_INVARG); 1851573Srgrimes 1861573Srgrimes /* prescreening; this does wonders for this rather slow code */ 1871573Srgrimes if (g->must != NULL) { 18862391Sdcs if (g->charjump != NULL && g->matchjump != NULL) { 18962232Sdcs mustfirst = g->must; 19062232Sdcs mustlast = g->must + g->mlen - 1; 19162232Sdcs charjump = g->charjump; 19262232Sdcs matchjump = g->matchjump; 19362232Sdcs pp = mustlast; 19462754Sdcs for (dp = start+g->mlen-1; dp < stop;) { 19562232Sdcs /* Fast skip non-matches */ 196111010Snectar while (dp < stop && charjump[(int)*dp]) 197111010Snectar dp += charjump[(int)*dp]; 19862232Sdcs 19962754Sdcs if (dp >= stop) 20062232Sdcs break; 20162232Sdcs 20262232Sdcs /* Greedy matcher */ 20362232Sdcs /* We depend on not being used for 20462232Sdcs * for strings of length 1 20562232Sdcs */ 20662754Sdcs while (*--dp == *--pp && pp != mustfirst); 20762232Sdcs 20862754Sdcs if (*dp == *pp) 20962232Sdcs break; 21062232Sdcs 21162232Sdcs /* Jump to next possible match */ 21262232Sdcs mj = matchjump[pp - mustfirst]; 213111010Snectar cj = charjump[(int)*dp]; 21462754Sdcs dp += (cj < mj ? mj : cj); 21562754Sdcs pp = mustlast; 21662232Sdcs } 21762391Sdcs if (pp != mustfirst) 21862232Sdcs return(REG_NOMATCH); 21962232Sdcs } else { 22062232Sdcs for (dp = start; dp < stop; dp++) 22162232Sdcs if (*dp == g->must[0] && 22262232Sdcs stop - dp >= g->mlen && 22362232Sdcs memcmp(dp, g->must, (size_t)g->mlen) == 0) 22462232Sdcs break; 22562232Sdcs if (dp == stop) /* we didn't find g->must */ 22662232Sdcs return(REG_NOMATCH); 22762232Sdcs } 2281573Srgrimes } 2291573Srgrimes 2301573Srgrimes /* match struct setup */ 2311573Srgrimes m->g = g; 2321573Srgrimes m->eflags = eflags; 2331573Srgrimes m->pmatch = NULL; 2341573Srgrimes m->lastpos = NULL; 2351573Srgrimes m->offp = string; 2361573Srgrimes m->beginp = start; 2371573Srgrimes m->endp = stop; 2381573Srgrimes STATESETUP(m, 4); 2391573Srgrimes SETUP(m->st); 2401573Srgrimes SETUP(m->fresh); 2411573Srgrimes SETUP(m->tmp); 2421573Srgrimes SETUP(m->empty); 2431573Srgrimes CLEAR(m->empty); 244132019Stjr ZAPSTATE(&m->mbs); 2451573Srgrimes 24662391Sdcs /* Adjust start according to moffset, to speed things up */ 24762391Sdcs if (g->moffset > -1) 24862854Sdcs start = ((dp - g->moffset) < start) ? start : dp - g->moffset; 24962391Sdcs 250197245Sdds SP("mloop", m->st, *start); 251197245Sdds 2521573Srgrimes /* this loop does only one repetition except for backrefs */ 2531573Srgrimes for (;;) { 2541573Srgrimes endp = fast(m, start, stop, gf, gl); 2551573Srgrimes if (endp == NULL) { /* a miss */ 256139437Sdds if (m->pmatch != NULL) 257139437Sdds free((char *)m->pmatch); 258139437Sdds if (m->lastpos != NULL) 259139437Sdds free((char *)m->lastpos); 2601573Srgrimes STATETEARDOWN(m); 2611573Srgrimes return(REG_NOMATCH); 2621573Srgrimes } 2631573Srgrimes if (nmatch == 0 && !g->backrefs) 2641573Srgrimes break; /* no further info needed */ 2651573Srgrimes 2661573Srgrimes /* where? */ 2671573Srgrimes assert(m->coldp != NULL); 2681573Srgrimes for (;;) { 2691573Srgrimes NOTE("finding start"); 2701573Srgrimes endp = slow(m, m->coldp, stop, gf, gl); 2711573Srgrimes if (endp != NULL) 2721573Srgrimes break; 2731573Srgrimes assert(m->coldp < m->endp); 274132019Stjr m->coldp += XMBRTOWC(NULL, m->coldp, 275132019Stjr m->endp - m->coldp, &m->mbs, 0); 2761573Srgrimes } 2771573Srgrimes if (nmatch == 1 && !g->backrefs) 2781573Srgrimes break; /* no further info needed */ 2791573Srgrimes 2801573Srgrimes /* oh my, he wants the subexpressions... */ 2811573Srgrimes if (m->pmatch == NULL) 2821573Srgrimes m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2831573Srgrimes sizeof(regmatch_t)); 2841573Srgrimes if (m->pmatch == NULL) { 2851573Srgrimes STATETEARDOWN(m); 2861573Srgrimes return(REG_ESPACE); 2871573Srgrimes } 2881573Srgrimes for (i = 1; i <= m->g->nsub; i++) 2891573Srgrimes m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2901573Srgrimes if (!g->backrefs && !(m->eflags®_BACKR)) { 2911573Srgrimes NOTE("dissecting"); 2921573Srgrimes dp = dissect(m, m->coldp, endp, gf, gl); 2931573Srgrimes } else { 2941573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) 295169982Sdelphij m->lastpos = malloc((g->nplus+1) * 296169982Sdelphij sizeof(const char *)); 2971573Srgrimes if (g->nplus > 0 && m->lastpos == NULL) { 2981573Srgrimes free(m->pmatch); 2991573Srgrimes STATETEARDOWN(m); 3001573Srgrimes return(REG_ESPACE); 3011573Srgrimes } 3021573Srgrimes NOTE("backref dissect"); 303167222Sdelphij dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 3041573Srgrimes } 3051573Srgrimes if (dp != NULL) 3061573Srgrimes break; 3071573Srgrimes 3081573Srgrimes /* uh-oh... we couldn't find a subexpression-level match */ 3091573Srgrimes assert(g->backrefs); /* must be back references doing it */ 3101573Srgrimes assert(g->nplus == 0 || m->lastpos != NULL); 3111573Srgrimes for (;;) { 3121573Srgrimes if (dp != NULL || endp <= m->coldp) 3131573Srgrimes break; /* defeat */ 3141573Srgrimes NOTE("backoff"); 3151573Srgrimes endp = slow(m, m->coldp, endp-1, gf, gl); 3161573Srgrimes if (endp == NULL) 3171573Srgrimes break; /* defeat */ 3181573Srgrimes /* try it on a shorter possibility */ 3191573Srgrimes#ifndef NDEBUG 3201573Srgrimes for (i = 1; i <= m->g->nsub; i++) { 3211573Srgrimes assert(m->pmatch[i].rm_so == -1); 3221573Srgrimes assert(m->pmatch[i].rm_eo == -1); 3231573Srgrimes } 3241573Srgrimes#endif 3251573Srgrimes NOTE("backoff dissect"); 326167222Sdelphij dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 3271573Srgrimes } 3281573Srgrimes assert(dp == NULL || dp == endp); 3291573Srgrimes if (dp != NULL) /* found a shorter one */ 3301573Srgrimes break; 3311573Srgrimes 3321573Srgrimes /* despite initial appearances, there is no match here */ 3331573Srgrimes NOTE("false alarm"); 334132019Stjr /* recycle starting later */ 335132019Stjr start = m->coldp + XMBRTOWC(NULL, m->coldp, 336137959Stjr stop - m->coldp, &m->mbs, 0); 3371573Srgrimes assert(start <= stop); 3381573Srgrimes } 3391573Srgrimes 3401573Srgrimes /* fill in the details if requested */ 3411573Srgrimes if (nmatch > 0) { 3421573Srgrimes pmatch[0].rm_so = m->coldp - m->offp; 3431573Srgrimes pmatch[0].rm_eo = endp - m->offp; 3441573Srgrimes } 3451573Srgrimes if (nmatch > 1) { 3461573Srgrimes assert(m->pmatch != NULL); 3471573Srgrimes for (i = 1; i < nmatch; i++) 3481573Srgrimes if (i <= m->g->nsub) 3491573Srgrimes pmatch[i] = m->pmatch[i]; 3501573Srgrimes else { 3511573Srgrimes pmatch[i].rm_so = -1; 3521573Srgrimes pmatch[i].rm_eo = -1; 3531573Srgrimes } 3541573Srgrimes } 3551573Srgrimes 3561573Srgrimes if (m->pmatch != NULL) 3571573Srgrimes free((char *)m->pmatch); 3581573Srgrimes if (m->lastpos != NULL) 3591573Srgrimes free((char *)m->lastpos); 3601573Srgrimes STATETEARDOWN(m); 3611573Srgrimes return(0); 3621573Srgrimes} 3631573Srgrimes 3641573Srgrimes/* 3651573Srgrimes - dissect - figure out what matched what, no back references 366169982Sdelphij == static const char *dissect(struct match *m, const char *start, \ 367169982Sdelphij == const char *stop, sopno startst, sopno stopst); 3681573Srgrimes */ 369169982Sdelphijstatic const char * /* == stop (success) always */ 370169982Sdelphijdissect(struct match *m, 371169982Sdelphij const char *start, 372169982Sdelphij const char *stop, 373169982Sdelphij sopno startst, 374169982Sdelphij sopno stopst) 3751573Srgrimes{ 37692889Sobrien int i; 37792889Sobrien sopno ss; /* start sop of current subRE */ 37892889Sobrien sopno es; /* end sop of current subRE */ 379169982Sdelphij const char *sp; /* start of string matched by it */ 380169982Sdelphij const char *stp; /* string matched by it cannot pass here */ 381169982Sdelphij const char *rest; /* start of rest of string */ 382169982Sdelphij const char *tail; /* string unmatched by rest of RE */ 38392889Sobrien sopno ssub; /* start sop of subsubRE */ 38492889Sobrien sopno esub; /* end sop of subsubRE */ 385169982Sdelphij const char *ssp; /* start of string matched by subsubRE */ 386169982Sdelphij const char *sep; /* end of string matched by subsubRE */ 387169982Sdelphij const char *oldssp; /* previous ssp */ 388169982Sdelphij const char *dp; 3891573Srgrimes 3901573Srgrimes AT("diss", start, stop, startst, stopst); 3911573Srgrimes sp = start; 3921573Srgrimes for (ss = startst; ss < stopst; ss = es) { 3931573Srgrimes /* identify end of subRE */ 3941573Srgrimes es = ss; 3951573Srgrimes switch (OP(m->g->strip[es])) { 3961573Srgrimes case OPLUS_: 3971573Srgrimes case OQUEST_: 3981573Srgrimes es += OPND(m->g->strip[es]); 3991573Srgrimes break; 4001573Srgrimes case OCH_: 4011573Srgrimes while (OP(m->g->strip[es]) != O_CH) 4021573Srgrimes es += OPND(m->g->strip[es]); 4031573Srgrimes break; 4041573Srgrimes } 4051573Srgrimes es++; 4061573Srgrimes 4071573Srgrimes /* figure out what it matched */ 4081573Srgrimes switch (OP(m->g->strip[ss])) { 4091573Srgrimes case OEND: 4101573Srgrimes assert(nope); 4111573Srgrimes break; 4121573Srgrimes case OCHAR: 413132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4141573Srgrimes break; 4151573Srgrimes case OBOL: 4161573Srgrimes case OEOL: 4171573Srgrimes case OBOW: 4181573Srgrimes case OEOW: 4191573Srgrimes break; 4201573Srgrimes case OANY: 4211573Srgrimes case OANYOF: 422132019Stjr sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4231573Srgrimes break; 4241573Srgrimes case OBACK_: 4251573Srgrimes case O_BACK: 4261573Srgrimes assert(nope); 4271573Srgrimes break; 4281573Srgrimes /* cases where length of match is hard to find */ 4291573Srgrimes case OQUEST_: 4301573Srgrimes stp = stop; 4311573Srgrimes for (;;) { 4321573Srgrimes /* how long could this one be? */ 4331573Srgrimes rest = slow(m, sp, stp, ss, es); 4341573Srgrimes assert(rest != NULL); /* it did match */ 4351573Srgrimes /* could the rest match the rest? */ 4361573Srgrimes tail = slow(m, rest, stop, es, stopst); 4371573Srgrimes if (tail == stop) 4381573Srgrimes break; /* yes! */ 4391573Srgrimes /* no -- try a shorter match for this one */ 4401573Srgrimes stp = rest - 1; 4411573Srgrimes assert(stp >= sp); /* it did work */ 4421573Srgrimes } 4431573Srgrimes ssub = ss + 1; 4441573Srgrimes esub = es - 1; 4451573Srgrimes /* did innards match? */ 4461573Srgrimes if (slow(m, sp, rest, ssub, esub) != NULL) { 4471573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 4481573Srgrimes assert(dp == rest); 4491573Srgrimes } else /* no */ 4501573Srgrimes assert(sp == rest); 4511573Srgrimes sp = rest; 4521573Srgrimes break; 4531573Srgrimes case OPLUS_: 4541573Srgrimes stp = stop; 4551573Srgrimes for (;;) { 4561573Srgrimes /* how long could this one be? */ 4571573Srgrimes rest = slow(m, sp, stp, ss, es); 4581573Srgrimes assert(rest != NULL); /* it did match */ 4591573Srgrimes /* could the rest match the rest? */ 4601573Srgrimes tail = slow(m, rest, stop, es, stopst); 4611573Srgrimes if (tail == stop) 4621573Srgrimes break; /* yes! */ 4631573Srgrimes /* no -- try a shorter match for this one */ 4641573Srgrimes stp = rest - 1; 4651573Srgrimes assert(stp >= sp); /* it did work */ 4661573Srgrimes } 4671573Srgrimes ssub = ss + 1; 4681573Srgrimes esub = es - 1; 4691573Srgrimes ssp = sp; 4701573Srgrimes oldssp = ssp; 4711573Srgrimes for (;;) { /* find last match of innards */ 4721573Srgrimes sep = slow(m, ssp, rest, ssub, esub); 4731573Srgrimes if (sep == NULL || sep == ssp) 4741573Srgrimes break; /* failed or matched null */ 4751573Srgrimes oldssp = ssp; /* on to next try */ 4761573Srgrimes ssp = sep; 4771573Srgrimes } 4781573Srgrimes if (sep == NULL) { 4791573Srgrimes /* last successful match */ 4801573Srgrimes sep = ssp; 4811573Srgrimes ssp = oldssp; 4821573Srgrimes } 4831573Srgrimes assert(sep == rest); /* must exhaust substring */ 4841573Srgrimes assert(slow(m, ssp, sep, ssub, esub) == rest); 4851573Srgrimes dp = dissect(m, ssp, sep, ssub, esub); 4861573Srgrimes assert(dp == sep); 4871573Srgrimes sp = rest; 4881573Srgrimes break; 4891573Srgrimes case OCH_: 4901573Srgrimes stp = stop; 4911573Srgrimes for (;;) { 4921573Srgrimes /* how long could this one be? */ 4931573Srgrimes rest = slow(m, sp, stp, ss, es); 4941573Srgrimes assert(rest != NULL); /* it did match */ 4951573Srgrimes /* could the rest match the rest? */ 4961573Srgrimes tail = slow(m, rest, stop, es, stopst); 4971573Srgrimes if (tail == stop) 4981573Srgrimes break; /* yes! */ 4991573Srgrimes /* no -- try a shorter match for this one */ 5001573Srgrimes stp = rest - 1; 5011573Srgrimes assert(stp >= sp); /* it did work */ 5021573Srgrimes } 5031573Srgrimes ssub = ss + 1; 5041573Srgrimes esub = ss + OPND(m->g->strip[ss]) - 1; 5051573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5061573Srgrimes for (;;) { /* find first matching branch */ 5071573Srgrimes if (slow(m, sp, rest, ssub, esub) == rest) 5081573Srgrimes break; /* it matched all of it */ 5091573Srgrimes /* that one missed, try next one */ 5101573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 5111573Srgrimes esub++; 5121573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 5131573Srgrimes ssub = esub + 1; 5141573Srgrimes esub += OPND(m->g->strip[esub]); 5151573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 5161573Srgrimes esub--; 5171573Srgrimes else 5181573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 5191573Srgrimes } 5201573Srgrimes dp = dissect(m, sp, rest, ssub, esub); 5211573Srgrimes assert(dp == rest); 5221573Srgrimes sp = rest; 5231573Srgrimes break; 5241573Srgrimes case O_PLUS: 5251573Srgrimes case O_QUEST: 5261573Srgrimes case OOR1: 5271573Srgrimes case OOR2: 5281573Srgrimes case O_CH: 5291573Srgrimes assert(nope); 5301573Srgrimes break; 5311573Srgrimes case OLPAREN: 5321573Srgrimes i = OPND(m->g->strip[ss]); 5331573Srgrimes assert(0 < i && i <= m->g->nsub); 5341573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 5351573Srgrimes break; 5361573Srgrimes case ORPAREN: 5371573Srgrimes i = OPND(m->g->strip[ss]); 5381573Srgrimes assert(0 < i && i <= m->g->nsub); 5391573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 5401573Srgrimes break; 5411573Srgrimes default: /* uh oh */ 5421573Srgrimes assert(nope); 5431573Srgrimes break; 5441573Srgrimes } 5451573Srgrimes } 5461573Srgrimes 5471573Srgrimes assert(sp == stop); 5481573Srgrimes return(sp); 5491573Srgrimes} 5501573Srgrimes 5511573Srgrimes/* 5521573Srgrimes - backref - figure out what matched what, figuring in back references 553169982Sdelphij == static const char *backref(struct match *m, const char *start, \ 554169982Sdelphij == const char *stop, sopno startst, sopno stopst, sopno lev); 5551573Srgrimes */ 556169982Sdelphijstatic const char * /* == stop (success) or NULL (failure) */ 557169982Sdelphijbackref(struct match *m, 558169982Sdelphij const char *start, 559169982Sdelphij const char *stop, 560169982Sdelphij sopno startst, 561169982Sdelphij sopno stopst, 562169982Sdelphij sopno lev, /* PLUS nesting level */ 563169982Sdelphij int rec) 5641573Srgrimes{ 56592889Sobrien int i; 56692889Sobrien sopno ss; /* start sop of current subRE */ 567169982Sdelphij const char *sp; /* start of string matched by it */ 56892889Sobrien sopno ssub; /* start sop of subsubRE */ 56992889Sobrien sopno esub; /* end sop of subsubRE */ 570169982Sdelphij const char *ssp; /* start of string matched by subsubRE */ 571169982Sdelphij const char *dp; 57292889Sobrien size_t len; 57392889Sobrien int hard; 57492889Sobrien sop s; 57592889Sobrien regoff_t offsave; 57692889Sobrien cset *cs; 577132019Stjr wint_t wc; 5781573Srgrimes 5791573Srgrimes AT("back", start, stop, startst, stopst); 5801573Srgrimes sp = start; 5811573Srgrimes 5821573Srgrimes /* get as far as we can with easy stuff */ 5831573Srgrimes hard = 0; 5841573Srgrimes for (ss = startst; !hard && ss < stopst; ss++) 5851573Srgrimes switch (OP(s = m->g->strip[ss])) { 5861573Srgrimes case OCHAR: 587132019Stjr if (sp == stop) 5881573Srgrimes return(NULL); 589132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 590132019Stjr if (wc != OPND(s)) 591132019Stjr return(NULL); 5921573Srgrimes break; 5931573Srgrimes case OANY: 5941573Srgrimes if (sp == stop) 5951573Srgrimes return(NULL); 596132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 597132019Stjr if (wc == BADCHAR) 598132019Stjr return (NULL); 5991573Srgrimes break; 6001573Srgrimes case OANYOF: 601132019Stjr if (sp == stop) 602132019Stjr return (NULL); 6031573Srgrimes cs = &m->g->sets[OPND(s)]; 604132019Stjr sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 605132019Stjr if (wc == BADCHAR || !CHIN(cs, wc)) 6061573Srgrimes return(NULL); 6071573Srgrimes break; 6081573Srgrimes case OBOL: 6091573Srgrimes if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6101573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6111573Srgrimes (m->g->cflags®_NEWLINE)) ) 6121573Srgrimes { /* yes */ } 6131573Srgrimes else 6141573Srgrimes return(NULL); 6151573Srgrimes break; 6161573Srgrimes case OEOL: 6171573Srgrimes if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6181573Srgrimes (sp < m->endp && *sp == '\n' && 6191573Srgrimes (m->g->cflags®_NEWLINE)) ) 6201573Srgrimes { /* yes */ } 6211573Srgrimes else 6221573Srgrimes return(NULL); 6231573Srgrimes break; 6241573Srgrimes case OBOW: 6251573Srgrimes if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 6261573Srgrimes (sp < m->endp && *(sp-1) == '\n' && 6271573Srgrimes (m->g->cflags®_NEWLINE)) || 6281573Srgrimes (sp > m->beginp && 6291573Srgrimes !ISWORD(*(sp-1))) ) && 6301573Srgrimes (sp < m->endp && ISWORD(*sp)) ) 6311573Srgrimes { /* yes */ } 6321573Srgrimes else 6331573Srgrimes return(NULL); 6341573Srgrimes break; 6351573Srgrimes case OEOW: 6361573Srgrimes if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 6371573Srgrimes (sp < m->endp && *sp == '\n' && 6381573Srgrimes (m->g->cflags®_NEWLINE)) || 6391573Srgrimes (sp < m->endp && !ISWORD(*sp)) ) && 6401573Srgrimes (sp > m->beginp && ISWORD(*(sp-1))) ) 6411573Srgrimes { /* yes */ } 6421573Srgrimes else 6431573Srgrimes return(NULL); 6441573Srgrimes break; 6451573Srgrimes case O_QUEST: 6461573Srgrimes break; 6471573Srgrimes case OOR1: /* matches null but needs to skip */ 6481573Srgrimes ss++; 6491573Srgrimes s = m->g->strip[ss]; 6501573Srgrimes do { 6511573Srgrimes assert(OP(s) == OOR2); 6521573Srgrimes ss += OPND(s); 6531573Srgrimes } while (OP(s = m->g->strip[ss]) != O_CH); 6541573Srgrimes /* note that the ss++ gets us past the O_CH */ 6551573Srgrimes break; 6561573Srgrimes default: /* have to make a choice */ 6571573Srgrimes hard = 1; 6581573Srgrimes break; 6591573Srgrimes } 6601573Srgrimes if (!hard) { /* that was it! */ 6611573Srgrimes if (sp != stop) 6621573Srgrimes return(NULL); 6631573Srgrimes return(sp); 6641573Srgrimes } 6651573Srgrimes ss--; /* adjust for the for's final increment */ 6661573Srgrimes 6671573Srgrimes /* the hard stuff */ 6681573Srgrimes AT("hard", sp, stop, ss, stopst); 6691573Srgrimes s = m->g->strip[ss]; 6701573Srgrimes switch (OP(s)) { 6711573Srgrimes case OBACK_: /* the vilest depths */ 6721573Srgrimes i = OPND(s); 6731573Srgrimes assert(0 < i && i <= m->g->nsub); 6741573Srgrimes if (m->pmatch[i].rm_eo == -1) 6751573Srgrimes return(NULL); 6761573Srgrimes assert(m->pmatch[i].rm_so != -1); 6771573Srgrimes len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 678167222Sdelphij if (len == 0 && rec++ > MAX_RECURSION) 679167216Sdelphij return(NULL); 6801573Srgrimes assert(stop - m->beginp >= len); 6811573Srgrimes if (sp > stop - len) 6821573Srgrimes return(NULL); /* not enough left to match */ 6831573Srgrimes ssp = m->offp + m->pmatch[i].rm_so; 6841573Srgrimes if (memcmp(sp, ssp, len) != 0) 6851573Srgrimes return(NULL); 6861573Srgrimes while (m->g->strip[ss] != SOP(O_BACK, i)) 6871573Srgrimes ss++; 688167222Sdelphij return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); 6891573Srgrimes case OQUEST_: /* to null or not */ 690167222Sdelphij dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 6911573Srgrimes if (dp != NULL) 6921573Srgrimes return(dp); /* not */ 693167222Sdelphij return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); 6941573Srgrimes case OPLUS_: 6951573Srgrimes assert(m->lastpos != NULL); 6961573Srgrimes assert(lev+1 <= m->g->nplus); 6971573Srgrimes m->lastpos[lev+1] = sp; 698167222Sdelphij return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); 6991573Srgrimes case O_PLUS: 7001573Srgrimes if (sp == m->lastpos[lev]) /* last pass matched null */ 701167222Sdelphij return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 7021573Srgrimes /* try another pass */ 7031573Srgrimes m->lastpos[lev] = sp; 704167222Sdelphij dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); 7051573Srgrimes if (dp == NULL) 706167222Sdelphij return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 7071573Srgrimes else 7081573Srgrimes return(dp); 7091573Srgrimes case OCH_: /* find the right one, if any */ 7101573Srgrimes ssub = ss + 1; 7111573Srgrimes esub = ss + OPND(s) - 1; 7121573Srgrimes assert(OP(m->g->strip[esub]) == OOR1); 7131573Srgrimes for (;;) { /* find first matching branch */ 714167222Sdelphij dp = backref(m, sp, stop, ssub, esub, lev, rec); 7151573Srgrimes if (dp != NULL) 7161573Srgrimes return(dp); 7171573Srgrimes /* that one missed, try next one */ 7181573Srgrimes if (OP(m->g->strip[esub]) == O_CH) 7191573Srgrimes return(NULL); /* there is none */ 7201573Srgrimes esub++; 7211573Srgrimes assert(OP(m->g->strip[esub]) == OOR2); 7221573Srgrimes ssub = esub + 1; 7231573Srgrimes esub += OPND(m->g->strip[esub]); 7241573Srgrimes if (OP(m->g->strip[esub]) == OOR2) 7251573Srgrimes esub--; 7261573Srgrimes else 7271573Srgrimes assert(OP(m->g->strip[esub]) == O_CH); 7281573Srgrimes } 729265726Spfg /* NOTREACHED */ 7301573Srgrimes break; 7311573Srgrimes case OLPAREN: /* must undo assignment if rest fails */ 7321573Srgrimes i = OPND(s); 7331573Srgrimes assert(0 < i && i <= m->g->nsub); 7341573Srgrimes offsave = m->pmatch[i].rm_so; 7351573Srgrimes m->pmatch[i].rm_so = sp - m->offp; 736167222Sdelphij dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 7371573Srgrimes if (dp != NULL) 7381573Srgrimes return(dp); 7391573Srgrimes m->pmatch[i].rm_so = offsave; 7401573Srgrimes return(NULL); 7411573Srgrimes case ORPAREN: /* must undo assignment if rest fails */ 7421573Srgrimes i = OPND(s); 7431573Srgrimes assert(0 < i && i <= m->g->nsub); 7441573Srgrimes offsave = m->pmatch[i].rm_eo; 7451573Srgrimes m->pmatch[i].rm_eo = sp - m->offp; 746167222Sdelphij dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 7471573Srgrimes if (dp != NULL) 7481573Srgrimes return(dp); 7491573Srgrimes m->pmatch[i].rm_eo = offsave; 7501573Srgrimes return(NULL); 7511573Srgrimes default: /* uh oh */ 7521573Srgrimes assert(nope); 7531573Srgrimes break; 7541573Srgrimes } 7551573Srgrimes 7561573Srgrimes /* "can't happen" */ 7571573Srgrimes assert(nope); 7581573Srgrimes /* NOTREACHED */ 75911664Sphk return "shut up gcc"; 7601573Srgrimes} 7611573Srgrimes 7621573Srgrimes/* 7631573Srgrimes - fast - step through the string at top speed 764169982Sdelphij == static const char *fast(struct match *m, const char *start, \ 765169982Sdelphij == const char *stop, sopno startst, sopno stopst); 7661573Srgrimes */ 767169982Sdelphijstatic const char * /* where tentative match ended, or NULL */ 768169982Sdelphijfast( struct match *m, 769169982Sdelphij const char *start, 770169982Sdelphij const char *stop, 771169982Sdelphij sopno startst, 772169982Sdelphij sopno stopst) 7731573Srgrimes{ 77492889Sobrien states st = m->st; 77592889Sobrien states fresh = m->fresh; 77692889Sobrien states tmp = m->tmp; 777169982Sdelphij const char *p = start; 778132019Stjr wint_t c; 779132019Stjr wint_t lastc; /* previous c */ 780132019Stjr wint_t flagch; 78192889Sobrien int i; 782169982Sdelphij const char *coldp; /* last p after which no match was underway */ 783132019Stjr size_t clen; 7841573Srgrimes 7851573Srgrimes CLEAR(st); 7861573Srgrimes SET1(st, startst); 787197245Sdds SP("fast", st, *p); 7881573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 7891573Srgrimes ASSIGN(fresh, st); 7901573Srgrimes SP("start", st, *p); 7911573Srgrimes coldp = NULL; 792132019Stjr if (start == m->beginp) 793132019Stjr c = OUT; 794132019Stjr else { 795132019Stjr /* 796132019Stjr * XXX Wrong if the previous character was multi-byte. 797132019Stjr * Newline never is (in encodings supported by FreeBSD), 798132019Stjr * so this only breaks the ISWORD tests below. 799132019Stjr */ 800132019Stjr c = (uch)*(start - 1); 801132019Stjr } 8021573Srgrimes for (;;) { 8031573Srgrimes /* next character */ 8041573Srgrimes lastc = c; 805149180Stjr if (p == m->endp) { 806149180Stjr clen = 0; 807132019Stjr c = OUT; 808149180Stjr } else 809149180Stjr clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 8101573Srgrimes if (EQ(st, fresh)) 8111573Srgrimes coldp = p; 8121573Srgrimes 8131573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 8141573Srgrimes flagch = '\0'; 8151573Srgrimes i = 0; 8161573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8171573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8181573Srgrimes flagch = BOL; 8191573Srgrimes i = m->g->nbol; 8201573Srgrimes } 8211573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8221573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 8231573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 8241573Srgrimes i += m->g->neol; 8251573Srgrimes } 8261573Srgrimes if (i != 0) { 8271573Srgrimes for (; i > 0; i--) 8281573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8291573Srgrimes SP("boleol", st, c); 8301573Srgrimes } 8311573Srgrimes 8321573Srgrimes /* how about a word boundary? */ 8331573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8341573Srgrimes (c != OUT && ISWORD(c)) ) { 8351573Srgrimes flagch = BOW; 8361573Srgrimes } 8371573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 8381573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8391573Srgrimes flagch = EOW; 8401573Srgrimes } 8411573Srgrimes if (flagch == BOW || flagch == EOW) { 8421573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 8431573Srgrimes SP("boweow", st, c); 8441573Srgrimes } 8451573Srgrimes 8461573Srgrimes /* are we done? */ 847149180Stjr if (ISSET(st, stopst) || p == stop || clen > stop - p) 8481573Srgrimes break; /* NOTE BREAK OUT */ 8491573Srgrimes 8501573Srgrimes /* no, we must deal with this character */ 8511573Srgrimes ASSIGN(tmp, st); 8521573Srgrimes ASSIGN(st, fresh); 8531573Srgrimes assert(c != OUT); 8541573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 8551573Srgrimes SP("aft", st, c); 8561573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 857132019Stjr p += clen; 8581573Srgrimes } 8591573Srgrimes 8601573Srgrimes assert(coldp != NULL); 8611573Srgrimes m->coldp = coldp; 8621573Srgrimes if (ISSET(st, stopst)) 863137959Stjr return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); 8641573Srgrimes else 8651573Srgrimes return(NULL); 8661573Srgrimes} 8671573Srgrimes 8681573Srgrimes/* 8691573Srgrimes - slow - step through the string more deliberately 870169982Sdelphij == static const char *slow(struct match *m, const char *start, \ 871169982Sdelphij == const char *stop, sopno startst, sopno stopst); 8721573Srgrimes */ 873169982Sdelphijstatic const char * /* where it ended */ 874169982Sdelphijslow( struct match *m, 875169982Sdelphij const char *start, 876169982Sdelphij const char *stop, 877169982Sdelphij sopno startst, 878169982Sdelphij sopno stopst) 8791573Srgrimes{ 88092889Sobrien states st = m->st; 88192889Sobrien states empty = m->empty; 88292889Sobrien states tmp = m->tmp; 883169982Sdelphij const char *p = start; 884132019Stjr wint_t c; 885132019Stjr wint_t lastc; /* previous c */ 886132019Stjr wint_t flagch; 88792889Sobrien int i; 888169982Sdelphij const char *matchp; /* last p at which a match ended */ 889132019Stjr size_t clen; 8901573Srgrimes 8911573Srgrimes AT("slow", start, stop, startst, stopst); 8921573Srgrimes CLEAR(st); 8931573Srgrimes SET1(st, startst); 8941573Srgrimes SP("sstart", st, *p); 8951573Srgrimes st = step(m->g, startst, stopst, st, NOTHING, st); 8961573Srgrimes matchp = NULL; 897132019Stjr if (start == m->beginp) 898132019Stjr c = OUT; 899132019Stjr else { 900132019Stjr /* 901132019Stjr * XXX Wrong if the previous character was multi-byte. 902132019Stjr * Newline never is (in encodings supported by FreeBSD), 903132019Stjr * so this only breaks the ISWORD tests below. 904132019Stjr */ 905132019Stjr c = (uch)*(start - 1); 906132019Stjr } 9071573Srgrimes for (;;) { 9081573Srgrimes /* next character */ 9091573Srgrimes lastc = c; 910132019Stjr if (p == m->endp) { 911132019Stjr c = OUT; 912132019Stjr clen = 0; 913132019Stjr } else 914149180Stjr clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 9151573Srgrimes 9161573Srgrimes /* is there an EOL and/or BOL between lastc and c? */ 9171573Srgrimes flagch = '\0'; 9181573Srgrimes i = 0; 9191573Srgrimes if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 9201573Srgrimes (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 9211573Srgrimes flagch = BOL; 9221573Srgrimes i = m->g->nbol; 9231573Srgrimes } 9241573Srgrimes if ( (c == '\n' && m->g->cflags®_NEWLINE) || 9251573Srgrimes (c == OUT && !(m->eflags®_NOTEOL)) ) { 9261573Srgrimes flagch = (flagch == BOL) ? BOLEOL : EOL; 9271573Srgrimes i += m->g->neol; 9281573Srgrimes } 9291573Srgrimes if (i != 0) { 9301573Srgrimes for (; i > 0; i--) 9311573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9321573Srgrimes SP("sboleol", st, c); 9331573Srgrimes } 9341573Srgrimes 9351573Srgrimes /* how about a word boundary? */ 9361573Srgrimes if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 9371573Srgrimes (c != OUT && ISWORD(c)) ) { 9381573Srgrimes flagch = BOW; 9391573Srgrimes } 9401573Srgrimes if ( (lastc != OUT && ISWORD(lastc)) && 9411573Srgrimes (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 9421573Srgrimes flagch = EOW; 9431573Srgrimes } 9441573Srgrimes if (flagch == BOW || flagch == EOW) { 9451573Srgrimes st = step(m->g, startst, stopst, st, flagch, st); 9461573Srgrimes SP("sboweow", st, c); 9471573Srgrimes } 9481573Srgrimes 9491573Srgrimes /* are we done? */ 9501573Srgrimes if (ISSET(st, stopst)) 9511573Srgrimes matchp = p; 952149180Stjr if (EQ(st, empty) || p == stop || clen > stop - p) 9531573Srgrimes break; /* NOTE BREAK OUT */ 9541573Srgrimes 9551573Srgrimes /* no, we must deal with this character */ 9561573Srgrimes ASSIGN(tmp, st); 9571573Srgrimes ASSIGN(st, empty); 9581573Srgrimes assert(c != OUT); 9591573Srgrimes st = step(m->g, startst, stopst, tmp, c, st); 9601573Srgrimes SP("saft", st, c); 9611573Srgrimes assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 962132019Stjr p += clen; 9631573Srgrimes } 9641573Srgrimes 9651573Srgrimes return(matchp); 9661573Srgrimes} 9671573Srgrimes 9681573Srgrimes 9691573Srgrimes/* 9701573Srgrimes - step - map set of states reachable before char to set reachable after 97192889Sobrien == static states step(struct re_guts *g, sopno start, sopno stop, \ 97292889Sobrien == states bef, int ch, states aft); 973132019Stjr == #define BOL (OUT-1) 974132019Stjr == #define EOL (BOL-1) 975132019Stjr == #define BOLEOL (BOL-2) 976132019Stjr == #define NOTHING (BOL-3) 977132019Stjr == #define BOW (BOL-4) 978132019Stjr == #define EOW (BOL-5) 979132019Stjr == #define BADCHAR (BOL-6) 980132019Stjr == #define NONCHAR(c) ((c) <= OUT) 9811573Srgrimes */ 9821573Srgrimesstatic states 983169982Sdelphijstep(struct re_guts *g, 984169982Sdelphij sopno start, /* start state within strip */ 985169982Sdelphij sopno stop, /* state after stop state within strip */ 986169982Sdelphij states bef, /* states reachable before */ 987169982Sdelphij wint_t ch, /* character or NONCHAR code */ 988169982Sdelphij states aft) /* states already known reachable after */ 9891573Srgrimes{ 99092889Sobrien cset *cs; 99192889Sobrien sop s; 99292889Sobrien sopno pc; 99392889Sobrien onestate here; /* note, macros know this name */ 99492889Sobrien sopno look; 99592889Sobrien int i; 9961573Srgrimes 9971573Srgrimes for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 9981573Srgrimes s = g->strip[pc]; 9991573Srgrimes switch (OP(s)) { 10001573Srgrimes case OEND: 10011573Srgrimes assert(pc == stop-1); 10021573Srgrimes break; 10031573Srgrimes case OCHAR: 10041573Srgrimes /* only characters can match */ 1005132019Stjr assert(!NONCHAR(ch) || ch != OPND(s)); 1006132019Stjr if (ch == OPND(s)) 10071573Srgrimes FWD(aft, bef, 1); 10081573Srgrimes break; 10091573Srgrimes case OBOL: 10101573Srgrimes if (ch == BOL || ch == BOLEOL) 10111573Srgrimes FWD(aft, bef, 1); 10121573Srgrimes break; 10131573Srgrimes case OEOL: 10141573Srgrimes if (ch == EOL || ch == BOLEOL) 10151573Srgrimes FWD(aft, bef, 1); 10161573Srgrimes break; 10171573Srgrimes case OBOW: 10181573Srgrimes if (ch == BOW) 10191573Srgrimes FWD(aft, bef, 1); 10201573Srgrimes break; 10211573Srgrimes case OEOW: 10221573Srgrimes if (ch == EOW) 10231573Srgrimes FWD(aft, bef, 1); 10241573Srgrimes break; 10251573Srgrimes case OANY: 10261573Srgrimes if (!NONCHAR(ch)) 10271573Srgrimes FWD(aft, bef, 1); 10281573Srgrimes break; 10291573Srgrimes case OANYOF: 10301573Srgrimes cs = &g->sets[OPND(s)]; 10311573Srgrimes if (!NONCHAR(ch) && CHIN(cs, ch)) 10321573Srgrimes FWD(aft, bef, 1); 10331573Srgrimes break; 10341573Srgrimes case OBACK_: /* ignored here */ 10351573Srgrimes case O_BACK: 10361573Srgrimes FWD(aft, aft, 1); 10371573Srgrimes break; 10381573Srgrimes case OPLUS_: /* forward, this is just an empty */ 10391573Srgrimes FWD(aft, aft, 1); 10401573Srgrimes break; 10411573Srgrimes case O_PLUS: /* both forward and back */ 10421573Srgrimes FWD(aft, aft, 1); 10431573Srgrimes i = ISSETBACK(aft, OPND(s)); 10441573Srgrimes BACK(aft, aft, OPND(s)); 10451573Srgrimes if (!i && ISSETBACK(aft, OPND(s))) { 10461573Srgrimes /* oho, must reconsider loop body */ 10471573Srgrimes pc -= OPND(s) + 1; 10481573Srgrimes INIT(here, pc); 10491573Srgrimes } 10501573Srgrimes break; 10511573Srgrimes case OQUEST_: /* two branches, both forward */ 10521573Srgrimes FWD(aft, aft, 1); 10531573Srgrimes FWD(aft, aft, OPND(s)); 10541573Srgrimes break; 10551573Srgrimes case O_QUEST: /* just an empty */ 10561573Srgrimes FWD(aft, aft, 1); 10571573Srgrimes break; 10581573Srgrimes case OLPAREN: /* not significant here */ 10591573Srgrimes case ORPAREN: 10601573Srgrimes FWD(aft, aft, 1); 10611573Srgrimes break; 10621573Srgrimes case OCH_: /* mark the first two branches */ 10631573Srgrimes FWD(aft, aft, 1); 10641573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10651573Srgrimes FWD(aft, aft, OPND(s)); 10661573Srgrimes break; 10671573Srgrimes case OOR1: /* done a branch, find the O_CH */ 10681573Srgrimes if (ISSTATEIN(aft, here)) { 10691573Srgrimes for (look = 1; 10701573Srgrimes OP(s = g->strip[pc+look]) != O_CH; 10711573Srgrimes look += OPND(s)) 10721573Srgrimes assert(OP(s) == OOR2); 1073197246Sdds FWD(aft, aft, look + 1); 10741573Srgrimes } 10751573Srgrimes break; 10761573Srgrimes case OOR2: /* propagate OCH_'s marking */ 10771573Srgrimes FWD(aft, aft, 1); 10781573Srgrimes if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10791573Srgrimes assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10801573Srgrimes FWD(aft, aft, OPND(s)); 10811573Srgrimes } 10821573Srgrimes break; 10831573Srgrimes case O_CH: /* just empty */ 10841573Srgrimes FWD(aft, aft, 1); 10851573Srgrimes break; 10861573Srgrimes default: /* ooooops... */ 10871573Srgrimes assert(nope); 10881573Srgrimes break; 10891573Srgrimes } 10901573Srgrimes } 10911573Srgrimes 10921573Srgrimes return(aft); 10931573Srgrimes} 10941573Srgrimes 10951573Srgrimes#ifdef REDEBUG 10961573Srgrimes/* 10971573Srgrimes - print - print a set of states 10981573Srgrimes == #ifdef REDEBUG 1099169982Sdelphij == static void print(struct match *m, const char *caption, states st, \ 11001573Srgrimes == int ch, FILE *d); 11011573Srgrimes == #endif 11021573Srgrimes */ 11031573Srgrimesstatic void 1104169982Sdelphijprint(struct match *m, 1105169982Sdelphij const char *caption, 1106169982Sdelphij states st, 1107169982Sdelphij int ch, 1108169982Sdelphij FILE *d) 11091573Srgrimes{ 111092889Sobrien struct re_guts *g = m->g; 111192889Sobrien int i; 111292889Sobrien int first = 1; 11131573Srgrimes 11141573Srgrimes if (!(m->eflags®_TRACE)) 11151573Srgrimes return; 11161573Srgrimes 11171573Srgrimes fprintf(d, "%s", caption); 11181573Srgrimes if (ch != '\0') 11191573Srgrimes fprintf(d, " %s", pchar(ch)); 11201573Srgrimes for (i = 0; i < g->nstates; i++) 11211573Srgrimes if (ISSET(st, i)) { 11221573Srgrimes fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 11231573Srgrimes first = 0; 11241573Srgrimes } 11251573Srgrimes fprintf(d, "\n"); 11261573Srgrimes} 11271573Srgrimes 11288870Srgrimes/* 11291573Srgrimes - at - print current situation 11301573Srgrimes == #ifdef REDEBUG 1131169982Sdelphij == static void at(struct match *m, const char *title, const char *start, \ 1132169982Sdelphij == const char *stop, sopno startst, sopno stopst); 11331573Srgrimes == #endif 11341573Srgrimes */ 11351573Srgrimesstatic void 1136169982Sdelphijat( struct match *m, 1137169982Sdelphij const char *title, 1138169982Sdelphij const char *start, 1139169982Sdelphij const char *stop, 1140169982Sdelphij sopno startst, 1141169982Sdelphij sopno stopst) 11421573Srgrimes{ 11431573Srgrimes if (!(m->eflags®_TRACE)) 11441573Srgrimes return; 11451573Srgrimes 11461573Srgrimes printf("%s %s-", title, pchar(*start)); 11471573Srgrimes printf("%s ", pchar(*stop)); 11481573Srgrimes printf("%ld-%ld\n", (long)startst, (long)stopst); 11491573Srgrimes} 11501573Srgrimes 11511573Srgrimes#ifndef PCHARDONE 11521573Srgrimes#define PCHARDONE /* never again */ 11531573Srgrimes/* 11541573Srgrimes - pchar - make a character printable 11551573Srgrimes == #ifdef REDEBUG 1156169982Sdelphij == static const char *pchar(int ch); 11571573Srgrimes == #endif 11581573Srgrimes * 11591573Srgrimes * Is this identical to regchar() over in debug.c? Well, yes. But a 11601573Srgrimes * duplicate here avoids having a debugging-capable regexec.o tied to 11611573Srgrimes * a matching debug.o, and this is convenient. It all disappears in 11621573Srgrimes * the non-debug compilation anyway, so it doesn't matter much. 11631573Srgrimes */ 1164169982Sdelphijstatic const char * /* -> representation */ 1165169982Sdelphijpchar(int ch) 11661573Srgrimes{ 11671573Srgrimes static char pbuf[10]; 11681573Srgrimes 116917508Sache if (isprint((uch)ch) || ch == ' ') 11701573Srgrimes sprintf(pbuf, "%c", ch); 11711573Srgrimes else 11721573Srgrimes sprintf(pbuf, "\\%o", ch); 11731573Srgrimes return(pbuf); 11741573Srgrimes} 11751573Srgrimes#endif 11761573Srgrimes#endif 11771573Srgrimes 11781573Srgrimes#undef matcher 11791573Srgrimes#undef fast 11801573Srgrimes#undef slow 11811573Srgrimes#undef dissect 11821573Srgrimes#undef backref 11831573Srgrimes#undef step 11841573Srgrimes#undef print 11851573Srgrimes#undef at 11861573Srgrimes#undef match 1187