1254225Speter/* $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */ 2254225Speter 3254225Speter/*- 4254225Speter * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5254225Speter * Copyright (c) 1992, 1993, 1994 6254225Speter * The Regents of the University of California. All rights reserved. 7254225Speter * 8254225Speter * This code is derived from software contributed to Berkeley by 9254225Speter * Henry Spencer of the University of Toronto. 10254225Speter * 11254225Speter * Redistribution and use in source and binary forms, with or without 12254225Speter * modification, are permitted provided that the following conditions 13254225Speter * are met: 14254225Speter * 1. Redistributions of source code must retain the above copyright 15254225Speter * notice, this list of conditions and the following disclaimer. 16254225Speter * 2. Redistributions in binary form must reproduce the above copyright 17254225Speter * notice, this list of conditions and the following disclaimer in the 18254225Speter * documentation and/or other materials provided with the distribution. 19254225Speter * 3. All advertising materials mentioning features or use of this software 20254225Speter * must display the following acknowledgement: 21254225Speter * This product includes software developed by the University of 22254225Speter * California, Berkeley and its contributors. 23254225Speter * 4. Neither the name of the University nor the names of its contributors 24254225Speter * may be used to endorse or promote products derived from this software 25254225Speter * without specific prior written permission. 26254225Speter * 27254225Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28254225Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29254225Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30254225Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31254225Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32254225Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33254225Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34254225Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35254225Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36254225Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37254225Speter * SUCH DAMAGE. 38254225Speter * 39254225Speter * @(#)regcomp.c 8.4 (Berkeley) 3/19/94 40254225Speter */ 41254225Speter 42254225Speter#if defined(LIBC_SCCS) && !defined(lint) 43254225Speterstatic char sccsid[] = "@(#)regcomp.c 8.4 (Berkeley) 3/19/94"; 44254225Speter#endif /* LIBC_SCCS and not lint */ 45254225Speter 46254225Speter#include <sys/types.h> 47254225Speter#include <stdio.h> 48254225Speter#include <string.h> 49254225Speter#include <ctype.h> 50254225Speter#include <limits.h> 51254225Speter#include <stdlib.h> 52254225Speter#include <regex.h> 53254225Speter 54254225Speter#include "utils.h" 55254225Speter#include "regex2.h" 56254225Speter 57254225Speter#include "cclass.h" 58254225Speter#include "cname.h" 59254225Speter 60254225Speter/* 61254225Speter * parse structure, passed up and down to avoid global variables and 62254225Speter * other clumsinesses 63254225Speter */ 64254225Speterstruct parse { 65254225Speter RCHAR_T *next; /* next character in RE */ 66254225Speter RCHAR_T *end; /* end of string (-> NUL normally) */ 67254225Speter int error; /* has an error been seen? */ 68254225Speter sop *strip; /* malloced strip */ 69254225Speter RCHAR_T *stripdata; /* malloced stripdata */ 70254225Speter sopno ssize; /* malloced strip size (allocated) */ 71254225Speter sopno slen; /* malloced strip length (used) */ 72254225Speter int ncsalloc; /* number of csets allocated */ 73254225Speter struct re_guts *g; 74254225Speter# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 75254225Speter sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 76254225Speter sopno pend[NPAREN]; /* -> ) ([0] unused) */ 77254225Speter}; 78254225Speter 79254225Speter/* ========= begin header generated by ./mkh ========= */ 80254225Speter#ifdef __cplusplus 81254225Speterextern "C" { 82254225Speter#endif 83254225Speter 84254225Speter/* === regcomp.c === */ 85254225Speterstatic void p_ere __P((struct parse *p, int stop, size_t reclimit)); 86254225Speterstatic void p_ere_exp __P((struct parse *p, size_t reclimit)); 87254225Speterstatic void p_str __P((struct parse *p)); 88254225Speterstatic void p_bre __P((struct parse *p, int end1, int end2, size_t reclimit)); 89254225Speterstatic int p_simp_re __P((struct parse *p, int starordinary, size_t reclimit)); 90254225Speterstatic int p_count __P((struct parse *p)); 91254225Speterstatic void p_bracket __P((struct parse *p)); 92254225Speterstatic void p_b_term __P((struct parse *p, cset *cs)); 93254225Speterstatic void p_b_cclass __P((struct parse *p, cset *cs)); 94254225Speterstatic void p_b_eclass __P((struct parse *p, cset *cs)); 95254225Speterstatic char p_b_symbol __P((struct parse *p)); 96254225Speterstatic char p_b_coll_elem __P((struct parse *p, int endc)); 97254225Speterstatic char othercase __P((int ch)); 98254225Speterstatic void bothcases __P((struct parse *p, int ch)); 99254225Speterstatic void ordinary __P((struct parse *p, int ch)); 100254225Speterstatic void nonnewline __P((struct parse *p)); 101254225Speterstatic void repeat __P((struct parse *p, sopno start, int from, int to, size_t reclimit)); 102254225Speterstatic int seterr __P((struct parse *p, int e)); 103254225Speterstatic cset *allocset __P((struct parse *p)); 104254225Speterstatic void freeset __P((struct parse *p, cset *cs)); 105254225Speterstatic int freezeset __P((struct parse *p, cset *cs)); 106254225Speterstatic int firstch __P((struct parse *p, cset *cs)); 107254225Speterstatic int nch __P((struct parse *p, cset *cs)); 108254225Speterstatic void mcadd __P((struct parse *p, cset *cs, const char *cp)); 109254225Speter#ifdef notdef 110254225Speterstatic void mcsub __P((cset *cs, char *cp)); 111254225Speterstatic int mcin __P((cset *cs, char *cp)); 112254225Speterstatic char *mcfind __P((cset *cs, char *cp)); 113254225Speter#endif 114254225Speterstatic void mcinvert __P((struct parse *p, cset *cs)); 115254225Speterstatic void mccase __P((struct parse *p, cset *cs)); 116254225Speter#ifdef notdef 117254225Speterstatic int isinsets __P((struct re_guts *g, int c)); 118254225Speterstatic int samesets __P((struct re_guts *g, int c1, int c2)); 119254225Speter#endif 120254225Speterstatic void categorize __P((struct parse *p, struct re_guts *g)); 121254225Speterstatic sopno dupl __P((struct parse *p, sopno start, sopno finish)); 122254225Speterstatic void doemit __P((struct parse *p, sop op, size_t opnd)); 123254225Speterstatic void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); 124254225Speterstatic void dofwd __P((struct parse *p, sopno pos, sop value)); 125254225Speterstatic int enlarge __P((struct parse *p, sopno size)); 126254225Speterstatic void stripsnug __P((struct parse *p, struct re_guts *g)); 127254225Speterstatic void findmust __P((struct parse *p, struct re_guts *g)); 128254225Speterstatic sopno pluscount __P((struct parse *p, struct re_guts *g)); 129254225Speter 130254225Speter#ifdef __cplusplus 131254225Speter} 132254225Speter#endif 133254225Speter/* ========= end header generated by ./mkh ========= */ 134254225Speter 135254225Speterstatic RCHAR_T nuls[10]; /* place to point scanner in event of error */ 136254225Speter 137254225Speter/* 138254225Speter * macros for use with parse structure 139254225Speter * BEWARE: these know that the parse structure is named `p' !!! 140254225Speter */ 141254225Speter#define PEEK() (*p->next) 142254225Speter#define PEEK2() (*(p->next+1)) 143254225Speter#define MORE() (p->next < p->end) 144254225Speter#define MORE2() (p->next+1 < p->end) 145254225Speter#define SEE(c) (MORE() && PEEK() == (c)) 146254225Speter#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 147254225Speter#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 148254225Speter#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 149254225Speter#define NEXT() (p->next++) 150254225Speter#define NEXT2() (p->next += 2) 151254225Speter#define NEXTn(n) (p->next += (n)) 152254225Speter#define GETNEXT() (*p->next++) 153254225Speter#define SETERROR(e) seterr(p, (e)) 154254225Speter#define REQUIRE(co, e) ((co) || SETERROR(e)) 155254225Speter#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 156254225Speter#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 157254225Speter#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 158254225Speter#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 159254225Speter#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 160254225Speter#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 161254225Speter#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 162254225Speter#define HERE() (p->slen) 163254225Speter#define THERE() (p->slen - 1) 164254225Speter#define THERETHERE() (p->slen - 2) 165254225Speter#define DROP(n) (p->slen -= (n)) 166254225Speter 167254225Speter#ifndef NDEBUG 168254225Speterstatic int never = 0; /* for use in asserts; shuts lint up */ 169254225Speter#else 170254225Speter#define never 0 /* some <assert.h>s have bugs too */ 171254225Speter#endif 172254225Speter 173254225Speter#define MEMLIMIT 0x8000000 174254225Speter#define MEMSIZE(p) \ 175254225Speter ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ 176254225Speter (p)->ncsalloc * sizeof(cset) + \ 177254225Speter (p)->ssize * sizeof(sop)) 178254225Speter#define RECLIMIT 256 179254225Speter 180254225Speter/* 181254225Speter - regcomp - interface for parser and compilation 182254225Speter = extern int regcomp(regex_t *, const RCHAR_T *, int); 183254225Speter = #define REG_BASIC 0000 184254225Speter = #define REG_EXTENDED 0001 185254225Speter = #define REG_ICASE 0002 186254225Speter = #define REG_NOSUB 0004 187254225Speter = #define REG_NEWLINE 0010 188254225Speter = #define REG_NOSPEC 0020 189254225Speter = #define REG_PEND 0040 190254225Speter = #define REG_DUMP 0200 191254225Speter */ 192254225Speterint /* 0 success, otherwise REG_something */ 193254225Speterregcomp(regex_t *preg, const RCHAR_T *pattern, int cflags) 194254225Speter{ 195254225Speter struct parse pa; 196254225Speter register struct re_guts *g; 197254225Speter register struct parse *p = &pa; 198254225Speter register int i; 199254225Speter register size_t len; 200254225Speter#ifdef REDEBUG 201254225Speter# define GOODFLAGS(f) (f) 202254225Speter#else 203254225Speter# define GOODFLAGS(f) ((f)&~REG_DUMP) 204254225Speter#endif 205254225Speter 206254225Speter cflags = GOODFLAGS(cflags); 207254225Speter if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 208254225Speter return(REG_INVARG); 209254225Speter 210254225Speter if (cflags®_PEND) { 211254225Speter if (preg->re_endp < pattern) 212254225Speter return(REG_INVARG); 213254225Speter len = preg->re_endp - pattern; 214254225Speter } else 215254225Speter len = STRLEN(pattern); 216254225Speter 217254225Speter /* do the mallocs early so failure handling is easy */ 218254225Speter g = (struct re_guts *)malloc(sizeof(struct re_guts) + 219254225Speter (NC-1)*sizeof(cat_t)); 220254225Speter if (g == NULL) 221254225Speter return(REG_ESPACE); 222254225Speter p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 223254225Speter p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 224254225Speter if (p->strip == NULL) { 225254225Speter free((char *)g); 226254225Speter return(REG_ESPACE); 227254225Speter } 228254225Speter p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T)); 229254225Speter if (p->stripdata == NULL) { 230254225Speter free((char *)p->strip); 231254225Speter free((char *)g); 232254225Speter return(REG_ESPACE); 233254225Speter } 234254225Speter p->slen = 0; 235254225Speter 236254225Speter /* set things up */ 237254225Speter p->g = g; 238254225Speter p->next = (RCHAR_T *)pattern; /* convenience; we do not modify it */ 239254225Speter p->end = p->next + len; 240254225Speter p->error = 0; 241254225Speter p->ncsalloc = 0; 242254225Speter for (i = 0; i < NPAREN; i++) { 243254225Speter p->pbegin[i] = 0; 244254225Speter p->pend[i] = 0; 245254225Speter } 246254225Speter g->csetsize = NC; 247254225Speter g->sets = NULL; 248254225Speter g->setbits = NULL; 249254225Speter g->ncsets = 0; 250254225Speter g->cflags = cflags; 251254225Speter g->iflags = 0; 252254225Speter g->nbol = 0; 253254225Speter g->neol = 0; 254254225Speter g->must = NULL; 255254225Speter g->mlen = 0; 256254225Speter g->nsub = 0; 257254225Speter#if 0 258254225Speter g->ncategories = 1; /* category 0 is "everything else" */ 259254225Speter g->categories = &g->catspace[-(CHAR_MIN)]; 260254225Speter (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 261254225Speter#endif 262254225Speter g->backrefs = 0; 263254225Speter 264254225Speter /* do it */ 265254225Speter EMIT(OEND, 0); 266254225Speter g->firststate = THERE(); 267254225Speter if (cflags®_EXTENDED) 268254225Speter p_ere(p, OUT, 0); 269254225Speter else if (cflags®_NOSPEC) 270254225Speter p_str(p); 271254225Speter else 272254225Speter p_bre(p, OUT, OUT, 0); 273254225Speter EMIT(OEND, 0); 274254225Speter g->laststate = THERE(); 275254225Speter 276254225Speter /* tidy up loose ends and fill things in */ 277254225Speter categorize(p, g); 278254225Speter stripsnug(p, g); 279254225Speter findmust(p, g); 280254225Speter g->nplus = pluscount(p, g); 281254225Speter g->magic = MAGIC2; 282254225Speter preg->re_nsub = g->nsub; 283254225Speter preg->re_g = g; 284254225Speter preg->re_magic = MAGIC1; 285254225Speter#ifndef REDEBUG 286254225Speter /* not debugging, so can't rely on the assert() in regexec() */ 287254225Speter if (g->iflags&BAD) 288254225Speter SETERROR(REG_ASSERT); 289254225Speter#endif 290254225Speter 291254225Speter /* win or lose, we're done */ 292254225Speter if (p->error != 0) /* lose */ 293254225Speter regfree(preg); 294254225Speter return(p->error); 295254225Speter} 296254225Speter 297254225Speter/* 298254225Speter - p_ere - ERE parser top level, concatenation and alternation 299254225Speter == static void p_ere(register struct parse *p, int stop, size_t reclimit); 300254225Speter */ 301254225Speterstatic void 302254225Speterp_ere(register struct parse *p, int stop, size_t reclimit) 303254225Speter 304254225Speter /* character this ERE should end at */ 305254225Speter{ 306254225Speter register char c; 307254225Speter register sopno prevback = 0; 308254225Speter register sopno prevfwd = 0; 309254225Speter register sopno conc; 310254225Speter register int first = 1; /* is this the first alternative? */ 311254225Speter 312254225Speter if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { 313254225Speter p->error = REG_ESPACE; 314254225Speter return; 315254225Speter } 316254225Speter 317254225Speter for (;;) { 318254225Speter /* do a bunch of concatenated expressions */ 319254225Speter conc = HERE(); 320254225Speter while (MORE() && (c = PEEK()) != '|' && c != stop) 321254225Speter p_ere_exp(p, reclimit); 322254225Speter (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 323254225Speter 324254225Speter if (!EAT('|')) 325254225Speter break; /* NOTE BREAK OUT */ 326254225Speter 327254225Speter if (first) { 328254225Speter INSERT(OCH_, conc); /* offset is wrong */ 329254225Speter prevfwd = conc; 330254225Speter prevback = conc; 331254225Speter first = 0; 332254225Speter } 333254225Speter ASTERN(OOR1, prevback); 334254225Speter prevback = THERE(); 335254225Speter AHEAD(prevfwd); /* fix previous offset */ 336254225Speter prevfwd = HERE(); 337254225Speter EMIT(OOR2, 0); /* offset is very wrong */ 338254225Speter } 339254225Speter 340254225Speter if (!first) { /* tail-end fixups */ 341254225Speter AHEAD(prevfwd); 342254225Speter ASTERN(O_CH, prevback); 343254225Speter } 344254225Speter 345254225Speter assert(!MORE() || SEE(stop)); 346254225Speter} 347254225Speter 348254225Speter/* 349254225Speter - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 350254225Speter == static void p_ere_exp(register struct parse *p); 351254225Speter */ 352254225Speterstatic void 353254225Speterp_ere_exp(register struct parse *p, size_t reclimit) 354254225Speter{ 355254225Speter register char c; 356254225Speter register sopno pos; 357254225Speter register int count; 358254225Speter register int count2; 359254225Speter register sopno subno; 360254225Speter int wascaret = 0; 361254225Speter 362254225Speter assert(MORE()); /* caller should have ensured this */ 363254225Speter c = GETNEXT(); 364254225Speter 365254225Speter pos = HERE(); 366254225Speter switch (c) { 367254225Speter case '(': 368254225Speter (void)REQUIRE(MORE(), REG_EPAREN); 369254225Speter p->g->nsub++; 370254225Speter subno = p->g->nsub; 371254225Speter if (subno < NPAREN) 372254225Speter p->pbegin[subno] = HERE(); 373254225Speter EMIT(OLPAREN, subno); 374254225Speter if (!SEE(')')) 375254225Speter p_ere(p, ')', reclimit); 376254225Speter if (subno < NPAREN) { 377254225Speter p->pend[subno] = HERE(); 378254225Speter assert(p->pend[subno] != 0); 379254225Speter } 380254225Speter EMIT(ORPAREN, subno); 381254225Speter (void)MUSTEAT(')', REG_EPAREN); 382254225Speter break; 383254225Speter#ifndef POSIX_MISTAKE 384254225Speter case ')': /* happens only if no current unmatched ( */ 385254225Speter /* 386254225Speter * You may ask, why the ifndef? Because I didn't notice 387254225Speter * this until slightly too late for 1003.2, and none of the 388254225Speter * other 1003.2 regular-expression reviewers noticed it at 389254225Speter * all. So an unmatched ) is legal POSIX, at least until 390254225Speter * we can get it fixed. 391254225Speter */ 392254225Speter SETERROR(REG_EPAREN); 393254225Speter break; 394254225Speter#endif 395254225Speter case '^': 396254225Speter EMIT(OBOL, 0); 397254225Speter p->g->iflags |= USEBOL; 398254225Speter p->g->nbol++; 399254225Speter wascaret = 1; 400254225Speter break; 401254225Speter case '$': 402254225Speter EMIT(OEOL, 0); 403254225Speter p->g->iflags |= USEEOL; 404254225Speter p->g->neol++; 405254225Speter break; 406254225Speter case '|': 407254225Speter SETERROR(REG_EMPTY); 408254225Speter break; 409254225Speter case '*': 410254225Speter case '+': 411254225Speter case '?': 412254225Speter SETERROR(REG_BADRPT); 413254225Speter break; 414254225Speter case '.': 415254225Speter if (p->g->cflags®_NEWLINE) 416254225Speter nonnewline(p); 417254225Speter else 418254225Speter EMIT(OANY, 0); 419254225Speter break; 420254225Speter case '[': 421254225Speter p_bracket(p); 422254225Speter break; 423254225Speter case '\\': 424254225Speter (void)REQUIRE(MORE(), REG_EESCAPE); 425254225Speter c = GETNEXT(); 426254225Speter ordinary(p, c); 427254225Speter break; 428254225Speter case '{': /* okay as ordinary except if digit follows */ 429254225Speter (void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT); 430254225Speter /* FALLTHROUGH */ 431254225Speter default: 432254225Speter ordinary(p, c); 433254225Speter break; 434254225Speter } 435254225Speter 436254225Speter if (!MORE()) 437254225Speter return; 438254225Speter c = PEEK(); 439254225Speter /* we call { a repetition if followed by a digit */ 440254225Speter if (!( c == '*' || c == '+' || c == '?' || 441254225Speter (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) )) 442254225Speter return; /* no repetition, we're done */ 443254225Speter NEXT(); 444254225Speter 445254225Speter (void)REQUIRE(!wascaret, REG_BADRPT); 446254225Speter switch (c) { 447254225Speter case '*': /* implemented as +? */ 448254225Speter /* this case does not require the (y|) trick, noKLUDGE */ 449254225Speter INSERT(OPLUS_, pos); 450254225Speter ASTERN(O_PLUS, pos); 451254225Speter INSERT(OQUEST_, pos); 452254225Speter ASTERN(O_QUEST, pos); 453254225Speter break; 454254225Speter case '+': 455254225Speter INSERT(OPLUS_, pos); 456254225Speter ASTERN(O_PLUS, pos); 457254225Speter break; 458254225Speter case '?': 459254225Speter /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 460254225Speter INSERT(OCH_, pos); /* offset slightly wrong */ 461254225Speter ASTERN(OOR1, pos); /* this one's right */ 462254225Speter AHEAD(pos); /* fix the OCH_ */ 463254225Speter EMIT(OOR2, 0); /* offset very wrong... */ 464254225Speter AHEAD(THERE()); /* ...so fix it */ 465254225Speter ASTERN(O_CH, THERETHERE()); 466254225Speter break; 467254225Speter case '{': 468254225Speter count = p_count(p); 469254225Speter if (EAT(',')) { 470254225Speter if (ISDIGIT((UCHAR_T)PEEK())) { 471254225Speter count2 = p_count(p); 472254225Speter (void)REQUIRE(count <= count2, REG_BADBR); 473254225Speter } else /* single number with comma */ 474254225Speter count2 = INFINITY; 475254225Speter } else /* just a single number */ 476254225Speter count2 = count; 477254225Speter repeat(p, pos, count, count2, 0); 478254225Speter if (!EAT('}')) { /* error heuristics */ 479254225Speter while (MORE() && PEEK() != '}') 480254225Speter NEXT(); 481254225Speter (void)REQUIRE(MORE(), REG_EBRACE); 482254225Speter SETERROR(REG_BADBR); 483254225Speter } 484254225Speter break; 485254225Speter } 486254225Speter 487254225Speter if (!MORE()) 488254225Speter return; 489254225Speter c = PEEK(); 490254225Speter if (!( c == '*' || c == '+' || c == '?' || 491254225Speter (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) ) 492254225Speter return; 493254225Speter SETERROR(REG_BADRPT); 494254225Speter} 495254225Speter 496254225Speter/* 497254225Speter - p_str - string (no metacharacters) "parser" 498254225Speter == static void p_str(register struct parse *p); 499254225Speter */ 500254225Speterstatic void 501254225Speterp_str(register struct parse *p) 502254225Speter{ 503254225Speter (void)REQUIRE(MORE(), REG_EMPTY); 504254225Speter while (MORE()) 505254225Speter ordinary(p, GETNEXT()); 506254225Speter} 507254225Speter 508254225Speter/* 509254225Speter - p_bre - BRE parser top level, anchoring and concatenation 510254225Speter == static void p_bre(register struct parse *p, register int end1, \ 511254225Speter == register int end2, size_t reclimit); 512254225Speter * Giving end1 as OUT essentially eliminates the end1/end2 check. 513254225Speter * 514254225Speter * This implementation is a bit of a kludge, in that a trailing $ is first 515254225Speter * taken as an ordinary character and then revised to be an anchor. The 516254225Speter * only undesirable side effect is that '$' gets included as a character 517254225Speter * category in such cases. This is fairly harmless; not worth fixing. 518254225Speter * The amount of lookahead needed to avoid this kludge is excessive. 519254225Speter */ 520254225Speterstatic void 521254225Speterp_bre(register struct parse *p, register int end1, register int end2, size_t reclimit) 522254225Speter 523254225Speter /* first terminating character */ 524254225Speter /* second terminating character */ 525254225Speter{ 526254225Speter register sopno start; 527254225Speter register int first = 1; /* first subexpression? */ 528254225Speter register int wasdollar = 0; 529254225Speter 530254225Speter if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { 531254225Speter p->error = REG_ESPACE; 532254225Speter return; 533254225Speter } 534254225Speter 535254225Speter start = HERE(); 536254225Speter 537254225Speter if (EAT('^')) { 538254225Speter EMIT(OBOL, 0); 539254225Speter p->g->iflags |= USEBOL; 540254225Speter p->g->nbol++; 541254225Speter } 542254225Speter while (MORE() && !SEETWO(end1, end2)) { 543254225Speter wasdollar = p_simp_re(p, first, reclimit); 544254225Speter first = 0; 545254225Speter } 546254225Speter if (wasdollar) { /* oops, that was a trailing anchor */ 547254225Speter DROP(1); 548254225Speter EMIT(OEOL, 0); 549254225Speter p->g->iflags |= USEEOL; 550254225Speter p->g->neol++; 551254225Speter } 552254225Speter 553254225Speter (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 554254225Speter} 555254225Speter 556254225Speter/* 557254225Speter - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 558254225Speter == static int p_simp_re(register struct parse *p, int starordinary, size_t reclimit); 559254225Speter */ 560254225Speterstatic int /* was the simple RE an unbackslashed $? */ 561254225Speterp_simp_re(register struct parse *p, int starordinary, size_t reclimit) 562254225Speter 563254225Speter /* is a leading * an ordinary character? */ 564254225Speter{ 565254225Speter register int c; 566254225Speter register int count; 567254225Speter register int count2; 568254225Speter register sopno pos; 569254225Speter register int i; 570254225Speter register sopno subno; 571254225Speter int backsl; 572254225Speter 573254225Speter pos = HERE(); /* repetion op, if any, covers from here */ 574254225Speter 575254225Speter assert(MORE()); /* caller should have ensured this */ 576254225Speter c = GETNEXT(); 577254225Speter backsl = c == '\\'; 578254225Speter if (backsl) { 579254225Speter (void)REQUIRE(MORE(), REG_EESCAPE); 580254225Speter c = (unsigned char)GETNEXT(); 581254225Speter switch (c) { 582254225Speter case '{': 583254225Speter SETERROR(REG_BADRPT); 584254225Speter break; 585254225Speter case '(': 586254225Speter p->g->nsub++; 587254225Speter subno = p->g->nsub; 588254225Speter if (subno < NPAREN) 589254225Speter p->pbegin[subno] = HERE(); 590254225Speter EMIT(OLPAREN, subno); 591254225Speter /* the MORE here is an error heuristic */ 592254225Speter if (MORE() && !SEETWO('\\', ')')) 593254225Speter p_bre(p, '\\', ')', reclimit); 594254225Speter if (subno < NPAREN) { 595254225Speter p->pend[subno] = HERE(); 596254225Speter assert(p->pend[subno] != 0); 597254225Speter } 598254225Speter EMIT(ORPAREN, subno); 599254225Speter (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 600254225Speter break; 601254225Speter case ')': /* should not get here -- must be user */ 602254225Speter case '}': 603254225Speter SETERROR(REG_EPAREN); 604254225Speter break; 605254225Speter case '1': 606254225Speter case '2': 607254225Speter case '3': 608254225Speter case '4': 609254225Speter case '5': 610254225Speter case '6': 611254225Speter case '7': 612254225Speter case '8': 613254225Speter case '9': 614254225Speter i = c - '0'; 615254225Speter assert(i < NPAREN); 616254225Speter if (p->pend[i] != 0) { 617254225Speter assert(i <= p->g->nsub); 618254225Speter EMIT(OBACK_, i); 619254225Speter assert(p->pbegin[i] != 0); 620254225Speter assert(p->strip[p->pbegin[i]] == OLPAREN); 621254225Speter assert(p->strip[p->pend[i]] == ORPAREN); 622254225Speter (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 623254225Speter EMIT(O_BACK, i); 624254225Speter } else 625254225Speter SETERROR(REG_ESUBREG); 626254225Speter p->g->backrefs = 1; 627254225Speter break; 628254225Speter default: 629254225Speter ordinary(p, c); 630254225Speter break; 631254225Speter } 632254225Speter } else { 633254225Speter switch (c) { 634254225Speter case '.': 635254225Speter if (p->g->cflags®_NEWLINE) 636254225Speter nonnewline(p); 637254225Speter else 638254225Speter EMIT(OANY, 0); 639254225Speter break; 640254225Speter case '[': 641254225Speter p_bracket(p); 642254225Speter break; 643254225Speter case '*': 644254225Speter (void)REQUIRE(starordinary, REG_BADRPT); 645254225Speter /* FALLTHROUGH */ 646254225Speter default: 647254225Speter ordinary(p, c); 648254225Speter break; 649254225Speter } 650254225Speter } 651254225Speter 652254225Speter if (EAT('*')) { /* implemented as +? */ 653254225Speter /* this case does not require the (y|) trick, noKLUDGE */ 654254225Speter INSERT(OPLUS_, pos); 655254225Speter ASTERN(O_PLUS, pos); 656254225Speter INSERT(OQUEST_, pos); 657254225Speter ASTERN(O_QUEST, pos); 658254225Speter } else if (EATTWO('\\', '{')) { 659254225Speter count = p_count(p); 660254225Speter if (EAT(',')) { 661254225Speter if (MORE() && ISDIGIT((UCHAR_T)PEEK())) { 662254225Speter count2 = p_count(p); 663254225Speter (void)REQUIRE(count <= count2, REG_BADBR); 664254225Speter } else /* single number with comma */ 665254225Speter count2 = INFINITY; 666254225Speter } else /* just a single number */ 667254225Speter count2 = count; 668254225Speter repeat(p, pos, count, count2, reclimit); 669254225Speter if (!EATTWO('\\', '}')) { /* error heuristics */ 670254225Speter while (MORE() && !SEETWO('\\', '}')) 671254225Speter NEXT(); 672254225Speter (void)REQUIRE(MORE(), REG_EBRACE); 673254225Speter SETERROR(REG_BADBR); 674254225Speter } 675254225Speter } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */ 676254225Speter return(1); 677254225Speter 678254225Speter return(0); 679254225Speter} 680254225Speter 681254225Speter/* 682254225Speter - p_count - parse a repetition count 683254225Speter == static int p_count(register struct parse *p); 684254225Speter */ 685254225Speterstatic int /* the value */ 686254225Speterp_count(register struct parse *p) 687254225Speter{ 688254225Speter register int count = 0; 689254225Speter register int ndigits = 0; 690254225Speter 691254225Speter while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) { 692254225Speter count = count*10 + (GETNEXT() - '0'); 693254225Speter ndigits++; 694254225Speter } 695254225Speter 696254225Speter (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 697254225Speter return(count); 698254225Speter} 699254225Speter 700254225Speter/* 701254225Speter - p_bracket - parse a bracketed character list 702254225Speter == static void p_bracket(register struct parse *p); 703254225Speter * 704254225Speter * Note a significant property of this code: if the allocset() did SETERROR, 705254225Speter * no set operations are done. 706254225Speter */ 707254225Speterstatic void 708254225Speterp_bracket(register struct parse *p) 709254225Speter{ 710254225Speter register cset *cs; 711254225Speter register int invert = 0; 712254225Speter static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' }; 713254225Speter static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' }; 714254225Speter 715254225Speter cs = allocset(p); 716254225Speter if (cs == NULL) 717254225Speter return; 718254225Speter 719254225Speter /* Dept of Truly Sickening Special-Case Kludges */ 720254225Speter if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) { 721254225Speter EMIT(OBOW, 0); 722254225Speter NEXTn(6); 723254225Speter return; 724254225Speter } 725254225Speter if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) { 726254225Speter EMIT(OEOW, 0); 727254225Speter NEXTn(6); 728254225Speter return; 729254225Speter } 730254225Speter 731254225Speter if (EAT('^')) 732254225Speter invert++; /* make note to invert set at end */ 733254225Speter if (EAT(']')) 734254225Speter CHadd(cs, ']'); 735254225Speter else if (EAT('-')) 736254225Speter CHadd(cs, '-'); 737254225Speter while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 738254225Speter p_b_term(p, cs); 739254225Speter if (EAT('-')) 740254225Speter CHadd(cs, '-'); 741254225Speter (void)MUSTEAT(']', REG_EBRACK); 742254225Speter 743254225Speter if (p->error != 0) /* don't mess things up further */ 744254225Speter return; 745254225Speter 746254225Speter if (p->g->cflags®_ICASE) { 747254225Speter register int i; 748254225Speter register int ci; 749254225Speter 750254225Speter for (i = p->g->csetsize - 1; i >= 0; i--) 751254225Speter if (CHIN(cs, i) && isalpha(i)) { 752254225Speter ci = othercase(i); 753254225Speter if (ci != i) 754254225Speter CHadd(cs, ci); 755254225Speter } 756254225Speter if (cs->multis != NULL) 757254225Speter mccase(p, cs); 758254225Speter } 759254225Speter if (invert) { 760254225Speter register int i; 761254225Speter 762254225Speter for (i = p->g->csetsize - 1; i >= 0; i--) 763254225Speter if (CHIN(cs, i)) 764254225Speter CHsub(cs, i); 765254225Speter else 766254225Speter CHadd(cs, i); 767254225Speter if (p->g->cflags®_NEWLINE) 768254225Speter CHsub(cs, '\n'); 769254225Speter if (cs->multis != NULL) 770254225Speter mcinvert(p, cs); 771254225Speter } 772254225Speter 773254225Speter assert(cs->multis == NULL); /* xxx */ 774254225Speter 775254225Speter if (nch(p, cs) == 1) { /* optimize singleton sets */ 776254225Speter ordinary(p, firstch(p, cs)); 777254225Speter freeset(p, cs); 778254225Speter } else 779254225Speter EMIT(OANYOF, freezeset(p, cs)); 780254225Speter} 781254225Speter 782254225Speter/* 783254225Speter - p_b_term - parse one term of a bracketed character list 784254225Speter == static void p_b_term(register struct parse *p, register cset *cs); 785254225Speter */ 786254225Speterstatic void 787254225Speterp_b_term(register struct parse *p, register cset *cs) 788254225Speter{ 789254225Speter register char c; 790254225Speter register char start, finish; 791254225Speter register int i; 792254225Speter 793254225Speter /* classify what we've got */ 794254225Speter switch ((MORE()) ? PEEK() : '\0') { 795254225Speter case '[': 796254225Speter c = (MORE2()) ? PEEK2() : '\0'; 797254225Speter break; 798254225Speter case '-': 799254225Speter SETERROR(REG_ERANGE); 800254225Speter return; /* NOTE RETURN */ 801254225Speter break; 802254225Speter default: 803254225Speter c = '\0'; 804254225Speter break; 805254225Speter } 806254225Speter 807254225Speter switch (c) { 808254225Speter case ':': /* character class */ 809254225Speter NEXT2(); 810254225Speter (void)REQUIRE(MORE(), REG_EBRACK); 811254225Speter c = PEEK(); 812254225Speter (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); 813254225Speter p_b_cclass(p, cs); 814254225Speter (void)REQUIRE(MORE(), REG_EBRACK); 815254225Speter (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 816254225Speter break; 817254225Speter case '=': /* equivalence class */ 818254225Speter NEXT2(); 819254225Speter (void)REQUIRE(MORE(), REG_EBRACK); 820254225Speter c = PEEK(); 821254225Speter (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 822254225Speter p_b_eclass(p, cs); 823254225Speter (void)REQUIRE(MORE(), REG_EBRACK); 824254225Speter (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 825254225Speter break; 826254225Speter default: /* symbol, ordinary character, or range */ 827254225Speter/* xxx revision needed for multichar stuff */ 828254225Speter start = p_b_symbol(p); 829254225Speter if (SEE('-') && MORE2() && PEEK2() != ']') { 830254225Speter /* range */ 831254225Speter NEXT(); 832254225Speter if (EAT('-')) 833254225Speter finish = '-'; 834254225Speter else 835254225Speter finish = p_b_symbol(p); 836254225Speter } else 837254225Speter finish = start; 838254225Speter/* xxx what about signed chars here... */ 839254225Speter (void)REQUIRE(start <= finish, REG_ERANGE); 840254225Speter for (i = start; i <= finish; i++) 841254225Speter CHadd(cs, i); 842254225Speter break; 843254225Speter } 844254225Speter} 845254225Speter 846254225Speter/* 847254225Speter - p_b_cclass - parse a character-class name and deal with it 848254225Speter == static void p_b_cclass(register struct parse *p, register cset *cs); 849254225Speter */ 850254225Speterstatic void 851254225Speterp_b_cclass(register struct parse *p, register cset *cs) 852254225Speter{ 853254225Speter register RCHAR_T *sp = p->next; 854254225Speter register struct cclass *cp; 855254225Speter register size_t len; 856254225Speter register const char *u; 857254225Speter register char c; 858254225Speter 859254225Speter while (MORE() && isalpha(PEEK())) 860254225Speter NEXT(); 861254225Speter len = p->next - sp; 862254225Speter for (cp = cclasses; cp->name != NULL; cp++) 863254225Speter if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len)) 864254225Speter break; 865254225Speter if (cp->name == NULL) { 866254225Speter /* oops, didn't find it */ 867254225Speter SETERROR(REG_ECTYPE); 868254225Speter return; 869254225Speter } 870254225Speter 871254225Speter u = cp->chars; 872254225Speter while ((c = *u++) != '\0') 873254225Speter CHadd(cs, c); 874254225Speter for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 875254225Speter MCadd(p, cs, u); 876254225Speter} 877254225Speter 878254225Speter/* 879254225Speter - p_b_eclass - parse an equivalence-class name and deal with it 880254225Speter == static void p_b_eclass(register struct parse *p, register cset *cs); 881254225Speter * 882254225Speter * This implementation is incomplete. xxx 883254225Speter */ 884254225Speterstatic void 885254225Speterp_b_eclass(register struct parse *p, register cset *cs) 886254225Speter{ 887254225Speter register char c; 888254225Speter 889254225Speter c = p_b_coll_elem(p, '='); 890254225Speter CHadd(cs, c); 891254225Speter} 892254225Speter 893254225Speter/* 894254225Speter - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 895254225Speter == static char p_b_symbol(register struct parse *p); 896254225Speter */ 897254225Speterstatic char /* value of symbol */ 898254225Speterp_b_symbol(register struct parse *p) 899254225Speter{ 900254225Speter register char value; 901254225Speter 902254225Speter (void)REQUIRE(MORE(), REG_EBRACK); 903254225Speter if (!EATTWO('[', '.')) 904254225Speter return(GETNEXT()); 905254225Speter 906254225Speter /* collating symbol */ 907254225Speter value = p_b_coll_elem(p, '.'); 908254225Speter (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 909254225Speter return(value); 910254225Speter} 911254225Speter 912254225Speter/* 913254225Speter - p_b_coll_elem - parse a collating-element name and look it up 914254225Speter == static char p_b_coll_elem(register struct parse *p, int endc); 915254225Speter */ 916254225Speterstatic char /* value of collating element */ 917254225Speterp_b_coll_elem(register struct parse *p, int endc) 918254225Speter 919254225Speter /* name ended by endc,']' */ 920254225Speter{ 921254225Speter register RCHAR_T *sp = p->next; 922254225Speter register struct cname *cp; 923254225Speter register size_t len; 924254225Speter 925254225Speter while (MORE() && !SEETWO(endc, ']')) 926254225Speter NEXT(); 927254225Speter if (!MORE()) { 928254225Speter SETERROR(REG_EBRACK); 929254225Speter return(0); 930254225Speter } 931254225Speter len = p->next - sp; 932254225Speter for (cp = cnames; cp->name != NULL; cp++) 933254225Speter if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len)) 934254225Speter return(cp->code); /* known name */ 935254225Speter if (len == 1) 936254225Speter return(*sp); /* single character */ 937254225Speter SETERROR(REG_ECOLLATE); /* neither */ 938254225Speter return(0); 939254225Speter} 940254225Speter 941254225Speter/* 942254225Speter - othercase - return the case counterpart of an alphabetic 943254225Speter == static char othercase(int ch); 944254225Speter */ 945254225Speterstatic char /* if no counterpart, return ch */ 946254225Speterothercase(int ch) 947254225Speter{ 948254225Speter assert(isalpha(ch)); 949254225Speter if (isupper(ch)) 950254225Speter return(tolower(ch)); 951254225Speter else if (islower(ch)) 952254225Speter return(toupper(ch)); 953254225Speter else /* peculiar, but could happen */ 954254225Speter return(ch); 955254225Speter} 956254225Speter 957254225Speter/* 958254225Speter - bothcases - emit a dualcase version of a two-case character 959254225Speter == static void bothcases(register struct parse *p, int ch); 960254225Speter * 961254225Speter * Boy, is this implementation ever a kludge... 962254225Speter */ 963254225Speterstatic void 964254225Speterbothcases(register struct parse *p, int ch) 965254225Speter{ 966254225Speter register RCHAR_T *oldnext = p->next; 967254225Speter register RCHAR_T *oldend = p->end; 968254225Speter RCHAR_T bracket[3]; 969254225Speter 970254225Speter assert(othercase(ch) != ch); /* p_bracket() would recurse */ 971254225Speter p->next = bracket; 972254225Speter p->end = bracket+2; 973254225Speter bracket[0] = ch; 974254225Speter bracket[1] = ']'; 975254225Speter bracket[2] = '\0'; 976254225Speter p_bracket(p); 977254225Speter assert(p->next == bracket+2); 978254225Speter p->next = oldnext; 979254225Speter p->end = oldend; 980254225Speter} 981254225Speter 982254225Speter/* 983254225Speter - ordinary - emit an ordinary character 984254225Speter == static void ordinary(register struct parse *p, register int ch); 985254225Speter */ 986254225Speterstatic void 987254225Speterordinary(register struct parse *p, register int ch) 988254225Speter{ 989254225Speter/* 990254225Speter register cat_t *cap = p->g->categories; 991254225Speter*/ 992254225Speter 993254225Speter if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 994254225Speter bothcases(p, ch); 995254225Speter else { 996254225Speter EMIT(OCHAR, (UCHAR_T)ch); 997254225Speter/* 998254225Speter if (cap[ch] == 0) 999254225Speter cap[ch] = p->g->ncategories++; 1000254225Speter*/ 1001254225Speter } 1002254225Speter} 1003254225Speter 1004254225Speter/* 1005254225Speter - nonnewline - emit REG_NEWLINE version of OANY 1006254225Speter == static void nonnewline(register struct parse *p); 1007254225Speter * 1008254225Speter * Boy, is this implementation ever a kludge... 1009254225Speter */ 1010254225Speterstatic void 1011254225Speternonnewline(register struct parse *p) 1012254225Speter{ 1013254225Speter register RCHAR_T *oldnext = p->next; 1014254225Speter register RCHAR_T *oldend = p->end; 1015254225Speter RCHAR_T bracket[4]; 1016254225Speter 1017254225Speter p->next = bracket; 1018254225Speter p->end = bracket+3; 1019254225Speter bracket[0] = '^'; 1020254225Speter bracket[1] = '\n'; 1021254225Speter bracket[2] = ']'; 1022254225Speter bracket[3] = '\0'; 1023254225Speter p_bracket(p); 1024254225Speter assert(p->next == bracket+3); 1025254225Speter p->next = oldnext; 1026254225Speter p->end = oldend; 1027254225Speter} 1028254225Speter 1029254225Speter/* 1030254225Speter - repeat - generate code for a bounded repetition, recursively if needed 1031254225Speter == static void repeat(register struct parse *p, sopno start, int from, int to, size_t reclimit); 1032254225Speter */ 1033254225Speterstatic void 1034254225Speterrepeat(register struct parse *p, sopno start, int from, int to, size_t reclimit) 1035254225Speter 1036254225Speter /* operand from here to end of strip */ 1037254225Speter /* repeated from this number */ 1038254225Speter /* to this number of times (maybe INFINITY) */ 1039254225Speter{ 1040254225Speter register sopno finish; 1041254225Speter# define N 2 1042254225Speter# define INF 3 1043254225Speter# define REP(f, t) ((f)*8 + (t)) 1044254225Speter# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 1045254225Speter register sopno copy; 1046254225Speter 1047254225Speter if (reclimit++ > RECLIMIT) 1048254225Speter p->error = REG_ESPACE; 1049254225Speter if (p->error) 1050254225Speter return; 1051254225Speter 1052254225Speter finish = HERE(); 1053254225Speter 1054254225Speter assert(from <= to); 1055254225Speter 1056254225Speter switch (REP(MAP(from), MAP(to))) { 1057254225Speter case REP(0, 0): /* must be user doing this */ 1058254225Speter DROP(finish-start); /* drop the operand */ 1059254225Speter break; 1060254225Speter case REP(0, 1): /* as x{1,1}? */ 1061254225Speter case REP(0, N): /* as x{1,n}? */ 1062254225Speter case REP(0, INF): /* as x{1,}? */ 1063254225Speter /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 1064254225Speter INSERT(OCH_, start); /* offset is wrong... */ 1065254225Speter repeat(p, start+1, 1, to, reclimit); 1066254225Speter ASTERN(OOR1, start); 1067254225Speter AHEAD(start); /* ... fix it */ 1068254225Speter EMIT(OOR2, 0); 1069254225Speter AHEAD(THERE()); 1070254225Speter ASTERN(O_CH, THERETHERE()); 1071254225Speter break; 1072254225Speter case REP(1, 1): /* trivial case */ 1073254225Speter /* done */ 1074254225Speter break; 1075254225Speter case REP(1, N): /* as x?x{1,n-1} */ 1076254225Speter /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 1077254225Speter INSERT(OCH_, start); 1078254225Speter ASTERN(OOR1, start); 1079254225Speter AHEAD(start); 1080254225Speter EMIT(OOR2, 0); /* offset very wrong... */ 1081254225Speter AHEAD(THERE()); /* ...so fix it */ 1082254225Speter ASTERN(O_CH, THERETHERE()); 1083254225Speter copy = dupl(p, start+1, finish+1); 1084254225Speter assert(copy == finish+4); 1085254225Speter repeat(p, copy, 1, to-1, reclimit); 1086254225Speter break; 1087254225Speter case REP(1, INF): /* as x+ */ 1088254225Speter INSERT(OPLUS_, start); 1089254225Speter ASTERN(O_PLUS, start); 1090254225Speter break; 1091254225Speter case REP(N, N): /* as xx{m-1,n-1} */ 1092254225Speter copy = dupl(p, start, finish); 1093254225Speter repeat(p, copy, from-1, to-1, reclimit); 1094254225Speter break; 1095254225Speter case REP(N, INF): /* as xx{n-1,INF} */ 1096254225Speter copy = dupl(p, start, finish); 1097254225Speter repeat(p, copy, from-1, to, reclimit); 1098254225Speter break; 1099254225Speter default: /* "can't happen" */ 1100254225Speter SETERROR(REG_ASSERT); /* just in case */ 1101254225Speter break; 1102254225Speter } 1103254225Speter} 1104254225Speter 1105254225Speter/* 1106254225Speter - seterr - set an error condition 1107254225Speter == static int seterr(register struct parse *p, int e); 1108254225Speter */ 1109254225Speterstatic int /* useless but makes type checking happy */ 1110254225Speterseterr(register struct parse *p, int e) 1111254225Speter{ 1112254225Speter if (p->error == 0) /* keep earliest error condition */ 1113254225Speter p->error = e; 1114254225Speter p->next = nuls; /* try to bring things to a halt */ 1115254225Speter p->end = nuls; 1116254225Speter return(0); /* make the return value well-defined */ 1117254225Speter} 1118254225Speter 1119254225Speter/* 1120254225Speter - allocset - allocate a set of characters for [] 1121254225Speter == static cset *allocset(register struct parse *p); 1122254225Speter */ 1123254225Speterstatic cset * 1124254225Speterallocset(register struct parse *p) 1125254225Speter{ 1126254225Speter register int no = p->g->ncsets++; 1127254225Speter register size_t nc; 1128254225Speter register size_t nbytes; 1129254225Speter register cset *cs; 1130254225Speter register size_t css = (size_t)p->g->csetsize; 1131254225Speter register int i; 1132254225Speter 1133254225Speter if (no >= p->ncsalloc) { /* need another column of space */ 1134254225Speter p->ncsalloc += CHAR_BIT; 1135254225Speter nc = p->ncsalloc; 1136254225Speter assert(nc % CHAR_BIT == 0); 1137254225Speter nbytes = nc / CHAR_BIT * css; 1138254225Speter if (MEMSIZE(p) > MEMLIMIT) 1139254225Speter goto oomem; 1140254225Speter if (p->g->sets == NULL) 1141254225Speter p->g->sets = (cset *)malloc(nc * sizeof(cset)); 1142254225Speter else 1143254225Speter p->g->sets = (cset *)realloc((char *)p->g->sets, 1144254225Speter nc * sizeof(cset)); 1145254225Speter if (p->g->setbits == NULL) 1146254225Speter p->g->setbits = (uch *)malloc(nbytes); 1147254225Speter else { 1148254225Speter p->g->setbits = (uch *)realloc((char *)p->g->setbits, 1149254225Speter nbytes); 1150254225Speter /* xxx this isn't right if setbits is now NULL */ 1151254225Speter for (i = 0; i < no; i++) 1152254225Speter p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1153254225Speter } 1154254225Speter if (p->g->sets != NULL && p->g->setbits != NULL) 1155254225Speter (void) memset((char *)p->g->setbits + (nbytes - css), 1156254225Speter 0, css); 1157254225Speter else { 1158254225Speteroomem: 1159254225Speter no = 0; 1160254225Speter SETERROR(REG_ESPACE); 1161254225Speter /* caller's responsibility not to do set ops */ 1162254225Speter return NULL; 1163254225Speter } 1164254225Speter } 1165254225Speter 1166254225Speter cs = &p->g->sets[no]; 1167254225Speter cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1168254225Speter cs->mask = 1 << ((no) % CHAR_BIT); 1169254225Speter cs->hash = 0; 1170254225Speter cs->smultis = 0; 1171254225Speter cs->multis = NULL; 1172254225Speter 1173254225Speter return(cs); 1174254225Speter} 1175254225Speter 1176254225Speter/* 1177254225Speter - freeset - free a now-unused set 1178254225Speter == static void freeset(register struct parse *p, register cset *cs); 1179254225Speter */ 1180254225Speterstatic void 1181254225Speterfreeset(register struct parse *p, register cset *cs) 1182254225Speter{ 1183254225Speter register size_t i; 1184254225Speter register cset *top = &p->g->sets[p->g->ncsets]; 1185254225Speter register size_t css = (size_t)p->g->csetsize; 1186254225Speter 1187254225Speter for (i = 0; i < css; i++) 1188254225Speter CHsub(cs, i); 1189254225Speter if (cs == top-1) /* recover only the easy case */ 1190254225Speter p->g->ncsets--; 1191254225Speter} 1192254225Speter 1193254225Speter/* 1194254225Speter - freezeset - final processing on a set of characters 1195254225Speter == static int freezeset(register struct parse *p, register cset *cs); 1196254225Speter * 1197254225Speter * The main task here is merging identical sets. This is usually a waste 1198254225Speter * of time (although the hash code minimizes the overhead), but can win 1199254225Speter * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1200254225Speter * is done using addition rather than xor -- all ASCII [aA] sets xor to 1201254225Speter * the same value! 1202254225Speter */ 1203254225Speterstatic int /* set number */ 1204254225Speterfreezeset(register struct parse *p, register cset *cs) 1205254225Speter{ 1206254225Speter register uch h = cs->hash; 1207254225Speter register size_t i; 1208254225Speter register cset *top = &p->g->sets[p->g->ncsets]; 1209254225Speter register cset *cs2; 1210254225Speter register size_t css = (size_t)p->g->csetsize; 1211254225Speter 1212254225Speter /* look for an earlier one which is the same */ 1213254225Speter for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1214254225Speter if (cs2->hash == h && cs2 != cs) { 1215254225Speter /* maybe */ 1216254225Speter for (i = 0; i < css; i++) 1217254225Speter if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1218254225Speter break; /* no */ 1219254225Speter if (i == css) 1220254225Speter break; /* yes */ 1221254225Speter } 1222254225Speter 1223254225Speter if (cs2 < top) { /* found one */ 1224254225Speter freeset(p, cs); 1225254225Speter cs = cs2; 1226254225Speter } 1227254225Speter 1228254225Speter return((int)(cs - p->g->sets)); 1229254225Speter} 1230254225Speter 1231254225Speter/* 1232254225Speter - firstch - return first character in a set (which must have at least one) 1233254225Speter == static int firstch(register struct parse *p, register cset *cs); 1234254225Speter */ 1235254225Speterstatic int /* character; there is no "none" value */ 1236254225Speterfirstch(register struct parse *p, register cset *cs) 1237254225Speter{ 1238254225Speter register size_t i; 1239254225Speter register size_t css = (size_t)p->g->csetsize; 1240254225Speter 1241254225Speter for (i = 0; i < css; i++) 1242254225Speter if (CHIN(cs, i)) 1243254225Speter return((char)i); 1244254225Speter assert(never); 1245254225Speter return(0); /* arbitrary */ 1246254225Speter} 1247254225Speter 1248254225Speter/* 1249254225Speter - nch - number of characters in a set 1250254225Speter == static int nch(register struct parse *p, register cset *cs); 1251254225Speter */ 1252254225Speterstatic int 1253254225Speternch(register struct parse *p, register cset *cs) 1254254225Speter{ 1255254225Speter register size_t i; 1256254225Speter register size_t css = (size_t)p->g->csetsize; 1257254225Speter register int n = 0; 1258254225Speter 1259254225Speter for (i = 0; i < css; i++) 1260254225Speter if (CHIN(cs, i)) 1261254225Speter n++; 1262254225Speter return(n); 1263254225Speter} 1264254225Speter 1265254225Speter/* 1266254225Speter - mcadd - add a collating element to a cset 1267254225Speter == static void mcadd(register struct parse *p, register cset *cs, \ 1268254225Speter == register char *cp); 1269254225Speter */ 1270254225Speterstatic void 1271254225Spetermcadd(register struct parse *p, register cset *cs, register const char *cp) 1272254225Speter{ 1273254225Speter register size_t oldend = cs->smultis; 1274254225Speter void *np; 1275254225Speter 1276254225Speter cs->smultis += strlen(cp) + 1; 1277254225Speter np = realloc(cs->multis, cs->smultis); 1278254225Speter if (np == NULL) { 1279254225Speter if (cs->multis) 1280254225Speter free(cs->multis); 1281254225Speter cs->multis = NULL; 1282254225Speter SETERROR(REG_ESPACE); 1283254225Speter return; 1284254225Speter } 1285254225Speter cs->multis = np; 1286254225Speter 1287254225Speter (void) strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 1288254225Speter} 1289254225Speter 1290254225Speter#ifdef notdef 1291254225Speter/* 1292254225Speter - mcsub - subtract a collating element from a cset 1293254225Speter == static void mcsub(register cset *cs, register char *cp); 1294254225Speter */ 1295254225Speterstatic void 1296254225Spetermcsub(register cset *cs, register char *cp) 1297254225Speter{ 1298254225Speter register char *fp = mcfind(cs, cp); 1299254225Speter register size_t len = strlen(fp); 1300254225Speter 1301254225Speter assert(fp != NULL); 1302254225Speter (void) memmove(fp, fp + len + 1, 1303254225Speter cs->smultis - (fp + len + 1 - cs->multis)); 1304254225Speter cs->smultis -= len; 1305254225Speter 1306254225Speter if (cs->smultis == 0) { 1307254225Speter free(cs->multis); 1308254225Speter cs->multis = NULL; 1309254225Speter return; 1310254225Speter } 1311254225Speter 1312254225Speter cs->multis = realloc(cs->multis, cs->smultis); 1313254225Speter assert(cs->multis != NULL); 1314254225Speter} 1315254225Speter 1316254225Speter/* 1317254225Speter - mcin - is a collating element in a cset? 1318254225Speter == static int mcin(register cset *cs, register char *cp); 1319254225Speter */ 1320254225Speterstatic int 1321254225Spetermcin(register cset *cs, register char *cp) 1322254225Speter{ 1323254225Speter return(mcfind(cs, cp) != NULL); 1324254225Speter} 1325254225Speter 1326254225Speter/* 1327254225Speter - mcfind - find a collating element in a cset 1328254225Speter == static char *mcfind(register cset *cs, register char *cp); 1329254225Speter */ 1330254225Speterstatic char * 1331254225Spetermcfind(register cset *cs, register char *cp) 1332254225Speter{ 1333254225Speter register char *p; 1334254225Speter 1335254225Speter if (cs->multis == NULL) 1336254225Speter return(NULL); 1337254225Speter for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1338254225Speter if (strcmp(cp, p) == 0) 1339254225Speter return(p); 1340254225Speter return(NULL); 1341254225Speter} 1342254225Speter#endif 1343254225Speter 1344254225Speter/* 1345254225Speter - mcinvert - invert the list of collating elements in a cset 1346254225Speter == static void mcinvert(register struct parse *p, register cset *cs); 1347254225Speter * 1348254225Speter * This would have to know the set of possibilities. Implementation 1349254225Speter * is deferred. 1350254225Speter */ 1351254225Speterstatic void 1352254225Spetermcinvert(register struct parse *p, register cset *cs) 1353254225Speter{ 1354254225Speter assert(cs->multis == NULL); /* xxx */ 1355254225Speter} 1356254225Speter 1357254225Speter/* 1358254225Speter - mccase - add case counterparts of the list of collating elements in a cset 1359254225Speter == static void mccase(register struct parse *p, register cset *cs); 1360254225Speter * 1361254225Speter * This would have to know the set of possibilities. Implementation 1362254225Speter * is deferred. 1363254225Speter */ 1364254225Speterstatic void 1365254225Spetermccase(register struct parse *p, register cset *cs) 1366254225Speter{ 1367254225Speter assert(cs->multis == NULL); /* xxx */ 1368254225Speter} 1369254225Speter 1370254225Speter#ifdef notdef 1371254225Speter/* 1372254225Speter - isinsets - is this character in any sets? 1373254225Speter == static int isinsets(register struct re_guts *g, int c); 1374254225Speter */ 1375254225Speterstatic int /* predicate */ 1376254225Speterisinsets(register struct re_guts *g, int c) 1377254225Speter{ 1378254225Speter register uch *col; 1379254225Speter register int i; 1380254225Speter register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1381254225Speter register unsigned uc = (unsigned char)c; 1382254225Speter 1383254225Speter for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1384254225Speter if (col[uc] != 0) 1385254225Speter return(1); 1386254225Speter return(0); 1387254225Speter} 1388254225Speter 1389254225Speter/* 1390254225Speter - samesets - are these two characters in exactly the same sets? 1391254225Speter == static int samesets(register struct re_guts *g, int c1, int c2); 1392254225Speter */ 1393254225Speterstatic int /* predicate */ 1394254225Spetersamesets(register struct re_guts *g, int c1, int c2) 1395254225Speter{ 1396254225Speter register uch *col; 1397254225Speter register int i; 1398254225Speter register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1399254225Speter register unsigned uc1 = (unsigned char)c1; 1400254225Speter register unsigned uc2 = (unsigned char)c2; 1401254225Speter 1402254225Speter for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1403254225Speter if (col[uc1] != col[uc2]) 1404254225Speter return(0); 1405254225Speter return(1); 1406254225Speter} 1407254225Speter#endif 1408254225Speter 1409254225Speter/* 1410254225Speter - categorize - sort out character categories 1411254225Speter == static void categorize(struct parse *p, register struct re_guts *g); 1412254225Speter */ 1413254225Speterstatic void 1414254225Spetercategorize(struct parse *p, register struct re_guts *g) 1415254225Speter{ 1416254225Speter#ifdef notdef 1417254225Speter register cat_t *cats = g->categories; 1418254225Speter register int c; 1419254225Speter register int c2; 1420254225Speter register cat_t cat; 1421254225Speter 1422254225Speter /* avoid making error situations worse */ 1423254225Speter if (p->error != 0) 1424254225Speter return; 1425254225Speter 1426254225Speter for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1427254225Speter if (cats[c] == 0 && isinsets(g, c)) { 1428254225Speter cat = g->ncategories++; 1429254225Speter cats[c] = cat; 1430254225Speter for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1431254225Speter if (cats[c2] == 0 && samesets(g, c, c2)) 1432254225Speter cats[c2] = cat; 1433254225Speter } 1434254225Speter#endif 1435254225Speter} 1436254225Speter 1437254225Speter/* 1438254225Speter - dupl - emit a duplicate of a bunch of sops 1439254225Speter == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1440254225Speter */ 1441254225Speterstatic sopno /* start of duplicate */ 1442254225Speterdupl(register struct parse *p, sopno start, sopno finish) 1443254225Speter 1444254225Speter /* from here */ 1445254225Speter /* to this less one */ 1446254225Speter{ 1447254225Speter register sopno ret = HERE(); 1448254225Speter register sopno len = finish - start; 1449254225Speter 1450254225Speter assert(finish >= start); 1451254225Speter if (len == 0) 1452254225Speter return(ret); 1453254225Speter if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ 1454254225Speter return ret; 1455254225Speter assert(p->ssize >= p->slen + len); 1456254225Speter (void) memcpy((char *)(p->strip + p->slen), 1457254225Speter (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1458254225Speter (void) memcpy((char *)(p->stripdata + p->slen), 1459254225Speter (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T)); 1460254225Speter p->slen += len; 1461254225Speter return(ret); 1462254225Speter} 1463254225Speter 1464254225Speter/* 1465254225Speter - doemit - emit a strip operator 1466254225Speter == static void doemit(register struct parse *p, sop op, size_t opnd); 1467254225Speter * 1468254225Speter * It might seem better to implement this as a macro with a function as 1469254225Speter * hard-case backup, but it's just too big and messy unless there are 1470254225Speter * some changes to the data structures. Maybe later. 1471254225Speter */ 1472254225Speterstatic void 1473254225Speterdoemit(register struct parse *p, sop op, size_t opnd) 1474254225Speter{ 1475254225Speter /* avoid making error situations worse */ 1476254225Speter if (p->error != 0) 1477254225Speter return; 1478254225Speter 1479254225Speter /* deal with oversize operands ("can't happen", more or less) */ 1480254225Speter assert(opnd < 1); 1481254225Speter 1482254225Speter /* deal with undersized strip */ 1483254225Speter if (p->slen >= p->ssize) 1484254225Speter if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ 1485254225Speter return; 1486254225Speter 1487254225Speter /* finally, it's all reduced to the easy case */ 1488254225Speter p->strip[p->slen] = op; 1489254225Speter p->stripdata[p->slen] = opnd; 1490254225Speter p->slen++; 1491254225Speter} 1492254225Speter 1493254225Speter/* 1494254225Speter - doinsert - insert a sop into the strip 1495254225Speter == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1496254225Speter */ 1497254225Speterstatic void 1498254225Speterdoinsert(register struct parse *p, sop op, size_t opnd, sopno pos) 1499254225Speter{ 1500254225Speter register sopno sn; 1501254225Speter register sop s; 1502254225Speter register RCHAR_T d; 1503254225Speter register int i; 1504254225Speter 1505254225Speter /* avoid making error situations worse */ 1506254225Speter if (p->error != 0) 1507254225Speter return; 1508254225Speter 1509254225Speter sn = HERE(); 1510254225Speter EMIT(op, opnd); /* do checks, ensure space */ 1511254225Speter assert(HERE() == sn+1); 1512254225Speter s = p->strip[sn]; 1513254225Speter d = p->stripdata[sn]; 1514254225Speter 1515254225Speter /* adjust paren pointers */ 1516254225Speter assert(pos > 0); 1517254225Speter for (i = 1; i < NPAREN; i++) { 1518254225Speter if (p->pbegin[i] >= pos) { 1519254225Speter p->pbegin[i]++; 1520254225Speter } 1521254225Speter if (p->pend[i] >= pos) { 1522254225Speter p->pend[i]++; 1523254225Speter } 1524254225Speter } 1525254225Speter 1526254225Speter memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1527254225Speter (HERE()-pos-1)*sizeof(sop)); 1528254225Speter memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos], 1529254225Speter (HERE()-pos-1)*sizeof(RCHAR_T)); 1530254225Speter p->strip[pos] = s; 1531254225Speter p->stripdata[pos] = d; 1532254225Speter} 1533254225Speter 1534254225Speter/* 1535254225Speter - dofwd - complete a forward reference 1536254225Speter == static void dofwd(register struct parse *p, sopno pos, sop value); 1537254225Speter */ 1538254225Speterstatic void 1539254225Speterdofwd(register struct parse *p, register sopno pos, sop value) 1540254225Speter{ 1541254225Speter /* avoid making error situations worse */ 1542254225Speter if (p->error != 0) 1543254225Speter return; 1544254225Speter 1545254225Speter assert(value < 1); 1546254225Speter p->stripdata[pos] = value; 1547254225Speter} 1548254225Speter 1549254225Speter/* 1550254225Speter - enlarge - enlarge the strip 1551254225Speter == static int enlarge(register struct parse *p, sopno size); 1552254225Speter */ 1553254225Speterstatic int 1554254225Speterenlarge(register struct parse *p, register sopno size) 1555254225Speter{ 1556254225Speter register sop *sp; 1557254225Speter register RCHAR_T *dp; 1558254225Speter sopno osize; 1559254225Speter 1560254225Speter if (p->ssize >= size) 1561254225Speter return 1; 1562254225Speter 1563254225Speter osize = p->ssize; 1564254225Speter p->ssize = size; 1565254225Speter if (MEMSIZE(p) > MEMLIMIT) 1566254225Speter goto oomem; 1567254225Speter sp = realloc(p->strip, p->ssize * sizeof(sop)); 1568254225Speter if (sp == NULL) 1569254225Speter goto oomem; 1570254225Speter p->strip = sp; 1571254225Speter dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T)); 1572254225Speter if (dp == NULL) { 1573254225Speteroomem: 1574254225Speter p->ssize = osize; 1575254225Speter SETERROR(REG_ESPACE); 1576254225Speter return 0; 1577254225Speter } 1578254225Speter p->stripdata = dp; 1579254225Speter return 1; 1580254225Speter} 1581254225Speter 1582254225Speter/* 1583254225Speter - stripsnug - compact the strip 1584254225Speter == static void stripsnug(register struct parse *p, register struct re_guts *g); 1585254225Speter */ 1586254225Speterstatic void 1587254225Speterstripsnug(register struct parse *p, register struct re_guts *g) 1588254225Speter{ 1589254225Speter g->nstates = p->slen; 1590254225Speter g->strip = (sop *)realloc((char *)p->strip, 1591254225Speter p->slen * sizeof(sop)); 1592254225Speter if (g->strip == NULL) { 1593254225Speter SETERROR(REG_ESPACE); 1594254225Speter g->strip = p->strip; 1595254225Speter } 1596254225Speter g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata, 1597254225Speter p->slen * sizeof(RCHAR_T)); 1598254225Speter if (g->stripdata == NULL) { 1599254225Speter SETERROR(REG_ESPACE); 1600254225Speter g->stripdata = p->stripdata; 1601254225Speter } 1602254225Speter} 1603254225Speter 1604254225Speter/* 1605254225Speter - findmust - fill in must and mlen with longest mandatory literal string 1606254225Speter == static void findmust(register struct parse *p, register struct re_guts *g); 1607254225Speter * 1608254225Speter * This algorithm could do fancy things like analyzing the operands of | 1609254225Speter * for common subsequences. Someday. This code is simple and finds most 1610254225Speter * of the interesting cases. 1611254225Speter * 1612254225Speter * Note that must and mlen got initialized during setup. 1613254225Speter */ 1614254225Speterstatic void 1615254225Speterfindmust(struct parse *p, register struct re_guts *g) 1616254225Speter{ 1617254225Speter register sop *scans; 1618254225Speter register RCHAR_T *scand; 1619254225Speter sop *starts = 0; 1620254225Speter RCHAR_T *startd = NULL; 1621254225Speter register sop *newstarts = 0; 1622254225Speter register RCHAR_T *newstartd = NULL; 1623254225Speter register sopno newlen; 1624254225Speter register sop s; 1625254225Speter register RCHAR_T d; 1626254225Speter register RCHAR_T *cp; 1627254225Speter register sopno i; 1628254225Speter 1629254225Speter /* avoid making error situations worse */ 1630254225Speter if (p->error != 0) 1631254225Speter return; 1632254225Speter 1633254225Speter /* find the longest OCHAR sequence in strip */ 1634254225Speter newlen = 0; 1635254225Speter scans = g->strip + 1; 1636254225Speter scand = g->stripdata + 1; 1637254225Speter do { 1638254225Speter s = *scans++; 1639254225Speter d = *scand++; 1640254225Speter switch (s) { 1641254225Speter case OCHAR: /* sequence member */ 1642254225Speter if (newlen == 0) { /* new sequence */ 1643254225Speter newstarts = scans - 1; 1644254225Speter newstartd = scand - 1; 1645254225Speter } 1646254225Speter newlen++; 1647254225Speter break; 1648254225Speter case OPLUS_: /* things that don't break one */ 1649254225Speter case OLPAREN: 1650254225Speter case ORPAREN: 1651254225Speter break; 1652254225Speter case OQUEST_: /* things that must be skipped */ 1653254225Speter case OCH_: 1654254225Speter scans--; 1655254225Speter scand--; 1656254225Speter do { 1657254225Speter scans += d; 1658254225Speter scand += d; 1659254225Speter s = *scans; 1660254225Speter d = *scand; 1661254225Speter /* assert() interferes w debug printouts */ 1662254225Speter if (s != O_QUEST && s != O_CH && s != OOR2) { 1663254225Speter g->iflags |= BAD; 1664254225Speter return; 1665254225Speter } 1666254225Speter } while (s != O_QUEST && s != O_CH); 1667254225Speter /* fallthrough */ 1668254225Speter default: /* things that break a sequence */ 1669254225Speter if (newlen > g->mlen) { /* ends one */ 1670254225Speter starts = newstarts; 1671254225Speter startd = newstartd; 1672254225Speter g->mlen = newlen; 1673254225Speter } 1674254225Speter newlen = 0; 1675254225Speter break; 1676254225Speter } 1677254225Speter } while (s != OEND); 1678254225Speter 1679254225Speter if (g->mlen == 0) /* there isn't one */ 1680254225Speter return; 1681254225Speter 1682254225Speter /* turn it into a character string */ 1683254225Speter g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T)); 1684254225Speter if (g->must == NULL) { /* argh; just forget it */ 1685254225Speter g->mlen = 0; 1686254225Speter return; 1687254225Speter } 1688254225Speter cp = g->must; 1689254225Speter scans = starts; 1690254225Speter scand = startd; 1691254225Speter for (i = g->mlen; i > 0; i--) { 1692254225Speter for (;;) { 1693254225Speter s = *scans++; 1694254225Speter d = *scand++; 1695254225Speter if (s == OCHAR) 1696254225Speter break; 1697254225Speter } 1698254225Speter assert(cp < g->must + g->mlen); 1699254225Speter *cp++ = d; 1700254225Speter } 1701254225Speter assert(cp == g->must + g->mlen); 1702254225Speter *cp++ = '\0'; /* just on general principles */ 1703254225Speter} 1704254225Speter 1705254225Speter/* 1706254225Speter - pluscount - count + nesting 1707254225Speter == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1708254225Speter */ 1709254225Speterstatic sopno /* nesting depth */ 1710254225Speterpluscount(struct parse *p, register struct re_guts *g) 1711254225Speter{ 1712254225Speter register sop *scan; 1713254225Speter register sop s; 1714254225Speter register sopno plusnest = 0; 1715254225Speter register sopno maxnest = 0; 1716254225Speter 1717254225Speter if (p->error != 0) 1718254225Speter return(0); /* there may not be an OEND */ 1719254225Speter 1720254225Speter scan = g->strip + 1; 1721254225Speter do { 1722254225Speter s = *scan++; 1723254225Speter switch (s) { 1724254225Speter case OPLUS_: 1725254225Speter plusnest++; 1726254225Speter break; 1727254225Speter case O_PLUS: 1728254225Speter if (plusnest > maxnest) 1729254225Speter maxnest = plusnest; 1730254225Speter plusnest--; 1731254225Speter break; 1732254225Speter } 1733254225Speter } while (s != OEND); 1734254225Speter if (plusnest != 0) 1735254225Speter g->iflags |= BAD; 1736254225Speter return(maxnest); 1737254225Speter} 1738