regcomp.c revision 62391
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 3. All advertising materials mentioning features or use of this software
181573Srgrimes *    must display the following acknowledgement:
191573Srgrimes *	This product includes software developed by the University of
201573Srgrimes *	California, Berkeley and its contributors.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes *
371573Srgrimes *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
3862232Sdcs *
3962232Sdcs * $FreeBSD: head/lib/libc/regex/regcomp.c 62391 2000-07-02 10:58:07Z dcs $
401573Srgrimes */
411573Srgrimes
421573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
431573Srgrimesstatic char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
441573Srgrimes#endif /* LIBC_SCCS and not lint */
451573Srgrimes
461573Srgrimes#include <sys/types.h>
471573Srgrimes#include <stdio.h>
481573Srgrimes#include <string.h>
491573Srgrimes#include <ctype.h>
501573Srgrimes#include <limits.h>
511573Srgrimes#include <stdlib.h>
521573Srgrimes#include <regex.h>
531573Srgrimes
5419277Sache#include "collate.h"
5519277Sache
561573Srgrimes#include "utils.h"
571573Srgrimes#include "regex2.h"
581573Srgrimes
591573Srgrimes#include "cclass.h"
601573Srgrimes#include "cname.h"
611573Srgrimes
621573Srgrimes/*
631573Srgrimes * parse structure, passed up and down to avoid global variables and
641573Srgrimes * other clumsinesses
651573Srgrimes */
661573Srgrimesstruct parse {
671573Srgrimes	char *next;		/* next character in RE */
681573Srgrimes	char *end;		/* end of string (-> NUL normally) */
691573Srgrimes	int error;		/* has an error been seen? */
701573Srgrimes	sop *strip;		/* malloced strip */
711573Srgrimes	sopno ssize;		/* malloced strip size (allocated) */
721573Srgrimes	sopno slen;		/* malloced strip length (used) */
731573Srgrimes	int ncsalloc;		/* number of csets allocated */
741573Srgrimes	struct re_guts *g;
751573Srgrimes#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
761573Srgrimes	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
771573Srgrimes	sopno pend[NPAREN];	/* -> ) ([0] unused) */
781573Srgrimes};
791573Srgrimes
801573Srgrimes/* ========= begin header generated by ./mkh ========= */
811573Srgrimes#ifdef __cplusplus
821573Srgrimesextern "C" {
831573Srgrimes#endif
841573Srgrimes
851573Srgrimes/* === regcomp.c === */
861573Srgrimesstatic void p_ere __P((struct parse *p, int stop));
871573Srgrimesstatic void p_ere_exp __P((struct parse *p));
881573Srgrimesstatic void p_str __P((struct parse *p));
891573Srgrimesstatic void p_bre __P((struct parse *p, int end1, int end2));
901573Srgrimesstatic int p_simp_re __P((struct parse *p, int starordinary));
911573Srgrimesstatic int p_count __P((struct parse *p));
921573Srgrimesstatic void p_bracket __P((struct parse *p));
931573Srgrimesstatic void p_b_term __P((struct parse *p, cset *cs));
941573Srgrimesstatic void p_b_cclass __P((struct parse *p, cset *cs));
951573Srgrimesstatic void p_b_eclass __P((struct parse *p, cset *cs));
961573Srgrimesstatic char p_b_symbol __P((struct parse *p));
971573Srgrimesstatic char p_b_coll_elem __P((struct parse *p, int endc));
981573Srgrimesstatic char othercase __P((int ch));
991573Srgrimesstatic void bothcases __P((struct parse *p, int ch));
1001573Srgrimesstatic void ordinary __P((struct parse *p, int ch));
1011573Srgrimesstatic void nonnewline __P((struct parse *p));
1021573Srgrimesstatic void repeat __P((struct parse *p, sopno start, int from, int to));
1031573Srgrimesstatic int seterr __P((struct parse *p, int e));
1041573Srgrimesstatic cset *allocset __P((struct parse *p));
1051573Srgrimesstatic void freeset __P((struct parse *p, cset *cs));
1061573Srgrimesstatic int freezeset __P((struct parse *p, cset *cs));
1071573Srgrimesstatic int firstch __P((struct parse *p, cset *cs));
1081573Srgrimesstatic int nch __P((struct parse *p, cset *cs));
1091573Srgrimesstatic void mcadd __P((struct parse *p, cset *cs, char *cp));
11017141Sjkh#if used
1111573Srgrimesstatic void mcsub __P((cset *cs, char *cp));
1121573Srgrimesstatic int mcin __P((cset *cs, char *cp));
1131573Srgrimesstatic char *mcfind __P((cset *cs, char *cp));
11417141Sjkh#endif
1151573Srgrimesstatic void mcinvert __P((struct parse *p, cset *cs));
1161573Srgrimesstatic void mccase __P((struct parse *p, cset *cs));
1171573Srgrimesstatic int isinsets __P((struct re_guts *g, int c));
1181573Srgrimesstatic int samesets __P((struct re_guts *g, int c1, int c2));
1191573Srgrimesstatic void categorize __P((struct parse *p, struct re_guts *g));
1201573Srgrimesstatic sopno dupl __P((struct parse *p, sopno start, sopno finish));
1211573Srgrimesstatic void doemit __P((struct parse *p, sop op, size_t opnd));
1221573Srgrimesstatic void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
1231573Srgrimesstatic void dofwd __P((struct parse *p, sopno pos, sop value));
1241573Srgrimesstatic void enlarge __P((struct parse *p, sopno size));
1251573Srgrimesstatic void stripsnug __P((struct parse *p, struct re_guts *g));
1261573Srgrimesstatic void findmust __P((struct parse *p, struct re_guts *g));
12762391Sdcsstatic int altoffset __P((sop *scan, int offset, int mccs));
12862232Sdcsstatic void computejumps __P((struct parse *p, struct re_guts *g));
12962232Sdcsstatic void computematchjumps __P((struct parse *p, struct re_guts *g));
1301573Srgrimesstatic sopno pluscount __P((struct parse *p, struct re_guts *g));
1311573Srgrimes
1321573Srgrimes#ifdef __cplusplus
1331573Srgrimes}
1341573Srgrimes#endif
1351573Srgrimes/* ========= end header generated by ./mkh ========= */
1361573Srgrimes
1371573Srgrimesstatic char nuls[10];		/* place to point scanner in event of error */
1381573Srgrimes
1391573Srgrimes/*
1401573Srgrimes * macros for use with parse structure
1411573Srgrimes * BEWARE:  these know that the parse structure is named `p' !!!
1421573Srgrimes */
1431573Srgrimes#define	PEEK()	(*p->next)
1441573Srgrimes#define	PEEK2()	(*(p->next+1))
1451573Srgrimes#define	MORE()	(p->next < p->end)
1461573Srgrimes#define	MORE2()	(p->next+1 < p->end)
1471573Srgrimes#define	SEE(c)	(MORE() && PEEK() == (c))
1481573Srgrimes#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
1491573Srgrimes#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
1501573Srgrimes#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
1511573Srgrimes#define	NEXT()	(p->next++)
1521573Srgrimes#define	NEXT2()	(p->next += 2)
1531573Srgrimes#define	NEXTn(n)	(p->next += (n))
1541573Srgrimes#define	GETNEXT()	(*p->next++)
1551573Srgrimes#define	SETERROR(e)	seterr(p, (e))
1561573Srgrimes#define	REQUIRE(co, e)	((co) || SETERROR(e))
1571573Srgrimes#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
1581573Srgrimes#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
1591573Srgrimes#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
1601573Srgrimes#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
1611573Srgrimes#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
1621573Srgrimes#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
1631573Srgrimes#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
1641573Srgrimes#define	HERE()		(p->slen)
1651573Srgrimes#define	THERE()		(p->slen - 1)
1661573Srgrimes#define	THERETHERE()	(p->slen - 2)
1671573Srgrimes#define	DROP(n)	(p->slen -= (n))
1681573Srgrimes
1691573Srgrimes#ifndef NDEBUG
1701573Srgrimesstatic int never = 0;		/* for use in asserts; shuts lint up */
1711573Srgrimes#else
1721573Srgrimes#define	never	0		/* some <assert.h>s have bugs too */
1731573Srgrimes#endif
1741573Srgrimes
17562232Sdcs/* Macro used by computejump()/computematchjump() */
17662232Sdcs#define MIN(a,b)	((a)<(b)?(a):(b))
17762232Sdcs
1781573Srgrimes/*
1791573Srgrimes - regcomp - interface for parser and compilation
1801573Srgrimes = extern int regcomp(regex_t *, const char *, int);
1811573Srgrimes = #define	REG_BASIC	0000
1821573Srgrimes = #define	REG_EXTENDED	0001
1831573Srgrimes = #define	REG_ICASE	0002
1841573Srgrimes = #define	REG_NOSUB	0004
1851573Srgrimes = #define	REG_NEWLINE	0010
1861573Srgrimes = #define	REG_NOSPEC	0020
1871573Srgrimes = #define	REG_PEND	0040
1881573Srgrimes = #define	REG_DUMP	0200
1891573Srgrimes */
1901573Srgrimesint				/* 0 success, otherwise REG_something */
1911573Srgrimesregcomp(preg, pattern, cflags)
1921573Srgrimesregex_t *preg;
1931573Srgrimesconst char *pattern;
1941573Srgrimesint cflags;
1951573Srgrimes{
1961573Srgrimes	struct parse pa;
1971573Srgrimes	register struct re_guts *g;
1981573Srgrimes	register struct parse *p = &pa;
1991573Srgrimes	register int i;
2001573Srgrimes	register size_t len;
2011573Srgrimes#ifdef REDEBUG
2021573Srgrimes#	define	GOODFLAGS(f)	(f)
2031573Srgrimes#else
2041573Srgrimes#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
2051573Srgrimes#endif
2061573Srgrimes
2071573Srgrimes	cflags = GOODFLAGS(cflags);
2081573Srgrimes	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
2091573Srgrimes		return(REG_INVARG);
2101573Srgrimes
2111573Srgrimes	if (cflags&REG_PEND) {
2121573Srgrimes		if (preg->re_endp < pattern)
2131573Srgrimes			return(REG_INVARG);
2141573Srgrimes		len = preg->re_endp - pattern;
2151573Srgrimes	} else
2161573Srgrimes		len = strlen((char *)pattern);
2171573Srgrimes
2181573Srgrimes	/* do the mallocs early so failure handling is easy */
2191573Srgrimes	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
2201573Srgrimes							(NC-1)*sizeof(cat_t));
2211573Srgrimes	if (g == NULL)
2221573Srgrimes		return(REG_ESPACE);
2231573Srgrimes	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
2241573Srgrimes	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
2251573Srgrimes	p->slen = 0;
2261573Srgrimes	if (p->strip == NULL) {
2271573Srgrimes		free((char *)g);
2281573Srgrimes		return(REG_ESPACE);
2291573Srgrimes	}
2301573Srgrimes
2311573Srgrimes	/* set things up */
2321573Srgrimes	p->g = g;
2331573Srgrimes	p->next = (char *)pattern;	/* convenience; we do not modify it */
2341573Srgrimes	p->end = p->next + len;
2351573Srgrimes	p->error = 0;
2361573Srgrimes	p->ncsalloc = 0;
2371573Srgrimes	for (i = 0; i < NPAREN; i++) {
2381573Srgrimes		p->pbegin[i] = 0;
2391573Srgrimes		p->pend[i] = 0;
2401573Srgrimes	}
2411573Srgrimes	g->csetsize = NC;
2421573Srgrimes	g->sets = NULL;
2431573Srgrimes	g->setbits = NULL;
2441573Srgrimes	g->ncsets = 0;
2451573Srgrimes	g->cflags = cflags;
2461573Srgrimes	g->iflags = 0;
2471573Srgrimes	g->nbol = 0;
2481573Srgrimes	g->neol = 0;
2491573Srgrimes	g->must = NULL;
25062391Sdcs	g->moffset = -1;
25162263Sdcs	g->charjump = NULL;
25262263Sdcs	g->matchjump = NULL;
2531573Srgrimes	g->mlen = 0;
2541573Srgrimes	g->nsub = 0;
2551573Srgrimes	g->ncategories = 1;	/* category 0 is "everything else" */
2561573Srgrimes	g->categories = &g->catspace[-(CHAR_MIN)];
2571573Srgrimes	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
2581573Srgrimes	g->backrefs = 0;
2591573Srgrimes
2601573Srgrimes	/* do it */
2611573Srgrimes	EMIT(OEND, 0);
2621573Srgrimes	g->firststate = THERE();
2631573Srgrimes	if (cflags&REG_EXTENDED)
2641573Srgrimes		p_ere(p, OUT);
2651573Srgrimes	else if (cflags&REG_NOSPEC)
2661573Srgrimes		p_str(p);
2671573Srgrimes	else
2681573Srgrimes		p_bre(p, OUT, OUT);
2691573Srgrimes	EMIT(OEND, 0);
2701573Srgrimes	g->laststate = THERE();
2711573Srgrimes
2721573Srgrimes	/* tidy up loose ends and fill things in */
2731573Srgrimes	categorize(p, g);
2741573Srgrimes	stripsnug(p, g);
2751573Srgrimes	findmust(p, g);
27662232Sdcs	/* only use Boyer-Moore algorithm if the pattern is bigger
27762232Sdcs	 * than three characters
27862232Sdcs	 */
27962232Sdcs	if(g->mlen > 3) {
28062232Sdcs		computejumps(p, g);
28162232Sdcs		computematchjumps(p, g);
28262232Sdcs		if(g->matchjump == NULL) {
28362232Sdcs			free(g->charjump);
28462232Sdcs			g->charjump = NULL;
28562232Sdcs		}
28662232Sdcs	}
2871573Srgrimes	g->nplus = pluscount(p, g);
2881573Srgrimes	g->magic = MAGIC2;
2891573Srgrimes	preg->re_nsub = g->nsub;
2901573Srgrimes	preg->re_g = g;
2911573Srgrimes	preg->re_magic = MAGIC1;
2921573Srgrimes#ifndef REDEBUG
2931573Srgrimes	/* not debugging, so can't rely on the assert() in regexec() */
2941573Srgrimes	if (g->iflags&BAD)
2951573Srgrimes		SETERROR(REG_ASSERT);
2961573Srgrimes#endif
2971573Srgrimes
2981573Srgrimes	/* win or lose, we're done */
2991573Srgrimes	if (p->error != 0)	/* lose */
3001573Srgrimes		regfree(preg);
3011573Srgrimes	return(p->error);
3021573Srgrimes}
3031573Srgrimes
3041573Srgrimes/*
3051573Srgrimes - p_ere - ERE parser top level, concatenation and alternation
3061573Srgrimes == static void p_ere(register struct parse *p, int stop);
3071573Srgrimes */
3081573Srgrimesstatic void
3091573Srgrimesp_ere(p, stop)
3101573Srgrimesregister struct parse *p;
3111573Srgrimesint stop;			/* character this ERE should end at */
3121573Srgrimes{
3131573Srgrimes	register char c;
3141573Srgrimes	register sopno prevback;
3151573Srgrimes	register sopno prevfwd;
3161573Srgrimes	register sopno conc;
3171573Srgrimes	register int first = 1;		/* is this the first alternative? */
3181573Srgrimes
3191573Srgrimes	for (;;) {
3201573Srgrimes		/* do a bunch of concatenated expressions */
3211573Srgrimes		conc = HERE();
3221573Srgrimes		while (MORE() && (c = PEEK()) != '|' && c != stop)
3231573Srgrimes			p_ere_exp(p);
32417141Sjkh		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
3251573Srgrimes
3261573Srgrimes		if (!EAT('|'))
3271573Srgrimes			break;		/* NOTE BREAK OUT */
3281573Srgrimes
3291573Srgrimes		if (first) {
3301573Srgrimes			INSERT(OCH_, conc);	/* offset is wrong */
3311573Srgrimes			prevfwd = conc;
3321573Srgrimes			prevback = conc;
3331573Srgrimes			first = 0;
3341573Srgrimes		}
3351573Srgrimes		ASTERN(OOR1, prevback);
3361573Srgrimes		prevback = THERE();
3371573Srgrimes		AHEAD(prevfwd);			/* fix previous offset */
3381573Srgrimes		prevfwd = HERE();
3391573Srgrimes		EMIT(OOR2, 0);			/* offset is very wrong */
3401573Srgrimes	}
3411573Srgrimes
3421573Srgrimes	if (!first) {		/* tail-end fixups */
3431573Srgrimes		AHEAD(prevfwd);
3441573Srgrimes		ASTERN(O_CH, prevback);
3451573Srgrimes	}
3461573Srgrimes
3471573Srgrimes	assert(!MORE() || SEE(stop));
3481573Srgrimes}
3491573Srgrimes
3501573Srgrimes/*
3511573Srgrimes - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
3521573Srgrimes == static void p_ere_exp(register struct parse *p);
3531573Srgrimes */
3541573Srgrimesstatic void
3551573Srgrimesp_ere_exp(p)
3561573Srgrimesregister struct parse *p;
3571573Srgrimes{
3581573Srgrimes	register char c;
3591573Srgrimes	register sopno pos;
3601573Srgrimes	register int count;
3611573Srgrimes	register int count2;
3621573Srgrimes	register sopno subno;
3631573Srgrimes	int wascaret = 0;
3641573Srgrimes
3651573Srgrimes	assert(MORE());		/* caller should have ensured this */
3661573Srgrimes	c = GETNEXT();
3671573Srgrimes
3681573Srgrimes	pos = HERE();
3691573Srgrimes	switch (c) {
3701573Srgrimes	case '(':
37117141Sjkh		(void)REQUIRE(MORE(), REG_EPAREN);
3721573Srgrimes		p->g->nsub++;
3731573Srgrimes		subno = p->g->nsub;
3741573Srgrimes		if (subno < NPAREN)
3751573Srgrimes			p->pbegin[subno] = HERE();
3761573Srgrimes		EMIT(OLPAREN, subno);
3771573Srgrimes		if (!SEE(')'))
3781573Srgrimes			p_ere(p, ')');
3791573Srgrimes		if (subno < NPAREN) {
3801573Srgrimes			p->pend[subno] = HERE();
3811573Srgrimes			assert(p->pend[subno] != 0);
3821573Srgrimes		}
3831573Srgrimes		EMIT(ORPAREN, subno);
38417141Sjkh		(void)MUSTEAT(')', REG_EPAREN);
3851573Srgrimes		break;
3861573Srgrimes#ifndef POSIX_MISTAKE
3871573Srgrimes	case ')':		/* happens only if no current unmatched ( */
3881573Srgrimes		/*
3891573Srgrimes		 * You may ask, why the ifndef?  Because I didn't notice
3901573Srgrimes		 * this until slightly too late for 1003.2, and none of the
3911573Srgrimes		 * other 1003.2 regular-expression reviewers noticed it at
3921573Srgrimes		 * all.  So an unmatched ) is legal POSIX, at least until
3931573Srgrimes		 * we can get it fixed.
3941573Srgrimes		 */
3951573Srgrimes		SETERROR(REG_EPAREN);
3961573Srgrimes		break;
3971573Srgrimes#endif
3981573Srgrimes	case '^':
3991573Srgrimes		EMIT(OBOL, 0);
4001573Srgrimes		p->g->iflags |= USEBOL;
4011573Srgrimes		p->g->nbol++;
4021573Srgrimes		wascaret = 1;
4031573Srgrimes		break;
4041573Srgrimes	case '$':
4051573Srgrimes		EMIT(OEOL, 0);
4061573Srgrimes		p->g->iflags |= USEEOL;
4071573Srgrimes		p->g->neol++;
4081573Srgrimes		break;
4091573Srgrimes	case '|':
4101573Srgrimes		SETERROR(REG_EMPTY);
4111573Srgrimes		break;
4121573Srgrimes	case '*':
4131573Srgrimes	case '+':
4141573Srgrimes	case '?':
4151573Srgrimes		SETERROR(REG_BADRPT);
4161573Srgrimes		break;
4171573Srgrimes	case '.':
4181573Srgrimes		if (p->g->cflags&REG_NEWLINE)
4191573Srgrimes			nonnewline(p);
4201573Srgrimes		else
4211573Srgrimes			EMIT(OANY, 0);
4221573Srgrimes		break;
4231573Srgrimes	case '[':
4241573Srgrimes		p_bracket(p);
4251573Srgrimes		break;
4261573Srgrimes	case '\\':
42717141Sjkh		(void)REQUIRE(MORE(), REG_EESCAPE);
4281573Srgrimes		c = GETNEXT();
4291573Srgrimes		ordinary(p, c);
4301573Srgrimes		break;
4311573Srgrimes	case '{':		/* okay as ordinary except if digit follows */
43249094Sache		(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
4331573Srgrimes		/* FALLTHROUGH */
4341573Srgrimes	default:
4351573Srgrimes		ordinary(p, c);
4361573Srgrimes		break;
4371573Srgrimes	}
4381573Srgrimes
4391573Srgrimes	if (!MORE())
4401573Srgrimes		return;
4411573Srgrimes	c = PEEK();
4421573Srgrimes	/* we call { a repetition if followed by a digit */
4431573Srgrimes	if (!( c == '*' || c == '+' || c == '?' ||
44449094Sache				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
4451573Srgrimes		return;		/* no repetition, we're done */
4461573Srgrimes	NEXT();
4471573Srgrimes
44817141Sjkh	(void)REQUIRE(!wascaret, REG_BADRPT);
4491573Srgrimes	switch (c) {
4501573Srgrimes	case '*':	/* implemented as +? */
4511573Srgrimes		/* this case does not require the (y|) trick, noKLUDGE */
4521573Srgrimes		INSERT(OPLUS_, pos);
4531573Srgrimes		ASTERN(O_PLUS, pos);
4541573Srgrimes		INSERT(OQUEST_, pos);
4551573Srgrimes		ASTERN(O_QUEST, pos);
4561573Srgrimes		break;
4571573Srgrimes	case '+':
4581573Srgrimes		INSERT(OPLUS_, pos);
4591573Srgrimes		ASTERN(O_PLUS, pos);
4601573Srgrimes		break;
4611573Srgrimes	case '?':
4621573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
4631573Srgrimes		INSERT(OCH_, pos);		/* offset slightly wrong */
4641573Srgrimes		ASTERN(OOR1, pos);		/* this one's right */
4651573Srgrimes		AHEAD(pos);			/* fix the OCH_ */
4661573Srgrimes		EMIT(OOR2, 0);			/* offset very wrong... */
4671573Srgrimes		AHEAD(THERE());			/* ...so fix it */
4681573Srgrimes		ASTERN(O_CH, THERETHERE());
4691573Srgrimes		break;
4701573Srgrimes	case '{':
4711573Srgrimes		count = p_count(p);
4721573Srgrimes		if (EAT(',')) {
47349094Sache			if (isdigit((uch)PEEK())) {
4741573Srgrimes				count2 = p_count(p);
47517141Sjkh				(void)REQUIRE(count <= count2, REG_BADBR);
4761573Srgrimes			} else		/* single number with comma */
4771573Srgrimes				count2 = INFINITY;
4781573Srgrimes		} else		/* just a single number */
4791573Srgrimes			count2 = count;
4801573Srgrimes		repeat(p, pos, count, count2);
4811573Srgrimes		if (!EAT('}')) {	/* error heuristics */
4821573Srgrimes			while (MORE() && PEEK() != '}')
4831573Srgrimes				NEXT();
48417141Sjkh			(void)REQUIRE(MORE(), REG_EBRACE);
4851573Srgrimes			SETERROR(REG_BADBR);
4861573Srgrimes		}
4871573Srgrimes		break;
4881573Srgrimes	}
4891573Srgrimes
4901573Srgrimes	if (!MORE())
4911573Srgrimes		return;
4921573Srgrimes	c = PEEK();
4931573Srgrimes	if (!( c == '*' || c == '+' || c == '?' ||
49449094Sache				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
4951573Srgrimes		return;
4961573Srgrimes	SETERROR(REG_BADRPT);
4971573Srgrimes}
4981573Srgrimes
4991573Srgrimes/*
5001573Srgrimes - p_str - string (no metacharacters) "parser"
5011573Srgrimes == static void p_str(register struct parse *p);
5021573Srgrimes */
5031573Srgrimesstatic void
5041573Srgrimesp_str(p)
5051573Srgrimesregister struct parse *p;
5061573Srgrimes{
50717141Sjkh	(void)REQUIRE(MORE(), REG_EMPTY);
5081573Srgrimes	while (MORE())
5091573Srgrimes		ordinary(p, GETNEXT());
5101573Srgrimes}
5111573Srgrimes
5121573Srgrimes/*
5131573Srgrimes - p_bre - BRE parser top level, anchoring and concatenation
5141573Srgrimes == static void p_bre(register struct parse *p, register int end1, \
5151573Srgrimes ==	register int end2);
5161573Srgrimes * Giving end1 as OUT essentially eliminates the end1/end2 check.
5171573Srgrimes *
5181573Srgrimes * This implementation is a bit of a kludge, in that a trailing $ is first
5191573Srgrimes * taken as an ordinary character and then revised to be an anchor.  The
5201573Srgrimes * only undesirable side effect is that '$' gets included as a character
5211573Srgrimes * category in such cases.  This is fairly harmless; not worth fixing.
5221573Srgrimes * The amount of lookahead needed to avoid this kludge is excessive.
5231573Srgrimes */
5241573Srgrimesstatic void
5251573Srgrimesp_bre(p, end1, end2)
5261573Srgrimesregister struct parse *p;
5271573Srgrimesregister int end1;		/* first terminating character */
5281573Srgrimesregister int end2;		/* second terminating character */
5291573Srgrimes{
5301573Srgrimes	register sopno start = HERE();
5311573Srgrimes	register int first = 1;			/* first subexpression? */
5321573Srgrimes	register int wasdollar = 0;
5331573Srgrimes
5341573Srgrimes	if (EAT('^')) {
5351573Srgrimes		EMIT(OBOL, 0);
5361573Srgrimes		p->g->iflags |= USEBOL;
5371573Srgrimes		p->g->nbol++;
5381573Srgrimes	}
5391573Srgrimes	while (MORE() && !SEETWO(end1, end2)) {
5401573Srgrimes		wasdollar = p_simp_re(p, first);
5411573Srgrimes		first = 0;
5421573Srgrimes	}
5431573Srgrimes	if (wasdollar) {	/* oops, that was a trailing anchor */
5441573Srgrimes		DROP(1);
5451573Srgrimes		EMIT(OEOL, 0);
5461573Srgrimes		p->g->iflags |= USEEOL;
5471573Srgrimes		p->g->neol++;
5481573Srgrimes	}
5491573Srgrimes
55017141Sjkh	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
5511573Srgrimes}
5521573Srgrimes
5531573Srgrimes/*
5541573Srgrimes - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
5551573Srgrimes == static int p_simp_re(register struct parse *p, int starordinary);
5561573Srgrimes */
5571573Srgrimesstatic int			/* was the simple RE an unbackslashed $? */
5581573Srgrimesp_simp_re(p, starordinary)
5591573Srgrimesregister struct parse *p;
5601573Srgrimesint starordinary;		/* is a leading * an ordinary character? */
5611573Srgrimes{
5621573Srgrimes	register int c;
5631573Srgrimes	register int count;
5641573Srgrimes	register int count2;
5651573Srgrimes	register sopno pos;
5661573Srgrimes	register int i;
5671573Srgrimes	register sopno subno;
5681573Srgrimes#	define	BACKSL	(1<<CHAR_BIT)
5691573Srgrimes
5701573Srgrimes	pos = HERE();		/* repetion op, if any, covers from here */
5711573Srgrimes
5721573Srgrimes	assert(MORE());		/* caller should have ensured this */
5731573Srgrimes	c = GETNEXT();
5741573Srgrimes	if (c == '\\') {
57517141Sjkh		(void)REQUIRE(MORE(), REG_EESCAPE);
57649094Sache		c = BACKSL | GETNEXT();
5771573Srgrimes	}
5781573Srgrimes	switch (c) {
5791573Srgrimes	case '.':
5801573Srgrimes		if (p->g->cflags&REG_NEWLINE)
5811573Srgrimes			nonnewline(p);
5821573Srgrimes		else
5831573Srgrimes			EMIT(OANY, 0);
5841573Srgrimes		break;
5851573Srgrimes	case '[':
5861573Srgrimes		p_bracket(p);
5871573Srgrimes		break;
5881573Srgrimes	case BACKSL|'{':
5891573Srgrimes		SETERROR(REG_BADRPT);
5901573Srgrimes		break;
5911573Srgrimes	case BACKSL|'(':
5921573Srgrimes		p->g->nsub++;
5931573Srgrimes		subno = p->g->nsub;
5941573Srgrimes		if (subno < NPAREN)
5951573Srgrimes			p->pbegin[subno] = HERE();
5961573Srgrimes		EMIT(OLPAREN, subno);
5971573Srgrimes		/* the MORE here is an error heuristic */
5981573Srgrimes		if (MORE() && !SEETWO('\\', ')'))
5991573Srgrimes			p_bre(p, '\\', ')');
6001573Srgrimes		if (subno < NPAREN) {
6011573Srgrimes			p->pend[subno] = HERE();
6021573Srgrimes			assert(p->pend[subno] != 0);
6031573Srgrimes		}
6041573Srgrimes		EMIT(ORPAREN, subno);
60517141Sjkh		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
6061573Srgrimes		break;
6071573Srgrimes	case BACKSL|')':	/* should not get here -- must be user */
6081573Srgrimes	case BACKSL|'}':
6091573Srgrimes		SETERROR(REG_EPAREN);
6101573Srgrimes		break;
6111573Srgrimes	case BACKSL|'1':
6121573Srgrimes	case BACKSL|'2':
6131573Srgrimes	case BACKSL|'3':
6141573Srgrimes	case BACKSL|'4':
6151573Srgrimes	case BACKSL|'5':
6161573Srgrimes	case BACKSL|'6':
6171573Srgrimes	case BACKSL|'7':
6181573Srgrimes	case BACKSL|'8':
6191573Srgrimes	case BACKSL|'9':
6201573Srgrimes		i = (c&~BACKSL) - '0';
6211573Srgrimes		assert(i < NPAREN);
6221573Srgrimes		if (p->pend[i] != 0) {
6231573Srgrimes			assert(i <= p->g->nsub);
6241573Srgrimes			EMIT(OBACK_, i);
6251573Srgrimes			assert(p->pbegin[i] != 0);
6261573Srgrimes			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
6271573Srgrimes			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
6281573Srgrimes			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
6291573Srgrimes			EMIT(O_BACK, i);
6301573Srgrimes		} else
6311573Srgrimes			SETERROR(REG_ESUBREG);
6321573Srgrimes		p->g->backrefs = 1;
6331573Srgrimes		break;
6341573Srgrimes	case '*':
63517141Sjkh		(void)REQUIRE(starordinary, REG_BADRPT);
6361573Srgrimes		/* FALLTHROUGH */
6371573Srgrimes	default:
63849094Sache		ordinary(p, (char)c);
6391573Srgrimes		break;
6401573Srgrimes	}
6411573Srgrimes
6421573Srgrimes	if (EAT('*')) {		/* implemented as +? */
6431573Srgrimes		/* this case does not require the (y|) trick, noKLUDGE */
6441573Srgrimes		INSERT(OPLUS_, pos);
6451573Srgrimes		ASTERN(O_PLUS, pos);
6461573Srgrimes		INSERT(OQUEST_, pos);
6471573Srgrimes		ASTERN(O_QUEST, pos);
6481573Srgrimes	} else if (EATTWO('\\', '{')) {
6491573Srgrimes		count = p_count(p);
6501573Srgrimes		if (EAT(',')) {
65149094Sache			if (MORE() && isdigit((uch)PEEK())) {
6521573Srgrimes				count2 = p_count(p);
65317141Sjkh				(void)REQUIRE(count <= count2, REG_BADBR);
6541573Srgrimes			} else		/* single number with comma */
6551573Srgrimes				count2 = INFINITY;
6561573Srgrimes		} else		/* just a single number */
6571573Srgrimes			count2 = count;
6581573Srgrimes		repeat(p, pos, count, count2);
6591573Srgrimes		if (!EATTWO('\\', '}')) {	/* error heuristics */
6601573Srgrimes			while (MORE() && !SEETWO('\\', '}'))
6611573Srgrimes				NEXT();
66217141Sjkh			(void)REQUIRE(MORE(), REG_EBRACE);
6631573Srgrimes			SETERROR(REG_BADBR);
6641573Srgrimes		}
66549094Sache	} else if (c == '$')     /* $ (but not \$) ends it */
6661573Srgrimes		return(1);
6671573Srgrimes
6681573Srgrimes	return(0);
6691573Srgrimes}
6701573Srgrimes
6711573Srgrimes/*
6721573Srgrimes - p_count - parse a repetition count
6731573Srgrimes == static int p_count(register struct parse *p);
6741573Srgrimes */
6751573Srgrimesstatic int			/* the value */
6761573Srgrimesp_count(p)
6771573Srgrimesregister struct parse *p;
6781573Srgrimes{
6791573Srgrimes	register int count = 0;
6801573Srgrimes	register int ndigits = 0;
6811573Srgrimes
68249094Sache	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
6831573Srgrimes		count = count*10 + (GETNEXT() - '0');
6841573Srgrimes		ndigits++;
6851573Srgrimes	}
6861573Srgrimes
68717141Sjkh	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
6881573Srgrimes	return(count);
6891573Srgrimes}
6901573Srgrimes
6911573Srgrimes/*
6921573Srgrimes - p_bracket - parse a bracketed character list
6931573Srgrimes == static void p_bracket(register struct parse *p);
6941573Srgrimes *
6951573Srgrimes * Note a significant property of this code:  if the allocset() did SETERROR,
6961573Srgrimes * no set operations are done.
6971573Srgrimes */
6981573Srgrimesstatic void
6991573Srgrimesp_bracket(p)
7001573Srgrimesregister struct parse *p;
7011573Srgrimes{
7021573Srgrimes	register cset *cs = allocset(p);
7031573Srgrimes	register int invert = 0;
7041573Srgrimes
7051573Srgrimes	/* Dept of Truly Sickening Special-Case Kludges */
7061573Srgrimes	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
7071573Srgrimes		EMIT(OBOW, 0);
7081573Srgrimes		NEXTn(6);
7091573Srgrimes		return;
7101573Srgrimes	}
7111573Srgrimes	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
7121573Srgrimes		EMIT(OEOW, 0);
7131573Srgrimes		NEXTn(6);
7141573Srgrimes		return;
7151573Srgrimes	}
7161573Srgrimes
7171573Srgrimes	if (EAT('^'))
7181573Srgrimes		invert++;	/* make note to invert set at end */
7191573Srgrimes	if (EAT(']'))
7201573Srgrimes		CHadd(cs, ']');
7211573Srgrimes	else if (EAT('-'))
7221573Srgrimes		CHadd(cs, '-');
7231573Srgrimes	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
7241573Srgrimes		p_b_term(p, cs);
7251573Srgrimes	if (EAT('-'))
7261573Srgrimes		CHadd(cs, '-');
72717141Sjkh	(void)MUSTEAT(']', REG_EBRACK);
7281573Srgrimes
7291573Srgrimes	if (p->error != 0)	/* don't mess things up further */
7301573Srgrimes		return;
7311573Srgrimes
7321573Srgrimes	if (p->g->cflags&REG_ICASE) {
7331573Srgrimes		register int i;
7341573Srgrimes		register int ci;
7351573Srgrimes
7361573Srgrimes		for (i = p->g->csetsize - 1; i >= 0; i--)
7371573Srgrimes			if (CHIN(cs, i) && isalpha(i)) {
7381573Srgrimes				ci = othercase(i);
7391573Srgrimes				if (ci != i)
7401573Srgrimes					CHadd(cs, ci);
7411573Srgrimes			}
7421573Srgrimes		if (cs->multis != NULL)
7431573Srgrimes			mccase(p, cs);
7441573Srgrimes	}
7451573Srgrimes	if (invert) {
7461573Srgrimes		register int i;
7471573Srgrimes
7481573Srgrimes		for (i = p->g->csetsize - 1; i >= 0; i--)
7491573Srgrimes			if (CHIN(cs, i))
7501573Srgrimes				CHsub(cs, i);
7511573Srgrimes			else
7521573Srgrimes				CHadd(cs, i);
7531573Srgrimes		if (p->g->cflags&REG_NEWLINE)
7541573Srgrimes			CHsub(cs, '\n');
7551573Srgrimes		if (cs->multis != NULL)
7561573Srgrimes			mcinvert(p, cs);
7571573Srgrimes	}
7581573Srgrimes
7591573Srgrimes	assert(cs->multis == NULL);		/* xxx */
7601573Srgrimes
7611573Srgrimes	if (nch(p, cs) == 1) {		/* optimize singleton sets */
7621573Srgrimes		ordinary(p, firstch(p, cs));
7631573Srgrimes		freeset(p, cs);
7641573Srgrimes	} else
7651573Srgrimes		EMIT(OANYOF, freezeset(p, cs));
7661573Srgrimes}
7671573Srgrimes
7681573Srgrimes/*
7691573Srgrimes - p_b_term - parse one term of a bracketed character list
7701573Srgrimes == static void p_b_term(register struct parse *p, register cset *cs);
7711573Srgrimes */
7721573Srgrimesstatic void
7731573Srgrimesp_b_term(p, cs)
7741573Srgrimesregister struct parse *p;
7751573Srgrimesregister cset *cs;
7761573Srgrimes{
7771573Srgrimes	register char c;
7781573Srgrimes	register char start, finish;
7791573Srgrimes	register int i;
7801573Srgrimes
7811573Srgrimes	/* classify what we've got */
7821573Srgrimes	switch ((MORE()) ? PEEK() : '\0') {
7831573Srgrimes	case '[':
7841573Srgrimes		c = (MORE2()) ? PEEK2() : '\0';
7851573Srgrimes		break;
7861573Srgrimes	case '-':
7871573Srgrimes		SETERROR(REG_ERANGE);
7881573Srgrimes		return;			/* NOTE RETURN */
7891573Srgrimes		break;
7901573Srgrimes	default:
7911573Srgrimes		c = '\0';
7921573Srgrimes		break;
7931573Srgrimes	}
7941573Srgrimes
7951573Srgrimes	switch (c) {
7961573Srgrimes	case ':':		/* character class */
7971573Srgrimes		NEXT2();
79817141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
7991573Srgrimes		c = PEEK();
80017141Sjkh		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
8011573Srgrimes		p_b_cclass(p, cs);
80217141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
80317141Sjkh		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
8041573Srgrimes		break;
8051573Srgrimes	case '=':		/* equivalence class */
8061573Srgrimes		NEXT2();
80717141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
8081573Srgrimes		c = PEEK();
80917141Sjkh		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
8101573Srgrimes		p_b_eclass(p, cs);
81117141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
81217141Sjkh		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
8131573Srgrimes		break;
8141573Srgrimes	default:		/* symbol, ordinary character, or range */
8151573Srgrimes/* xxx revision needed for multichar stuff */
8161573Srgrimes		start = p_b_symbol(p);
8171573Srgrimes		if (SEE('-') && MORE2() && PEEK2() != ']') {
8181573Srgrimes			/* range */
8191573Srgrimes			NEXT();
8201573Srgrimes			if (EAT('-'))
8211573Srgrimes				finish = '-';
8221573Srgrimes			else
8231573Srgrimes				finish = p_b_symbol(p);
8241573Srgrimes		} else
8251573Srgrimes			finish = start;
82617514Sache		if (start == finish)
82717514Sache			CHadd(cs, start);
82817514Sache		else {
82924637Sache			if (__collate_load_error) {
83024637Sache				(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
83124637Sache				for (i = (uch)start; i <= (uch)finish; i++)
83217514Sache					CHadd(cs, i);
83324637Sache			} else {
83424637Sache				(void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
83524637Sache				for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
83624637Sache					if (   __collate_range_cmp(start, i) <= 0
83724637Sache					    && __collate_range_cmp(i, finish) <= 0
83824637Sache					   )
83924637Sache						CHadd(cs, i);
84024637Sache				}
84117514Sache			}
84217514Sache		}
8431573Srgrimes		break;
8441573Srgrimes	}
8451573Srgrimes}
8461573Srgrimes
8471573Srgrimes/*
8481573Srgrimes - p_b_cclass - parse a character-class name and deal with it
8491573Srgrimes == static void p_b_cclass(register struct parse *p, register cset *cs);
8501573Srgrimes */
8511573Srgrimesstatic void
8521573Srgrimesp_b_cclass(p, cs)
8531573Srgrimesregister struct parse *p;
8541573Srgrimesregister cset *cs;
8551573Srgrimes{
85617508Sache	register int c;
8571573Srgrimes	register char *sp = p->next;
8581573Srgrimes	register struct cclass *cp;
8591573Srgrimes	register size_t len;
8601573Srgrimes
86117508Sache	while (MORE() && isalpha((uch)PEEK()))
8621573Srgrimes		NEXT();
8631573Srgrimes	len = p->next - sp;
8641573Srgrimes	for (cp = cclasses; cp->name != NULL; cp++)
8651573Srgrimes		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
8661573Srgrimes			break;
8671573Srgrimes	if (cp->name == NULL) {
8681573Srgrimes		/* oops, didn't find it */
8691573Srgrimes		SETERROR(REG_ECTYPE);
8701573Srgrimes		return;
8711573Srgrimes	}
8721573Srgrimes
87317508Sache	switch (cp->fidx) {
87417508Sache	case CALNUM:
87517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
87617508Sache			if (isalnum((uch)c))
87717508Sache				CHadd(cs, c);
87817508Sache		break;
87917508Sache	case CALPHA:
88017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
88117508Sache			if (isalpha((uch)c))
88217508Sache				CHadd(cs, c);
88317508Sache		break;
88417508Sache	case CBLANK:
88517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
88617508Sache			if (isblank((uch)c))
88717508Sache				CHadd(cs, c);
88817508Sache		break;
88917508Sache	case CCNTRL:
89017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
89117508Sache			if (iscntrl((uch)c))
89217508Sache				CHadd(cs, c);
89317508Sache		break;
89417508Sache	case CDIGIT:
89517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
89617508Sache			if (isdigit((uch)c))
89717508Sache				CHadd(cs, c);
89817508Sache		break;
89917508Sache	case CGRAPH:
90017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
90117508Sache			if (isgraph((uch)c))
90217508Sache				CHadd(cs, c);
90317508Sache		break;
90417508Sache	case CLOWER:
90517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
90617508Sache			if (islower((uch)c))
90717508Sache				CHadd(cs, c);
90817508Sache		break;
90917508Sache	case CPRINT:
91017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
91117508Sache			if (isprint((uch)c))
91217508Sache				CHadd(cs, c);
91317508Sache		break;
91417508Sache	case CPUNCT:
91517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
91617508Sache			if (ispunct((uch)c))
91717508Sache				CHadd(cs, c);
91817508Sache		break;
91917508Sache	case CSPACE:
92017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
92117508Sache			if (isspace((uch)c))
92217508Sache				CHadd(cs, c);
92317508Sache		break;
92417508Sache	case CUPPER:
92517508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
92617508Sache			if (isupper((uch)c))
92717508Sache				CHadd(cs, c);
92817508Sache		break;
92917508Sache	case CXDIGIT:
93017508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
93117508Sache			if (isxdigit((uch)c))
93217508Sache				CHadd(cs, c);
93317508Sache		break;
93417508Sache	}
93517508Sache#if 0
9361573Srgrimes	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
9371573Srgrimes		MCadd(p, cs, u);
93817508Sache#endif
9391573Srgrimes}
9401573Srgrimes
9411573Srgrimes/*
9421573Srgrimes - p_b_eclass - parse an equivalence-class name and deal with it
9431573Srgrimes == static void p_b_eclass(register struct parse *p, register cset *cs);
9441573Srgrimes *
9451573Srgrimes * This implementation is incomplete. xxx
9461573Srgrimes */
9471573Srgrimesstatic void
9481573Srgrimesp_b_eclass(p, cs)
9491573Srgrimesregister struct parse *p;
9501573Srgrimesregister cset *cs;
9511573Srgrimes{
9521573Srgrimes	register char c;
9531573Srgrimes
9541573Srgrimes	c = p_b_coll_elem(p, '=');
9551573Srgrimes	CHadd(cs, c);
9561573Srgrimes}
9571573Srgrimes
9581573Srgrimes/*
9591573Srgrimes - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
9601573Srgrimes == static char p_b_symbol(register struct parse *p);
9611573Srgrimes */
9621573Srgrimesstatic char			/* value of symbol */
9631573Srgrimesp_b_symbol(p)
9641573Srgrimesregister struct parse *p;
9651573Srgrimes{
9661573Srgrimes	register char value;
9671573Srgrimes
96817141Sjkh	(void)REQUIRE(MORE(), REG_EBRACK);
9691573Srgrimes	if (!EATTWO('[', '.'))
9701573Srgrimes		return(GETNEXT());
9711573Srgrimes
9721573Srgrimes	/* collating symbol */
9731573Srgrimes	value = p_b_coll_elem(p, '.');
97417141Sjkh	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
9751573Srgrimes	return(value);
9761573Srgrimes}
9771573Srgrimes
9781573Srgrimes/*
9791573Srgrimes - p_b_coll_elem - parse a collating-element name and look it up
9801573Srgrimes == static char p_b_coll_elem(register struct parse *p, int endc);
9811573Srgrimes */
9821573Srgrimesstatic char			/* value of collating element */
9831573Srgrimesp_b_coll_elem(p, endc)
9841573Srgrimesregister struct parse *p;
9851573Srgrimesint endc;			/* name ended by endc,']' */
9861573Srgrimes{
9871573Srgrimes	register char *sp = p->next;
9881573Srgrimes	register struct cname *cp;
9891573Srgrimes	register int len;
9901573Srgrimes
9911573Srgrimes	while (MORE() && !SEETWO(endc, ']'))
9921573Srgrimes		NEXT();
9931573Srgrimes	if (!MORE()) {
9941573Srgrimes		SETERROR(REG_EBRACK);
9951573Srgrimes		return(0);
9961573Srgrimes	}
9971573Srgrimes	len = p->next - sp;
9981573Srgrimes	for (cp = cnames; cp->name != NULL; cp++)
9991573Srgrimes		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
10001573Srgrimes			return(cp->code);	/* known name */
10011573Srgrimes	if (len == 1)
10021573Srgrimes		return(*sp);	/* single character */
10031573Srgrimes	SETERROR(REG_ECOLLATE);			/* neither */
10041573Srgrimes	return(0);
10051573Srgrimes}
10061573Srgrimes
10071573Srgrimes/*
10081573Srgrimes - othercase - return the case counterpart of an alphabetic
10091573Srgrimes == static char othercase(int ch);
10101573Srgrimes */
10111573Srgrimesstatic char			/* if no counterpart, return ch */
10121573Srgrimesothercase(ch)
10131573Srgrimesint ch;
10141573Srgrimes{
101549094Sache	ch = (uch)ch;
10161573Srgrimes	assert(isalpha(ch));
10171573Srgrimes	if (isupper(ch))
10181573Srgrimes		return(tolower(ch));
10191573Srgrimes	else if (islower(ch))
10201573Srgrimes		return(toupper(ch));
10211573Srgrimes	else			/* peculiar, but could happen */
10221573Srgrimes		return(ch);
10231573Srgrimes}
10241573Srgrimes
10251573Srgrimes/*
10261573Srgrimes - bothcases - emit a dualcase version of a two-case character
10271573Srgrimes == static void bothcases(register struct parse *p, int ch);
10281573Srgrimes *
10291573Srgrimes * Boy, is this implementation ever a kludge...
10301573Srgrimes */
10311573Srgrimesstatic void
10321573Srgrimesbothcases(p, ch)
10331573Srgrimesregister struct parse *p;
10341573Srgrimesint ch;
10351573Srgrimes{
10361573Srgrimes	register char *oldnext = p->next;
10371573Srgrimes	register char *oldend = p->end;
10381573Srgrimes	char bracket[3];
10391573Srgrimes
104049094Sache	ch = (uch)ch;
10411573Srgrimes	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
10421573Srgrimes	p->next = bracket;
10431573Srgrimes	p->end = bracket+2;
10441573Srgrimes	bracket[0] = ch;
10451573Srgrimes	bracket[1] = ']';
10461573Srgrimes	bracket[2] = '\0';
10471573Srgrimes	p_bracket(p);
10481573Srgrimes	assert(p->next == bracket+2);
10491573Srgrimes	p->next = oldnext;
10501573Srgrimes	p->end = oldend;
10511573Srgrimes}
10521573Srgrimes
10531573Srgrimes/*
10541573Srgrimes - ordinary - emit an ordinary character
10551573Srgrimes == static void ordinary(register struct parse *p, register int ch);
10561573Srgrimes */
10571573Srgrimesstatic void
10581573Srgrimesordinary(p, ch)
10591573Srgrimesregister struct parse *p;
10601573Srgrimesregister int ch;
10611573Srgrimes{
10621573Srgrimes	register cat_t *cap = p->g->categories;
10631573Srgrimes
106449094Sache	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
10651573Srgrimes		bothcases(p, ch);
10661573Srgrimes	else {
106749094Sache		EMIT(OCHAR, (uch)ch);
10681573Srgrimes		if (cap[ch] == 0)
10691573Srgrimes			cap[ch] = p->g->ncategories++;
10701573Srgrimes	}
10711573Srgrimes}
10721573Srgrimes
10731573Srgrimes/*
10741573Srgrimes - nonnewline - emit REG_NEWLINE version of OANY
10751573Srgrimes == static void nonnewline(register struct parse *p);
10761573Srgrimes *
10771573Srgrimes * Boy, is this implementation ever a kludge...
10781573Srgrimes */
10791573Srgrimesstatic void
10801573Srgrimesnonnewline(p)
10811573Srgrimesregister struct parse *p;
10821573Srgrimes{
10831573Srgrimes	register char *oldnext = p->next;
10841573Srgrimes	register char *oldend = p->end;
10851573Srgrimes	char bracket[4];
10861573Srgrimes
10871573Srgrimes	p->next = bracket;
10881573Srgrimes	p->end = bracket+3;
10891573Srgrimes	bracket[0] = '^';
10901573Srgrimes	bracket[1] = '\n';
10911573Srgrimes	bracket[2] = ']';
10921573Srgrimes	bracket[3] = '\0';
10931573Srgrimes	p_bracket(p);
10941573Srgrimes	assert(p->next == bracket+3);
10951573Srgrimes	p->next = oldnext;
10961573Srgrimes	p->end = oldend;
10971573Srgrimes}
10981573Srgrimes
10991573Srgrimes/*
11001573Srgrimes - repeat - generate code for a bounded repetition, recursively if needed
11011573Srgrimes == static void repeat(register struct parse *p, sopno start, int from, int to);
11021573Srgrimes */
11031573Srgrimesstatic void
11041573Srgrimesrepeat(p, start, from, to)
11051573Srgrimesregister struct parse *p;
11061573Srgrimessopno start;			/* operand from here to end of strip */
11071573Srgrimesint from;			/* repeated from this number */
11081573Srgrimesint to;				/* to this number of times (maybe INFINITY) */
11091573Srgrimes{
11101573Srgrimes	register sopno finish = HERE();
11111573Srgrimes#	define	N	2
11121573Srgrimes#	define	INF	3
11131573Srgrimes#	define	REP(f, t)	((f)*8 + (t))
11141573Srgrimes#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
11151573Srgrimes	register sopno copy;
11161573Srgrimes
11171573Srgrimes	if (p->error != 0)	/* head off possible runaway recursion */
11181573Srgrimes		return;
11191573Srgrimes
11201573Srgrimes	assert(from <= to);
11211573Srgrimes
11221573Srgrimes	switch (REP(MAP(from), MAP(to))) {
11231573Srgrimes	case REP(0, 0):			/* must be user doing this */
11241573Srgrimes		DROP(finish-start);	/* drop the operand */
11251573Srgrimes		break;
11261573Srgrimes	case REP(0, 1):			/* as x{1,1}? */
11271573Srgrimes	case REP(0, N):			/* as x{1,n}? */
11281573Srgrimes	case REP(0, INF):		/* as x{1,}? */
11291573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11301573Srgrimes		INSERT(OCH_, start);		/* offset is wrong... */
11311573Srgrimes		repeat(p, start+1, 1, to);
11321573Srgrimes		ASTERN(OOR1, start);
11331573Srgrimes		AHEAD(start);			/* ... fix it */
11341573Srgrimes		EMIT(OOR2, 0);
11351573Srgrimes		AHEAD(THERE());
11361573Srgrimes		ASTERN(O_CH, THERETHERE());
11371573Srgrimes		break;
11381573Srgrimes	case REP(1, 1):			/* trivial case */
11391573Srgrimes		/* done */
11401573Srgrimes		break;
11411573Srgrimes	case REP(1, N):			/* as x?x{1,n-1} */
11421573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11431573Srgrimes		INSERT(OCH_, start);
11441573Srgrimes		ASTERN(OOR1, start);
11451573Srgrimes		AHEAD(start);
11461573Srgrimes		EMIT(OOR2, 0);			/* offset very wrong... */
11471573Srgrimes		AHEAD(THERE());			/* ...so fix it */
11481573Srgrimes		ASTERN(O_CH, THERETHERE());
11491573Srgrimes		copy = dupl(p, start+1, finish+1);
11501573Srgrimes		assert(copy == finish+4);
11511573Srgrimes		repeat(p, copy, 1, to-1);
11521573Srgrimes		break;
11531573Srgrimes	case REP(1, INF):		/* as x+ */
11541573Srgrimes		INSERT(OPLUS_, start);
11551573Srgrimes		ASTERN(O_PLUS, start);
11561573Srgrimes		break;
11571573Srgrimes	case REP(N, N):			/* as xx{m-1,n-1} */
11581573Srgrimes		copy = dupl(p, start, finish);
11591573Srgrimes		repeat(p, copy, from-1, to-1);
11601573Srgrimes		break;
11611573Srgrimes	case REP(N, INF):		/* as xx{n-1,INF} */
11621573Srgrimes		copy = dupl(p, start, finish);
11631573Srgrimes		repeat(p, copy, from-1, to);
11641573Srgrimes		break;
11651573Srgrimes	default:			/* "can't happen" */
11661573Srgrimes		SETERROR(REG_ASSERT);	/* just in case */
11671573Srgrimes		break;
11681573Srgrimes	}
11691573Srgrimes}
11701573Srgrimes
11711573Srgrimes/*
11721573Srgrimes - seterr - set an error condition
11731573Srgrimes == static int seterr(register struct parse *p, int e);
11741573Srgrimes */
11751573Srgrimesstatic int			/* useless but makes type checking happy */
11761573Srgrimesseterr(p, e)
11771573Srgrimesregister struct parse *p;
11781573Srgrimesint e;
11791573Srgrimes{
11801573Srgrimes	if (p->error == 0)	/* keep earliest error condition */
11811573Srgrimes		p->error = e;
11821573Srgrimes	p->next = nuls;		/* try to bring things to a halt */
11831573Srgrimes	p->end = nuls;
11841573Srgrimes	return(0);		/* make the return value well-defined */
11851573Srgrimes}
11861573Srgrimes
11871573Srgrimes/*
11881573Srgrimes - allocset - allocate a set of characters for []
11891573Srgrimes == static cset *allocset(register struct parse *p);
11901573Srgrimes */
11911573Srgrimesstatic cset *
11921573Srgrimesallocset(p)
11931573Srgrimesregister struct parse *p;
11941573Srgrimes{
11951573Srgrimes	register int no = p->g->ncsets++;
11961573Srgrimes	register size_t nc;
11971573Srgrimes	register size_t nbytes;
11981573Srgrimes	register cset *cs;
11991573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12001573Srgrimes	register int i;
12011573Srgrimes
12021573Srgrimes	if (no >= p->ncsalloc) {	/* need another column of space */
12031573Srgrimes		p->ncsalloc += CHAR_BIT;
12041573Srgrimes		nc = p->ncsalloc;
12051573Srgrimes		assert(nc % CHAR_BIT == 0);
12061573Srgrimes		nbytes = nc / CHAR_BIT * css;
12071573Srgrimes		if (p->g->sets == NULL)
12081573Srgrimes			p->g->sets = (cset *)malloc(nc * sizeof(cset));
12091573Srgrimes		else
121039327Simp			p->g->sets = (cset *)reallocf((char *)p->g->sets,
12111573Srgrimes							nc * sizeof(cset));
12121573Srgrimes		if (p->g->setbits == NULL)
12131573Srgrimes			p->g->setbits = (uch *)malloc(nbytes);
12141573Srgrimes		else {
121539327Simp			p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
12161573Srgrimes								nbytes);
12171573Srgrimes			/* xxx this isn't right if setbits is now NULL */
12181573Srgrimes			for (i = 0; i < no; i++)
12191573Srgrimes				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
12201573Srgrimes		}
12211573Srgrimes		if (p->g->sets != NULL && p->g->setbits != NULL)
12221573Srgrimes			(void) memset((char *)p->g->setbits + (nbytes - css),
12231573Srgrimes								0, css);
12241573Srgrimes		else {
12251573Srgrimes			no = 0;
12261573Srgrimes			SETERROR(REG_ESPACE);
12271573Srgrimes			/* caller's responsibility not to do set ops */
12281573Srgrimes		}
12291573Srgrimes	}
12301573Srgrimes
12311573Srgrimes	assert(p->g->sets != NULL);	/* xxx */
12321573Srgrimes	cs = &p->g->sets[no];
12331573Srgrimes	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
12341573Srgrimes	cs->mask = 1 << ((no) % CHAR_BIT);
12351573Srgrimes	cs->hash = 0;
12361573Srgrimes	cs->smultis = 0;
12371573Srgrimes	cs->multis = NULL;
12381573Srgrimes
12391573Srgrimes	return(cs);
12401573Srgrimes}
12411573Srgrimes
12421573Srgrimes/*
12431573Srgrimes - freeset - free a now-unused set
12441573Srgrimes == static void freeset(register struct parse *p, register cset *cs);
12451573Srgrimes */
12461573Srgrimesstatic void
12471573Srgrimesfreeset(p, cs)
12481573Srgrimesregister struct parse *p;
12491573Srgrimesregister cset *cs;
12501573Srgrimes{
12511573Srgrimes	register int i;
12521573Srgrimes	register cset *top = &p->g->sets[p->g->ncsets];
12531573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12541573Srgrimes
12551573Srgrimes	for (i = 0; i < css; i++)
12561573Srgrimes		CHsub(cs, i);
12571573Srgrimes	if (cs == top-1)	/* recover only the easy case */
12581573Srgrimes		p->g->ncsets--;
12591573Srgrimes}
12601573Srgrimes
12611573Srgrimes/*
12621573Srgrimes - freezeset - final processing on a set of characters
12631573Srgrimes == static int freezeset(register struct parse *p, register cset *cs);
12641573Srgrimes *
12651573Srgrimes * The main task here is merging identical sets.  This is usually a waste
12661573Srgrimes * of time (although the hash code minimizes the overhead), but can win
12671573Srgrimes * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
12681573Srgrimes * is done using addition rather than xor -- all ASCII [aA] sets xor to
12691573Srgrimes * the same value!
12701573Srgrimes */
12711573Srgrimesstatic int			/* set number */
12721573Srgrimesfreezeset(p, cs)
12731573Srgrimesregister struct parse *p;
12741573Srgrimesregister cset *cs;
12751573Srgrimes{
127617509Sache	register short h = cs->hash;
12771573Srgrimes	register int i;
12781573Srgrimes	register cset *top = &p->g->sets[p->g->ncsets];
12791573Srgrimes	register cset *cs2;
12801573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12811573Srgrimes
12821573Srgrimes	/* look for an earlier one which is the same */
12831573Srgrimes	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
12841573Srgrimes		if (cs2->hash == h && cs2 != cs) {
12851573Srgrimes			/* maybe */
12861573Srgrimes			for (i = 0; i < css; i++)
12871573Srgrimes				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
12881573Srgrimes					break;		/* no */
12891573Srgrimes			if (i == css)
12901573Srgrimes				break;			/* yes */
12911573Srgrimes		}
12921573Srgrimes
12931573Srgrimes	if (cs2 < top) {	/* found one */
12941573Srgrimes		freeset(p, cs);
12951573Srgrimes		cs = cs2;
12961573Srgrimes	}
12971573Srgrimes
12981573Srgrimes	return((int)(cs - p->g->sets));
12991573Srgrimes}
13001573Srgrimes
13011573Srgrimes/*
13021573Srgrimes - firstch - return first character in a set (which must have at least one)
13031573Srgrimes == static int firstch(register struct parse *p, register cset *cs);
13041573Srgrimes */
13051573Srgrimesstatic int			/* character; there is no "none" value */
13061573Srgrimesfirstch(p, cs)
13071573Srgrimesregister struct parse *p;
13081573Srgrimesregister cset *cs;
13091573Srgrimes{
13101573Srgrimes	register int i;
13111573Srgrimes	register size_t css = (size_t)p->g->csetsize;
13121573Srgrimes
13131573Srgrimes	for (i = 0; i < css; i++)
13141573Srgrimes		if (CHIN(cs, i))
131549094Sache			return((char)i);
13161573Srgrimes	assert(never);
13171573Srgrimes	return(0);		/* arbitrary */
13181573Srgrimes}
13191573Srgrimes
13201573Srgrimes/*
13211573Srgrimes - nch - number of characters in a set
13221573Srgrimes == static int nch(register struct parse *p, register cset *cs);
13231573Srgrimes */
13241573Srgrimesstatic int
13251573Srgrimesnch(p, cs)
13261573Srgrimesregister struct parse *p;
13271573Srgrimesregister cset *cs;
13281573Srgrimes{
13291573Srgrimes	register int i;
13301573Srgrimes	register size_t css = (size_t)p->g->csetsize;
13311573Srgrimes	register int n = 0;
13321573Srgrimes
13331573Srgrimes	for (i = 0; i < css; i++)
13341573Srgrimes		if (CHIN(cs, i))
13351573Srgrimes			n++;
13361573Srgrimes	return(n);
13371573Srgrimes}
13381573Srgrimes
13391573Srgrimes/*
13401573Srgrimes - mcadd - add a collating element to a cset
13411573Srgrimes == static void mcadd(register struct parse *p, register cset *cs, \
13421573Srgrimes ==	register char *cp);
13431573Srgrimes */
13441573Srgrimesstatic void
13451573Srgrimesmcadd(p, cs, cp)
13461573Srgrimesregister struct parse *p;
13471573Srgrimesregister cset *cs;
13481573Srgrimesregister char *cp;
13491573Srgrimes{
13501573Srgrimes	register size_t oldend = cs->smultis;
13511573Srgrimes
13521573Srgrimes	cs->smultis += strlen(cp) + 1;
13531573Srgrimes	if (cs->multis == NULL)
13541573Srgrimes		cs->multis = malloc(cs->smultis);
13551573Srgrimes	else
135639327Simp		cs->multis = reallocf(cs->multis, cs->smultis);
13571573Srgrimes	if (cs->multis == NULL) {
13581573Srgrimes		SETERROR(REG_ESPACE);
13591573Srgrimes		return;
13601573Srgrimes	}
13611573Srgrimes
13621573Srgrimes	(void) strcpy(cs->multis + oldend - 1, cp);
13631573Srgrimes	cs->multis[cs->smultis - 1] = '\0';
13641573Srgrimes}
13651573Srgrimes
136617141Sjkh#if used
13671573Srgrimes/*
13681573Srgrimes - mcsub - subtract a collating element from a cset
13691573Srgrimes == static void mcsub(register cset *cs, register char *cp);
13701573Srgrimes */
13711573Srgrimesstatic void
13721573Srgrimesmcsub(cs, cp)
13731573Srgrimesregister cset *cs;
13741573Srgrimesregister char *cp;
13751573Srgrimes{
13761573Srgrimes	register char *fp = mcfind(cs, cp);
13771573Srgrimes	register size_t len = strlen(fp);
13781573Srgrimes
13791573Srgrimes	assert(fp != NULL);
13801573Srgrimes	(void) memmove(fp, fp + len + 1,
13811573Srgrimes				cs->smultis - (fp + len + 1 - cs->multis));
13821573Srgrimes	cs->smultis -= len;
13831573Srgrimes
13841573Srgrimes	if (cs->smultis == 0) {
13851573Srgrimes		free(cs->multis);
13861573Srgrimes		cs->multis = NULL;
13871573Srgrimes		return;
13881573Srgrimes	}
13891573Srgrimes
139039327Simp	cs->multis = reallocf(cs->multis, cs->smultis);
13911573Srgrimes	assert(cs->multis != NULL);
13921573Srgrimes}
13931573Srgrimes
13941573Srgrimes/*
13951573Srgrimes - mcin - is a collating element in a cset?
13961573Srgrimes == static int mcin(register cset *cs, register char *cp);
13971573Srgrimes */
13981573Srgrimesstatic int
13991573Srgrimesmcin(cs, cp)
14001573Srgrimesregister cset *cs;
14011573Srgrimesregister char *cp;
14021573Srgrimes{
14031573Srgrimes	return(mcfind(cs, cp) != NULL);
14041573Srgrimes}
14051573Srgrimes
14061573Srgrimes/*
14071573Srgrimes - mcfind - find a collating element in a cset
14081573Srgrimes == static char *mcfind(register cset *cs, register char *cp);
14091573Srgrimes */
14101573Srgrimesstatic char *
14111573Srgrimesmcfind(cs, cp)
14121573Srgrimesregister cset *cs;
14131573Srgrimesregister char *cp;
14141573Srgrimes{
14151573Srgrimes	register char *p;
14161573Srgrimes
14171573Srgrimes	if (cs->multis == NULL)
14181573Srgrimes		return(NULL);
14191573Srgrimes	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
14201573Srgrimes		if (strcmp(cp, p) == 0)
14211573Srgrimes			return(p);
14221573Srgrimes	return(NULL);
14231573Srgrimes}
142417141Sjkh#endif
14251573Srgrimes
14261573Srgrimes/*
14271573Srgrimes - mcinvert - invert the list of collating elements in a cset
14281573Srgrimes == static void mcinvert(register struct parse *p, register cset *cs);
14291573Srgrimes *
14301573Srgrimes * This would have to know the set of possibilities.  Implementation
14311573Srgrimes * is deferred.
14321573Srgrimes */
14331573Srgrimesstatic void
14341573Srgrimesmcinvert(p, cs)
14351573Srgrimesregister struct parse *p;
14361573Srgrimesregister cset *cs;
14371573Srgrimes{
14381573Srgrimes	assert(cs->multis == NULL);	/* xxx */
14391573Srgrimes}
14401573Srgrimes
14411573Srgrimes/*
14421573Srgrimes - mccase - add case counterparts of the list of collating elements in a cset
14431573Srgrimes == static void mccase(register struct parse *p, register cset *cs);
14441573Srgrimes *
14451573Srgrimes * This would have to know the set of possibilities.  Implementation
14461573Srgrimes * is deferred.
14471573Srgrimes */
14481573Srgrimesstatic void
14491573Srgrimesmccase(p, cs)
14501573Srgrimesregister struct parse *p;
14511573Srgrimesregister cset *cs;
14521573Srgrimes{
14531573Srgrimes	assert(cs->multis == NULL);	/* xxx */
14541573Srgrimes}
14551573Srgrimes
14561573Srgrimes/*
14571573Srgrimes - isinsets - is this character in any sets?
14581573Srgrimes == static int isinsets(register struct re_guts *g, int c);
14591573Srgrimes */
14601573Srgrimesstatic int			/* predicate */
14611573Srgrimesisinsets(g, c)
14621573Srgrimesregister struct re_guts *g;
14631573Srgrimesint c;
14641573Srgrimes{
14651573Srgrimes	register uch *col;
14661573Srgrimes	register int i;
14671573Srgrimes	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
146849094Sache	register unsigned uc = (uch)c;
14691573Srgrimes
14701573Srgrimes	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
14711573Srgrimes		if (col[uc] != 0)
14721573Srgrimes			return(1);
14731573Srgrimes	return(0);
14741573Srgrimes}
14751573Srgrimes
14761573Srgrimes/*
14771573Srgrimes - samesets - are these two characters in exactly the same sets?
14781573Srgrimes == static int samesets(register struct re_guts *g, int c1, int c2);
14791573Srgrimes */
14801573Srgrimesstatic int			/* predicate */
14811573Srgrimessamesets(g, c1, c2)
14821573Srgrimesregister struct re_guts *g;
14831573Srgrimesint c1;
14841573Srgrimesint c2;
14851573Srgrimes{
14861573Srgrimes	register uch *col;
14871573Srgrimes	register int i;
14881573Srgrimes	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
148949094Sache	register unsigned uc1 = (uch)c1;
149049094Sache	register unsigned uc2 = (uch)c2;
14911573Srgrimes
14921573Srgrimes	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
14931573Srgrimes		if (col[uc1] != col[uc2])
14941573Srgrimes			return(0);
14951573Srgrimes	return(1);
14961573Srgrimes}
14971573Srgrimes
14981573Srgrimes/*
14991573Srgrimes - categorize - sort out character categories
15001573Srgrimes == static void categorize(struct parse *p, register struct re_guts *g);
15011573Srgrimes */
15021573Srgrimesstatic void
15031573Srgrimescategorize(p, g)
15041573Srgrimesstruct parse *p;
15051573Srgrimesregister struct re_guts *g;
15061573Srgrimes{
15071573Srgrimes	register cat_t *cats = g->categories;
15081573Srgrimes	register int c;
15091573Srgrimes	register int c2;
15101573Srgrimes	register cat_t cat;
15111573Srgrimes
15121573Srgrimes	/* avoid making error situations worse */
15131573Srgrimes	if (p->error != 0)
15141573Srgrimes		return;
15151573Srgrimes
15161573Srgrimes	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
15171573Srgrimes		if (cats[c] == 0 && isinsets(g, c)) {
15181573Srgrimes			cat = g->ncategories++;
15191573Srgrimes			cats[c] = cat;
15201573Srgrimes			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
15211573Srgrimes				if (cats[c2] == 0 && samesets(g, c, c2))
15221573Srgrimes					cats[c2] = cat;
15231573Srgrimes		}
15241573Srgrimes}
15251573Srgrimes
15261573Srgrimes/*
15271573Srgrimes - dupl - emit a duplicate of a bunch of sops
15281573Srgrimes == static sopno dupl(register struct parse *p, sopno start, sopno finish);
15291573Srgrimes */
15301573Srgrimesstatic sopno			/* start of duplicate */
15311573Srgrimesdupl(p, start, finish)
15321573Srgrimesregister struct parse *p;
15331573Srgrimessopno start;			/* from here */
15341573Srgrimessopno finish;			/* to this less one */
15351573Srgrimes{
15361573Srgrimes	register sopno ret = HERE();
15371573Srgrimes	register sopno len = finish - start;
15381573Srgrimes
15391573Srgrimes	assert(finish >= start);
15401573Srgrimes	if (len == 0)
15411573Srgrimes		return(ret);
15421573Srgrimes	enlarge(p, p->ssize + len);	/* this many unexpected additions */
15431573Srgrimes	assert(p->ssize >= p->slen + len);
15441573Srgrimes	(void) memcpy((char *)(p->strip + p->slen),
15451573Srgrimes		(char *)(p->strip + start), (size_t)len*sizeof(sop));
15461573Srgrimes	p->slen += len;
15471573Srgrimes	return(ret);
15481573Srgrimes}
15491573Srgrimes
15501573Srgrimes/*
15511573Srgrimes - doemit - emit a strip operator
15521573Srgrimes == static void doemit(register struct parse *p, sop op, size_t opnd);
15531573Srgrimes *
15541573Srgrimes * It might seem better to implement this as a macro with a function as
15551573Srgrimes * hard-case backup, but it's just too big and messy unless there are
15561573Srgrimes * some changes to the data structures.  Maybe later.
15571573Srgrimes */
15581573Srgrimesstatic void
15591573Srgrimesdoemit(p, op, opnd)
15601573Srgrimesregister struct parse *p;
15611573Srgrimessop op;
15621573Srgrimessize_t opnd;
15631573Srgrimes{
15641573Srgrimes	/* avoid making error situations worse */
15651573Srgrimes	if (p->error != 0)
15661573Srgrimes		return;
15671573Srgrimes
15681573Srgrimes	/* deal with oversize operands ("can't happen", more or less) */
15691573Srgrimes	assert(opnd < 1<<OPSHIFT);
15701573Srgrimes
15711573Srgrimes	/* deal with undersized strip */
15721573Srgrimes	if (p->slen >= p->ssize)
15731573Srgrimes		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
15741573Srgrimes	assert(p->slen < p->ssize);
15751573Srgrimes
15761573Srgrimes	/* finally, it's all reduced to the easy case */
15771573Srgrimes	p->strip[p->slen++] = SOP(op, opnd);
15781573Srgrimes}
15791573Srgrimes
15801573Srgrimes/*
15811573Srgrimes - doinsert - insert a sop into the strip
15821573Srgrimes == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
15831573Srgrimes */
15841573Srgrimesstatic void
15851573Srgrimesdoinsert(p, op, opnd, pos)
15861573Srgrimesregister struct parse *p;
15871573Srgrimessop op;
15881573Srgrimessize_t opnd;
15891573Srgrimessopno pos;
15901573Srgrimes{
15911573Srgrimes	register sopno sn;
15921573Srgrimes	register sop s;
15931573Srgrimes	register int i;
15941573Srgrimes
15951573Srgrimes	/* avoid making error situations worse */
15961573Srgrimes	if (p->error != 0)
15971573Srgrimes		return;
15981573Srgrimes
15991573Srgrimes	sn = HERE();
16001573Srgrimes	EMIT(op, opnd);		/* do checks, ensure space */
16011573Srgrimes	assert(HERE() == sn+1);
16021573Srgrimes	s = p->strip[sn];
16031573Srgrimes
16041573Srgrimes	/* adjust paren pointers */
16051573Srgrimes	assert(pos > 0);
16061573Srgrimes	for (i = 1; i < NPAREN; i++) {
16071573Srgrimes		if (p->pbegin[i] >= pos) {
16081573Srgrimes			p->pbegin[i]++;
16091573Srgrimes		}
16101573Srgrimes		if (p->pend[i] >= pos) {
16111573Srgrimes			p->pend[i]++;
16121573Srgrimes		}
16131573Srgrimes	}
16141573Srgrimes
16151573Srgrimes	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
16161573Srgrimes						(HERE()-pos-1)*sizeof(sop));
16171573Srgrimes	p->strip[pos] = s;
16181573Srgrimes}
16191573Srgrimes
16201573Srgrimes/*
16211573Srgrimes - dofwd - complete a forward reference
16221573Srgrimes == static void dofwd(register struct parse *p, sopno pos, sop value);
16231573Srgrimes */
16241573Srgrimesstatic void
16251573Srgrimesdofwd(p, pos, value)
16261573Srgrimesregister struct parse *p;
16271573Srgrimesregister sopno pos;
16281573Srgrimessop value;
16291573Srgrimes{
16301573Srgrimes	/* avoid making error situations worse */
16311573Srgrimes	if (p->error != 0)
16321573Srgrimes		return;
16331573Srgrimes
16341573Srgrimes	assert(value < 1<<OPSHIFT);
16351573Srgrimes	p->strip[pos] = OP(p->strip[pos]) | value;
16361573Srgrimes}
16371573Srgrimes
16381573Srgrimes/*
16391573Srgrimes - enlarge - enlarge the strip
16401573Srgrimes == static void enlarge(register struct parse *p, sopno size);
16411573Srgrimes */
16421573Srgrimesstatic void
16431573Srgrimesenlarge(p, size)
16441573Srgrimesregister struct parse *p;
16451573Srgrimesregister sopno size;
16461573Srgrimes{
16471573Srgrimes	register sop *sp;
16481573Srgrimes
16491573Srgrimes	if (p->ssize >= size)
16501573Srgrimes		return;
16511573Srgrimes
16521573Srgrimes	sp = (sop *)realloc(p->strip, size*sizeof(sop));
16531573Srgrimes	if (sp == NULL) {
16541573Srgrimes		SETERROR(REG_ESPACE);
16551573Srgrimes		return;
16561573Srgrimes	}
16571573Srgrimes	p->strip = sp;
16581573Srgrimes	p->ssize = size;
16591573Srgrimes}
16601573Srgrimes
16611573Srgrimes/*
16621573Srgrimes - stripsnug - compact the strip
16631573Srgrimes == static void stripsnug(register struct parse *p, register struct re_guts *g);
16641573Srgrimes */
16651573Srgrimesstatic void
16661573Srgrimesstripsnug(p, g)
16671573Srgrimesregister struct parse *p;
16681573Srgrimesregister struct re_guts *g;
16691573Srgrimes{
16701573Srgrimes	g->nstates = p->slen;
16711573Srgrimes	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
16721573Srgrimes	if (g->strip == NULL) {
16731573Srgrimes		SETERROR(REG_ESPACE);
16741573Srgrimes		g->strip = p->strip;
16751573Srgrimes	}
16761573Srgrimes}
16771573Srgrimes
16781573Srgrimes/*
16791573Srgrimes - findmust - fill in must and mlen with longest mandatory literal string
16801573Srgrimes == static void findmust(register struct parse *p, register struct re_guts *g);
16811573Srgrimes *
16821573Srgrimes * This algorithm could do fancy things like analyzing the operands of |
16831573Srgrimes * for common subsequences.  Someday.  This code is simple and finds most
16841573Srgrimes * of the interesting cases.
16851573Srgrimes *
16861573Srgrimes * Note that must and mlen got initialized during setup.
16871573Srgrimes */
16881573Srgrimesstatic void
16891573Srgrimesfindmust(p, g)
16901573Srgrimesstruct parse *p;
16911573Srgrimesregister struct re_guts *g;
16921573Srgrimes{
16931573Srgrimes	register sop *scan;
16941573Srgrimes	sop *start;
16951573Srgrimes	register sop *newstart;
16961573Srgrimes	register sopno newlen;
16971573Srgrimes	register sop s;
16981573Srgrimes	register char *cp;
16991573Srgrimes	register sopno i;
170062391Sdcs	int offset;
170162391Sdcs	int cs, mccs;
17021573Srgrimes
17031573Srgrimes	/* avoid making error situations worse */
17041573Srgrimes	if (p->error != 0)
17051573Srgrimes		return;
17061573Srgrimes
170762391Sdcs	/* Find out if we can handle OANYOF or not */
170862391Sdcs	mccs = 0;
170962391Sdcs	for (cs = 0; cs < g->ncsets; cs++)
171062391Sdcs		if (g->sets[cs].multis != NULL)
171162391Sdcs			mccs = 1;
171262391Sdcs
17131573Srgrimes	/* find the longest OCHAR sequence in strip */
17141573Srgrimes	newlen = 0;
171562391Sdcs	offset = 0;
171662391Sdcs	g->moffset = 0;
17171573Srgrimes	scan = g->strip + 1;
17181573Srgrimes	do {
17191573Srgrimes		s = *scan++;
17201573Srgrimes		switch (OP(s)) {
17211573Srgrimes		case OCHAR:		/* sequence member */
17221573Srgrimes			if (newlen == 0)		/* new sequence */
17231573Srgrimes				newstart = scan - 1;
17241573Srgrimes			newlen++;
17251573Srgrimes			break;
17261573Srgrimes		case OPLUS_:		/* things that don't break one */
17271573Srgrimes		case OLPAREN:
17281573Srgrimes		case ORPAREN:
17291573Srgrimes			break;
17301573Srgrimes		case OQUEST_:		/* things that must be skipped */
17311573Srgrimes		case OCH_:
173262391Sdcs			offset = altoffset(scan, offset, mccs);
17331573Srgrimes			scan--;
17341573Srgrimes			do {
17351573Srgrimes				scan += OPND(s);
17361573Srgrimes				s = *scan;
17371573Srgrimes				/* assert() interferes w debug printouts */
17381573Srgrimes				if (OP(s) != O_QUEST && OP(s) != O_CH &&
17391573Srgrimes							OP(s) != OOR2) {
17401573Srgrimes					g->iflags |= BAD;
17411573Srgrimes					return;
17421573Srgrimes				}
17431573Srgrimes			} while (OP(s) != O_QUEST && OP(s) != O_CH);
17441573Srgrimes			/* fallthrough */
174562391Sdcs		case OBOW:		/* things that break a sequence */
174662391Sdcs		case OEOW:
174762391Sdcs		case OBOL:
174862391Sdcs		case OEOL:
174962391Sdcs		case O_QUEST:
175062391Sdcs		case O_CH:
175162391Sdcs		case OEND:
17521573Srgrimes			if (newlen > g->mlen) {		/* ends one */
17531573Srgrimes				start = newstart;
17541573Srgrimes				g->mlen = newlen;
175562391Sdcs				if (offset > -1) {
175662391Sdcs					g->moffset += offset;
175762391Sdcs					offset = newlen;
175862391Sdcs				} else
175962391Sdcs					g->moffset = offset;
176062391Sdcs			} else {
176162391Sdcs				if (offset > -1)
176262391Sdcs					offset += newlen;
17631573Srgrimes			}
17641573Srgrimes			newlen = 0;
17651573Srgrimes			break;
176662391Sdcs		case OANY:
176762391Sdcs			if (newlen > g->mlen) {		/* ends one */
176862391Sdcs				start = newstart;
176962391Sdcs				g->mlen = newlen;
177062391Sdcs				if (offset > -1) {
177162391Sdcs					g->moffset += offset;
177262391Sdcs					offset = newlen;
177362391Sdcs				} else
177462391Sdcs					g->moffset = offset;
177562391Sdcs			} else {
177662391Sdcs				if (offset > -1)
177762391Sdcs					offset += newlen;
177862391Sdcs			}
177962391Sdcs			if (offset > -1)
178062391Sdcs				offset++;
178162391Sdcs			newlen = 0;
178262391Sdcs			break;
178362391Sdcs		case OANYOF:		/* may or may not invalidate offset */
178462391Sdcs			/* First, everything as OANY */
178562391Sdcs			if (newlen > g->mlen) {		/* ends one */
178662391Sdcs				start = newstart;
178762391Sdcs				g->mlen = newlen;
178862391Sdcs				if (offset > -1) {
178962391Sdcs					g->moffset += offset;
179062391Sdcs					offset = newlen;
179162391Sdcs				} else
179262391Sdcs					g->moffset = offset;
179362391Sdcs			} else {
179462391Sdcs				if (offset > -1)
179562391Sdcs					offset += newlen;
179662391Sdcs			}
179762391Sdcs			if (offset > -1)
179862391Sdcs				offset++;
179962391Sdcs			newlen = 0;
180062391Sdcs			/* And, now, if we found out we can't deal with
180162391Sdcs			 * it, make offset = -1.
180262391Sdcs			 */
180362391Sdcs			if (mccs)
180462391Sdcs				offset = -1;
180562391Sdcs			break;
180662391Sdcs		default:
180762391Sdcs			/* Anything here makes it impossible or too hard
180862391Sdcs			 * to calculate the offset -- so we give up;
180962391Sdcs			 * save the last known good offset, in case the
181062391Sdcs			 * must sequence doesn't occur later.
181162391Sdcs			 */
181262391Sdcs			if (newlen > g->mlen) {		/* ends one */
181362391Sdcs				start = newstart;
181462391Sdcs				g->mlen = newlen;
181562391Sdcs				if (offset > -1)
181662391Sdcs					g->moffset += offset;
181762391Sdcs				else
181862391Sdcs					g->moffset = offset;
181962391Sdcs			}
182062391Sdcs			offset = -1;
182162391Sdcs			newlen = 0;
182262391Sdcs			break;
18231573Srgrimes		}
18241573Srgrimes	} while (OP(s) != OEND);
18251573Srgrimes
182662391Sdcs	if (g->mlen == 0) {		/* there isn't one */
182762391Sdcs		g->moffset = -1;
18281573Srgrimes		return;
182962391Sdcs	}
18301573Srgrimes
18311573Srgrimes	/* turn it into a character string */
18321573Srgrimes	g->must = malloc((size_t)g->mlen + 1);
18331573Srgrimes	if (g->must == NULL) {		/* argh; just forget it */
18341573Srgrimes		g->mlen = 0;
183562391Sdcs		g->moffset = -1;
18361573Srgrimes		return;
18371573Srgrimes	}
18381573Srgrimes	cp = g->must;
18391573Srgrimes	scan = start;
18401573Srgrimes	for (i = g->mlen; i > 0; i--) {
18411573Srgrimes		while (OP(s = *scan++) != OCHAR)
18421573Srgrimes			continue;
18431573Srgrimes		assert(cp < g->must + g->mlen);
18441573Srgrimes		*cp++ = (char)OPND(s);
18451573Srgrimes	}
18461573Srgrimes	assert(cp == g->must + g->mlen);
18471573Srgrimes	*cp++ = '\0';		/* just on general principles */
18481573Srgrimes}
18491573Srgrimes
18501573Srgrimes/*
185162391Sdcs - altoffset - choose biggest offset among multiple choices
185262391Sdcs = static int altoffset(sop *scan, int offset, int mccs);
185362391Sdcs *
185462391Sdcs * Compute, recursively if necessary, the largest offset among multiple
185562391Sdcs * re paths.
185662391Sdcs */
185762391Sdcsstatic int
185862391Sdcsaltoffset(scan, offset, mccs)
185962391Sdcssop *scan;
186062391Sdcsint offset;
186162391Sdcsint mccs;
186262391Sdcs{
186362391Sdcs	int largest;
186462391Sdcs	int try;
186562391Sdcs	sop s;
186662391Sdcs
186762391Sdcs	/* If we gave up already on offsets, return */
186862391Sdcs	if (offset == -1)
186962391Sdcs		return -1;
187062391Sdcs
187162391Sdcs	largest = 0;
187262391Sdcs	try = 0;
187362391Sdcs	s = *scan++;
187462391Sdcs	while (OP(s) != O_QUEST && OP(s) != O_CH) {
187562391Sdcs		switch (OP(s)) {
187662391Sdcs		case OOR1:
187762391Sdcs			if (try > largest)
187862391Sdcs				largest = try;
187962391Sdcs			try = 0;
188062391Sdcs			break;
188162391Sdcs		case OQUEST_:
188262391Sdcs		case OCH_:
188362391Sdcs			try = altoffset(scan, try, mccs);
188462391Sdcs			if (try == -1)
188562391Sdcs				return -1;
188662391Sdcs			scan--;
188762391Sdcs			do {
188862391Sdcs				scan += OPND(s);
188962391Sdcs				s = *scan;
189062391Sdcs				if (OP(s) != O_QUEST && OP(s) != O_CH &&
189162391Sdcs							OP(s) != OOR2)
189262391Sdcs					return -1;
189362391Sdcs			} while (OP(s) != O_QUEST && OP(s) != O_CH);
189462391Sdcs			break;
189562391Sdcs		case OANYOF:
189662391Sdcs			if (mccs)
189762391Sdcs				return -1;
189862391Sdcs		case OCHAR:
189962391Sdcs		case OANY:
190062391Sdcs			try++;
190162391Sdcs		case OBOW:
190262391Sdcs		case OEOW:
190362391Sdcs		case OLPAREN:
190462391Sdcs		case ORPAREN:
190562391Sdcs		case OOR2:
190662391Sdcs			break;
190762391Sdcs		default:
190862391Sdcs			try = -1;
190962391Sdcs			break;
191062391Sdcs		}
191162391Sdcs		if (try == -1)
191262391Sdcs			return -1;
191362391Sdcs		s = *scan++;
191462391Sdcs	}
191562391Sdcs
191662391Sdcs	if (try > largest)
191762391Sdcs		largest = try;
191862391Sdcs
191962391Sdcs	return largest+offset;
192062391Sdcs}
192162391Sdcs
192262391Sdcs/*
192362232Sdcs - computejumps - compute char jumps for BM scan
192462232Sdcs == static void computejumps(register struct parse *p, register struct re_guts *g);
192562232Sdcs *
192662232Sdcs * This algorithm assumes g->must exists and is has size greater than
192762232Sdcs * zero. It's based on the algorithm found on Computer Algorithms by
192862232Sdcs * Sara Baase.
192962232Sdcs *
193062232Sdcs * A char jump is the number of characters one needs to jump based on
193162232Sdcs * the value of the character from the text that was mismatched.
193262232Sdcs */
193362232Sdcsstatic void
193462232Sdcscomputejumps(p, g)
193562232Sdcsstruct parse *p;
193662232Sdcsstruct re_guts *g;
193762232Sdcs{
193862232Sdcs	int ch;
193962232Sdcs	int mindex;
194062232Sdcs
194162232Sdcs	/* Avoid making errors worse */
194262232Sdcs	if (p->error != 0)
194362232Sdcs		return;
194462232Sdcs
194562391Sdcs	g->charjump = malloc((UCHAR_MAX+1) * sizeof(int));
194662232Sdcs	if (g->charjump == NULL)	/* Not a fatal error */
194762232Sdcs		return;
194862232Sdcs
194962232Sdcs	/* If the character does not exist in the pattern, the jump
195062232Sdcs	 * is equal to the number of characters in the pattern.
195162232Sdcs	 */
195262232Sdcs	for (ch = 0; ch < 256; ch++)
195362232Sdcs		g->charjump[ch] = g->mlen;
195462232Sdcs
195562232Sdcs	/* If the character does exist, compute the jump that would
195662232Sdcs	 * take us to the last character in the pattern equal to it
195762232Sdcs	 * (notice that we match right to left, so that last character
195862232Sdcs	 * is the first one that would be matched).
195962232Sdcs	 */
196062232Sdcs	for (mindex = 0; mindex < g->mlen; mindex++)
196162232Sdcs		g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
196262232Sdcs}
196362232Sdcs
196462232Sdcs/*
196562232Sdcs - computematchjumps - compute match jumps for BM scan
196662232Sdcs == static void computematchjumps(register struct parse *p, register struct re_guts *g);
196762232Sdcs *
196862232Sdcs * This algorithm assumes g->must exists and is has size greater than
196962232Sdcs * zero. It's based on the algorithm found on Computer Algorithms by
197062232Sdcs * Sara Baase.
197162232Sdcs *
197262232Sdcs * A match jump is the number of characters one needs to advance based
197362232Sdcs * on the already-matched suffix.
197462232Sdcs * Notice that all values here are minus (g->mlen-1), because of the way
197562232Sdcs * the search algorithm works.
197662232Sdcs */
197762232Sdcsstatic void
197862232Sdcscomputematchjumps(p, g)
197962232Sdcsstruct parse *p;
198062232Sdcsstruct re_guts *g;
198162232Sdcs{
198262232Sdcs	int mindex;		/* General "must" iterator */
198362232Sdcs	int suffix;		/* Keeps track of matching suffix */
198462232Sdcs	int ssuffix;		/* Keeps track of suffixes' suffix */
198562232Sdcs	int* pmatches;		/* pmatches[k] points to the next i
198662232Sdcs				 * such that i+1...mlen is a substring
198762232Sdcs				 * of k+1...k+mlen-i-1
198862232Sdcs				 */
198962232Sdcs
199062232Sdcs	/* Avoid making errors worse */
199162232Sdcs	if (p->error != 0)
199262232Sdcs		return;
199362232Sdcs
199462232Sdcs	pmatches = malloc(g->mlen * sizeof(unsigned int));
199562232Sdcs	if (pmatches == NULL) {
199662232Sdcs		g->matchjump = NULL;
199762232Sdcs		return;
199862232Sdcs	}
199962232Sdcs
200062232Sdcs	g->matchjump = malloc(g->mlen * sizeof(unsigned int));
200162232Sdcs	if (g->matchjump == NULL)	/* Not a fatal error */
200262232Sdcs		return;
200362232Sdcs
200462232Sdcs	/* Set maximum possible jump for each character in the pattern */
200562232Sdcs	for (mindex = 0; mindex < g->mlen; mindex++)
200662232Sdcs		g->matchjump[mindex] = 2*g->mlen - mindex - 1;
200762232Sdcs
200862232Sdcs	/* Compute pmatches[] */
200962232Sdcs	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
201062232Sdcs	    mindex--, suffix--) {
201162232Sdcs		pmatches[mindex] = suffix;
201262232Sdcs
201362232Sdcs		/* If a mismatch is found, interrupting the substring,
201462232Sdcs		 * compute the matchjump for that position. If no
201562232Sdcs		 * mismatch is found, then a text substring mismatched
201662232Sdcs		 * against the suffix will also mismatch against the
201762232Sdcs		 * substring.
201862232Sdcs		 */
201962232Sdcs		while (suffix < g->mlen
202062232Sdcs		    && g->must[mindex] != g->must[suffix]) {
202162232Sdcs			g->matchjump[suffix] = MIN(g->matchjump[suffix],
202262232Sdcs			    g->mlen - mindex - 1);
202362232Sdcs			suffix = pmatches[suffix];
202462232Sdcs		}
202562232Sdcs	}
202662232Sdcs
202762232Sdcs	/* Compute the matchjump up to the last substring found to jump
202862232Sdcs	 * to the beginning of the largest must pattern prefix matching
202962232Sdcs	 * it's own suffix.
203062232Sdcs	 */
203162232Sdcs	for (mindex = 0; mindex <= suffix; mindex++)
203262232Sdcs		g->matchjump[mindex] = MIN(g->matchjump[mindex],
203362232Sdcs		    g->mlen + suffix - mindex);
203462232Sdcs
203562232Sdcs        ssuffix = pmatches[suffix];
203662391Sdcs        while (suffix < g->mlen) {
203762391Sdcs                while (suffix <= ssuffix) {
203862232Sdcs                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
203962232Sdcs			    g->mlen + ssuffix - suffix);
204062232Sdcs                        suffix++;
204162232Sdcs                }
204262232Sdcs                ssuffix = pmatches[ssuffix];
204362232Sdcs        }
204462232Sdcs
204562232Sdcs	free(pmatches);
204662232Sdcs}
204762232Sdcs
204862232Sdcs/*
20491573Srgrimes - pluscount - count + nesting
20501573Srgrimes == static sopno pluscount(register struct parse *p, register struct re_guts *g);
20511573Srgrimes */
20521573Srgrimesstatic sopno			/* nesting depth */
20531573Srgrimespluscount(p, g)
20541573Srgrimesstruct parse *p;
20551573Srgrimesregister struct re_guts *g;
20561573Srgrimes{
20571573Srgrimes	register sop *scan;
20581573Srgrimes	register sop s;
20591573Srgrimes	register sopno plusnest = 0;
20601573Srgrimes	register sopno maxnest = 0;
20611573Srgrimes
20621573Srgrimes	if (p->error != 0)
20631573Srgrimes		return(0);	/* there may not be an OEND */
20641573Srgrimes
20651573Srgrimes	scan = g->strip + 1;
20661573Srgrimes	do {
20671573Srgrimes		s = *scan++;
20681573Srgrimes		switch (OP(s)) {
20691573Srgrimes		case OPLUS_:
20701573Srgrimes			plusnest++;
20711573Srgrimes			break;
20721573Srgrimes		case O_PLUS:
20731573Srgrimes			if (plusnest > maxnest)
20741573Srgrimes				maxnest = plusnest;
20751573Srgrimes			plusnest--;
20761573Srgrimes			break;
20771573Srgrimes		}
20781573Srgrimes	} while (OP(s) != OEND);
20791573Srgrimes	if (plusnest != 0)
20801573Srgrimes		g->iflags |= BAD;
20811573Srgrimes	return(maxnest);
20821573Srgrimes}
2083