regcomp.c revision 17514
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 3. All advertising materials mentioning features or use of this software
181573Srgrimes *    must display the following acknowledgement:
191573Srgrimes *	This product includes software developed by the University of
201573Srgrimes *	California, Berkeley and its contributors.
211573Srgrimes * 4. Neither the name of the University nor the names of its contributors
221573Srgrimes *    may be used to endorse or promote products derived from this software
231573Srgrimes *    without specific prior written permission.
241573Srgrimes *
251573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351573Srgrimes * SUCH DAMAGE.
361573Srgrimes *
371573Srgrimes *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
381573Srgrimes */
391573Srgrimes
401573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
411573Srgrimesstatic char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
421573Srgrimes#endif /* LIBC_SCCS and not lint */
431573Srgrimes
441573Srgrimes#include <sys/types.h>
451573Srgrimes#include <stdio.h>
461573Srgrimes#include <string.h>
471573Srgrimes#include <ctype.h>
481573Srgrimes#include <limits.h>
491573Srgrimes#include <stdlib.h>
501573Srgrimes#include <regex.h>
511573Srgrimes
521573Srgrimes#include "utils.h"
531573Srgrimes#include "regex2.h"
541573Srgrimes
551573Srgrimes#include "cclass.h"
561573Srgrimes#include "cname.h"
571573Srgrimes
581573Srgrimes/*
591573Srgrimes * parse structure, passed up and down to avoid global variables and
601573Srgrimes * other clumsinesses
611573Srgrimes */
621573Srgrimesstruct parse {
631573Srgrimes	char *next;		/* next character in RE */
641573Srgrimes	char *end;		/* end of string (-> NUL normally) */
651573Srgrimes	int error;		/* has an error been seen? */
661573Srgrimes	sop *strip;		/* malloced strip */
671573Srgrimes	sopno ssize;		/* malloced strip size (allocated) */
681573Srgrimes	sopno slen;		/* malloced strip length (used) */
691573Srgrimes	int ncsalloc;		/* number of csets allocated */
701573Srgrimes	struct re_guts *g;
711573Srgrimes#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
721573Srgrimes	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
731573Srgrimes	sopno pend[NPAREN];	/* -> ) ([0] unused) */
741573Srgrimes};
751573Srgrimes
761573Srgrimes/* ========= begin header generated by ./mkh ========= */
771573Srgrimes#ifdef __cplusplus
781573Srgrimesextern "C" {
791573Srgrimes#endif
801573Srgrimes
811573Srgrimes/* === regcomp.c === */
821573Srgrimesstatic void p_ere __P((struct parse *p, int stop));
831573Srgrimesstatic void p_ere_exp __P((struct parse *p));
841573Srgrimesstatic void p_str __P((struct parse *p));
851573Srgrimesstatic void p_bre __P((struct parse *p, int end1, int end2));
861573Srgrimesstatic int p_simp_re __P((struct parse *p, int starordinary));
871573Srgrimesstatic int p_count __P((struct parse *p));
881573Srgrimesstatic void p_bracket __P((struct parse *p));
891573Srgrimesstatic void p_b_term __P((struct parse *p, cset *cs));
901573Srgrimesstatic void p_b_cclass __P((struct parse *p, cset *cs));
911573Srgrimesstatic void p_b_eclass __P((struct parse *p, cset *cs));
921573Srgrimesstatic char p_b_symbol __P((struct parse *p));
931573Srgrimesstatic char p_b_coll_elem __P((struct parse *p, int endc));
941573Srgrimesstatic char othercase __P((int ch));
951573Srgrimesstatic void bothcases __P((struct parse *p, int ch));
961573Srgrimesstatic void ordinary __P((struct parse *p, int ch));
971573Srgrimesstatic void nonnewline __P((struct parse *p));
981573Srgrimesstatic void repeat __P((struct parse *p, sopno start, int from, int to));
991573Srgrimesstatic int seterr __P((struct parse *p, int e));
1001573Srgrimesstatic cset *allocset __P((struct parse *p));
1011573Srgrimesstatic void freeset __P((struct parse *p, cset *cs));
1021573Srgrimesstatic int freezeset __P((struct parse *p, cset *cs));
1031573Srgrimesstatic int firstch __P((struct parse *p, cset *cs));
1041573Srgrimesstatic int nch __P((struct parse *p, cset *cs));
1051573Srgrimesstatic void mcadd __P((struct parse *p, cset *cs, char *cp));
10617141Sjkh#if used
1071573Srgrimesstatic void mcsub __P((cset *cs, char *cp));
1081573Srgrimesstatic int mcin __P((cset *cs, char *cp));
1091573Srgrimesstatic char *mcfind __P((cset *cs, char *cp));
11017141Sjkh#endif
1111573Srgrimesstatic void mcinvert __P((struct parse *p, cset *cs));
1121573Srgrimesstatic void mccase __P((struct parse *p, cset *cs));
1131573Srgrimesstatic int isinsets __P((struct re_guts *g, int c));
1141573Srgrimesstatic int samesets __P((struct re_guts *g, int c1, int c2));
1151573Srgrimesstatic void categorize __P((struct parse *p, struct re_guts *g));
1161573Srgrimesstatic sopno dupl __P((struct parse *p, sopno start, sopno finish));
1171573Srgrimesstatic void doemit __P((struct parse *p, sop op, size_t opnd));
1181573Srgrimesstatic void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
1191573Srgrimesstatic void dofwd __P((struct parse *p, sopno pos, sop value));
1201573Srgrimesstatic void enlarge __P((struct parse *p, sopno size));
1211573Srgrimesstatic void stripsnug __P((struct parse *p, struct re_guts *g));
1221573Srgrimesstatic void findmust __P((struct parse *p, struct re_guts *g));
1231573Srgrimesstatic sopno pluscount __P((struct parse *p, struct re_guts *g));
12417514Sachestatic int collcmp __P((int c1, int c2));
1251573Srgrimes
1261573Srgrimes#ifdef __cplusplus
1271573Srgrimes}
1281573Srgrimes#endif
1291573Srgrimes/* ========= end header generated by ./mkh ========= */
1301573Srgrimes
1311573Srgrimesstatic char nuls[10];		/* place to point scanner in event of error */
1321573Srgrimes
1331573Srgrimes/*
1341573Srgrimes * macros for use with parse structure
1351573Srgrimes * BEWARE:  these know that the parse structure is named `p' !!!
1361573Srgrimes */
1371573Srgrimes#define	PEEK()	(*p->next)
1381573Srgrimes#define	PEEK2()	(*(p->next+1))
1391573Srgrimes#define	MORE()	(p->next < p->end)
1401573Srgrimes#define	MORE2()	(p->next+1 < p->end)
1411573Srgrimes#define	SEE(c)	(MORE() && PEEK() == (c))
1421573Srgrimes#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
1431573Srgrimes#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
1441573Srgrimes#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
1451573Srgrimes#define	NEXT()	(p->next++)
1461573Srgrimes#define	NEXT2()	(p->next += 2)
1471573Srgrimes#define	NEXTn(n)	(p->next += (n))
1481573Srgrimes#define	GETNEXT()	(*p->next++)
1491573Srgrimes#define	SETERROR(e)	seterr(p, (e))
1501573Srgrimes#define	REQUIRE(co, e)	((co) || SETERROR(e))
1511573Srgrimes#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
1521573Srgrimes#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
1531573Srgrimes#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
1541573Srgrimes#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
1551573Srgrimes#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
1561573Srgrimes#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
1571573Srgrimes#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
1581573Srgrimes#define	HERE()		(p->slen)
1591573Srgrimes#define	THERE()		(p->slen - 1)
1601573Srgrimes#define	THERETHERE()	(p->slen - 2)
1611573Srgrimes#define	DROP(n)	(p->slen -= (n))
1621573Srgrimes
1631573Srgrimes#ifndef NDEBUG
1641573Srgrimesstatic int never = 0;		/* for use in asserts; shuts lint up */
1651573Srgrimes#else
1661573Srgrimes#define	never	0		/* some <assert.h>s have bugs too */
1671573Srgrimes#endif
1681573Srgrimes
1691573Srgrimes/*
1701573Srgrimes - regcomp - interface for parser and compilation
1711573Srgrimes = extern int regcomp(regex_t *, const char *, int);
1721573Srgrimes = #define	REG_BASIC	0000
1731573Srgrimes = #define	REG_EXTENDED	0001
1741573Srgrimes = #define	REG_ICASE	0002
1751573Srgrimes = #define	REG_NOSUB	0004
1761573Srgrimes = #define	REG_NEWLINE	0010
1771573Srgrimes = #define	REG_NOSPEC	0020
1781573Srgrimes = #define	REG_PEND	0040
1791573Srgrimes = #define	REG_DUMP	0200
1801573Srgrimes */
1811573Srgrimesint				/* 0 success, otherwise REG_something */
1821573Srgrimesregcomp(preg, pattern, cflags)
1831573Srgrimesregex_t *preg;
1841573Srgrimesconst char *pattern;
1851573Srgrimesint cflags;
1861573Srgrimes{
1871573Srgrimes	struct parse pa;
1881573Srgrimes	register struct re_guts *g;
1891573Srgrimes	register struct parse *p = &pa;
1901573Srgrimes	register int i;
1911573Srgrimes	register size_t len;
1921573Srgrimes#ifdef REDEBUG
1931573Srgrimes#	define	GOODFLAGS(f)	(f)
1941573Srgrimes#else
1951573Srgrimes#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
1961573Srgrimes#endif
1971573Srgrimes
1981573Srgrimes	cflags = GOODFLAGS(cflags);
1991573Srgrimes	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
2001573Srgrimes		return(REG_INVARG);
2011573Srgrimes
2021573Srgrimes	if (cflags&REG_PEND) {
2031573Srgrimes		if (preg->re_endp < pattern)
2041573Srgrimes			return(REG_INVARG);
2051573Srgrimes		len = preg->re_endp - pattern;
2061573Srgrimes	} else
2071573Srgrimes		len = strlen((char *)pattern);
2081573Srgrimes
2091573Srgrimes	/* do the mallocs early so failure handling is easy */
2101573Srgrimes	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
2111573Srgrimes							(NC-1)*sizeof(cat_t));
2121573Srgrimes	if (g == NULL)
2131573Srgrimes		return(REG_ESPACE);
2141573Srgrimes	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
2151573Srgrimes	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
2161573Srgrimes	p->slen = 0;
2171573Srgrimes	if (p->strip == NULL) {
2181573Srgrimes		free((char *)g);
2191573Srgrimes		return(REG_ESPACE);
2201573Srgrimes	}
2211573Srgrimes
2221573Srgrimes	/* set things up */
2231573Srgrimes	p->g = g;
2241573Srgrimes	p->next = (char *)pattern;	/* convenience; we do not modify it */
2251573Srgrimes	p->end = p->next + len;
2261573Srgrimes	p->error = 0;
2271573Srgrimes	p->ncsalloc = 0;
2281573Srgrimes	for (i = 0; i < NPAREN; i++) {
2291573Srgrimes		p->pbegin[i] = 0;
2301573Srgrimes		p->pend[i] = 0;
2311573Srgrimes	}
2321573Srgrimes	g->csetsize = NC;
2331573Srgrimes	g->sets = NULL;
2341573Srgrimes	g->setbits = NULL;
2351573Srgrimes	g->ncsets = 0;
2361573Srgrimes	g->cflags = cflags;
2371573Srgrimes	g->iflags = 0;
2381573Srgrimes	g->nbol = 0;
2391573Srgrimes	g->neol = 0;
2401573Srgrimes	g->must = NULL;
2411573Srgrimes	g->mlen = 0;
2421573Srgrimes	g->nsub = 0;
2431573Srgrimes	g->ncategories = 1;	/* category 0 is "everything else" */
2441573Srgrimes	g->categories = &g->catspace[-(CHAR_MIN)];
2451573Srgrimes	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
2461573Srgrimes	g->backrefs = 0;
2471573Srgrimes
2481573Srgrimes	/* do it */
2491573Srgrimes	EMIT(OEND, 0);
2501573Srgrimes	g->firststate = THERE();
2511573Srgrimes	if (cflags&REG_EXTENDED)
2521573Srgrimes		p_ere(p, OUT);
2531573Srgrimes	else if (cflags&REG_NOSPEC)
2541573Srgrimes		p_str(p);
2551573Srgrimes	else
2561573Srgrimes		p_bre(p, OUT, OUT);
2571573Srgrimes	EMIT(OEND, 0);
2581573Srgrimes	g->laststate = THERE();
2591573Srgrimes
2601573Srgrimes	/* tidy up loose ends and fill things in */
2611573Srgrimes	categorize(p, g);
2621573Srgrimes	stripsnug(p, g);
2631573Srgrimes	findmust(p, g);
2641573Srgrimes	g->nplus = pluscount(p, g);
2651573Srgrimes	g->magic = MAGIC2;
2661573Srgrimes	preg->re_nsub = g->nsub;
2671573Srgrimes	preg->re_g = g;
2681573Srgrimes	preg->re_magic = MAGIC1;
2691573Srgrimes#ifndef REDEBUG
2701573Srgrimes	/* not debugging, so can't rely on the assert() in regexec() */
2711573Srgrimes	if (g->iflags&BAD)
2721573Srgrimes		SETERROR(REG_ASSERT);
2731573Srgrimes#endif
2741573Srgrimes
2751573Srgrimes	/* win or lose, we're done */
2761573Srgrimes	if (p->error != 0)	/* lose */
2771573Srgrimes		regfree(preg);
2781573Srgrimes	return(p->error);
2791573Srgrimes}
2801573Srgrimes
2811573Srgrimes/*
2821573Srgrimes - p_ere - ERE parser top level, concatenation and alternation
2831573Srgrimes == static void p_ere(register struct parse *p, int stop);
2841573Srgrimes */
2851573Srgrimesstatic void
2861573Srgrimesp_ere(p, stop)
2871573Srgrimesregister struct parse *p;
2881573Srgrimesint stop;			/* character this ERE should end at */
2891573Srgrimes{
2901573Srgrimes	register char c;
2911573Srgrimes	register sopno prevback;
2921573Srgrimes	register sopno prevfwd;
2931573Srgrimes	register sopno conc;
2941573Srgrimes	register int first = 1;		/* is this the first alternative? */
2951573Srgrimes
2961573Srgrimes	for (;;) {
2971573Srgrimes		/* do a bunch of concatenated expressions */
2981573Srgrimes		conc = HERE();
2991573Srgrimes		while (MORE() && (c = PEEK()) != '|' && c != stop)
3001573Srgrimes			p_ere_exp(p);
30117141Sjkh		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
3021573Srgrimes
3031573Srgrimes		if (!EAT('|'))
3041573Srgrimes			break;		/* NOTE BREAK OUT */
3051573Srgrimes
3061573Srgrimes		if (first) {
3071573Srgrimes			INSERT(OCH_, conc);	/* offset is wrong */
3081573Srgrimes			prevfwd = conc;
3091573Srgrimes			prevback = conc;
3101573Srgrimes			first = 0;
3111573Srgrimes		}
3121573Srgrimes		ASTERN(OOR1, prevback);
3131573Srgrimes		prevback = THERE();
3141573Srgrimes		AHEAD(prevfwd);			/* fix previous offset */
3151573Srgrimes		prevfwd = HERE();
3161573Srgrimes		EMIT(OOR2, 0);			/* offset is very wrong */
3171573Srgrimes	}
3181573Srgrimes
3191573Srgrimes	if (!first) {		/* tail-end fixups */
3201573Srgrimes		AHEAD(prevfwd);
3211573Srgrimes		ASTERN(O_CH, prevback);
3221573Srgrimes	}
3231573Srgrimes
3241573Srgrimes	assert(!MORE() || SEE(stop));
3251573Srgrimes}
3261573Srgrimes
3271573Srgrimes/*
3281573Srgrimes - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
3291573Srgrimes == static void p_ere_exp(register struct parse *p);
3301573Srgrimes */
3311573Srgrimesstatic void
3321573Srgrimesp_ere_exp(p)
3331573Srgrimesregister struct parse *p;
3341573Srgrimes{
3351573Srgrimes	register char c;
3361573Srgrimes	register sopno pos;
3371573Srgrimes	register int count;
3381573Srgrimes	register int count2;
3391573Srgrimes	register sopno subno;
3401573Srgrimes	int wascaret = 0;
3411573Srgrimes
3421573Srgrimes	assert(MORE());		/* caller should have ensured this */
3431573Srgrimes	c = GETNEXT();
3441573Srgrimes
3451573Srgrimes	pos = HERE();
3461573Srgrimes	switch (c) {
3471573Srgrimes	case '(':
34817141Sjkh		(void)REQUIRE(MORE(), REG_EPAREN);
3491573Srgrimes		p->g->nsub++;
3501573Srgrimes		subno = p->g->nsub;
3511573Srgrimes		if (subno < NPAREN)
3521573Srgrimes			p->pbegin[subno] = HERE();
3531573Srgrimes		EMIT(OLPAREN, subno);
3541573Srgrimes		if (!SEE(')'))
3551573Srgrimes			p_ere(p, ')');
3561573Srgrimes		if (subno < NPAREN) {
3571573Srgrimes			p->pend[subno] = HERE();
3581573Srgrimes			assert(p->pend[subno] != 0);
3591573Srgrimes		}
3601573Srgrimes		EMIT(ORPAREN, subno);
36117141Sjkh		(void)MUSTEAT(')', REG_EPAREN);
3621573Srgrimes		break;
3631573Srgrimes#ifndef POSIX_MISTAKE
3641573Srgrimes	case ')':		/* happens only if no current unmatched ( */
3651573Srgrimes		/*
3661573Srgrimes		 * You may ask, why the ifndef?  Because I didn't notice
3671573Srgrimes		 * this until slightly too late for 1003.2, and none of the
3681573Srgrimes		 * other 1003.2 regular-expression reviewers noticed it at
3691573Srgrimes		 * all.  So an unmatched ) is legal POSIX, at least until
3701573Srgrimes		 * we can get it fixed.
3711573Srgrimes		 */
3721573Srgrimes		SETERROR(REG_EPAREN);
3731573Srgrimes		break;
3741573Srgrimes#endif
3751573Srgrimes	case '^':
3761573Srgrimes		EMIT(OBOL, 0);
3771573Srgrimes		p->g->iflags |= USEBOL;
3781573Srgrimes		p->g->nbol++;
3791573Srgrimes		wascaret = 1;
3801573Srgrimes		break;
3811573Srgrimes	case '$':
3821573Srgrimes		EMIT(OEOL, 0);
3831573Srgrimes		p->g->iflags |= USEEOL;
3841573Srgrimes		p->g->neol++;
3851573Srgrimes		break;
3861573Srgrimes	case '|':
3871573Srgrimes		SETERROR(REG_EMPTY);
3881573Srgrimes		break;
3891573Srgrimes	case '*':
3901573Srgrimes	case '+':
3911573Srgrimes	case '?':
3921573Srgrimes		SETERROR(REG_BADRPT);
3931573Srgrimes		break;
3941573Srgrimes	case '.':
3951573Srgrimes		if (p->g->cflags&REG_NEWLINE)
3961573Srgrimes			nonnewline(p);
3971573Srgrimes		else
3981573Srgrimes			EMIT(OANY, 0);
3991573Srgrimes		break;
4001573Srgrimes	case '[':
4011573Srgrimes		p_bracket(p);
4021573Srgrimes		break;
4031573Srgrimes	case '\\':
40417141Sjkh		(void)REQUIRE(MORE(), REG_EESCAPE);
4051573Srgrimes		c = GETNEXT();
4061573Srgrimes		ordinary(p, c);
4071573Srgrimes		break;
4081573Srgrimes	case '{':		/* okay as ordinary except if digit follows */
40917141Sjkh		(void)REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
4101573Srgrimes		/* FALLTHROUGH */
4111573Srgrimes	default:
4121573Srgrimes		ordinary(p, c);
4131573Srgrimes		break;
4141573Srgrimes	}
4151573Srgrimes
4161573Srgrimes	if (!MORE())
4171573Srgrimes		return;
4181573Srgrimes	c = PEEK();
4191573Srgrimes	/* we call { a repetition if followed by a digit */
4201573Srgrimes	if (!( c == '*' || c == '+' || c == '?' ||
4211573Srgrimes				(c == '{' && MORE2() && isdigit(PEEK2())) ))
4221573Srgrimes		return;		/* no repetition, we're done */
4231573Srgrimes	NEXT();
4241573Srgrimes
42517141Sjkh	(void)REQUIRE(!wascaret, REG_BADRPT);
4261573Srgrimes	switch (c) {
4271573Srgrimes	case '*':	/* implemented as +? */
4281573Srgrimes		/* this case does not require the (y|) trick, noKLUDGE */
4291573Srgrimes		INSERT(OPLUS_, pos);
4301573Srgrimes		ASTERN(O_PLUS, pos);
4311573Srgrimes		INSERT(OQUEST_, pos);
4321573Srgrimes		ASTERN(O_QUEST, pos);
4331573Srgrimes		break;
4341573Srgrimes	case '+':
4351573Srgrimes		INSERT(OPLUS_, pos);
4361573Srgrimes		ASTERN(O_PLUS, pos);
4371573Srgrimes		break;
4381573Srgrimes	case '?':
4391573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
4401573Srgrimes		INSERT(OCH_, pos);		/* offset slightly wrong */
4411573Srgrimes		ASTERN(OOR1, pos);		/* this one's right */
4421573Srgrimes		AHEAD(pos);			/* fix the OCH_ */
4431573Srgrimes		EMIT(OOR2, 0);			/* offset very wrong... */
4441573Srgrimes		AHEAD(THERE());			/* ...so fix it */
4451573Srgrimes		ASTERN(O_CH, THERETHERE());
4461573Srgrimes		break;
4471573Srgrimes	case '{':
4481573Srgrimes		count = p_count(p);
4491573Srgrimes		if (EAT(',')) {
4501573Srgrimes			if (isdigit(PEEK())) {
4511573Srgrimes				count2 = p_count(p);
45217141Sjkh				(void)REQUIRE(count <= count2, REG_BADBR);
4531573Srgrimes			} else		/* single number with comma */
4541573Srgrimes				count2 = INFINITY;
4551573Srgrimes		} else		/* just a single number */
4561573Srgrimes			count2 = count;
4571573Srgrimes		repeat(p, pos, count, count2);
4581573Srgrimes		if (!EAT('}')) {	/* error heuristics */
4591573Srgrimes			while (MORE() && PEEK() != '}')
4601573Srgrimes				NEXT();
46117141Sjkh			(void)REQUIRE(MORE(), REG_EBRACE);
4621573Srgrimes			SETERROR(REG_BADBR);
4631573Srgrimes		}
4641573Srgrimes		break;
4651573Srgrimes	}
4661573Srgrimes
4671573Srgrimes	if (!MORE())
4681573Srgrimes		return;
4691573Srgrimes	c = PEEK();
4701573Srgrimes	if (!( c == '*' || c == '+' || c == '?' ||
4711573Srgrimes				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
4721573Srgrimes		return;
4731573Srgrimes	SETERROR(REG_BADRPT);
4741573Srgrimes}
4751573Srgrimes
4761573Srgrimes/*
4771573Srgrimes - p_str - string (no metacharacters) "parser"
4781573Srgrimes == static void p_str(register struct parse *p);
4791573Srgrimes */
4801573Srgrimesstatic void
4811573Srgrimesp_str(p)
4821573Srgrimesregister struct parse *p;
4831573Srgrimes{
48417141Sjkh	(void)REQUIRE(MORE(), REG_EMPTY);
4851573Srgrimes	while (MORE())
4861573Srgrimes		ordinary(p, GETNEXT());
4871573Srgrimes}
4881573Srgrimes
4891573Srgrimes/*
4901573Srgrimes - p_bre - BRE parser top level, anchoring and concatenation
4911573Srgrimes == static void p_bre(register struct parse *p, register int end1, \
4921573Srgrimes ==	register int end2);
4931573Srgrimes * Giving end1 as OUT essentially eliminates the end1/end2 check.
4941573Srgrimes *
4951573Srgrimes * This implementation is a bit of a kludge, in that a trailing $ is first
4961573Srgrimes * taken as an ordinary character and then revised to be an anchor.  The
4971573Srgrimes * only undesirable side effect is that '$' gets included as a character
4981573Srgrimes * category in such cases.  This is fairly harmless; not worth fixing.
4991573Srgrimes * The amount of lookahead needed to avoid this kludge is excessive.
5001573Srgrimes */
5011573Srgrimesstatic void
5021573Srgrimesp_bre(p, end1, end2)
5031573Srgrimesregister struct parse *p;
5041573Srgrimesregister int end1;		/* first terminating character */
5051573Srgrimesregister int end2;		/* second terminating character */
5061573Srgrimes{
5071573Srgrimes	register sopno start = HERE();
5081573Srgrimes	register int first = 1;			/* first subexpression? */
5091573Srgrimes	register int wasdollar = 0;
5101573Srgrimes
5111573Srgrimes	if (EAT('^')) {
5121573Srgrimes		EMIT(OBOL, 0);
5131573Srgrimes		p->g->iflags |= USEBOL;
5141573Srgrimes		p->g->nbol++;
5151573Srgrimes	}
5161573Srgrimes	while (MORE() && !SEETWO(end1, end2)) {
5171573Srgrimes		wasdollar = p_simp_re(p, first);
5181573Srgrimes		first = 0;
5191573Srgrimes	}
5201573Srgrimes	if (wasdollar) {	/* oops, that was a trailing anchor */
5211573Srgrimes		DROP(1);
5221573Srgrimes		EMIT(OEOL, 0);
5231573Srgrimes		p->g->iflags |= USEEOL;
5241573Srgrimes		p->g->neol++;
5251573Srgrimes	}
5261573Srgrimes
52717141Sjkh	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
5281573Srgrimes}
5291573Srgrimes
5301573Srgrimes/*
5311573Srgrimes - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
5321573Srgrimes == static int p_simp_re(register struct parse *p, int starordinary);
5331573Srgrimes */
5341573Srgrimesstatic int			/* was the simple RE an unbackslashed $? */
5351573Srgrimesp_simp_re(p, starordinary)
5361573Srgrimesregister struct parse *p;
5371573Srgrimesint starordinary;		/* is a leading * an ordinary character? */
5381573Srgrimes{
5391573Srgrimes	register int c;
5401573Srgrimes	register int count;
5411573Srgrimes	register int count2;
5421573Srgrimes	register sopno pos;
5431573Srgrimes	register int i;
5441573Srgrimes	register sopno subno;
5451573Srgrimes#	define	BACKSL	(1<<CHAR_BIT)
5461573Srgrimes
5471573Srgrimes	pos = HERE();		/* repetion op, if any, covers from here */
5481573Srgrimes
5491573Srgrimes	assert(MORE());		/* caller should have ensured this */
5501573Srgrimes	c = GETNEXT();
5511573Srgrimes	if (c == '\\') {
55217141Sjkh		(void)REQUIRE(MORE(), REG_EESCAPE);
5531573Srgrimes		c = BACKSL | (unsigned char)GETNEXT();
5541573Srgrimes	}
5551573Srgrimes	switch (c) {
5561573Srgrimes	case '.':
5571573Srgrimes		if (p->g->cflags&REG_NEWLINE)
5581573Srgrimes			nonnewline(p);
5591573Srgrimes		else
5601573Srgrimes			EMIT(OANY, 0);
5611573Srgrimes		break;
5621573Srgrimes	case '[':
5631573Srgrimes		p_bracket(p);
5641573Srgrimes		break;
5651573Srgrimes	case BACKSL|'{':
5661573Srgrimes		SETERROR(REG_BADRPT);
5671573Srgrimes		break;
5681573Srgrimes	case BACKSL|'(':
5691573Srgrimes		p->g->nsub++;
5701573Srgrimes		subno = p->g->nsub;
5711573Srgrimes		if (subno < NPAREN)
5721573Srgrimes			p->pbegin[subno] = HERE();
5731573Srgrimes		EMIT(OLPAREN, subno);
5741573Srgrimes		/* the MORE here is an error heuristic */
5751573Srgrimes		if (MORE() && !SEETWO('\\', ')'))
5761573Srgrimes			p_bre(p, '\\', ')');
5771573Srgrimes		if (subno < NPAREN) {
5781573Srgrimes			p->pend[subno] = HERE();
5791573Srgrimes			assert(p->pend[subno] != 0);
5801573Srgrimes		}
5811573Srgrimes		EMIT(ORPAREN, subno);
58217141Sjkh		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
5831573Srgrimes		break;
5841573Srgrimes	case BACKSL|')':	/* should not get here -- must be user */
5851573Srgrimes	case BACKSL|'}':
5861573Srgrimes		SETERROR(REG_EPAREN);
5871573Srgrimes		break;
5881573Srgrimes	case BACKSL|'1':
5891573Srgrimes	case BACKSL|'2':
5901573Srgrimes	case BACKSL|'3':
5911573Srgrimes	case BACKSL|'4':
5921573Srgrimes	case BACKSL|'5':
5931573Srgrimes	case BACKSL|'6':
5941573Srgrimes	case BACKSL|'7':
5951573Srgrimes	case BACKSL|'8':
5961573Srgrimes	case BACKSL|'9':
5971573Srgrimes		i = (c&~BACKSL) - '0';
5981573Srgrimes		assert(i < NPAREN);
5991573Srgrimes		if (p->pend[i] != 0) {
6001573Srgrimes			assert(i <= p->g->nsub);
6011573Srgrimes			EMIT(OBACK_, i);
6021573Srgrimes			assert(p->pbegin[i] != 0);
6031573Srgrimes			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
6041573Srgrimes			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
6051573Srgrimes			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
6061573Srgrimes			EMIT(O_BACK, i);
6071573Srgrimes		} else
6081573Srgrimes			SETERROR(REG_ESUBREG);
6091573Srgrimes		p->g->backrefs = 1;
6101573Srgrimes		break;
6111573Srgrimes	case '*':
61217141Sjkh		(void)REQUIRE(starordinary, REG_BADRPT);
6131573Srgrimes		/* FALLTHROUGH */
6141573Srgrimes	default:
6151573Srgrimes		ordinary(p, c &~ BACKSL);
6161573Srgrimes		break;
6171573Srgrimes	}
6181573Srgrimes
6191573Srgrimes	if (EAT('*')) {		/* implemented as +? */
6201573Srgrimes		/* this case does not require the (y|) trick, noKLUDGE */
6211573Srgrimes		INSERT(OPLUS_, pos);
6221573Srgrimes		ASTERN(O_PLUS, pos);
6231573Srgrimes		INSERT(OQUEST_, pos);
6241573Srgrimes		ASTERN(O_QUEST, pos);
6251573Srgrimes	} else if (EATTWO('\\', '{')) {
6261573Srgrimes		count = p_count(p);
6271573Srgrimes		if (EAT(',')) {
6281573Srgrimes			if (MORE() && isdigit(PEEK())) {
6291573Srgrimes				count2 = p_count(p);
63017141Sjkh				(void)REQUIRE(count <= count2, REG_BADBR);
6311573Srgrimes			} else		/* single number with comma */
6321573Srgrimes				count2 = INFINITY;
6331573Srgrimes		} else		/* just a single number */
6341573Srgrimes			count2 = count;
6351573Srgrimes		repeat(p, pos, count, count2);
6361573Srgrimes		if (!EATTWO('\\', '}')) {	/* error heuristics */
6371573Srgrimes			while (MORE() && !SEETWO('\\', '}'))
6381573Srgrimes				NEXT();
63917141Sjkh			(void)REQUIRE(MORE(), REG_EBRACE);
6401573Srgrimes			SETERROR(REG_BADBR);
6411573Srgrimes		}
6421573Srgrimes	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
6431573Srgrimes		return(1);
6441573Srgrimes
6451573Srgrimes	return(0);
6461573Srgrimes}
6471573Srgrimes
6481573Srgrimes/*
6491573Srgrimes - p_count - parse a repetition count
6501573Srgrimes == static int p_count(register struct parse *p);
6511573Srgrimes */
6521573Srgrimesstatic int			/* the value */
6531573Srgrimesp_count(p)
6541573Srgrimesregister struct parse *p;
6551573Srgrimes{
6561573Srgrimes	register int count = 0;
6571573Srgrimes	register int ndigits = 0;
6581573Srgrimes
6591573Srgrimes	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
6601573Srgrimes		count = count*10 + (GETNEXT() - '0');
6611573Srgrimes		ndigits++;
6621573Srgrimes	}
6631573Srgrimes
66417141Sjkh	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
6651573Srgrimes	return(count);
6661573Srgrimes}
6671573Srgrimes
6681573Srgrimes/*
6691573Srgrimes - p_bracket - parse a bracketed character list
6701573Srgrimes == static void p_bracket(register struct parse *p);
6711573Srgrimes *
6721573Srgrimes * Note a significant property of this code:  if the allocset() did SETERROR,
6731573Srgrimes * no set operations are done.
6741573Srgrimes */
6751573Srgrimesstatic void
6761573Srgrimesp_bracket(p)
6771573Srgrimesregister struct parse *p;
6781573Srgrimes{
6791573Srgrimes	register cset *cs = allocset(p);
6801573Srgrimes	register int invert = 0;
6811573Srgrimes
6821573Srgrimes	/* Dept of Truly Sickening Special-Case Kludges */
6831573Srgrimes	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
6841573Srgrimes		EMIT(OBOW, 0);
6851573Srgrimes		NEXTn(6);
6861573Srgrimes		return;
6871573Srgrimes	}
6881573Srgrimes	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
6891573Srgrimes		EMIT(OEOW, 0);
6901573Srgrimes		NEXTn(6);
6911573Srgrimes		return;
6921573Srgrimes	}
6931573Srgrimes
6941573Srgrimes	if (EAT('^'))
6951573Srgrimes		invert++;	/* make note to invert set at end */
6961573Srgrimes	if (EAT(']'))
6971573Srgrimes		CHadd(cs, ']');
6981573Srgrimes	else if (EAT('-'))
6991573Srgrimes		CHadd(cs, '-');
7001573Srgrimes	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
7011573Srgrimes		p_b_term(p, cs);
7021573Srgrimes	if (EAT('-'))
7031573Srgrimes		CHadd(cs, '-');
70417141Sjkh	(void)MUSTEAT(']', REG_EBRACK);
7051573Srgrimes
7061573Srgrimes	if (p->error != 0)	/* don't mess things up further */
7071573Srgrimes		return;
7081573Srgrimes
7091573Srgrimes	if (p->g->cflags&REG_ICASE) {
7101573Srgrimes		register int i;
7111573Srgrimes		register int ci;
7121573Srgrimes
7131573Srgrimes		for (i = p->g->csetsize - 1; i >= 0; i--)
7141573Srgrimes			if (CHIN(cs, i) && isalpha(i)) {
7151573Srgrimes				ci = othercase(i);
7161573Srgrimes				if (ci != i)
7171573Srgrimes					CHadd(cs, ci);
7181573Srgrimes			}
7191573Srgrimes		if (cs->multis != NULL)
7201573Srgrimes			mccase(p, cs);
7211573Srgrimes	}
7221573Srgrimes	if (invert) {
7231573Srgrimes		register int i;
7241573Srgrimes
7251573Srgrimes		for (i = p->g->csetsize - 1; i >= 0; i--)
7261573Srgrimes			if (CHIN(cs, i))
7271573Srgrimes				CHsub(cs, i);
7281573Srgrimes			else
7291573Srgrimes				CHadd(cs, i);
7301573Srgrimes		if (p->g->cflags&REG_NEWLINE)
7311573Srgrimes			CHsub(cs, '\n');
7321573Srgrimes		if (cs->multis != NULL)
7331573Srgrimes			mcinvert(p, cs);
7341573Srgrimes	}
7351573Srgrimes
7361573Srgrimes	assert(cs->multis == NULL);		/* xxx */
7371573Srgrimes
7381573Srgrimes	if (nch(p, cs) == 1) {		/* optimize singleton sets */
7391573Srgrimes		ordinary(p, firstch(p, cs));
7401573Srgrimes		freeset(p, cs);
7411573Srgrimes	} else
7421573Srgrimes		EMIT(OANYOF, freezeset(p, cs));
7431573Srgrimes}
7441573Srgrimes
74517514Sachestatic int collcmp (c1, c2)
74617514Sacheint c1, c2;
74717514Sache{
74817514Sache	static char s1[2], s2[2];
74917514Sache
75017514Sache	if (c1 == c2)
75117514Sache		return (0);
75217514Sache	if (   (isascii(c1) && isascii(c2))
75317514Sache	    || (!isalpha(c1) && !isalpha(c2))
75417514Sache	   )
75517514Sache		return (c1 - c2);
75617514Sache	if (isalpha(c1) && !isalpha(c2)) {
75717514Sache		if (isupper(c1))
75817514Sache			return ('A' - c2);
75917514Sache		else
76017514Sache			return ('a' - c2);
76117514Sache	} else if (isalpha(c2) && !isalpha(c1)) {
76217514Sache		if (isupper(c2))
76317514Sache			return (c1 - 'A');
76417514Sache		else
76517514Sache			return (c1 - 'a');
76617514Sache	}
76717514Sache	if (isupper(c1) && islower(c2))
76817514Sache		return (-1);
76917514Sache	else if (islower(c1) && isupper(c2))
77017514Sache		return (1);
77117514Sache	s1[0] = c1;
77217514Sache	s2[0] = c2;
77317514Sache	return strcoll(s1, s2);
77417514Sache}
77517514Sache
7761573Srgrimes/*
7771573Srgrimes - p_b_term - parse one term of a bracketed character list
7781573Srgrimes == static void p_b_term(register struct parse *p, register cset *cs);
7791573Srgrimes */
7801573Srgrimesstatic void
7811573Srgrimesp_b_term(p, cs)
7821573Srgrimesregister struct parse *p;
7831573Srgrimesregister cset *cs;
7841573Srgrimes{
7851573Srgrimes	register char c;
7861573Srgrimes	register char start, finish;
7871573Srgrimes	register int i;
7881573Srgrimes
7891573Srgrimes	/* classify what we've got */
7901573Srgrimes	switch ((MORE()) ? PEEK() : '\0') {
7911573Srgrimes	case '[':
7921573Srgrimes		c = (MORE2()) ? PEEK2() : '\0';
7931573Srgrimes		break;
7941573Srgrimes	case '-':
7951573Srgrimes		SETERROR(REG_ERANGE);
7961573Srgrimes		return;			/* NOTE RETURN */
7971573Srgrimes		break;
7981573Srgrimes	default:
7991573Srgrimes		c = '\0';
8001573Srgrimes		break;
8011573Srgrimes	}
8021573Srgrimes
8031573Srgrimes	switch (c) {
8041573Srgrimes	case ':':		/* character class */
8051573Srgrimes		NEXT2();
80617141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
8071573Srgrimes		c = PEEK();
80817141Sjkh		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
8091573Srgrimes		p_b_cclass(p, cs);
81017141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
81117141Sjkh		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
8121573Srgrimes		break;
8131573Srgrimes	case '=':		/* equivalence class */
8141573Srgrimes		NEXT2();
81517141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
8161573Srgrimes		c = PEEK();
81717141Sjkh		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
8181573Srgrimes		p_b_eclass(p, cs);
81917141Sjkh		(void)REQUIRE(MORE(), REG_EBRACK);
82017141Sjkh		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
8211573Srgrimes		break;
8221573Srgrimes	default:		/* symbol, ordinary character, or range */
8231573Srgrimes/* xxx revision needed for multichar stuff */
8241573Srgrimes		start = p_b_symbol(p);
8251573Srgrimes		if (SEE('-') && MORE2() && PEEK2() != ']') {
8261573Srgrimes			/* range */
8271573Srgrimes			NEXT();
8281573Srgrimes			if (EAT('-'))
8291573Srgrimes				finish = '-';
8301573Srgrimes			else
8311573Srgrimes				finish = p_b_symbol(p);
8321573Srgrimes		} else
8331573Srgrimes			finish = start;
83417514Sache		if (start == finish)
83517514Sache			CHadd(cs, start);
83617514Sache		else {
83717514Sache			uch s = (uch)start, f = (uch)finish;
83817514Sache
83917514Sache			(void)REQUIRE(collcmp(s, f) <= 0, REG_ERANGE);
84017514Sache			for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
84117514Sache				if (   collcmp(s, (uch)i) <= 0
84217514Sache				    && collcmp((uch)i, f) <= 0
84317514Sache				   )
84417514Sache					CHadd(cs, i);
84517514Sache			}
84617514Sache		}
8471573Srgrimes		break;
8481573Srgrimes	}
8491573Srgrimes}
8501573Srgrimes
8511573Srgrimes/*
8521573Srgrimes - p_b_cclass - parse a character-class name and deal with it
8531573Srgrimes == static void p_b_cclass(register struct parse *p, register cset *cs);
8541573Srgrimes */
8551573Srgrimesstatic void
8561573Srgrimesp_b_cclass(p, cs)
8571573Srgrimesregister struct parse *p;
8581573Srgrimesregister cset *cs;
8591573Srgrimes{
86017508Sache	register int c;
8611573Srgrimes	register char *sp = p->next;
8621573Srgrimes	register struct cclass *cp;
8631573Srgrimes	register size_t len;
8641573Srgrimes
86517508Sache	while (MORE() && isalpha((uch)PEEK()))
8661573Srgrimes		NEXT();
8671573Srgrimes	len = p->next - sp;
8681573Srgrimes	for (cp = cclasses; cp->name != NULL; cp++)
8691573Srgrimes		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
8701573Srgrimes			break;
8711573Srgrimes	if (cp->name == NULL) {
8721573Srgrimes		/* oops, didn't find it */
8731573Srgrimes		SETERROR(REG_ECTYPE);
8741573Srgrimes		return;
8751573Srgrimes	}
8761573Srgrimes
87717508Sache	switch (cp->fidx) {
87817508Sache	case CALNUM:
87917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
88017508Sache			if (isalnum((uch)c))
88117508Sache				CHadd(cs, c);
88217508Sache		break;
88317508Sache	case CALPHA:
88417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
88517508Sache			if (isalpha((uch)c))
88617508Sache				CHadd(cs, c);
88717508Sache		break;
88817508Sache	case CBLANK:
88917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
89017508Sache			if (isblank((uch)c))
89117508Sache				CHadd(cs, c);
89217508Sache		break;
89317508Sache	case CCNTRL:
89417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
89517508Sache			if (iscntrl((uch)c))
89617508Sache				CHadd(cs, c);
89717508Sache		break;
89817508Sache	case CDIGIT:
89917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
90017508Sache			if (isdigit((uch)c))
90117508Sache				CHadd(cs, c);
90217508Sache		break;
90317508Sache	case CGRAPH:
90417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
90517508Sache			if (isgraph((uch)c))
90617508Sache				CHadd(cs, c);
90717508Sache		break;
90817508Sache	case CLOWER:
90917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
91017508Sache			if (islower((uch)c))
91117508Sache				CHadd(cs, c);
91217508Sache		break;
91317508Sache	case CPRINT:
91417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
91517508Sache			if (isprint((uch)c))
91617508Sache				CHadd(cs, c);
91717508Sache		break;
91817508Sache	case CPUNCT:
91917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
92017508Sache			if (ispunct((uch)c))
92117508Sache				CHadd(cs, c);
92217508Sache		break;
92317508Sache	case CSPACE:
92417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
92517508Sache			if (isspace((uch)c))
92617508Sache				CHadd(cs, c);
92717508Sache		break;
92817508Sache	case CUPPER:
92917508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
93017508Sache			if (isupper((uch)c))
93117508Sache				CHadd(cs, c);
93217508Sache		break;
93317508Sache	case CXDIGIT:
93417508Sache		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
93517508Sache			if (isxdigit((uch)c))
93617508Sache				CHadd(cs, c);
93717508Sache		break;
93817508Sache	}
93917508Sache#if 0
9401573Srgrimes	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
9411573Srgrimes		MCadd(p, cs, u);
94217508Sache#endif
9431573Srgrimes}
9441573Srgrimes
9451573Srgrimes/*
9461573Srgrimes - p_b_eclass - parse an equivalence-class name and deal with it
9471573Srgrimes == static void p_b_eclass(register struct parse *p, register cset *cs);
9481573Srgrimes *
9491573Srgrimes * This implementation is incomplete. xxx
9501573Srgrimes */
9511573Srgrimesstatic void
9521573Srgrimesp_b_eclass(p, cs)
9531573Srgrimesregister struct parse *p;
9541573Srgrimesregister cset *cs;
9551573Srgrimes{
9561573Srgrimes	register char c;
9571573Srgrimes
9581573Srgrimes	c = p_b_coll_elem(p, '=');
9591573Srgrimes	CHadd(cs, c);
9601573Srgrimes}
9611573Srgrimes
9621573Srgrimes/*
9631573Srgrimes - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
9641573Srgrimes == static char p_b_symbol(register struct parse *p);
9651573Srgrimes */
9661573Srgrimesstatic char			/* value of symbol */
9671573Srgrimesp_b_symbol(p)
9681573Srgrimesregister struct parse *p;
9691573Srgrimes{
9701573Srgrimes	register char value;
9711573Srgrimes
97217141Sjkh	(void)REQUIRE(MORE(), REG_EBRACK);
9731573Srgrimes	if (!EATTWO('[', '.'))
9741573Srgrimes		return(GETNEXT());
9751573Srgrimes
9761573Srgrimes	/* collating symbol */
9771573Srgrimes	value = p_b_coll_elem(p, '.');
97817141Sjkh	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
9791573Srgrimes	return(value);
9801573Srgrimes}
9811573Srgrimes
9821573Srgrimes/*
9831573Srgrimes - p_b_coll_elem - parse a collating-element name and look it up
9841573Srgrimes == static char p_b_coll_elem(register struct parse *p, int endc);
9851573Srgrimes */
9861573Srgrimesstatic char			/* value of collating element */
9871573Srgrimesp_b_coll_elem(p, endc)
9881573Srgrimesregister struct parse *p;
9891573Srgrimesint endc;			/* name ended by endc,']' */
9901573Srgrimes{
9911573Srgrimes	register char *sp = p->next;
9921573Srgrimes	register struct cname *cp;
9931573Srgrimes	register int len;
9941573Srgrimes
9951573Srgrimes	while (MORE() && !SEETWO(endc, ']'))
9961573Srgrimes		NEXT();
9971573Srgrimes	if (!MORE()) {
9981573Srgrimes		SETERROR(REG_EBRACK);
9991573Srgrimes		return(0);
10001573Srgrimes	}
10011573Srgrimes	len = p->next - sp;
10021573Srgrimes	for (cp = cnames; cp->name != NULL; cp++)
10031573Srgrimes		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
10041573Srgrimes			return(cp->code);	/* known name */
10051573Srgrimes	if (len == 1)
10061573Srgrimes		return(*sp);	/* single character */
10071573Srgrimes	SETERROR(REG_ECOLLATE);			/* neither */
10081573Srgrimes	return(0);
10091573Srgrimes}
10101573Srgrimes
10111573Srgrimes/*
10121573Srgrimes - othercase - return the case counterpart of an alphabetic
10131573Srgrimes == static char othercase(int ch);
10141573Srgrimes */
10151573Srgrimesstatic char			/* if no counterpart, return ch */
10161573Srgrimesothercase(ch)
10171573Srgrimesint ch;
10181573Srgrimes{
101914815Sache	ch = (unsigned char)ch;
10201573Srgrimes	assert(isalpha(ch));
10211573Srgrimes	if (isupper(ch))
10221573Srgrimes		return(tolower(ch));
10231573Srgrimes	else if (islower(ch))
10241573Srgrimes		return(toupper(ch));
10251573Srgrimes	else			/* peculiar, but could happen */
10261573Srgrimes		return(ch);
10271573Srgrimes}
10281573Srgrimes
10291573Srgrimes/*
10301573Srgrimes - bothcases - emit a dualcase version of a two-case character
10311573Srgrimes == static void bothcases(register struct parse *p, int ch);
10321573Srgrimes *
10331573Srgrimes * Boy, is this implementation ever a kludge...
10341573Srgrimes */
10351573Srgrimesstatic void
10361573Srgrimesbothcases(p, ch)
10371573Srgrimesregister struct parse *p;
10381573Srgrimesint ch;
10391573Srgrimes{
10401573Srgrimes	register char *oldnext = p->next;
10411573Srgrimes	register char *oldend = p->end;
10421573Srgrimes	char bracket[3];
10431573Srgrimes
104414815Sache	ch = (unsigned char)ch;
10451573Srgrimes	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
10461573Srgrimes	p->next = bracket;
10471573Srgrimes	p->end = bracket+2;
10481573Srgrimes	bracket[0] = ch;
10491573Srgrimes	bracket[1] = ']';
10501573Srgrimes	bracket[2] = '\0';
10511573Srgrimes	p_bracket(p);
10521573Srgrimes	assert(p->next == bracket+2);
10531573Srgrimes	p->next = oldnext;
10541573Srgrimes	p->end = oldend;
10551573Srgrimes}
10561573Srgrimes
10571573Srgrimes/*
10581573Srgrimes - ordinary - emit an ordinary character
10591573Srgrimes == static void ordinary(register struct parse *p, register int ch);
10601573Srgrimes */
10611573Srgrimesstatic void
10621573Srgrimesordinary(p, ch)
10631573Srgrimesregister struct parse *p;
10641573Srgrimesregister int ch;
10651573Srgrimes{
10661573Srgrimes	register cat_t *cap = p->g->categories;
10671573Srgrimes
106814815Sache	if ((p->g->cflags&REG_ICASE) && isalpha((unsigned char)ch) && othercase(ch) != ch)
10691573Srgrimes		bothcases(p, ch);
10701573Srgrimes	else {
10711573Srgrimes		EMIT(OCHAR, (unsigned char)ch);
10721573Srgrimes		if (cap[ch] == 0)
10731573Srgrimes			cap[ch] = p->g->ncategories++;
10741573Srgrimes	}
10751573Srgrimes}
10761573Srgrimes
10771573Srgrimes/*
10781573Srgrimes - nonnewline - emit REG_NEWLINE version of OANY
10791573Srgrimes == static void nonnewline(register struct parse *p);
10801573Srgrimes *
10811573Srgrimes * Boy, is this implementation ever a kludge...
10821573Srgrimes */
10831573Srgrimesstatic void
10841573Srgrimesnonnewline(p)
10851573Srgrimesregister struct parse *p;
10861573Srgrimes{
10871573Srgrimes	register char *oldnext = p->next;
10881573Srgrimes	register char *oldend = p->end;
10891573Srgrimes	char bracket[4];
10901573Srgrimes
10911573Srgrimes	p->next = bracket;
10921573Srgrimes	p->end = bracket+3;
10931573Srgrimes	bracket[0] = '^';
10941573Srgrimes	bracket[1] = '\n';
10951573Srgrimes	bracket[2] = ']';
10961573Srgrimes	bracket[3] = '\0';
10971573Srgrimes	p_bracket(p);
10981573Srgrimes	assert(p->next == bracket+3);
10991573Srgrimes	p->next = oldnext;
11001573Srgrimes	p->end = oldend;
11011573Srgrimes}
11021573Srgrimes
11031573Srgrimes/*
11041573Srgrimes - repeat - generate code for a bounded repetition, recursively if needed
11051573Srgrimes == static void repeat(register struct parse *p, sopno start, int from, int to);
11061573Srgrimes */
11071573Srgrimesstatic void
11081573Srgrimesrepeat(p, start, from, to)
11091573Srgrimesregister struct parse *p;
11101573Srgrimessopno start;			/* operand from here to end of strip */
11111573Srgrimesint from;			/* repeated from this number */
11121573Srgrimesint to;				/* to this number of times (maybe INFINITY) */
11131573Srgrimes{
11141573Srgrimes	register sopno finish = HERE();
11151573Srgrimes#	define	N	2
11161573Srgrimes#	define	INF	3
11171573Srgrimes#	define	REP(f, t)	((f)*8 + (t))
11181573Srgrimes#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
11191573Srgrimes	register sopno copy;
11201573Srgrimes
11211573Srgrimes	if (p->error != 0)	/* head off possible runaway recursion */
11221573Srgrimes		return;
11231573Srgrimes
11241573Srgrimes	assert(from <= to);
11251573Srgrimes
11261573Srgrimes	switch (REP(MAP(from), MAP(to))) {
11271573Srgrimes	case REP(0, 0):			/* must be user doing this */
11281573Srgrimes		DROP(finish-start);	/* drop the operand */
11291573Srgrimes		break;
11301573Srgrimes	case REP(0, 1):			/* as x{1,1}? */
11311573Srgrimes	case REP(0, N):			/* as x{1,n}? */
11321573Srgrimes	case REP(0, INF):		/* as x{1,}? */
11331573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11341573Srgrimes		INSERT(OCH_, start);		/* offset is wrong... */
11351573Srgrimes		repeat(p, start+1, 1, to);
11361573Srgrimes		ASTERN(OOR1, start);
11371573Srgrimes		AHEAD(start);			/* ... fix it */
11381573Srgrimes		EMIT(OOR2, 0);
11391573Srgrimes		AHEAD(THERE());
11401573Srgrimes		ASTERN(O_CH, THERETHERE());
11411573Srgrimes		break;
11421573Srgrimes	case REP(1, 1):			/* trivial case */
11431573Srgrimes		/* done */
11441573Srgrimes		break;
11451573Srgrimes	case REP(1, N):			/* as x?x{1,n-1} */
11461573Srgrimes		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11471573Srgrimes		INSERT(OCH_, start);
11481573Srgrimes		ASTERN(OOR1, start);
11491573Srgrimes		AHEAD(start);
11501573Srgrimes		EMIT(OOR2, 0);			/* offset very wrong... */
11511573Srgrimes		AHEAD(THERE());			/* ...so fix it */
11521573Srgrimes		ASTERN(O_CH, THERETHERE());
11531573Srgrimes		copy = dupl(p, start+1, finish+1);
11541573Srgrimes		assert(copy == finish+4);
11551573Srgrimes		repeat(p, copy, 1, to-1);
11561573Srgrimes		break;
11571573Srgrimes	case REP(1, INF):		/* as x+ */
11581573Srgrimes		INSERT(OPLUS_, start);
11591573Srgrimes		ASTERN(O_PLUS, start);
11601573Srgrimes		break;
11611573Srgrimes	case REP(N, N):			/* as xx{m-1,n-1} */
11621573Srgrimes		copy = dupl(p, start, finish);
11631573Srgrimes		repeat(p, copy, from-1, to-1);
11641573Srgrimes		break;
11651573Srgrimes	case REP(N, INF):		/* as xx{n-1,INF} */
11661573Srgrimes		copy = dupl(p, start, finish);
11671573Srgrimes		repeat(p, copy, from-1, to);
11681573Srgrimes		break;
11691573Srgrimes	default:			/* "can't happen" */
11701573Srgrimes		SETERROR(REG_ASSERT);	/* just in case */
11711573Srgrimes		break;
11721573Srgrimes	}
11731573Srgrimes}
11741573Srgrimes
11751573Srgrimes/*
11761573Srgrimes - seterr - set an error condition
11771573Srgrimes == static int seterr(register struct parse *p, int e);
11781573Srgrimes */
11791573Srgrimesstatic int			/* useless but makes type checking happy */
11801573Srgrimesseterr(p, e)
11811573Srgrimesregister struct parse *p;
11821573Srgrimesint e;
11831573Srgrimes{
11841573Srgrimes	if (p->error == 0)	/* keep earliest error condition */
11851573Srgrimes		p->error = e;
11861573Srgrimes	p->next = nuls;		/* try to bring things to a halt */
11871573Srgrimes	p->end = nuls;
11881573Srgrimes	return(0);		/* make the return value well-defined */
11891573Srgrimes}
11901573Srgrimes
11911573Srgrimes/*
11921573Srgrimes - allocset - allocate a set of characters for []
11931573Srgrimes == static cset *allocset(register struct parse *p);
11941573Srgrimes */
11951573Srgrimesstatic cset *
11961573Srgrimesallocset(p)
11971573Srgrimesregister struct parse *p;
11981573Srgrimes{
11991573Srgrimes	register int no = p->g->ncsets++;
12001573Srgrimes	register size_t nc;
12011573Srgrimes	register size_t nbytes;
12021573Srgrimes	register cset *cs;
12031573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12041573Srgrimes	register int i;
12051573Srgrimes
12061573Srgrimes	if (no >= p->ncsalloc) {	/* need another column of space */
12071573Srgrimes		p->ncsalloc += CHAR_BIT;
12081573Srgrimes		nc = p->ncsalloc;
12091573Srgrimes		assert(nc % CHAR_BIT == 0);
12101573Srgrimes		nbytes = nc / CHAR_BIT * css;
12111573Srgrimes		if (p->g->sets == NULL)
12121573Srgrimes			p->g->sets = (cset *)malloc(nc * sizeof(cset));
12131573Srgrimes		else
12141573Srgrimes			p->g->sets = (cset *)realloc((char *)p->g->sets,
12151573Srgrimes							nc * sizeof(cset));
12161573Srgrimes		if (p->g->setbits == NULL)
12171573Srgrimes			p->g->setbits = (uch *)malloc(nbytes);
12181573Srgrimes		else {
12191573Srgrimes			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
12201573Srgrimes								nbytes);
12211573Srgrimes			/* xxx this isn't right if setbits is now NULL */
12221573Srgrimes			for (i = 0; i < no; i++)
12231573Srgrimes				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
12241573Srgrimes		}
12251573Srgrimes		if (p->g->sets != NULL && p->g->setbits != NULL)
12261573Srgrimes			(void) memset((char *)p->g->setbits + (nbytes - css),
12271573Srgrimes								0, css);
12281573Srgrimes		else {
12291573Srgrimes			no = 0;
12301573Srgrimes			SETERROR(REG_ESPACE);
12311573Srgrimes			/* caller's responsibility not to do set ops */
12321573Srgrimes		}
12331573Srgrimes	}
12341573Srgrimes
12351573Srgrimes	assert(p->g->sets != NULL);	/* xxx */
12361573Srgrimes	cs = &p->g->sets[no];
12371573Srgrimes	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
12381573Srgrimes	cs->mask = 1 << ((no) % CHAR_BIT);
12391573Srgrimes	cs->hash = 0;
12401573Srgrimes	cs->smultis = 0;
12411573Srgrimes	cs->multis = NULL;
12421573Srgrimes
12431573Srgrimes	return(cs);
12441573Srgrimes}
12451573Srgrimes
12461573Srgrimes/*
12471573Srgrimes - freeset - free a now-unused set
12481573Srgrimes == static void freeset(register struct parse *p, register cset *cs);
12491573Srgrimes */
12501573Srgrimesstatic void
12511573Srgrimesfreeset(p, cs)
12521573Srgrimesregister struct parse *p;
12531573Srgrimesregister cset *cs;
12541573Srgrimes{
12551573Srgrimes	register int i;
12561573Srgrimes	register cset *top = &p->g->sets[p->g->ncsets];
12571573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12581573Srgrimes
12591573Srgrimes	for (i = 0; i < css; i++)
12601573Srgrimes		CHsub(cs, i);
12611573Srgrimes	if (cs == top-1)	/* recover only the easy case */
12621573Srgrimes		p->g->ncsets--;
12631573Srgrimes}
12641573Srgrimes
12651573Srgrimes/*
12661573Srgrimes - freezeset - final processing on a set of characters
12671573Srgrimes == static int freezeset(register struct parse *p, register cset *cs);
12681573Srgrimes *
12691573Srgrimes * The main task here is merging identical sets.  This is usually a waste
12701573Srgrimes * of time (although the hash code minimizes the overhead), but can win
12711573Srgrimes * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
12721573Srgrimes * is done using addition rather than xor -- all ASCII [aA] sets xor to
12731573Srgrimes * the same value!
12741573Srgrimes */
12751573Srgrimesstatic int			/* set number */
12761573Srgrimesfreezeset(p, cs)
12771573Srgrimesregister struct parse *p;
12781573Srgrimesregister cset *cs;
12791573Srgrimes{
128017509Sache	register short h = cs->hash;
12811573Srgrimes	register int i;
12821573Srgrimes	register cset *top = &p->g->sets[p->g->ncsets];
12831573Srgrimes	register cset *cs2;
12841573Srgrimes	register size_t css = (size_t)p->g->csetsize;
12851573Srgrimes
12861573Srgrimes	/* look for an earlier one which is the same */
12871573Srgrimes	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
12881573Srgrimes		if (cs2->hash == h && cs2 != cs) {
12891573Srgrimes			/* maybe */
12901573Srgrimes			for (i = 0; i < css; i++)
12911573Srgrimes				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
12921573Srgrimes					break;		/* no */
12931573Srgrimes			if (i == css)
12941573Srgrimes				break;			/* yes */
12951573Srgrimes		}
12961573Srgrimes
12971573Srgrimes	if (cs2 < top) {	/* found one */
12981573Srgrimes		freeset(p, cs);
12991573Srgrimes		cs = cs2;
13001573Srgrimes	}
13011573Srgrimes
13021573Srgrimes	return((int)(cs - p->g->sets));
13031573Srgrimes}
13041573Srgrimes
13051573Srgrimes/*
13061573Srgrimes - firstch - return first character in a set (which must have at least one)
13071573Srgrimes == static int firstch(register struct parse *p, register cset *cs);
13081573Srgrimes */
13091573Srgrimesstatic int			/* character; there is no "none" value */
13101573Srgrimesfirstch(p, cs)
13111573Srgrimesregister struct parse *p;
13121573Srgrimesregister cset *cs;
13131573Srgrimes{
13141573Srgrimes	register int i;
13151573Srgrimes	register size_t css = (size_t)p->g->csetsize;
13161573Srgrimes
13171573Srgrimes	for (i = 0; i < css; i++)
13181573Srgrimes		if (CHIN(cs, i))
131914815Sache			return((unsigned char)i);
13201573Srgrimes	assert(never);
13211573Srgrimes	return(0);		/* arbitrary */
13221573Srgrimes}
13231573Srgrimes
13241573Srgrimes/*
13251573Srgrimes - nch - number of characters in a set
13261573Srgrimes == static int nch(register struct parse *p, register cset *cs);
13271573Srgrimes */
13281573Srgrimesstatic int
13291573Srgrimesnch(p, cs)
13301573Srgrimesregister struct parse *p;
13311573Srgrimesregister cset *cs;
13321573Srgrimes{
13331573Srgrimes	register int i;
13341573Srgrimes	register size_t css = (size_t)p->g->csetsize;
13351573Srgrimes	register int n = 0;
13361573Srgrimes
13371573Srgrimes	for (i = 0; i < css; i++)
13381573Srgrimes		if (CHIN(cs, i))
13391573Srgrimes			n++;
13401573Srgrimes	return(n);
13411573Srgrimes}
13421573Srgrimes
13431573Srgrimes/*
13441573Srgrimes - mcadd - add a collating element to a cset
13451573Srgrimes == static void mcadd(register struct parse *p, register cset *cs, \
13461573Srgrimes ==	register char *cp);
13471573Srgrimes */
13481573Srgrimesstatic void
13491573Srgrimesmcadd(p, cs, cp)
13501573Srgrimesregister struct parse *p;
13511573Srgrimesregister cset *cs;
13521573Srgrimesregister char *cp;
13531573Srgrimes{
13541573Srgrimes	register size_t oldend = cs->smultis;
13551573Srgrimes
13561573Srgrimes	cs->smultis += strlen(cp) + 1;
13571573Srgrimes	if (cs->multis == NULL)
13581573Srgrimes		cs->multis = malloc(cs->smultis);
13591573Srgrimes	else
13601573Srgrimes		cs->multis = realloc(cs->multis, cs->smultis);
13611573Srgrimes	if (cs->multis == NULL) {
13621573Srgrimes		SETERROR(REG_ESPACE);
13631573Srgrimes		return;
13641573Srgrimes	}
13651573Srgrimes
13661573Srgrimes	(void) strcpy(cs->multis + oldend - 1, cp);
13671573Srgrimes	cs->multis[cs->smultis - 1] = '\0';
13681573Srgrimes}
13691573Srgrimes
137017141Sjkh#if used
13711573Srgrimes/*
13721573Srgrimes - mcsub - subtract a collating element from a cset
13731573Srgrimes == static void mcsub(register cset *cs, register char *cp);
13741573Srgrimes */
13751573Srgrimesstatic void
13761573Srgrimesmcsub(cs, cp)
13771573Srgrimesregister cset *cs;
13781573Srgrimesregister char *cp;
13791573Srgrimes{
13801573Srgrimes	register char *fp = mcfind(cs, cp);
13811573Srgrimes	register size_t len = strlen(fp);
13821573Srgrimes
13831573Srgrimes	assert(fp != NULL);
13841573Srgrimes	(void) memmove(fp, fp + len + 1,
13851573Srgrimes				cs->smultis - (fp + len + 1 - cs->multis));
13861573Srgrimes	cs->smultis -= len;
13871573Srgrimes
13881573Srgrimes	if (cs->smultis == 0) {
13891573Srgrimes		free(cs->multis);
13901573Srgrimes		cs->multis = NULL;
13911573Srgrimes		return;
13921573Srgrimes	}
13931573Srgrimes
13941573Srgrimes	cs->multis = realloc(cs->multis, cs->smultis);
13951573Srgrimes	assert(cs->multis != NULL);
13961573Srgrimes}
13971573Srgrimes
13981573Srgrimes/*
13991573Srgrimes - mcin - is a collating element in a cset?
14001573Srgrimes == static int mcin(register cset *cs, register char *cp);
14011573Srgrimes */
14021573Srgrimesstatic int
14031573Srgrimesmcin(cs, cp)
14041573Srgrimesregister cset *cs;
14051573Srgrimesregister char *cp;
14061573Srgrimes{
14071573Srgrimes	return(mcfind(cs, cp) != NULL);
14081573Srgrimes}
14091573Srgrimes
14101573Srgrimes/*
14111573Srgrimes - mcfind - find a collating element in a cset
14121573Srgrimes == static char *mcfind(register cset *cs, register char *cp);
14131573Srgrimes */
14141573Srgrimesstatic char *
14151573Srgrimesmcfind(cs, cp)
14161573Srgrimesregister cset *cs;
14171573Srgrimesregister char *cp;
14181573Srgrimes{
14191573Srgrimes	register char *p;
14201573Srgrimes
14211573Srgrimes	if (cs->multis == NULL)
14221573Srgrimes		return(NULL);
14231573Srgrimes	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
14241573Srgrimes		if (strcmp(cp, p) == 0)
14251573Srgrimes			return(p);
14261573Srgrimes	return(NULL);
14271573Srgrimes}
142817141Sjkh#endif
14291573Srgrimes
14301573Srgrimes/*
14311573Srgrimes - mcinvert - invert the list of collating elements in a cset
14321573Srgrimes == static void mcinvert(register struct parse *p, register cset *cs);
14331573Srgrimes *
14341573Srgrimes * This would have to know the set of possibilities.  Implementation
14351573Srgrimes * is deferred.
14361573Srgrimes */
14371573Srgrimesstatic void
14381573Srgrimesmcinvert(p, cs)
14391573Srgrimesregister struct parse *p;
14401573Srgrimesregister cset *cs;
14411573Srgrimes{
14421573Srgrimes	assert(cs->multis == NULL);	/* xxx */
14431573Srgrimes}
14441573Srgrimes
14451573Srgrimes/*
14461573Srgrimes - mccase - add case counterparts of the list of collating elements in a cset
14471573Srgrimes == static void mccase(register struct parse *p, register cset *cs);
14481573Srgrimes *
14491573Srgrimes * This would have to know the set of possibilities.  Implementation
14501573Srgrimes * is deferred.
14511573Srgrimes */
14521573Srgrimesstatic void
14531573Srgrimesmccase(p, cs)
14541573Srgrimesregister struct parse *p;
14551573Srgrimesregister cset *cs;
14561573Srgrimes{
14571573Srgrimes	assert(cs->multis == NULL);	/* xxx */
14581573Srgrimes}
14591573Srgrimes
14601573Srgrimes/*
14611573Srgrimes - isinsets - is this character in any sets?
14621573Srgrimes == static int isinsets(register struct re_guts *g, int c);
14631573Srgrimes */
14641573Srgrimesstatic int			/* predicate */
14651573Srgrimesisinsets(g, c)
14661573Srgrimesregister struct re_guts *g;
14671573Srgrimesint c;
14681573Srgrimes{
14691573Srgrimes	register uch *col;
14701573Srgrimes	register int i;
14711573Srgrimes	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
14721573Srgrimes	register unsigned uc = (unsigned char)c;
14731573Srgrimes
14741573Srgrimes	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
14751573Srgrimes		if (col[uc] != 0)
14761573Srgrimes			return(1);
14771573Srgrimes	return(0);
14781573Srgrimes}
14791573Srgrimes
14801573Srgrimes/*
14811573Srgrimes - samesets - are these two characters in exactly the same sets?
14821573Srgrimes == static int samesets(register struct re_guts *g, int c1, int c2);
14831573Srgrimes */
14841573Srgrimesstatic int			/* predicate */
14851573Srgrimessamesets(g, c1, c2)
14861573Srgrimesregister struct re_guts *g;
14871573Srgrimesint c1;
14881573Srgrimesint c2;
14891573Srgrimes{
14901573Srgrimes	register uch *col;
14911573Srgrimes	register int i;
14921573Srgrimes	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
14931573Srgrimes	register unsigned uc1 = (unsigned char)c1;
14941573Srgrimes	register unsigned uc2 = (unsigned char)c2;
14951573Srgrimes
14961573Srgrimes	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
14971573Srgrimes		if (col[uc1] != col[uc2])
14981573Srgrimes			return(0);
14991573Srgrimes	return(1);
15001573Srgrimes}
15011573Srgrimes
15021573Srgrimes/*
15031573Srgrimes - categorize - sort out character categories
15041573Srgrimes == static void categorize(struct parse *p, register struct re_guts *g);
15051573Srgrimes */
15061573Srgrimesstatic void
15071573Srgrimescategorize(p, g)
15081573Srgrimesstruct parse *p;
15091573Srgrimesregister struct re_guts *g;
15101573Srgrimes{
15111573Srgrimes	register cat_t *cats = g->categories;
15121573Srgrimes	register int c;
15131573Srgrimes	register int c2;
15141573Srgrimes	register cat_t cat;
15151573Srgrimes
15161573Srgrimes	/* avoid making error situations worse */
15171573Srgrimes	if (p->error != 0)
15181573Srgrimes		return;
15191573Srgrimes
15201573Srgrimes	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
15211573Srgrimes		if (cats[c] == 0 && isinsets(g, c)) {
15221573Srgrimes			cat = g->ncategories++;
15231573Srgrimes			cats[c] = cat;
15241573Srgrimes			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
15251573Srgrimes				if (cats[c2] == 0 && samesets(g, c, c2))
15261573Srgrimes					cats[c2] = cat;
15271573Srgrimes		}
15281573Srgrimes}
15291573Srgrimes
15301573Srgrimes/*
15311573Srgrimes - dupl - emit a duplicate of a bunch of sops
15321573Srgrimes == static sopno dupl(register struct parse *p, sopno start, sopno finish);
15331573Srgrimes */
15341573Srgrimesstatic sopno			/* start of duplicate */
15351573Srgrimesdupl(p, start, finish)
15361573Srgrimesregister struct parse *p;
15371573Srgrimessopno start;			/* from here */
15381573Srgrimessopno finish;			/* to this less one */
15391573Srgrimes{
15401573Srgrimes	register sopno ret = HERE();
15411573Srgrimes	register sopno len = finish - start;
15421573Srgrimes
15431573Srgrimes	assert(finish >= start);
15441573Srgrimes	if (len == 0)
15451573Srgrimes		return(ret);
15461573Srgrimes	enlarge(p, p->ssize + len);	/* this many unexpected additions */
15471573Srgrimes	assert(p->ssize >= p->slen + len);
15481573Srgrimes	(void) memcpy((char *)(p->strip + p->slen),
15491573Srgrimes		(char *)(p->strip + start), (size_t)len*sizeof(sop));
15501573Srgrimes	p->slen += len;
15511573Srgrimes	return(ret);
15521573Srgrimes}
15531573Srgrimes
15541573Srgrimes/*
15551573Srgrimes - doemit - emit a strip operator
15561573Srgrimes == static void doemit(register struct parse *p, sop op, size_t opnd);
15571573Srgrimes *
15581573Srgrimes * It might seem better to implement this as a macro with a function as
15591573Srgrimes * hard-case backup, but it's just too big and messy unless there are
15601573Srgrimes * some changes to the data structures.  Maybe later.
15611573Srgrimes */
15621573Srgrimesstatic void
15631573Srgrimesdoemit(p, op, opnd)
15641573Srgrimesregister struct parse *p;
15651573Srgrimessop op;
15661573Srgrimessize_t opnd;
15671573Srgrimes{
15681573Srgrimes	/* avoid making error situations worse */
15691573Srgrimes	if (p->error != 0)
15701573Srgrimes		return;
15711573Srgrimes
15721573Srgrimes	/* deal with oversize operands ("can't happen", more or less) */
15731573Srgrimes	assert(opnd < 1<<OPSHIFT);
15741573Srgrimes
15751573Srgrimes	/* deal with undersized strip */
15761573Srgrimes	if (p->slen >= p->ssize)
15771573Srgrimes		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
15781573Srgrimes	assert(p->slen < p->ssize);
15791573Srgrimes
15801573Srgrimes	/* finally, it's all reduced to the easy case */
15811573Srgrimes	p->strip[p->slen++] = SOP(op, opnd);
15821573Srgrimes}
15831573Srgrimes
15841573Srgrimes/*
15851573Srgrimes - doinsert - insert a sop into the strip
15861573Srgrimes == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
15871573Srgrimes */
15881573Srgrimesstatic void
15891573Srgrimesdoinsert(p, op, opnd, pos)
15901573Srgrimesregister struct parse *p;
15911573Srgrimessop op;
15921573Srgrimessize_t opnd;
15931573Srgrimessopno pos;
15941573Srgrimes{
15951573Srgrimes	register sopno sn;
15961573Srgrimes	register sop s;
15971573Srgrimes	register int i;
15981573Srgrimes
15991573Srgrimes	/* avoid making error situations worse */
16001573Srgrimes	if (p->error != 0)
16011573Srgrimes		return;
16021573Srgrimes
16031573Srgrimes	sn = HERE();
16041573Srgrimes	EMIT(op, opnd);		/* do checks, ensure space */
16051573Srgrimes	assert(HERE() == sn+1);
16061573Srgrimes	s = p->strip[sn];
16071573Srgrimes
16081573Srgrimes	/* adjust paren pointers */
16091573Srgrimes	assert(pos > 0);
16101573Srgrimes	for (i = 1; i < NPAREN; i++) {
16111573Srgrimes		if (p->pbegin[i] >= pos) {
16121573Srgrimes			p->pbegin[i]++;
16131573Srgrimes		}
16141573Srgrimes		if (p->pend[i] >= pos) {
16151573Srgrimes			p->pend[i]++;
16161573Srgrimes		}
16171573Srgrimes	}
16181573Srgrimes
16191573Srgrimes	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
16201573Srgrimes						(HERE()-pos-1)*sizeof(sop));
16211573Srgrimes	p->strip[pos] = s;
16221573Srgrimes}
16231573Srgrimes
16241573Srgrimes/*
16251573Srgrimes - dofwd - complete a forward reference
16261573Srgrimes == static void dofwd(register struct parse *p, sopno pos, sop value);
16271573Srgrimes */
16281573Srgrimesstatic void
16291573Srgrimesdofwd(p, pos, value)
16301573Srgrimesregister struct parse *p;
16311573Srgrimesregister sopno pos;
16321573Srgrimessop value;
16331573Srgrimes{
16341573Srgrimes	/* avoid making error situations worse */
16351573Srgrimes	if (p->error != 0)
16361573Srgrimes		return;
16371573Srgrimes
16381573Srgrimes	assert(value < 1<<OPSHIFT);
16391573Srgrimes	p->strip[pos] = OP(p->strip[pos]) | value;
16401573Srgrimes}
16411573Srgrimes
16421573Srgrimes/*
16431573Srgrimes - enlarge - enlarge the strip
16441573Srgrimes == static void enlarge(register struct parse *p, sopno size);
16451573Srgrimes */
16461573Srgrimesstatic void
16471573Srgrimesenlarge(p, size)
16481573Srgrimesregister struct parse *p;
16491573Srgrimesregister sopno size;
16501573Srgrimes{
16511573Srgrimes	register sop *sp;
16521573Srgrimes
16531573Srgrimes	if (p->ssize >= size)
16541573Srgrimes		return;
16551573Srgrimes
16561573Srgrimes	sp = (sop *)realloc(p->strip, size*sizeof(sop));
16571573Srgrimes	if (sp == NULL) {
16581573Srgrimes		SETERROR(REG_ESPACE);
16591573Srgrimes		return;
16601573Srgrimes	}
16611573Srgrimes	p->strip = sp;
16621573Srgrimes	p->ssize = size;
16631573Srgrimes}
16641573Srgrimes
16651573Srgrimes/*
16661573Srgrimes - stripsnug - compact the strip
16671573Srgrimes == static void stripsnug(register struct parse *p, register struct re_guts *g);
16681573Srgrimes */
16691573Srgrimesstatic void
16701573Srgrimesstripsnug(p, g)
16711573Srgrimesregister struct parse *p;
16721573Srgrimesregister struct re_guts *g;
16731573Srgrimes{
16741573Srgrimes	g->nstates = p->slen;
16751573Srgrimes	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
16761573Srgrimes	if (g->strip == NULL) {
16771573Srgrimes		SETERROR(REG_ESPACE);
16781573Srgrimes		g->strip = p->strip;
16791573Srgrimes	}
16801573Srgrimes}
16811573Srgrimes
16821573Srgrimes/*
16831573Srgrimes - findmust - fill in must and mlen with longest mandatory literal string
16841573Srgrimes == static void findmust(register struct parse *p, register struct re_guts *g);
16851573Srgrimes *
16861573Srgrimes * This algorithm could do fancy things like analyzing the operands of |
16871573Srgrimes * for common subsequences.  Someday.  This code is simple and finds most
16881573Srgrimes * of the interesting cases.
16891573Srgrimes *
16901573Srgrimes * Note that must and mlen got initialized during setup.
16911573Srgrimes */
16921573Srgrimesstatic void
16931573Srgrimesfindmust(p, g)
16941573Srgrimesstruct parse *p;
16951573Srgrimesregister struct re_guts *g;
16961573Srgrimes{
16971573Srgrimes	register sop *scan;
16981573Srgrimes	sop *start;
16991573Srgrimes	register sop *newstart;
17001573Srgrimes	register sopno newlen;
17011573Srgrimes	register sop s;
17021573Srgrimes	register char *cp;
17031573Srgrimes	register sopno i;
17041573Srgrimes
17051573Srgrimes	/* avoid making error situations worse */
17061573Srgrimes	if (p->error != 0)
17071573Srgrimes		return;
17081573Srgrimes
17091573Srgrimes	/* find the longest OCHAR sequence in strip */
17101573Srgrimes	newlen = 0;
17111573Srgrimes	scan = g->strip + 1;
17121573Srgrimes	do {
17131573Srgrimes		s = *scan++;
17141573Srgrimes		switch (OP(s)) {
17151573Srgrimes		case OCHAR:		/* sequence member */
17161573Srgrimes			if (newlen == 0)		/* new sequence */
17171573Srgrimes				newstart = scan - 1;
17181573Srgrimes			newlen++;
17191573Srgrimes			break;
17201573Srgrimes		case OPLUS_:		/* things that don't break one */
17211573Srgrimes		case OLPAREN:
17221573Srgrimes		case ORPAREN:
17231573Srgrimes			break;
17241573Srgrimes		case OQUEST_:		/* things that must be skipped */
17251573Srgrimes		case OCH_:
17261573Srgrimes			scan--;
17271573Srgrimes			do {
17281573Srgrimes				scan += OPND(s);
17291573Srgrimes				s = *scan;
17301573Srgrimes				/* assert() interferes w debug printouts */
17311573Srgrimes				if (OP(s) != O_QUEST && OP(s) != O_CH &&
17321573Srgrimes							OP(s) != OOR2) {
17331573Srgrimes					g->iflags |= BAD;
17341573Srgrimes					return;
17351573Srgrimes				}
17361573Srgrimes			} while (OP(s) != O_QUEST && OP(s) != O_CH);
17371573Srgrimes			/* fallthrough */
17381573Srgrimes		default:		/* things that break a sequence */
17391573Srgrimes			if (newlen > g->mlen) {		/* ends one */
17401573Srgrimes				start = newstart;
17411573Srgrimes				g->mlen = newlen;
17421573Srgrimes			}
17431573Srgrimes			newlen = 0;
17441573Srgrimes			break;
17451573Srgrimes		}
17461573Srgrimes	} while (OP(s) != OEND);
17471573Srgrimes
17481573Srgrimes	if (g->mlen == 0)		/* there isn't one */
17491573Srgrimes		return;
17501573Srgrimes
17511573Srgrimes	/* turn it into a character string */
17521573Srgrimes	g->must = malloc((size_t)g->mlen + 1);
17531573Srgrimes	if (g->must == NULL) {		/* argh; just forget it */
17541573Srgrimes		g->mlen = 0;
17551573Srgrimes		return;
17561573Srgrimes	}
17571573Srgrimes	cp = g->must;
17581573Srgrimes	scan = start;
17591573Srgrimes	for (i = g->mlen; i > 0; i--) {
17601573Srgrimes		while (OP(s = *scan++) != OCHAR)
17611573Srgrimes			continue;
17621573Srgrimes		assert(cp < g->must + g->mlen);
17631573Srgrimes		*cp++ = (char)OPND(s);
17641573Srgrimes	}
17651573Srgrimes	assert(cp == g->must + g->mlen);
17661573Srgrimes	*cp++ = '\0';		/* just on general principles */
17671573Srgrimes}
17681573Srgrimes
17691573Srgrimes/*
17701573Srgrimes - pluscount - count + nesting
17711573Srgrimes == static sopno pluscount(register struct parse *p, register struct re_guts *g);
17721573Srgrimes */
17731573Srgrimesstatic sopno			/* nesting depth */
17741573Srgrimespluscount(p, g)
17751573Srgrimesstruct parse *p;
17761573Srgrimesregister struct re_guts *g;
17771573Srgrimes{
17781573Srgrimes	register sop *scan;
17791573Srgrimes	register sop s;
17801573Srgrimes	register sopno plusnest = 0;
17811573Srgrimes	register sopno maxnest = 0;
17821573Srgrimes
17831573Srgrimes	if (p->error != 0)
17841573Srgrimes		return(0);	/* there may not be an OEND */
17851573Srgrimes
17861573Srgrimes	scan = g->strip + 1;
17871573Srgrimes	do {
17881573Srgrimes		s = *scan++;
17891573Srgrimes		switch (OP(s)) {
17901573Srgrimes		case OPLUS_:
17911573Srgrimes			plusnest++;
17921573Srgrimes			break;
17931573Srgrimes		case O_PLUS:
17941573Srgrimes			if (plusnest > maxnest)
17951573Srgrimes				maxnest = plusnest;
17961573Srgrimes			plusnest--;
17971573Srgrimes			break;
17981573Srgrimes		}
17991573Srgrimes	} while (OP(s) != OEND);
18001573Srgrimes	if (plusnest != 0)
18011573Srgrimes		g->iflags |= BAD;
18021573Srgrimes	return(maxnest);
18031573Srgrimes}
1804