145386Swpaul/*-
245386Swpaul * Copyright (c) 1992, 1993, 1994 Henry Spencer.
345386Swpaul * Copyright (c) 1992, 1993, 1994
445386Swpaul *	The Regents of the University of California.  All rights reserved.
545386Swpaul *
645386Swpaul * This code is derived from software contributed to Berkeley by
745386Swpaul * Henry Spencer.
845386Swpaul *
945386Swpaul * Redistribution and use in source and binary forms, with or without
1045386Swpaul * modification, are permitted provided that the following conditions
1145386Swpaul * are met:
1245386Swpaul * 1. Redistributions of source code must retain the above copyright
1345386Swpaul *    notice, this list of conditions and the following disclaimer.
1445386Swpaul * 2. Redistributions in binary form must reproduce the above copyright
1545386Swpaul *    notice, this list of conditions and the following disclaimer in the
1645386Swpaul *    documentation and/or other materials provided with the distribution.
1745386Swpaul * 4. Neither the name of the University nor the names of its contributors
1845386Swpaul *    may be used to endorse or promote products derived from this software
1945386Swpaul *    without specific prior written permission.
2045386Swpaul *
2145386Swpaul * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2245386Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2345386Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2445386Swpaul * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2545386Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2645386Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2745386Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2845386Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2945386Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3045386Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3145386Swpaul * SUCH DAMAGE.
3250477Speter *
3345386Swpaul *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
3445386Swpaul */
3545386Swpaul
3645386Swpaul#if defined(LIBC_SCCS) && !defined(lint)
3745386Swpaulstatic char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
3845386Swpaul#endif /* LIBC_SCCS and not lint */
3945386Swpaul#include <sys/cdefs.h>
4045386Swpaul__FBSDID("$FreeBSD$");
4145386Swpaul
4245386Swpaul/*
4345386Swpaul * the outer shell of regexec()
4445386Swpaul *
4545386Swpaul * This file includes engine.c three times, after muchos fiddling with the
4645386Swpaul * macros that code uses.  This lets the same code operate on two different
4745386Swpaul * representations for state sets and characters.
4845386Swpaul */
4945386Swpaul#include <sys/types.h>
5045386Swpaul#include <stdio.h>
5145386Swpaul#include <stdlib.h>
5245386Swpaul#include <string.h>
5345386Swpaul#include <limits.h>
5445386Swpaul#include <ctype.h>
5545386Swpaul#include <regex.h>
5645386Swpaul#include <wchar.h>
5745386Swpaul#include <wctype.h>
5845386Swpaul
5945386Swpaul#include "utils.h"
6045386Swpaul#include "regex2.h"
6145386Swpaul
6245386Swpaulstatic int nope __unused = 0;	/* for use in asserts; shuts lint up */
6345386Swpaul
6445386Swpaulstatic __inline size_t
6545386Swpaulxmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
6645386Swpaul{
6745386Swpaul	size_t nr;
6845386Swpaul	wchar_t wc;
6945386Swpaul
7045386Swpaul	nr = mbrtowc(&wc, s, n, mbs);
7145386Swpaul	if (wi != NULL)
7245386Swpaul		*wi = wc;
7345386Swpaul	if (nr == 0)
7445386Swpaul		return (1);
7545386Swpaul	else if (nr == (size_t)-1 || nr == (size_t)-2) {
7645386Swpaul		memset(mbs, 0, sizeof(*mbs));
7745386Swpaul		if (wi != NULL)
7845386Swpaul			*wi = dummy;
7945386Swpaul		return (1);
8045386Swpaul	} else
8145386Swpaul                return (nr);
8245386Swpaul}
8345386Swpaul
8445386Swpaulstatic __inline size_t
8545386Swpaulxmbrtowc_dummy(wint_t *wi,
8645386Swpaul		const char *s,
8745386Swpaul		size_t n __unused,
8845386Swpaul		mbstate_t *mbs __unused,
8945386Swpaul		wint_t dummy __unused)
9045386Swpaul{
9145386Swpaul
9245386Swpaul	if (wi != NULL)
9345386Swpaul		*wi = (unsigned char)*s;
9445386Swpaul	return (1);
9583115Sbrooks}
9683115Sbrooks
9745386Swpaul/* macros for manipulating states, small version */
9845386Swpaul#define	states	long
9945386Swpaul#define	states1	states		/* for later use in regexec() decision */
10045386Swpaul#define	CLEAR(v)	((v) = 0)
10145386Swpaul#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
10245386Swpaul#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
10345386Swpaul#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
10445386Swpaul#define	ASSIGN(d, s)	((d) = (s))
10545386Swpaul#define	EQ(a, b)	((a) == (b))
10645386Swpaul#define	STATEVARS	long dummy	/* dummy version */
10745386Swpaul#define	STATESETUP(m, n)	/* nothing */
10849011Swpaul#define	STATETEARDOWN(m)	/* nothing */
10949011Swpaul#define	SETUP(v)	((v) = 0)
11049011Swpaul#define	onestate	long
11145386Swpaul#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
11245386Swpaul#define	INC(o)	((o) <<= 1)
11345386Swpaul#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
11445386Swpaul/* some abbreviations; note that some of these know variable names! */
11545386Swpaul/* do "if I'm here, I can also be there" etc without branches */
11645386Swpaul#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
11745386Swpaul#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
11845386Swpaul#define	ISSETBACK(v, n)	(((v) & ((unsigned long)here >> (n))) != 0)
11958698Sjlemon/* no multibyte support */
12045386Swpaul#define	XMBRTOWC	xmbrtowc_dummy
12145386Swpaul#define	ZAPSTATE(mbs)	((void)(mbs))
12245386Swpaul/* function names */
12350477Speter#define SNAMES			/* engine.c looks after details */
12445386Swpaul
12545386Swpaul#include "engine.c"
12645386Swpaul
12745386Swpaul/* now undo things */
12845386Swpaul#undef	states
12945386Swpaul#undef	CLEAR
13045386Swpaul#undef	SET0
13145386Swpaul#undef	SET1
13263702Swpaul#undef	ISSET
13363699Swpaul#undef	ASSIGN
13463702Swpaul#undef	EQ
13545386Swpaul#undef	STATEVARS
13645386Swpaul#undef	STATESETUP
13745386Swpaul#undef	STATETEARDOWN
13864139Swpaul#undef	SETUP
13964139Swpaul#undef	onestate
14064139Swpaul#undef	INIT
14145386Swpaul#undef	INC
14245386Swpaul#undef	ISSTATEIN
14356206Swpaul#undef	FWD
14456206Swpaul#undef	BACK
14545386Swpaul#undef	ISSETBACK
14645386Swpaul#undef	SNAMES
14745386Swpaul#undef	XMBRTOWC
14849011Swpaul#undef	ZAPSTATE
14949011Swpaul
15049011Swpaul/* macros for manipulating states, large version */
15145386Swpaul#define	states	char *
15245386Swpaul#define	CLEAR(v)	memset(v, 0, m->g->nstates)
15345386Swpaul#define	SET0(v, n)	((v)[n] = 0)
15445386Swpaul#define	SET1(v, n)	((v)[n] = 1)
15545386Swpaul#define	ISSET(v, n)	((v)[n])
15645386Swpaul#define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
15745386Swpaul#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
15845386Swpaul#define	STATEVARS	long vn; char *space
15945386Swpaul#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
16045386Swpaul				if ((m)->space == NULL) return(REG_ESPACE); \
16145386Swpaul				(m)->vn = 0; }
16245386Swpaul#define	STATETEARDOWN(m)	{ free((m)->space); }
16345386Swpaul#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
16445386Swpaul#define	onestate	long
16549011Swpaul#define	INIT(o, n)	((o) = (n))
16645386Swpaul#define	INC(o)	((o)++)
16745386Swpaul#define	ISSTATEIN(v, o)	((v)[o])
16845386Swpaul/* some abbreviations; note that some of these know variable names! */
16945386Swpaul/* do "if I'm here, I can also be there" etc without branches */
17045386Swpaul#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
17145386Swpaul#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
17245386Swpaul#define	ISSETBACK(v, n)	((v)[here - (n)])
17345386Swpaul/* no multibyte support */
17445386Swpaul#define	XMBRTOWC	xmbrtowc_dummy
17545386Swpaul#define	ZAPSTATE(mbs)	((void)(mbs))
17645386Swpaul/* function names */
17745386Swpaul#define	LNAMES			/* flag */
17845386Swpaul
17945386Swpaul#include "engine.c"
18045386Swpaul
18145386Swpaul/* multibyte character & large states version */
18245386Swpaul#undef	LNAMES
18345386Swpaul#undef	XMBRTOWC
18445386Swpaul#undef	ZAPSTATE
18545386Swpaul#define	XMBRTOWC	xmbrtowc
18645386Swpaul#define	ZAPSTATE(mbs)	memset((mbs), 0, sizeof(*(mbs)))
18764837Sdwmalone#define	MNAMES
18845386Swpaul
18945386Swpaul#include "engine.c"
19045386Swpaul
19145386Swpaul/*
19245386Swpaul - regexec - interface for matching
19345386Swpaul = extern int regexec(const regex_t *, const char *, size_t, \
19445386Swpaul =					regmatch_t [], int);
19545386Swpaul = #define	REG_NOTBOL	00001
19645386Swpaul = #define	REG_NOTEOL	00002
19745386Swpaul = #define	REG_STARTEND	00004
19845386Swpaul = #define	REG_TRACE	00400	// tracing of execution
19945386Swpaul = #define	REG_LARGE	01000	// force large representation
20045386Swpaul = #define	REG_BACKR	02000	// force use of backref code
20145386Swpaul *
20245386Swpaul * We put this here so we can exploit knowledge of the state representation
20345386Swpaul * when choosing which matcher to call.  Also, by this point the matchers
20449011Swpaul * have been prototyped.
20549011Swpaul */
20649011Swpaulint				/* 0 success, REG_NOMATCH failure */
20749011Swpaulregexec(const regex_t * __restrict preg,
20849011Swpaul	const char * __restrict string,
20949011Swpaul	size_t nmatch,
21049011Swpaul	regmatch_t pmatch[__restrict],
21149011Swpaul	int eflags)
21249011Swpaul{
21349011Swpaul	struct re_guts *g = preg->re_g;
21451455Swpaul#ifdef REDEBUG
21549011Swpaul#	define	GOODFLAGS(f)	(f)
21649011Swpaul#else
21749011Swpaul#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
21849011Swpaul#endif
21949011Swpaul
22049011Swpaul	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
22151533Swpaul		return(REG_BADPAT);
22249011Swpaul	assert(!(g->iflags&BAD));
22345386Swpaul	if (g->iflags&BAD)		/* backstop for no-debug case */
22445386Swpaul		return(REG_BADPAT);
22545386Swpaul	eflags = GOODFLAGS(eflags);
22645386Swpaul
22745386Swpaul	if (MB_CUR_MAX > 1)
22845386Swpaul		return(mmatcher(g, (char *)string, nmatch, pmatch, eflags));
22945386Swpaul	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
23045386Swpaul		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
23145386Swpaul	else
23245386Swpaul		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
23345386Swpaul}
23445386Swpaul