11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
31573Srgrimes * Copyright (c) 1992, 1993, 1994
41573Srgrimes *	The Regents of the University of California.  All rights reserved.
51573Srgrimes *
61573Srgrimes * This code is derived from software contributed to Berkeley by
71573Srgrimes * Henry Spencer.
81573Srgrimes *
91573Srgrimes * Redistribution and use in source and binary forms, with or without
101573Srgrimes * modification, are permitted provided that the following conditions
111573Srgrimes * are met:
121573Srgrimes * 1. Redistributions of source code must retain the above copyright
131573Srgrimes *    notice, this list of conditions and the following disclaimer.
141573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151573Srgrimes *    notice, this list of conditions and the following disclaimer in the
161573Srgrimes *    documentation and/or other materials provided with the distribution.
171573Srgrimes * 4. Neither the name of the University nor the names of its contributors
181573Srgrimes *    may be used to endorse or promote products derived from this software
191573Srgrimes *    without specific prior written permission.
201573Srgrimes *
211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311573Srgrimes * SUCH DAMAGE.
321573Srgrimes *
331573Srgrimes *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
341573Srgrimes */
351573Srgrimes
361573Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
371573Srgrimesstatic char sccsid[] = "@(#)regexec.c	8.3 (Berkeley) 3/20/94";
381573Srgrimes#endif /* LIBC_SCCS and not lint */
3992889Sobrien#include <sys/cdefs.h>
4092889Sobrien__FBSDID("$FreeBSD$");
411573Srgrimes
421573Srgrimes/*
431573Srgrimes * the outer shell of regexec()
441573Srgrimes *
45132019Stjr * This file includes engine.c three times, after muchos fiddling with the
461573Srgrimes * macros that code uses.  This lets the same code operate on two different
47132019Stjr * representations for state sets and characters.
481573Srgrimes */
491573Srgrimes#include <sys/types.h>
501573Srgrimes#include <stdio.h>
511573Srgrimes#include <stdlib.h>
521573Srgrimes#include <string.h>
531573Srgrimes#include <limits.h>
541573Srgrimes#include <ctype.h>
551573Srgrimes#include <regex.h>
56132019Stjr#include <wchar.h>
57132019Stjr#include <wctype.h>
581573Srgrimes
591573Srgrimes#include "utils.h"
601573Srgrimes#include "regex2.h"
611573Srgrimes
62111010Snectarstatic int nope __unused = 0;	/* for use in asserts; shuts lint up */
631573Srgrimes
64132019Stjrstatic __inline size_t
65170528Sdelphijxmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
66132019Stjr{
67132019Stjr	size_t nr;
68132019Stjr	wchar_t wc;
69132019Stjr
70132019Stjr	nr = mbrtowc(&wc, s, n, mbs);
71132019Stjr	if (wi != NULL)
72132019Stjr		*wi = wc;
73132019Stjr	if (nr == 0)
74132019Stjr		return (1);
75132019Stjr	else if (nr == (size_t)-1 || nr == (size_t)-2) {
76132019Stjr		memset(mbs, 0, sizeof(*mbs));
77132019Stjr		if (wi != NULL)
78132019Stjr			*wi = dummy;
79132019Stjr		return (1);
80132019Stjr	} else
81132019Stjr                return (nr);
82132019Stjr}
83132019Stjr
84132019Stjrstatic __inline size_t
85170528Sdelphijxmbrtowc_dummy(wint_t *wi,
86170528Sdelphij		const char *s,
87170528Sdelphij		size_t n __unused,
88170528Sdelphij		mbstate_t *mbs __unused,
89170528Sdelphij		wint_t dummy __unused)
90132019Stjr{
91132019Stjr
92132019Stjr	if (wi != NULL)
93132019Stjr		*wi = (unsigned char)*s;
94132019Stjr	return (1);
95132019Stjr}
96132019Stjr
971573Srgrimes/* macros for manipulating states, small version */
981573Srgrimes#define	states	long
991573Srgrimes#define	states1	states		/* for later use in regexec() decision */
1001573Srgrimes#define	CLEAR(v)	((v) = 0)
10136043Sjb#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
10236043Sjb#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
10336043Sjb#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
1041573Srgrimes#define	ASSIGN(d, s)	((d) = (s))
1051573Srgrimes#define	EQ(a, b)	((a) == (b))
10636043Sjb#define	STATEVARS	long dummy	/* dummy version */
1071573Srgrimes#define	STATESETUP(m, n)	/* nothing */
1081573Srgrimes#define	STATETEARDOWN(m)	/* nothing */
1091573Srgrimes#define	SETUP(v)	((v) = 0)
11036043Sjb#define	onestate	long
11136043Sjb#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
1121573Srgrimes#define	INC(o)	((o) <<= 1)
11336043Sjb#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
1141573Srgrimes/* some abbreviations; note that some of these know variable names! */
1151573Srgrimes/* do "if I'm here, I can also be there" etc without branches */
11636043Sjb#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
11736043Sjb#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
11836043Sjb#define	ISSETBACK(v, n)	(((v) & ((unsigned long)here >> (n))) != 0)
119132019Stjr/* no multibyte support */
120132019Stjr#define	XMBRTOWC	xmbrtowc_dummy
121132019Stjr#define	ZAPSTATE(mbs)	((void)(mbs))
1221573Srgrimes/* function names */
1231573Srgrimes#define SNAMES			/* engine.c looks after details */
1241573Srgrimes
1251573Srgrimes#include "engine.c"
1261573Srgrimes
1271573Srgrimes/* now undo things */
1281573Srgrimes#undef	states
1291573Srgrimes#undef	CLEAR
1301573Srgrimes#undef	SET0
1311573Srgrimes#undef	SET1
1321573Srgrimes#undef	ISSET
1331573Srgrimes#undef	ASSIGN
1341573Srgrimes#undef	EQ
1351573Srgrimes#undef	STATEVARS
1361573Srgrimes#undef	STATESETUP
1371573Srgrimes#undef	STATETEARDOWN
1381573Srgrimes#undef	SETUP
1391573Srgrimes#undef	onestate
1401573Srgrimes#undef	INIT
1411573Srgrimes#undef	INC
1421573Srgrimes#undef	ISSTATEIN
1431573Srgrimes#undef	FWD
1441573Srgrimes#undef	BACK
1451573Srgrimes#undef	ISSETBACK
1461573Srgrimes#undef	SNAMES
147132019Stjr#undef	XMBRTOWC
148132019Stjr#undef	ZAPSTATE
1491573Srgrimes
1501573Srgrimes/* macros for manipulating states, large version */
1511573Srgrimes#define	states	char *
1521573Srgrimes#define	CLEAR(v)	memset(v, 0, m->g->nstates)
1531573Srgrimes#define	SET0(v, n)	((v)[n] = 0)
1541573Srgrimes#define	SET1(v, n)	((v)[n] = 1)
1551573Srgrimes#define	ISSET(v, n)	((v)[n])
1561573Srgrimes#define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
1571573Srgrimes#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
15836043Sjb#define	STATEVARS	long vn; char *space
1591573Srgrimes#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
1601573Srgrimes				if ((m)->space == NULL) return(REG_ESPACE); \
1611573Srgrimes				(m)->vn = 0; }
1621573Srgrimes#define	STATETEARDOWN(m)	{ free((m)->space); }
1631573Srgrimes#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
16436043Sjb#define	onestate	long
1651573Srgrimes#define	INIT(o, n)	((o) = (n))
1661573Srgrimes#define	INC(o)	((o)++)
1671573Srgrimes#define	ISSTATEIN(v, o)	((v)[o])
1681573Srgrimes/* some abbreviations; note that some of these know variable names! */
1691573Srgrimes/* do "if I'm here, I can also be there" etc without branches */
1701573Srgrimes#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
1711573Srgrimes#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
1721573Srgrimes#define	ISSETBACK(v, n)	((v)[here - (n)])
173132019Stjr/* no multibyte support */
174132019Stjr#define	XMBRTOWC	xmbrtowc_dummy
175132019Stjr#define	ZAPSTATE(mbs)	((void)(mbs))
1761573Srgrimes/* function names */
1771573Srgrimes#define	LNAMES			/* flag */
1781573Srgrimes
1791573Srgrimes#include "engine.c"
1801573Srgrimes
181132019Stjr/* multibyte character & large states version */
182132019Stjr#undef	LNAMES
183132019Stjr#undef	XMBRTOWC
184132019Stjr#undef	ZAPSTATE
185132019Stjr#define	XMBRTOWC	xmbrtowc
186132019Stjr#define	ZAPSTATE(mbs)	memset((mbs), 0, sizeof(*(mbs)))
187132019Stjr#define	MNAMES
188132019Stjr
189132019Stjr#include "engine.c"
190132019Stjr
1911573Srgrimes/*
1921573Srgrimes - regexec - interface for matching
1931573Srgrimes = extern int regexec(const regex_t *, const char *, size_t, \
1941573Srgrimes =					regmatch_t [], int);
1951573Srgrimes = #define	REG_NOTBOL	00001
1961573Srgrimes = #define	REG_NOTEOL	00002
1971573Srgrimes = #define	REG_STARTEND	00004
1981573Srgrimes = #define	REG_TRACE	00400	// tracing of execution
1991573Srgrimes = #define	REG_LARGE	01000	// force large representation
2001573Srgrimes = #define	REG_BACKR	02000	// force use of backref code
2011573Srgrimes *
2021573Srgrimes * We put this here so we can exploit knowledge of the state representation
2031573Srgrimes * when choosing which matcher to call.  Also, by this point the matchers
2041573Srgrimes * have been prototyped.
2051573Srgrimes */
2061573Srgrimesint				/* 0 success, REG_NOMATCH failure */
207170528Sdelphijregexec(const regex_t * __restrict preg,
208170528Sdelphij	const char * __restrict string,
209170528Sdelphij	size_t nmatch,
210170528Sdelphij	regmatch_t pmatch[__restrict],
211170528Sdelphij	int eflags)
2121573Srgrimes{
21392889Sobrien	struct re_guts *g = preg->re_g;
2141573Srgrimes#ifdef REDEBUG
2151573Srgrimes#	define	GOODFLAGS(f)	(f)
2161573Srgrimes#else
2171573Srgrimes#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
2181573Srgrimes#endif
2191573Srgrimes
2201573Srgrimes	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
2211573Srgrimes		return(REG_BADPAT);
2221573Srgrimes	assert(!(g->iflags&BAD));
2231573Srgrimes	if (g->iflags&BAD)		/* backstop for no-debug case */
2241573Srgrimes		return(REG_BADPAT);
2251573Srgrimes	eflags = GOODFLAGS(eflags);
2261573Srgrimes
227132019Stjr	if (MB_CUR_MAX > 1)
228132019Stjr		return(mmatcher(g, (char *)string, nmatch, pmatch, eflags));
229132019Stjr	else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
2301573Srgrimes		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
2311573Srgrimes	else
2321573Srgrimes		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
2331573Srgrimes}
234