145386Swpaul/*- 245386Swpaul * Copyright (c) 1992, 1993, 1994 Henry Spencer. 345386Swpaul * Copyright (c) 1992, 1993, 1994 445386Swpaul * The Regents of the University of California. All rights reserved. 545386Swpaul * 645386Swpaul * This code is derived from software contributed to Berkeley by 745386Swpaul * Henry Spencer. 845386Swpaul * 945386Swpaul * Redistribution and use in source and binary forms, with or without 1045386Swpaul * modification, are permitted provided that the following conditions 1145386Swpaul * are met: 1245386Swpaul * 1. Redistributions of source code must retain the above copyright 1345386Swpaul * notice, this list of conditions and the following disclaimer. 1445386Swpaul * 2. Redistributions in binary form must reproduce the above copyright 1545386Swpaul * notice, this list of conditions and the following disclaimer in the 1645386Swpaul * documentation and/or other materials provided with the distribution. 1745386Swpaul * 4. Neither the name of the University nor the names of its contributors 1845386Swpaul * may be used to endorse or promote products derived from this software 1945386Swpaul * without specific prior written permission. 2045386Swpaul * 2145386Swpaul * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2245386Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2345386Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2445386Swpaul * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2545386Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2645386Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2745386Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2845386Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2945386Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3045386Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3145386Swpaul * SUCH DAMAGE. 3250477Speter * 3345386Swpaul * @(#)regexec.c 8.3 (Berkeley) 3/20/94 3445386Swpaul */ 3545386Swpaul 3645386Swpaul#if defined(LIBC_SCCS) && !defined(lint) 3745386Swpaulstatic char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; 3845386Swpaul#endif /* LIBC_SCCS and not lint */ 3945386Swpaul#include <sys/cdefs.h> 4045386Swpaul__FBSDID("$FreeBSD$"); 4145386Swpaul 4245386Swpaul/* 4345386Swpaul * the outer shell of regexec() 4445386Swpaul * 4545386Swpaul * This file includes engine.c three times, after muchos fiddling with the 4645386Swpaul * macros that code uses. This lets the same code operate on two different 4745386Swpaul * representations for state sets and characters. 4845386Swpaul */ 4945386Swpaul#include <sys/types.h> 5045386Swpaul#include <stdio.h> 5145386Swpaul#include <stdlib.h> 5245386Swpaul#include <string.h> 5345386Swpaul#include <limits.h> 5445386Swpaul#include <ctype.h> 5545386Swpaul#include <regex.h> 5645386Swpaul#include <wchar.h> 5745386Swpaul#include <wctype.h> 5845386Swpaul 5945386Swpaul#include "utils.h" 6045386Swpaul#include "regex2.h" 6145386Swpaul 6245386Swpaulstatic int nope __unused = 0; /* for use in asserts; shuts lint up */ 6345386Swpaul 6445386Swpaulstatic __inline size_t 6545386Swpaulxmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) 6645386Swpaul{ 6745386Swpaul size_t nr; 6845386Swpaul wchar_t wc; 6945386Swpaul 7045386Swpaul nr = mbrtowc(&wc, s, n, mbs); 7145386Swpaul if (wi != NULL) 7245386Swpaul *wi = wc; 7345386Swpaul if (nr == 0) 7445386Swpaul return (1); 7545386Swpaul else if (nr == (size_t)-1 || nr == (size_t)-2) { 7645386Swpaul memset(mbs, 0, sizeof(*mbs)); 7745386Swpaul if (wi != NULL) 7845386Swpaul *wi = dummy; 7945386Swpaul return (1); 8045386Swpaul } else 8145386Swpaul return (nr); 8245386Swpaul} 8345386Swpaul 8445386Swpaulstatic __inline size_t 8545386Swpaulxmbrtowc_dummy(wint_t *wi, 8645386Swpaul const char *s, 8745386Swpaul size_t n __unused, 8845386Swpaul mbstate_t *mbs __unused, 8945386Swpaul wint_t dummy __unused) 9045386Swpaul{ 9145386Swpaul 9245386Swpaul if (wi != NULL) 9345386Swpaul *wi = (unsigned char)*s; 9445386Swpaul return (1); 9583115Sbrooks} 9683115Sbrooks 9745386Swpaul/* macros for manipulating states, small version */ 9845386Swpaul#define states long 9945386Swpaul#define states1 states /* for later use in regexec() decision */ 10045386Swpaul#define CLEAR(v) ((v) = 0) 10145386Swpaul#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) 10245386Swpaul#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) 10345386Swpaul#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) 10445386Swpaul#define ASSIGN(d, s) ((d) = (s)) 10545386Swpaul#define EQ(a, b) ((a) == (b)) 10645386Swpaul#define STATEVARS long dummy /* dummy version */ 10745386Swpaul#define STATESETUP(m, n) /* nothing */ 10849011Swpaul#define STATETEARDOWN(m) /* nothing */ 10949011Swpaul#define SETUP(v) ((v) = 0) 11049011Swpaul#define onestate long 11145386Swpaul#define INIT(o, n) ((o) = (unsigned long)1 << (n)) 11245386Swpaul#define INC(o) ((o) <<= 1) 11345386Swpaul#define ISSTATEIN(v, o) (((v) & (o)) != 0) 11445386Swpaul/* some abbreviations; note that some of these know variable names! */ 11545386Swpaul/* do "if I'm here, I can also be there" etc without branches */ 11645386Swpaul#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) 11745386Swpaul#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) 11845386Swpaul#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) 11958698Sjlemon/* no multibyte support */ 12045386Swpaul#define XMBRTOWC xmbrtowc_dummy 12145386Swpaul#define ZAPSTATE(mbs) ((void)(mbs)) 12245386Swpaul/* function names */ 12350477Speter#define SNAMES /* engine.c looks after details */ 12445386Swpaul 12545386Swpaul#include "engine.c" 12645386Swpaul 12745386Swpaul/* now undo things */ 12845386Swpaul#undef states 12945386Swpaul#undef CLEAR 13045386Swpaul#undef SET0 13145386Swpaul#undef SET1 13263702Swpaul#undef ISSET 13363699Swpaul#undef ASSIGN 13463702Swpaul#undef EQ 13545386Swpaul#undef STATEVARS 13645386Swpaul#undef STATESETUP 13745386Swpaul#undef STATETEARDOWN 13864139Swpaul#undef SETUP 13964139Swpaul#undef onestate 14064139Swpaul#undef INIT 14145386Swpaul#undef INC 14245386Swpaul#undef ISSTATEIN 14356206Swpaul#undef FWD 14456206Swpaul#undef BACK 14545386Swpaul#undef ISSETBACK 14645386Swpaul#undef SNAMES 14745386Swpaul#undef XMBRTOWC 14849011Swpaul#undef ZAPSTATE 14949011Swpaul 15049011Swpaul/* macros for manipulating states, large version */ 15145386Swpaul#define states char * 15245386Swpaul#define CLEAR(v) memset(v, 0, m->g->nstates) 15345386Swpaul#define SET0(v, n) ((v)[n] = 0) 15445386Swpaul#define SET1(v, n) ((v)[n] = 1) 15545386Swpaul#define ISSET(v, n) ((v)[n]) 15645386Swpaul#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) 15745386Swpaul#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 15845386Swpaul#define STATEVARS long vn; char *space 15945386Swpaul#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 16045386Swpaul if ((m)->space == NULL) return(REG_ESPACE); \ 16145386Swpaul (m)->vn = 0; } 16245386Swpaul#define STATETEARDOWN(m) { free((m)->space); } 16345386Swpaul#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 16445386Swpaul#define onestate long 16549011Swpaul#define INIT(o, n) ((o) = (n)) 16645386Swpaul#define INC(o) ((o)++) 16745386Swpaul#define ISSTATEIN(v, o) ((v)[o]) 16845386Swpaul/* some abbreviations; note that some of these know variable names! */ 16945386Swpaul/* do "if I'm here, I can also be there" etc without branches */ 17045386Swpaul#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 17145386Swpaul#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 17245386Swpaul#define ISSETBACK(v, n) ((v)[here - (n)]) 17345386Swpaul/* no multibyte support */ 17445386Swpaul#define XMBRTOWC xmbrtowc_dummy 17545386Swpaul#define ZAPSTATE(mbs) ((void)(mbs)) 17645386Swpaul/* function names */ 17745386Swpaul#define LNAMES /* flag */ 17845386Swpaul 17945386Swpaul#include "engine.c" 18045386Swpaul 18145386Swpaul/* multibyte character & large states version */ 18245386Swpaul#undef LNAMES 18345386Swpaul#undef XMBRTOWC 18445386Swpaul#undef ZAPSTATE 18545386Swpaul#define XMBRTOWC xmbrtowc 18645386Swpaul#define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs))) 18764837Sdwmalone#define MNAMES 18845386Swpaul 18945386Swpaul#include "engine.c" 19045386Swpaul 19145386Swpaul/* 19245386Swpaul - regexec - interface for matching 19345386Swpaul = extern int regexec(const regex_t *, const char *, size_t, \ 19445386Swpaul = regmatch_t [], int); 19545386Swpaul = #define REG_NOTBOL 00001 19645386Swpaul = #define REG_NOTEOL 00002 19745386Swpaul = #define REG_STARTEND 00004 19845386Swpaul = #define REG_TRACE 00400 // tracing of execution 19945386Swpaul = #define REG_LARGE 01000 // force large representation 20045386Swpaul = #define REG_BACKR 02000 // force use of backref code 20145386Swpaul * 20245386Swpaul * We put this here so we can exploit knowledge of the state representation 20345386Swpaul * when choosing which matcher to call. Also, by this point the matchers 20449011Swpaul * have been prototyped. 20549011Swpaul */ 20649011Swpaulint /* 0 success, REG_NOMATCH failure */ 20749011Swpaulregexec(const regex_t * __restrict preg, 20849011Swpaul const char * __restrict string, 20949011Swpaul size_t nmatch, 21049011Swpaul regmatch_t pmatch[__restrict], 21149011Swpaul int eflags) 21249011Swpaul{ 21349011Swpaul struct re_guts *g = preg->re_g; 21451455Swpaul#ifdef REDEBUG 21549011Swpaul# define GOODFLAGS(f) (f) 21649011Swpaul#else 21749011Swpaul# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 21849011Swpaul#endif 21949011Swpaul 22049011Swpaul if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 22151533Swpaul return(REG_BADPAT); 22249011Swpaul assert(!(g->iflags&BAD)); 22345386Swpaul if (g->iflags&BAD) /* backstop for no-debug case */ 22445386Swpaul return(REG_BADPAT); 22545386Swpaul eflags = GOODFLAGS(eflags); 22645386Swpaul 22745386Swpaul if (MB_CUR_MAX > 1) 22845386Swpaul return(mmatcher(g, (char *)string, nmatch, pmatch, eflags)); 22945386Swpaul else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) 23045386Swpaul return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); 23145386Swpaul else 23245386Swpaul return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); 23345386Swpaul} 23445386Swpaul