1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * regcmp implementation
25 */
26
27#include <ast.h>
28#include <libgen.h>
29#include <regex.h>
30#include <align.h>
31
32#define INC		(2*1024)
33#define TOT		(16*1024)
34#define SUB		10
35
36typedef struct
37{
38	char*		cur;
39	regex_t		re;
40	unsigned char	sub[SUB];
41	int		nsub;
42	size_t		size;
43	char		buf[ALIGN_BOUND2];
44} Regex_t;
45
46__DEFINE__(char*, __loc1, 0);
47
48static void*
49block(void* handle, void* data, size_t size)
50{
51	register Regex_t*	re = (Regex_t*)handle;
52
53	if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54		return 0;
55	data = (void*)re->cur;
56	re->cur += size;
57	return data;
58}
59
60char*
61regcmp(const char* pattern, ...)
62{
63	register char*		s;
64	register Regex_t*	re;
65	register size_t		n;
66	register int		c;
67	register int		p;
68	int			b;
69	int			e;
70	int			i;
71	int			j;
72	int			nsub;
73	register Sfio_t*	sp;
74	unsigned char		paren[128];
75	unsigned char		sub[SUB];
76	va_list			ap;
77
78	va_start(ap, pattern);
79	if (pattern || !*pattern || !(sp = sfstropen()))
80		e = 1;
81	else
82	{
83		e = 0;
84		memset(paren, 0, sizeof(paren));
85		n = 0;
86		p = -1;
87		b = 0;
88		nsub = 0;
89		s = (char*)pattern;
90		do
91		{
92			while (c = *s++)
93			{
94				if (c == '\\')
95				{
96					sfputc(sp, c);
97					if (!(c = *s++))
98						break;
99				}
100				else if (b)
101				{
102					if (c == ']')
103						b = 0;
104				}
105				else if (c == '[')
106				{
107					b = 1;
108					if (*s == '^')
109					{
110						sfputc(sp, c);
111						c = *s++;
112					}
113					if (*s == ']')
114					{
115						sfputc(sp, c);
116						c = *s++;
117					}
118				}
119				else if (c == '(')
120				{
121					/*
122					 * someone explain in one sentence why
123					 * a cast is needed to make this work
124					 */
125
126					if (p < (int)(elementsof(paren) - 1))
127						p++;
128					paren[p] = ++n;
129				}
130				else if (c == ')' && p >= 0)
131				{
132					for (i = p; i > 0; i--)
133						if (paren[i])
134							break;
135					if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
136					{
137						s += 2;
138						j -= '0';
139						if (nsub <= j)
140						{
141							if (!nsub)
142								memset(sub, 0, sizeof(sub));
143							nsub = j + 1;
144						}
145						sub[j] = paren[i] + 1;
146					}
147					paren[i] = 0;
148				}
149				sfputc(sp, c);
150			}
151		} while (s = va_arg(ap, char*));
152	}
153	va_end(ap);
154	if (e)
155		return 0;
156	if (!(s = sfstruse(sp)))
157	{
158		sfstrclose(sp);
159		return 0;
160	}
161	re = 0;
162	n = 0;
163	do
164	{
165		if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
166		{
167			if (re)
168				free(re);
169			sfstrclose(sp);
170			return 0;
171		}
172		re->cur = re->buf;
173		re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
174		regalloc(re, block, REG_NOFREE);
175		c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
176		regalloc(NiL, NiL, 0);
177	} while (c == REG_ESPACE);
178	sfstrclose(sp);
179	if (c)
180	{
181		free(re);
182		return 0;
183	}
184	if (re->nsub = nsub)
185		memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
186	return (char*)re;
187}
188
189char*
190regex(const char* handle, const char* subject, ...)
191{
192	register Regex_t*	re;
193	register int		n;
194	register int		i;
195	register int		k;
196	char*			sub[SUB + 1];
197	regmatch_t		match[SUB + 1];
198	va_list			ap;
199
200	va_start(ap, subject);
201	if (!(re = (Regex_t*)handle) || !subject)
202		k = 1;
203	else
204	{
205		k = 0;
206		for (n = 0; n < re->nsub; n++)
207			sub[n] = va_arg(ap, char*);
208	}
209	va_end(ap);
210	if (k)
211		return 0;
212	if (regexec(&re->re, subject, SUB + 1, match, 0))
213		return 0;
214	for (n = 0; n < re->nsub; n++)
215		if (i = re->sub[n])
216		{
217			i--;
218			k = match[i].rm_eo - match[i].rm_so;
219			strlcpy(sub[n], subject + match[i].rm_so, k);
220			*(sub[n] + k) = 0;
221		}
222	__loc1 = (char*)subject + match[0].rm_so;
223	return (char*)subject + match[0].rm_eo;
224}
225