1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * posix regex state and alloc
26 */
27
28#include "reglib.h"
29
30#if _PACKAGE_ast
31
32#include <ccode.h>
33
34#else
35
36#define CC_bel		'\a'
37#define CC_esc		'\033'
38#define CC_vt		'\v'
39
40#endif
41
42/*
43 * state shared by all threads
44 */
45
46State_t		state =
47{
48	{ -1, -1 },
49
50	/*
51	 * escape code table
52	 * the "funny" things get special treatment at ends of BRE
53	 *
54	 *	BRE  0:normal  1:escaped  2:escaped-char-class
55	 *	ERE  3:normal  4:escaped  5:escaped-char-class
56	 *	ARE  6:normal  7:escaped  8:escaped-char-class
57	 *	SRE  9:normal 10:escaped 11:escaped-char-class
58	 *	KRE 12:normal 13:escaped 14:escaped-char-class
59	 */
60
61	'\\',
62		'\\',		'\\',		'\\',
63		'\\',		'\\',		'\\',
64		'\\',		'\\',		'\\',
65		'\\',		'\\',		'\\',
66		'\\',		'\\',		'\\',
67	'^',	/* funny */
68		'^',		'^',		'^',
69		T_CFLX,		'^',		'^',
70		T_CFLX,		'^',		'^',
71		'^',		'^',		'^',
72		'^',		'^',		'^',
73	'.',
74		T_DOT,		'.',		T_BAD,
75		T_DOT, 		'.',		T_BAD,
76		T_DOT, 		'.',		T_BAD,
77		'.',		'.',		T_BAD,
78		'.',		'.',		T_BAD,
79	'$',	/* funny */
80		'$',		'$',		T_BAD,
81		T_DOLL, 	'$',		T_BAD,
82		T_DOLL, 	'$',		T_BAD,
83		'$',		'$',		T_BAD,
84		'$',		'$',		T_BAD,
85	'*',
86		T_STAR,		'*',		T_BAD,
87		T_STAR, 	'*',		T_BAD,
88		T_STAR, 	'*',		T_BAD,
89		T_STAR, 	'*',		'*',
90		T_STAR, 	'*',		'*',
91	'[',
92		T_BRA,		'[',		'[',
93		T_BRA,		'[',		'[',
94		T_BRA,		'[',		'[',
95		T_BRA,		'[',		'[',
96		T_BRA,		'[',		'[',
97	'|',
98		'|',		T_BAD,		T_BAD,
99		T_BAR,		'|',		T_BAD,
100		T_BAR,		'|',		T_BAD,
101		'|',		'|',		T_BAD,
102		T_BAR,		'|',		T_BAD,
103	'+',
104		'+',		T_BAD,		T_BAD,
105		T_PLUS,		'+',		T_BAD,
106		T_PLUS,		'+',		T_BAD,
107		'+',		'+',		T_BAD,
108		T_PLUS,		'+',		T_BAD,
109	'?',
110		'?',		T_BAD,		T_BAD,
111		T_QUES, 	'?',		T_BAD,
112		T_QUES, 	'?',		T_BAD,
113		T_QUES,		'?',		'?',
114		T_QUES,		'?',		'?',
115	'(',
116		'(',		T_OPEN,		T_BAD,
117		T_OPEN, 	'(',		T_BAD,
118		T_OPEN, 	'(',		T_BAD,
119		'(',		'(',		'(',
120		T_OPEN,		'(',		'(',
121	')',
122		')',		T_CLOSE,	T_BAD,
123		T_CLOSE,	')',		T_BAD,
124		T_CLOSE,	')',		T_BAD,
125		')',		')',		')',
126		T_CLOSE,	')',		')',
127	'{',
128		'{',		T_LEFT,		T_BAD,
129		T_LEFT,		'{',		T_BAD,
130		T_LEFT,		'{',		T_BAD,
131		'{',		'{',		'{',
132		T_LEFT,		'{',		'{',
133	'}',
134		'}',		T_RIGHT,	T_BAD,
135		'}',		T_BAD,		T_BAD,
136		'}',		T_BAD,		T_BAD,
137		'}',		'}',		'}',
138		'}',		'}',		'}',
139	'&',
140		'&',		T_BAD,		T_BAD,
141		'&',		T_AND,		T_BAD,
142		T_AND,		'&',		T_BAD,
143		'&',		'&',		T_BAD,
144		T_AND,		'&',		T_BAD,
145	'!',
146		'!',		T_BAD,		T_BAD,
147		'!',		T_BANG,		T_BAD,
148		T_BANG, 	'!',		T_BAD,
149		'!',		'!',		T_BAD,
150		T_BANG,		'!',		T_BAD,
151	'@',
152		'@',		T_BAD,		T_BAD,
153		'@',		T_BAD,		T_BAD,
154		'@',		T_BAD,		T_BAD,
155		'@',		'@',		T_BAD,
156		T_AT,		'@',		T_BAD,
157	'~',
158		'~',		T_BAD,		T_BAD,
159		'~',		T_BAD,		T_BAD,
160		'~',		T_BAD,		T_BAD,
161		'~',		'~',		T_BAD,
162		T_TILDE,	'~',		T_BAD,
163	'%',
164		'%',		T_BAD,		T_BAD,
165		'%',		T_BAD,		T_BAD,
166		'%',		T_BAD,		T_BAD,
167		'%',		'%',		T_BAD,
168		T_PERCENT,	'%',		T_BAD,
169	'<',
170		'<',		T_LT,		T_BAD,
171		'<',		T_LT,		T_BAD,
172		T_LT,   	'<',		T_BAD,
173		'<',		'<',		T_BAD,
174		'<',		'<',		T_BAD,
175	'>',
176		'>',		T_GT,		T_BAD,
177		'>',		T_GT,		T_BAD,
178		T_GT,   	'>',		T_BAD,
179		'>',		'>',		T_BAD,
180		'>',		'>',		T_BAD,
181
182	/* backrefs */
183
184	'0',
185		'0',		T_BACK+0,	T_ESCAPE,
186		'0',		T_BACK+0,	T_ESCAPE,
187		'0',		T_BACK+0,	T_ESCAPE,
188		'0',		T_BACK+0,	T_ESCAPE,
189		'0',		T_BACK+0,	T_ESCAPE,
190	'1',
191		'1',		T_BACK+1,	T_ESCAPE,
192		'1',		T_BACK+1,	T_ESCAPE,
193		'1',		T_BACK+1,	T_ESCAPE,
194		'1',		T_BACK+1,	T_ESCAPE,
195		'1',		T_BACK+1,	T_ESCAPE,
196	'2',
197		'2',		T_BACK+2,	T_ESCAPE,
198		'2',		T_BACK+2,	T_ESCAPE,
199		'2',		T_BACK+2,	T_ESCAPE,
200		'2',		T_BACK+2,	T_ESCAPE,
201		'2',		T_BACK+2,	T_ESCAPE,
202	'3',
203		'3',		T_BACK+3,	T_ESCAPE,
204		'3',		T_BACK+3,	T_ESCAPE,
205		'3',		T_BACK+3,	T_ESCAPE,
206		'3',		T_BACK+3,	T_ESCAPE,
207		'3',		T_BACK+3,	T_ESCAPE,
208	'4',
209		'4',		T_BACK+4,	T_ESCAPE,
210		'4',		T_BACK+4,	T_ESCAPE,
211		'4',		T_BACK+4,	T_ESCAPE,
212		'4',		T_BACK+4,	T_ESCAPE,
213		'4',		T_BACK+4,	T_ESCAPE,
214	'5',
215		'5',		T_BACK+5,	T_ESCAPE,
216		'5',		T_BACK+5,	T_ESCAPE,
217		'5',		T_BACK+5,	T_ESCAPE,
218		'5',		T_BACK+5,	T_ESCAPE,
219		'5',		T_BACK+5,	T_ESCAPE,
220	'6',
221		'6',		T_BACK+6,	T_ESCAPE,
222		'6',		T_BACK+6,	T_ESCAPE,
223		'6',		T_BACK+6,	T_ESCAPE,
224		'6',		T_BACK+6,	T_ESCAPE,
225		'6',		T_BACK+6,	T_ESCAPE,
226	'7',
227		'7',		T_BACK+7,	T_ESCAPE,
228		'7',		T_BACK+7,	T_ESCAPE,
229		'7',		T_BACK+7,	T_ESCAPE,
230		'7',		T_BACK+7,	T_ESCAPE,
231		'7',		T_BACK+7,	T_ESCAPE,
232	'8',
233		'8',		T_BACK+8,	T_ESCAPE,
234		'8',		T_BACK+8,	T_ESCAPE,
235		'8',		T_BACK+8,	T_ESCAPE,
236		'8',		'8',		T_ESCAPE,
237		'8',		T_BACK+8,	T_ESCAPE,
238	'9',
239		'9',		T_BACK+9,	T_ESCAPE,
240		'9',		T_BACK+9,	T_ESCAPE,
241		'9',		T_BACK+9,	T_ESCAPE,
242		'9',		'9',		T_ESCAPE,
243		'9',		T_BACK+9,	T_ESCAPE,
244
245	/* perl */
246
247	'A',
248		'A',		T_BEG_STR,	T_BAD,
249		'A',		T_BEG_STR,	T_BAD,
250		'A',		T_BEG_STR,	T_BAD,
251		'A',		T_BEG_STR,	T_BAD,
252		'A',		T_BEG_STR,	T_BAD,
253	'b',
254		'b',		T_WORD,		'\b',
255		'b',		T_WORD,		'\b',
256		'b',		T_WORD,		'\b',
257		'b',		T_WORD,		'\b',
258		'b',		T_WORD,		'\b',
259	'B',
260		'B',		T_WORD_NOT,	T_BAD,
261		'B',		T_WORD_NOT,	T_BAD,
262		'B',		T_WORD_NOT,	T_BAD,
263		'B',		T_WORD_NOT,	T_BAD,
264		'B',		T_WORD_NOT,	T_BAD,
265	'd',
266		'd',		T_DIGIT,	T_DIGIT,
267		'd',		T_DIGIT,	T_DIGIT,
268		'd',		T_DIGIT,	T_DIGIT,
269		'd',		T_DIGIT,	T_DIGIT,
270		'd',		T_DIGIT,	T_DIGIT,
271	'D',
272		'D',		T_DIGIT_NOT,	T_DIGIT_NOT,
273		'D',		T_DIGIT_NOT,	T_DIGIT_NOT,
274		'D',		T_DIGIT_NOT,	T_DIGIT_NOT,
275		'D',		T_DIGIT_NOT,	T_DIGIT_NOT,
276		'D',		T_DIGIT_NOT,	T_DIGIT_NOT,
277	's',
278		's',		T_SPACE,	T_SPACE,
279		's',		T_SPACE,	T_SPACE,
280		's',		T_SPACE,	T_SPACE,
281		's',		T_SPACE,	T_SPACE,
282		's',		T_SPACE,	T_SPACE,
283	'S',
284		'S',		T_SPACE_NOT,	T_SPACE_NOT,
285		'S',		T_SPACE_NOT,	T_SPACE_NOT,
286		'S',		T_SPACE_NOT,	T_SPACE_NOT,
287		'S',		T_SPACE_NOT,	T_SPACE_NOT,
288		'S',		T_SPACE_NOT,	T_SPACE_NOT,
289	'w',
290		'w',		T_ALNUM,	T_ALNUM,
291		'w',		T_ALNUM,	T_ALNUM,
292		'w',		T_ALNUM,	T_ALNUM,
293		'w',		T_ALNUM,	T_ALNUM,
294		'w',		T_ALNUM,	T_ALNUM,
295	'W',
296		'W',		T_ALNUM_NOT,	T_ALNUM_NOT,
297		'W',		T_ALNUM_NOT,	T_ALNUM_NOT,
298		'W',		T_ALNUM_NOT,	T_ALNUM_NOT,
299		'W',		T_ALNUM_NOT,	T_ALNUM_NOT,
300		'W',		T_ALNUM_NOT,	T_ALNUM_NOT,
301	'z',
302		'z',		T_FIN_STR,	T_BAD,
303		'z',		T_FIN_STR,	T_BAD,
304		'z',		T_FIN_STR,	T_BAD,
305		'z',		T_FIN_STR,	T_BAD,
306		'z',		T_FIN_STR,	T_BAD,
307	'Z',
308		'Z',		T_END_STR,	T_BAD,
309		'Z',		T_END_STR,	T_BAD,
310		'Z',		T_END_STR,	T_BAD,
311		'Z',		T_END_STR,	T_BAD,
312		'Z',		T_END_STR,	T_BAD,
313
314	/* C escapes */
315
316	'a',
317		'a',		CC_bel,		CC_bel,
318		'a',		CC_bel,		CC_bel,
319		'a',		CC_bel,		CC_bel,
320		'a',		CC_bel,		CC_bel,
321		'a',		CC_bel,		CC_bel,
322	'c',
323		'c',		T_ESCAPE,	T_ESCAPE,
324		'c',		T_ESCAPE,	T_ESCAPE,
325		'c',		T_ESCAPE,	T_ESCAPE,
326		'c',		T_ESCAPE,	T_ESCAPE,
327		'c',		T_ESCAPE,	T_ESCAPE,
328	'C',
329		'C',		T_ESCAPE,	T_ESCAPE,
330		'C',		T_ESCAPE,	T_ESCAPE,
331		'C',		T_ESCAPE,	T_ESCAPE,
332		'C',		T_ESCAPE,	T_ESCAPE,
333		'C',		T_ESCAPE,	T_ESCAPE,
334	'e',
335		'e',		CC_esc,		CC_esc,
336		'e',		CC_esc,		CC_esc,
337		'e',		CC_esc,		CC_esc,
338		'e',		CC_esc,		CC_esc,
339		'e',		CC_esc,		CC_esc,
340	'E',
341		'E',		CC_esc,		CC_esc,
342		'E',		CC_esc,		CC_esc,
343		'E',		CC_esc,		CC_esc,
344		'E',		CC_esc,		CC_esc,
345		'E',		CC_esc,		CC_esc,
346	'f',
347		'f',		'\f',		'\f',
348		'f',		'\f',		'\f',
349		'f',		'\f',		'\f',
350		'f',		'\f',		'\f',
351		'f',		'\f',		'\f',
352	'n',
353		'n',		'\n',		'\n',
354		'n',		'\n',		'\n',
355		'n',		'\n',		'\n',
356		'n',		'\n',		'\n',
357		'n',		'\n',		'\n',
358	'r',
359		'r',		'\r',		'\r',
360		'r',		'\r',		'\r',
361		'r',		'\r',		'\r',
362		'r',		'\r',		'\r',
363		'r',		'\r',		'\r',
364	't',
365		't',		'\t',		'\t',
366		't',		'\t',		'\t',
367		't',		'\t',		'\t',
368		't',		'\t',		'\t',
369		't',		'\t',		'\t',
370	'v',
371		'v',		CC_vt,		CC_vt,
372		'v',		CC_vt,		CC_vt,
373		'v',		CC_vt,		CC_vt,
374		'v',		CC_vt,		CC_vt,
375		'v',		CC_vt,		CC_vt,
376	'x',
377		'x',		T_ESCAPE,	T_ESCAPE,
378		'x',		T_ESCAPE,	T_ESCAPE,
379		'x',		T_ESCAPE,	T_ESCAPE,
380		'x',		T_ESCAPE,	T_ESCAPE,
381		'x',		T_ESCAPE,	T_ESCAPE,
382};
383
384/*
385 * all allocation/free done here
386 * interface compatible with vmresize()
387 *
388 *	malloc(n)	alloc(0,n)
389 *	realloc(p,n)	alloc(p,n)
390 *	free(p)		alloc(p,0)
391 */
392
393void*
394alloc(register regdisc_t* disc, void* p, size_t n)
395{
396	if (disc->re_resizef)
397	{
398		if (!n && (disc->re_flags & REG_NOFREE))
399			return 0;
400		return (*disc->re_resizef)(disc->re_resizehandle, p, n);
401	}
402	else if (!n)
403	{
404		if (!(disc->re_flags & REG_NOFREE))
405			free(p);
406		return 0;
407	}
408	else if (p)
409		return realloc(p, n);
410	else
411		return malloc(n);
412}
413