regexp.h revision 2712:f74a135872bc
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*	Copyright (c) 1988 AT&T	*/
22/*	  All Rights Reserved  	*/
23
24
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#ifndef _REGEXP_H
31#define	_REGEXP_H
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.9	*/
34
35#include <string.h>
36
37#ifdef	__cplusplus
38extern "C" {
39#endif
40
41#define	CBRA	2
42#define	CCHR	4
43#define	CDOT	8
44#define	CCL	12
45#define	CXCL	16
46#define	CDOL	20
47#define	CCEOF	22
48#define	CKET	24
49#define	CBACK	36
50#define	NCCL	40
51
52#define	STAR	01
53#define	RNGE	03
54
55#define	NBRA	9
56
57#define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
58#define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
59#define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
60
61static char	*braslist[NBRA];
62static char	*braelist[NBRA];
63int	sed, nbra;
64char	*loc1, *loc2, *locs;
65static int	nodelim;
66
67int	circf;
68static int	low;
69static int	size;
70
71static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
72
73#ifdef	__STDC__
74int advance(const char *lp, const char *ep);
75static void getrnge(const char *str);
76#else
77int advance();
78static void getrnge();
79#endif
80
81char *
82#ifdef	__STDC__
83compile(char *instring, char *ep, const char *endbuf, int seof)
84#else
85compile(instring, ep, endbuf, seof)
86register char *ep;
87char *instring, *endbuf;
88int seof;
89#endif
90{
91	INIT	/* Dependent declarations and initializations */
92	register int c;
93	register int eof = seof;
94	char *lastep;
95	int cclcnt;
96	char bracket[NBRA], *bracketp;
97	int closed;
98	int neg;
99	int lc;
100	int i, cflg;
101	int iflag; /* used for non-ascii characters in brackets */
102
103#ifdef __lint
104	/* make lint happy */
105	c = nodelim;
106#endif
107
108	lastep = NULL;
109	if ((c = GETC()) == eof || c == '\n') {
110		if (c == '\n') {
111			UNGETC(c);
112			nodelim = 1;
113		}
114		if (*ep == 0 && !sed)
115			ERROR(41);
116		RETURN(ep);
117	}
118	bracketp = bracket;
119	circf = closed = nbra = 0;
120	if (c == '^')
121		circf++;
122	else
123		UNGETC(c);
124	for (;;) {
125		if (ep >= endbuf)
126			ERROR(50);
127		c = GETC();
128		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
129			lastep = ep;
130		if (c == eof) {
131			*ep++ = CCEOF;
132			if (bracketp != bracket)
133				ERROR(42);
134			RETURN(ep);
135		}
136		switch (c) {
137
138		case '.':
139			*ep++ = CDOT;
140			continue;
141
142		case '\n':
143			if (!sed) {
144				UNGETC(c);
145				*ep++ = CCEOF;
146				nodelim = 1;
147				if (bracketp != bracket)
148					ERROR(42);
149				RETURN(ep);
150			} else ERROR(36);
151		case '*':
152			if (lastep == NULL || *lastep == CBRA ||
153			    *lastep == CKET)
154				goto defchar;
155			*lastep |= STAR;
156			continue;
157
158		case '$':
159			if (PEEKC() != eof && PEEKC() != '\n')
160				goto defchar;
161			*ep++ = CDOL;
162			continue;
163
164		case '[':
165			if (&ep[17] >= endbuf)
166				ERROR(50);
167
168			*ep++ = CCL;
169			lc = 0;
170			for (i = 0; i < 16; i++)
171				ep[i] = 0;
172
173			neg = 0;
174			if ((c = GETC()) == '^') {
175				neg = 1;
176				c = GETC();
177			}
178			iflag = 1;
179			do {
180				c &= 0377;
181				if (c == '\0' || c == '\n')
182					ERROR(49);
183				if ((c & 0200) && iflag) {
184					iflag = 0;
185					if (&ep[32] >= endbuf)
186						ERROR(50);
187					ep[-1] = CXCL;
188					for (i = 16; i < 32; i++)
189						ep[i] = 0;
190				}
191				if (c == '-' && lc != 0) {
192					if ((c = GETC()) == ']') {
193						PLACE('-');
194						break;
195					}
196					if ((c & 0200) && iflag) {
197						iflag = 0;
198						if (&ep[32] >= endbuf)
199							ERROR(50);
200						ep[-1] = CXCL;
201						for (i = 16; i < 32; i++)
202							ep[i] = 0;
203					}
204					while (lc < c) {
205						PLACE(lc);
206						lc++;
207					}
208				}
209				lc = c;
210				PLACE(c);
211			} while ((c = GETC()) != ']');
212
213			if (iflag)
214				iflag = 16;
215			else
216				iflag = 32;
217
218			if (neg) {
219				if (iflag == 32) {
220					for (cclcnt = 0; cclcnt < iflag;
221					    cclcnt++)
222						ep[cclcnt] ^= 0377;
223					ep[0] &= 0376;
224				} else {
225					ep[-1] = NCCL;
226					/* make nulls match so test fails */
227					ep[0] |= 01;
228				}
229			}
230
231			ep += iflag;
232
233			continue;
234
235		case '\\':
236			switch (c = GETC()) {
237
238			case '(':
239				if (nbra >= NBRA)
240					ERROR(43);
241				*bracketp++ = (char)nbra;
242				*ep++ = CBRA;
243				*ep++ = (char)nbra++;
244				continue;
245
246			case ')':
247				if (bracketp <= bracket)
248					ERROR(42);
249				*ep++ = CKET;
250				*ep++ = *--bracketp;
251				closed++;
252				continue;
253
254			case '{':
255				if (lastep == NULL)
256					goto defchar;
257				*lastep |= RNGE;
258				cflg = 0;
259			nlim:
260				c = GETC();
261				i = 0;
262				do {
263					if ('0' <= c && c <= '9')
264						i = 10 * i + c - '0';
265					else
266						ERROR(16);
267				} while (((c = GETC()) != '\\') && (c != ','));
268				if (i >= 255)
269					ERROR(11);
270				*ep++ = (char)i;
271				if (c == ',') {
272					if (cflg++)
273						ERROR(44);
274					if ((c = GETC()) == '\\')
275						*ep++ = (char)255;
276					else {
277						UNGETC(c);
278						goto nlim;
279						/* get 2'nd number */
280					}
281				}
282				if (GETC() != '}')
283					ERROR(45);
284				if (!cflg)	/* one number */
285					*ep++ = (char)i;
286				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
287					ERROR(46);
288				continue;
289
290			case '\n':
291				ERROR(36);
292
293			case 'n':
294				c = '\n';
295				goto defchar;
296
297			default:
298				if (c >= '1' && c <= '9') {
299					if ((c -= '1') >= closed)
300						ERROR(25);
301					*ep++ = CBACK;
302					*ep++ = (char)c;
303					continue;
304				}
305			}
306	/* Drop through to default to use \ to turn off special chars */
307
308		defchar:
309		default:
310			lastep = ep;
311			*ep++ = CCHR;
312			*ep++ = (char)c;
313		}
314	}
315	/*NOTREACHED*/
316}
317
318#ifdef	__STDC__
319int
320step(const char *p1, const char *p2)
321#else
322int
323step(p1, p2)
324register char *p1, *p2;
325#endif
326{
327	char c;
328
329
330	if (circf) {
331		loc1 = (char *)p1;
332		return (advance(p1, p2));
333	}
334	/* fast check for first character */
335	if (*p2 == CCHR) {
336		c = p2[1];
337		do {
338			if (*p1 != c)
339				continue;
340			if (advance(p1, p2)) {
341				loc1 = (char *)p1;
342				return (1);
343			}
344		} while (*p1++);
345		return (0);
346	}
347		/* regular algorithm */
348	do {
349		if (advance(p1, p2)) {
350			loc1 = (char *)p1;
351			return (1);
352		}
353	} while (*p1++);
354	return (0);
355}
356
357int
358#ifdef	__STDC__
359advance(const char *lp, const char *ep)
360#else
361advance(lp, ep)
362register char *lp, *ep;
363#endif
364{
365#ifdef	__STDC__
366	const char *curlp;
367#else
368	register char *curlp;
369#endif
370	int c;
371	char *bbeg;
372	register char neg;
373	size_t ct;
374
375	for (;;) {
376		neg = 0;
377		switch (*ep++) {
378
379		case CCHR:
380			if (*ep++ == *lp++)
381				continue;
382			return (0);
383			/*FALLTHRU*/
384
385		case CDOT:
386			if (*lp++)
387				continue;
388			return (0);
389			/*FALLTHRU*/
390
391		case CDOL:
392			if (*lp == 0)
393				continue;
394			return (0);
395			/*FALLTHRU*/
396
397		case CCEOF:
398			loc2 = (char *)lp;
399			return (1);
400			/*FALLTHRU*/
401
402		case CXCL:
403			c = (unsigned char)*lp++;
404			if (ISTHERE(c)) {
405				ep += 32;
406				continue;
407			}
408			return (0);
409			/*FALLTHRU*/
410
411		case NCCL:
412			neg = 1;
413			/*FALLTHRU*/
414
415		case CCL:
416			c = *lp++;
417			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
418				ep += 16;
419				continue;
420			}
421			return (0);
422			/*FALLTHRU*/
423
424		case CBRA:
425			braslist[*ep++] = (char *)lp;
426			continue;
427			/*FALLTHRU*/
428
429		case CKET:
430			braelist[*ep++] = (char *)lp;
431			continue;
432			/*FALLTHRU*/
433
434		case CCHR | RNGE:
435			c = *ep++;
436			getrnge(ep);
437			while (low--)
438				if (*lp++ != c)
439					return (0);
440			curlp = lp;
441			while (size--)
442				if (*lp++ != c)
443					break;
444			if (size < 0)
445				lp++;
446			ep += 2;
447			goto star;
448			/*FALLTHRU*/
449
450		case CDOT | RNGE:
451			getrnge(ep);
452			while (low--)
453				if (*lp++ == '\0')
454					return (0);
455			curlp = lp;
456			while (size--)
457				if (*lp++ == '\0')
458					break;
459			if (size < 0)
460				lp++;
461			ep += 2;
462			goto star;
463			/*FALLTHRU*/
464
465		case CXCL | RNGE:
466			getrnge(ep + 32);
467			while (low--) {
468				c = (unsigned char)*lp++;
469				if (!ISTHERE(c))
470					return (0);
471			}
472			curlp = lp;
473			while (size--) {
474				c = (unsigned char)*lp++;
475				if (!ISTHERE(c))
476					break;
477			}
478			if (size < 0)
479				lp++;
480			ep += 34;		/* 32 + 2 */
481			goto star;
482			/*FALLTHRU*/
483
484		case NCCL | RNGE:
485			neg = 1;
486			/*FALLTHRU*/
487
488		case CCL | RNGE:
489			getrnge(ep + 16);
490			while (low--) {
491				c = *lp++;
492				if (((c & 0200) || !ISTHERE(c)) ^ neg)
493					return (0);
494			}
495			curlp = lp;
496			while (size--) {
497				c = *lp++;
498				if (((c & 0200) || !ISTHERE(c)) ^ neg)
499					break;
500			}
501			if (size < 0)
502				lp++;
503			ep += 18; 		/* 16 + 2 */
504			goto star;
505			/*FALLTHRU*/
506
507		case CBACK:
508			bbeg = braslist[*ep];
509			ct = braelist[*ep++] - bbeg;
510
511			if (ecmp(bbeg, lp, ct)) {
512				lp += ct;
513				continue;
514			}
515			return (0);
516			/*FALLTHRU*/
517
518		case CBACK | STAR:
519			bbeg = braslist[*ep];
520			ct = braelist[*ep++] - bbeg;
521			curlp = lp;
522			while (ecmp(bbeg, lp, ct))
523				lp += ct;
524
525			while (lp >= curlp) {
526				if (advance(lp, ep))
527					return (1);
528				lp -= ct;
529			}
530			return (0);
531			/*FALLTHRU*/
532
533		case CDOT | STAR:
534			curlp = lp;
535			while (*lp++);
536			goto star;
537			/*FALLTHRU*/
538
539		case CCHR | STAR:
540			curlp = lp;
541			while (*lp++ == *ep);
542			ep++;
543			goto star;
544			/*FALLTHRU*/
545
546		case CXCL | STAR:
547			curlp = lp;
548			do {
549				c = (unsigned char)*lp++;
550			} while (ISTHERE(c));
551			ep += 32;
552			goto star;
553			/*FALLTHRU*/
554
555		case NCCL | STAR:
556			neg = 1;
557			/*FALLTHRU*/
558
559		case CCL | STAR:
560			curlp = lp;
561			do {
562				c = *lp++;
563			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
564			ep += 16;
565			goto star;
566			/*FALLTHRU*/
567
568		star:
569			do {
570				if (--lp == locs)
571					break;
572				if (advance(lp, ep))
573					return (1);
574			} while (lp > curlp);
575			return (0);
576
577		}
578	}
579	/*NOTREACHED*/
580}
581
582static void
583#ifdef	__STDC__
584getrnge(const char *str)
585#else
586getrnge(str)
587register char *str;
588#endif
589{
590	low = *str++ & 0377;
591	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
592}
593
594#ifdef	__cplusplus
595}
596#endif
597
598#endif	/* _REGEXP_H */
599