regex.c revision 1219:f89f56c2d9ac
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23/*
24 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25 * Use is subject to license terms.
26 */
27
28/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29/*	  All Rights Reserved	*/
30
31#pragma ident	"%Z%%M%	%I%	%E% SMI"
32
33/*
34 * IMPORTANT NOTE:
35 *
36 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
37 * IT IS **NOT** CHARACTER SET INDEPENDENT.
38 *
39 */
40
41#pragma weak regex = _regex
42
43/* CONSTANTS SHARED WITH regcmp() */
44#include "regex.h"
45
46#include "synonyms.h"
47#include "mtlib.h"
48#include <limits.h>
49#include <stdarg.h>
50#include <stdlib.h>
51#include <thread.h>
52#include <widec.h>
53#include "tsd.h"
54
55
56/* PRIVATE CONSTANTS */
57
58#define	ADD_256_TO_GROUP_LENGTH	0x1
59#define	ADD_512_TO_GROUP_LENGTH	0x2
60#define	ADD_768_TO_GROUP_LENGTH	0x3
61#define	ADDED_LENGTH_BITS	0x3
62#define	SINGLE_BYTE_MASK	0xff
63#define	STRINGP_STACK_SIZE	50
64
65
66/* PRIVATE TYPE DEFINITIONS */
67
68typedef enum {
69	NOT_IN_CLASS = 0,
70	IN_CLASS
71} char_test_condition_t;
72
73typedef enum {
74	TESTING_CHAR = 0,
75	CONDITION_TRUE,
76	CONDITION_FALSE,
77	CHAR_TEST_ERROR
78} char_test_result_t;
79
80
81/* PRIVATE GLOBAL VARIABLES */
82
83static mutex_t		regex_lock = DEFAULTMUTEX;
84static int		return_arg_number[NSUBSTRINGS];
85static const char	*substring_endp[NSUBSTRINGS];
86static const char	*substring_startp[NSUBSTRINGS];
87static const char	*stringp_stack[STRINGP_STACK_SIZE];
88static const char	**stringp_stackp;
89
90
91/* DECLARATIONS OF PRIVATE FUNCTIONS */
92
93static int
94get_wchar(wchar_t *wcharp,
95	const char *stringp);
96
97static void
98get_match_counts(int *nmust_matchp,
99	int *nextra_matches_allowedp,
100	const char *count_stringp);
101
102static boolean_t
103in_wchar_range(wchar_t test_char,
104	wchar_t lower_char,
105	wchar_t upper_char);
106
107static const char *
108pop_stringp(void);
109
110static const char *
111previous_charp(const char *current_charp);
112
113static const char *
114push_stringp(const char *stringp);
115
116static char_test_result_t
117test_char_against_ascii_class(char test_char,
118	const char *classp,
119	char_test_condition_t test_condition);
120
121static char_test_result_t
122test_char_against_multibyte_class(wchar_t test_char,
123	const char *classp,
124	char_test_condition_t test_condition);
125
126
127/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
128
129static char_test_result_t
130test_char_against_old_ascii_class(char test_char,
131	const char *classp,
132	char_test_condition_t test_condition);
133
134static const char *
135test_repeated_ascii_char(const char *repeat_startp,
136	const char *stringp,
137	const char *regexp);
138
139static const char *
140test_repeated_multibyte_char(const char *repeat_startp,
141	const char *stringp,
142	const char *regexp);
143
144static const char *
145test_repeated_group(const char *repeat_startp,
146	const char *stringp,
147	const char *regexp);
148
149static const char *
150test_string(const char *stringp,
151	const char *regexp);
152
153
154/* DEFINITIONS OF PUBLIC VARIABLES */
155
156char *__loc1;
157
158/*
159 * reserve thread-specific storage for __loc1
160 */
161char **
162____loc1(void)
163{
164	if (_thr_main())
165		return (&__loc1);
166	return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
167}
168
169#define	__loc1 (*(____loc1()))
170
171/* DEFINITION OF regex() */
172
173extern char *
174_regex(const char *regexp,
175	const char *stringp, ...)
176{
177	va_list		arg_listp;
178	int		char_size;
179	const char	*end_of_matchp;
180	wchar_t		regex_wchar;
181	char		*return_argp[NSUBSTRINGS];
182	char		*returned_substringp;
183	int		substringn;
184	const char	*substringp;
185	wchar_t		string_wchar;
186
187	if (____loc1() == (char **)0) {
188	    return ((char *)0);
189	} else {
190	    lmutex_lock(&regex_lock);
191	    __loc1 = (char *)0;
192	}
193
194	if ((stringp == (char *)0) || (regexp == (char *)0)) {
195	    lmutex_unlock(&regex_lock);
196	return ((char *)0);
197	}
198
199
200	/* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS  */
201
202	substringn = 0;
203	va_start(arg_listp, stringp);
204	while (substringn < NSUBSTRINGS) {
205	    return_argp[substringn] = va_arg(arg_listp, char *);
206	    substring_startp[substringn] = (char *)0;
207	    return_arg_number[substringn] = -1;
208	    substringn++;
209	}
210	va_end(arg_listp);
211
212
213	/* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
214
215	end_of_matchp = (char *)0;
216	stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
217
218	if ((int)*regexp == (int)START_OF_STRING_MARK) {
219
220	/*
221	 * the match must start at the beginning of the string
222	 */
223
224	    __loc1 = (char *)stringp;
225	    regexp++;
226	    end_of_matchp = test_string(stringp, regexp);
227
228	} else if ((int)*regexp == (int)ASCII_CHAR) {
229
230	/*
231	 * test a string against a regular expression
232	 * that starts with a single ASCII character:
233	 *
234	 * move to each character in the string that matches
235	 * the first character in the regular expression
236	 * and test the remaining string
237	 */
238
239	    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
240		stringp++;
241	    }
242	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
243		end_of_matchp = test_string(stringp, regexp);
244		if (end_of_matchp != (char *)0) {
245		    __loc1 = (char *)stringp;
246		} else {
247		    stringp++;
248		    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
249			stringp++;
250		    }
251		}
252	    }
253
254	} else if (!multibyte) {
255
256	/*
257	 * if the value of the "multibyte" macro defined in <euc.h>
258	 * is false, regex() is running in an ASCII locale;
259	 * test an ASCII string against an ASCII regular expression
260	 * that doesn't start with a single ASCII character:
261	 *
262	 * move forward in the string one byte at a time, testing
263	 * the remaining string against the regular expression
264	 */
265
266	    end_of_matchp = test_string(stringp, regexp);
267	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
268		stringp++;
269		end_of_matchp = test_string(stringp, regexp);
270	    }
271	    if (end_of_matchp != (char *)0) {
272		__loc1 = (char *)stringp;
273	    }
274
275	} else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
276
277	/*
278	 * test a multibyte string against a multibyte regular expression
279	 * that starts with a single multibyte character:
280	 *
281	 * move to each character in the string that matches
282	 * the first character in the regular expression
283	 * and test the remaining string
284	 */
285
286	    (void) get_wchar(&regex_wchar, regexp + 1);
287	    char_size = get_wchar(&string_wchar, stringp);
288	    while ((string_wchar != regex_wchar) && (char_size > 0)) {
289		stringp += char_size;
290		char_size = get_wchar(&string_wchar, stringp);
291	    }
292	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
293		end_of_matchp = test_string(stringp, regexp);
294		if (end_of_matchp != (char *)0) {
295		    __loc1 = (char *)stringp;
296		} else {
297		    stringp += char_size;
298		    char_size = get_wchar(&string_wchar, stringp);
299		    while ((string_wchar != regex_wchar) && (char_size > 0)) {
300			stringp += char_size;
301			char_size = get_wchar(&string_wchar, stringp);
302		    }
303		}
304	    }
305
306	} else {
307
308	/*
309	 * test a multibyte string against a multibyte regular expression
310	 * that doesn't start with a single multibyte character
311	 *
312	 * move forward in the string one multibyte character at a time,
313	 * testing the remaining string against the regular expression
314	 */
315
316	    end_of_matchp = test_string(stringp, regexp);
317	    char_size = get_wchar(&string_wchar, stringp);
318	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
319		stringp += char_size;
320		end_of_matchp = test_string(stringp, regexp);
321		char_size = get_wchar(&string_wchar, stringp);
322	    }
323	    if (end_of_matchp != (char *)0) {
324		__loc1 = (char *)stringp;
325	    }
326	}
327
328	/*
329	 * Return substrings that matched subexpressions for which
330	 * matching substrings are to be returned.
331	 *
332	 * NOTE:
333	 *
334	 * According to manual page regcmp(3G), regex() returns substrings
335	 * that match subexpressions even when no substring matches the
336	 * entire regular expression.
337	 */
338
339	substringn = 0;
340	while (substringn < NSUBSTRINGS) {
341	    substringp = substring_startp[substringn];
342	    if ((substringp != (char *)0) &&
343		(return_arg_number[substringn] >= 0)) {
344		returned_substringp =
345		    return_argp[return_arg_number[substringn]];
346		if (returned_substringp != (char *)0) {
347		    while (substringp < substring_endp[substringn]) {
348			*returned_substringp = (char)*substringp;
349			returned_substringp++;
350			substringp++;
351		    }
352		    *returned_substringp = '\0';
353		}
354	    }
355	    substringn++;
356	}
357	lmutex_unlock(&regex_lock);
358	return ((char *)end_of_matchp);
359}  /* regex() */
360
361
362/* DEFINITIONS OF PRIVATE FUNCTIONS */
363
364static int
365get_wchar(wchar_t *wcharp,
366	const char *stringp)
367{
368	int char_size;
369
370	if (stringp == (char *)0) {
371	    char_size = 0;
372	    *wcharp = (wchar_t)((unsigned int)'\0');
373	} else if (*stringp == '\0') {
374	    char_size = 0;
375	    *wcharp = (wchar_t)((unsigned int)*stringp);
376	} else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
377	    char_size = 1;
378	    *wcharp = (wchar_t)((unsigned int)*stringp);
379	} else {
380	    char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
381	}
382	return (char_size);
383}
384
385static void
386get_match_counts(int *nmust_matchp,
387	int *nextra_matches_allowedp,
388	const char *count_stringp)
389{
390	int minimum_match_count;
391	int maximum_match_count;
392
393	minimum_match_count =
394	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
395	*nmust_matchp = minimum_match_count;
396
397	count_stringp++;
398	maximum_match_count =
399	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
400	if (maximum_match_count == (int)UNLIMITED) {
401	    *nextra_matches_allowedp = (int)UNLIMITED;
402	} else {
403	    *nextra_matches_allowedp =
404		maximum_match_count - minimum_match_count;
405	}
406	return;
407
408} /* get_match_counts() */
409
410static boolean_t
411in_wchar_range(wchar_t test_char,
412	wchar_t lower_char,
413	wchar_t upper_char)
414{
415	return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
416	    (lower_char <= test_char) && (test_char <= upper_char)) ||
417	    (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
418	    ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
419	    (lower_char <= test_char) && (test_char <= upper_char)));
420
421} /* in_wchar_range() */
422
423static const char *
424pop_stringp(void)
425{
426	const char *stringp;
427
428	if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
429	    return ((char *)0);
430	} else {
431	    stringp = *stringp_stackp;
432	    stringp_stackp++;
433	    return (stringp);
434	}
435}
436
437
438static const char *
439previous_charp(const char *current_charp)
440{
441	/*
442	 * returns the pointer to the previous character in
443	 * a string of multibyte characters
444	 */
445
446	const char *prev_cs0 = current_charp - 1;
447	const char *prev_cs1 = current_charp - eucw1;
448	const char *prev_cs2 = current_charp - eucw2 - 1;
449	const char *prev_cs3 = current_charp - eucw3 - 1;
450	const char *prev_charp;
451
452	if ((unsigned char)*prev_cs0 <= 0x7f) {
453	    prev_charp = prev_cs0;
454	} else if ((unsigned char)*prev_cs2 == SS2) {
455	    prev_charp = prev_cs2;
456	} else if ((unsigned char)*prev_cs3 == SS3) {
457	    prev_charp = prev_cs3;
458	} else {
459	    prev_charp = prev_cs1;
460	}
461	return (prev_charp);
462
463} /* previous_charp() */
464
465static const char *
466push_stringp(const char *stringp)
467{
468	if (stringp_stackp <= &stringp_stack[0]) {
469	    return ((char *)0);
470	} else {
471	    stringp_stackp--;
472	    *stringp_stackp = stringp;
473	    return (stringp);
474	}
475}
476
477
478static char_test_result_t
479test_char_against_ascii_class(char test_char,
480	const char *classp,
481	char_test_condition_t test_condition)
482{
483	/*
484	 * tests a character for membership in an ASCII character class compiled
485	 * by the internationalized version of regcmp();
486	 *
487	 * NOTE: The internationalized version of regcmp() compiles
488	 * 	the range a-z in an ASCII character class to aTHRUz.
489	 */
490
491	int	nbytes_to_check;
492
493	nbytes_to_check = (int)*classp;
494	classp++;
495	nbytes_to_check--;
496
497	while (nbytes_to_check > 0) {
498	    if (test_char == *classp) {
499		if (test_condition == IN_CLASS)
500		    return (CONDITION_TRUE);
501		else
502		    return (CONDITION_FALSE);
503	    } else if (*classp == THRU) {
504		if ((*(classp - 1) <= test_char) &&
505		    (test_char <= *(classp + 1))) {
506		    if (test_condition == IN_CLASS)
507			return (CONDITION_TRUE);
508		    else
509			return (CONDITION_FALSE);
510		} else {
511		    classp += 2;
512		    nbytes_to_check -= 2;
513		}
514	    } else {
515		classp++;
516		nbytes_to_check--;
517	    }
518	}
519	if (test_condition == NOT_IN_CLASS) {
520	    return (CONDITION_TRUE);
521	} else {
522	    return (CONDITION_FALSE);
523	}
524} /* test_char_against_ascii_class() */
525
526static char_test_result_t
527test_char_against_multibyte_class(wchar_t test_char,
528	const char *classp,
529	char_test_condition_t test_condition)
530{
531	/*
532	 * tests a character for membership in a multibyte character class;
533	 *
534	 * NOTE: The range a-z in a multibyte character class compiles to
535	 * 	aTHRUz.
536	 */
537
538	int		char_size;
539	wchar_t		current_char;
540	int		nbytes_to_check;
541	wchar_t		previous_char;
542
543	nbytes_to_check = (int)*classp;
544	classp++;
545	nbytes_to_check--;
546
547	char_size = get_wchar(&current_char, classp);
548	if (char_size <= 0) {
549	    return (CHAR_TEST_ERROR);
550	} else if (test_char == current_char) {
551	    if (test_condition == IN_CLASS) {
552		return (CONDITION_TRUE);
553	    } else {
554		return (CONDITION_FALSE);
555	    }
556	} else {
557	    classp += char_size;
558	    nbytes_to_check -= char_size;
559	}
560
561	while (nbytes_to_check > 0) {
562	    previous_char = current_char;
563	    char_size = get_wchar(&current_char, classp);
564	    if (char_size <= 0) {
565		return (CHAR_TEST_ERROR);
566	    } else if (test_char == current_char) {
567		if (test_condition == IN_CLASS) {
568		    return (CONDITION_TRUE);
569		} else {
570		    return (CONDITION_FALSE);
571		}
572	    } else if (current_char == THRU) {
573		classp += char_size;
574		nbytes_to_check -= char_size;
575		char_size = get_wchar(&current_char, classp);
576		if (char_size <= 0) {
577		    return (CHAR_TEST_ERROR);
578		} else if (in_wchar_range(test_char, previous_char,
579		    current_char)) {
580		    if (test_condition == IN_CLASS) {
581			return (CONDITION_TRUE);
582		    } else {
583			return (CONDITION_FALSE);
584		    }
585		} else {
586		    classp += char_size;
587		    nbytes_to_check -= char_size;
588		}
589	    } else {
590		classp += char_size;
591		nbytes_to_check -= char_size;
592	    }
593	}
594	if (test_condition == NOT_IN_CLASS) {
595	    return (CONDITION_TRUE);
596	} else {
597	    return (CONDITION_FALSE);
598	}
599} /* test_char_against_multibyte_class() */
600
601
602/* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
603
604static char_test_result_t
605test_char_against_old_ascii_class(char test_char,
606	const char *classp,
607	char_test_condition_t test_condition)
608{
609	/*
610	 * tests a character for membership in an ASCII character class compiled
611	 * by the ASCII version of regcmp();
612	 *
613	 * NOTE: ASCII versions of regcmp() compile the range a-z in an
614	 *	ASCII character class to THRUaz.  The internationalized
615	 *	version compiles the same range to aTHRUz.
616	 */
617
618	int	nbytes_to_check;
619
620	nbytes_to_check = (int)*classp;
621	classp++;
622	nbytes_to_check--;
623
624	while (nbytes_to_check > 0) {
625	    if (test_char == *classp) {
626		if (test_condition == IN_CLASS) {
627		    return (CONDITION_TRUE);
628		} else {
629		    return (CONDITION_FALSE);
630		}
631	    } else if (*classp == THRU) {
632		if ((*(classp + 1) <= test_char) &&
633		    (test_char <= *(classp + 2))) {
634		    if (test_condition == IN_CLASS) {
635			return (CONDITION_TRUE);
636		    } else {
637			return (CONDITION_FALSE);
638		    }
639		} else {
640		    classp += 3;
641		    nbytes_to_check -= 3;
642		}
643	    } else {
644		classp++;
645		nbytes_to_check--;
646	    }
647	}
648	if (test_condition == NOT_IN_CLASS) {
649	    return (CONDITION_TRUE);
650	} else {
651	    return (CONDITION_FALSE);
652	}
653} /* test_char_against_old_ascii_class() */
654
655static const char *
656test_repeated_ascii_char(const char *repeat_startp,
657	const char *stringp,
658	const char *regexp)
659{
660	const char *end_of_matchp;
661
662	end_of_matchp = test_string(stringp, regexp);
663	while ((end_of_matchp == (char *)0) &&
664	    (stringp > repeat_startp)) {
665	    stringp--;
666	    end_of_matchp = test_string(stringp, regexp);
667	}
668	return (end_of_matchp);
669}
670
671static const char *
672test_repeated_multibyte_char(const char *repeat_startp,
673	const char *stringp,
674	const char *regexp)
675{
676	const char *end_of_matchp;
677
678	end_of_matchp = test_string(stringp, regexp);
679	while ((end_of_matchp == (char *)0) &&
680	    (stringp > repeat_startp)) {
681	    stringp = previous_charp(stringp);
682	    end_of_matchp = test_string(stringp, regexp);
683	}
684	return (end_of_matchp);
685}
686
687static const char *
688test_repeated_group(const char *repeat_startp,
689	const char *stringp,
690	const char *regexp)
691{
692	const char *end_of_matchp;
693
694	end_of_matchp = test_string(stringp, regexp);
695	while ((end_of_matchp == (char *)0) &&
696	    (stringp > repeat_startp)) {
697	    stringp = pop_stringp();
698	    if (stringp == (char *)0) {
699		return ((char *)0);
700	    }
701	    end_of_matchp = test_string(stringp, regexp);
702	}
703	return (end_of_matchp);
704}
705
706static const char *
707test_string(const char *stringp,
708	const char *regexp)
709{
710	/*
711	 * returns a pointer to the first character following the first
712	 * substring of the string addressed by stringp that matches
713	 * the compiled regular expression addressed by regexp
714	 */
715
716	unsigned int		group_length;
717	int			nextra_matches_allowed;
718	int			nmust_match;
719	wchar_t			regex_wchar;
720	int			regex_char_size;
721	const char		*repeat_startp;
722	unsigned int		return_argn;
723	wchar_t			string_wchar;
724	int			string_char_size;
725	unsigned int		substringn;
726	char_test_condition_t	test_condition;
727	const char		*test_stringp;
728
729	for (;;) {
730
731		/*
732		 * Exit the loop via a return whenever there's a match
733		 * or it's clear that there can be no match.
734		 */
735
736	    switch ((int)*regexp) {
737
738		/*
739		 * No fall-through.
740		 * Each case ends with either a return or with stringp
741		 * addressing the next character to be tested and regexp
742		 * addressing the next compiled regular expression
743		 *
744		 * NOTE: The comments for each case give the meaning
745		 *	of the compiled regular expression decoded by the case
746		 *	and the character string that the compiled regular
747		 *	expression uses to encode the case.  Each single
748		 *	character encoded in the compiled regular expression
749		 *	is shown enclosed in angle brackets (<>).  Each
750		 *	compiled regular expression begins with a marker
751		 *	character which is shown as a named constant
752		 *	(e.g. <ASCII_CHAR>). Character constants are shown
753		 *	enclosed in single quotes (e.g. <'$'>).  All other
754		 *	single characters encoded in the compiled regular
755		 *	expression are shown as lower case variable names
756		 *	(e.g. <ascii_char> or <multibyte_char>). Multicharacter
757		 *	strings encoded in the compiled regular expression
758		 *	are shown as variable names followed by elipses
759		 *	(e.g. <compiled_regex...>).
760		 */
761
762	    case ASCII_CHAR: /* single ASCII char */
763
764		/* encoded as <ASCII_CHAR><ascii_char> */
765
766		regexp++;
767		if (*regexp == *stringp) {
768		    regexp++;
769		    stringp++;
770		} else {
771		    return ((char *)0);
772		}
773		break;		/* end case ASCII_CHAR */
774
775	    case MULTIBYTE_CHAR: /* single multibyte char */
776
777		/* encoded as <MULTIBYTE_CHAR><multibyte_char> */
778
779		regexp++;
780		regex_char_size = get_wchar(&regex_wchar, regexp);
781		string_char_size = get_wchar(&string_wchar, stringp);
782		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
783		    return ((char *)0);
784		} else {
785		    regexp += regex_char_size;
786		    stringp += string_char_size;
787		}
788		break;		/* end case MULTIBYTE_CHAR */
789
790	    case ANY_CHAR: /* any single ASCII or multibyte char */
791
792		/* encoded as <ANY_CHAR> */
793
794		if (!multibyte) {
795		    if (*stringp == '\0') {
796			return ((char *)0);
797		    } else {
798			regexp++;
799			stringp++;
800		    }
801		} else {
802		    string_char_size = get_wchar(&string_wchar, stringp);
803		    if (string_char_size <= 0) {
804			return ((char *)0);
805		    } else {
806			regexp++;
807			stringp += string_char_size;
808		    }
809		}
810		break;	/* end case ANY_CHAR */
811
812	    case IN_ASCII_CHAR_CLASS:		/* [.....] */
813	    case NOT_IN_ASCII_CHAR_CLASS:
814
815		/*
816		 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
817		 *	or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
818		 *
819		 * NOTE: <class_length> includes the <class_length> byte
820		 */
821
822		if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
823		    test_condition = IN_CLASS;
824		} else {
825		    test_condition = NOT_IN_CLASS;
826		}
827		regexp++; /* point to the <class_length> byte */
828
829		if ((*stringp != '\0') &&
830		    (test_char_against_ascii_class(*stringp, regexp,
831		    test_condition) == CONDITION_TRUE)) {
832		    regexp += (int)*regexp; /* add the class length to regexp */
833		    stringp++;
834		} else {
835		    return ((char *)0);
836		}
837		break; /* end case IN_ASCII_CHAR_CLASS */
838
839	    case IN_MULTIBYTE_CHAR_CLASS:	/* [....] */
840	    case NOT_IN_MULTIBYTE_CHAR_CLASS:
841
842		/*
843		 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
844		 * 	or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
845		 *
846		 * NOTE: <class_length> includes the <class_length> byte
847		 */
848
849		if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
850		    test_condition = IN_CLASS;
851		} else {
852		    test_condition = NOT_IN_CLASS;
853		}
854		regexp++; /* point to the <class_length> byte */
855
856		string_char_size = get_wchar(&string_wchar, stringp);
857		if ((string_char_size > 0) &&
858		    (test_char_against_multibyte_class(string_wchar, regexp,
859		    test_condition) == CONDITION_TRUE)) {
860		    regexp += (int)*regexp; /* add the class length to regexp */
861		    stringp += string_char_size;
862		} else {
863		    return ((char *)0);
864		}
865		break; /* end case IN_MULTIBYTE_CHAR_CLASS */
866
867	    case IN_OLD_ASCII_CHAR_CLASS:	/* [...] */
868	    case NOT_IN_OLD_ASCII_CHAR_CLASS:
869
870		/*
871		 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
872		 *	or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
873		 *
874		 * NOTE: <class_length> includes the <class_length> byte
875		 */
876
877		if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
878		    test_condition = IN_CLASS;
879		} else {
880		    test_condition = NOT_IN_CLASS;
881		}
882		regexp++; /* point to the <class_length> byte */
883
884		if ((*stringp != '\0') &&
885		    (test_char_against_old_ascii_class(*stringp, regexp,
886		    test_condition) == CONDITION_TRUE)) {
887		    regexp += (int)*regexp; /* add the class length to regexp */
888		    stringp++;
889		} else {
890		    return ((char *)0);
891		}
892		break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
893
894	    case SIMPLE_GROUP: /* (.....) */
895
896		/* encoded as <SIMPLE_GROUP><group_length> */
897
898		regexp += 2;
899		break;		/* end case SIMPLE_GROUP */
900
901	    case END_GROUP:	/* (.....) */
902
903		/* encoded as <END_GROUP><groupn> */
904
905		regexp += 2;
906		break;		/* end case END_GROUP */
907
908	    case SAVED_GROUP:	/* (.....)$0-9 */
909
910		/* encoded as <SAVED_GROUP><substringn> */
911
912		regexp++;
913		substringn = (unsigned int)*regexp;
914		if (substringn >= NSUBSTRINGS)
915		    return ((char *)0);
916		substring_startp[substringn] = stringp;
917		regexp++;
918		break;		/* end case SAVED_GROUP */
919
920	    case END_SAVED_GROUP:	/* (.....)$0-9 */
921
922		/*
923		 * encoded as <END_SAVED_GROUP><substringn>\
924		 *	<return_arg_number[substringn]>
925		 */
926
927		regexp++;
928		substringn = (unsigned int)*regexp;
929		if (substringn >= NSUBSTRINGS)
930		    return ((char *)0);
931		substring_endp[substringn] = stringp;
932		regexp++;
933		return_argn = (unsigned int)*regexp;
934		if (return_argn >= NSUBSTRINGS)
935		    return ((char *)0);
936		return_arg_number[substringn] = return_argn;
937		regexp++;
938		break;		/* end case END_SAVED_GROUP */
939
940	    case ASCII_CHAR|ZERO_OR_MORE:  /* char* */
941
942		/* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
943
944		regexp++;
945		repeat_startp = stringp;
946		while (*stringp == *regexp) {
947		    stringp++;
948		}
949		regexp++;
950		return (test_repeated_ascii_char(repeat_startp,
951		    stringp, regexp));
952
953		/* end case ASCII_CHAR|ZERO_OR_MORE */
954
955	    case ASCII_CHAR|ONE_OR_MORE:   /* char+ */
956
957		/* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
958
959		regexp++;
960		if (*stringp != *regexp) {
961		    return ((char *)0);
962		} else {
963		    stringp++;
964		    repeat_startp = stringp;
965		    while (*stringp == *regexp) {
966			stringp++;
967		    }
968		    regexp++;
969		    return (test_repeated_ascii_char(repeat_startp, stringp,
970			regexp));
971		}
972		/* end case ASCII_CHAR|ONE_OR_MORE */
973
974	    case ASCII_CHAR|COUNT:	/* char{min_count,max_count} */
975
976		/*
977		 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
978		 *	<minimum_match_count><maximum_match_count>
979		 */
980
981		regexp++;
982		get_match_counts(&nmust_match, &nextra_matches_allowed,
983		    regexp + 1);
984		while ((*stringp == *regexp) && (nmust_match > 0)) {
985		    nmust_match--;
986		    stringp++;
987		}
988		if (nmust_match > 0) {
989		    return ((char *)0);
990		} else if (nextra_matches_allowed == UNLIMITED) {
991		    repeat_startp = stringp;
992		    while (*stringp == *regexp) {
993			stringp++;
994		    }
995		    regexp += 3;
996		    return (test_repeated_ascii_char(repeat_startp, stringp,
997			regexp));
998		} else {
999		    repeat_startp = stringp;
1000		    while ((*stringp == *regexp) &&
1001			(nextra_matches_allowed > 0)) {
1002			nextra_matches_allowed--;
1003			stringp++;
1004		    }
1005		    regexp += 3;
1006		    return (test_repeated_ascii_char(repeat_startp, stringp,
1007			regexp));
1008		}
1009		/* end case ASCII_CHAR|COUNT */
1010
1011	    case MULTIBYTE_CHAR|ZERO_OR_MORE:   /* char* */
1012
1013		/* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1014
1015		regexp++;
1016		regex_char_size = get_wchar(&regex_wchar, regexp);
1017		repeat_startp = stringp;
1018		string_char_size = get_wchar(&string_wchar, stringp);
1019		while ((string_char_size > 0) &&
1020		    (string_wchar == regex_wchar)) {
1021		    stringp += string_char_size;
1022		    string_char_size = get_wchar(&string_wchar, stringp);
1023		}
1024		regexp += regex_char_size;
1025		return (test_repeated_multibyte_char(repeat_startp, stringp,
1026		    regexp));
1027
1028		/* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1029
1030	    case MULTIBYTE_CHAR|ONE_OR_MORE:    /* char+ */
1031
1032		/* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1033
1034		regexp++;
1035		regex_char_size = get_wchar(&regex_wchar, regexp);
1036		string_char_size = get_wchar(&string_wchar, stringp);
1037		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1038		    return ((char *)0);
1039		} else {
1040		    stringp += string_char_size;
1041		    repeat_startp = stringp;
1042		    string_char_size = get_wchar(&string_wchar, stringp);
1043		    while ((string_char_size > 0) &&
1044			(string_wchar == regex_wchar)) {
1045			stringp += string_char_size;
1046			string_char_size = get_wchar(&string_wchar, stringp);
1047		    }
1048		    regexp += regex_char_size;
1049		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1050			regexp));
1051		}
1052		/* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1053
1054	    case MULTIBYTE_CHAR|COUNT:		/* char{min_count,max_count} */
1055
1056		/*
1057		 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1058		 * 	<minimum_match_count><maximum_match_count>
1059		 */
1060
1061		regexp++;
1062		regex_char_size = get_wchar(&regex_wchar, regexp);
1063		get_match_counts(&nmust_match, &nextra_matches_allowed,
1064		    regexp + regex_char_size);
1065		string_char_size = get_wchar(&string_wchar, stringp);
1066		while ((string_char_size > 0) &&
1067		    (string_wchar == regex_wchar) &&
1068		    (nmust_match > 0)) {
1069
1070		    nmust_match--;
1071		    stringp += string_char_size;
1072		    string_char_size = get_wchar(&string_wchar, stringp);
1073		}
1074		if (nmust_match > 0) {
1075		    return ((char *)0);
1076		} else if (nextra_matches_allowed == UNLIMITED) {
1077		    repeat_startp = stringp;
1078		    while ((string_char_size > 0) &&
1079			(string_wchar == regex_wchar)) {
1080			stringp += string_char_size;
1081			string_char_size = get_wchar(&string_wchar, stringp);
1082		    }
1083		    regexp += regex_char_size + 2;
1084		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1085			regexp));
1086		} else {
1087		    repeat_startp = stringp;
1088		    while ((string_char_size > 0) &&
1089			(string_wchar == regex_wchar) &&
1090			(nextra_matches_allowed > 0)) {
1091			nextra_matches_allowed--;
1092			stringp += string_char_size;
1093			string_char_size = get_wchar(&string_wchar, stringp);
1094		    }
1095		    regexp += regex_char_size + 2;
1096		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1097			regexp));
1098		}
1099		/* end case MULTIBYTE_CHAR|COUNT */
1100
1101	    case ANY_CHAR|ZERO_OR_MORE:		/* .* */
1102
1103		/* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1104
1105		repeat_startp = stringp;
1106		if (!multibyte) {
1107		    while (*stringp != '\0') {
1108			stringp++;
1109		    }
1110		    regexp++;
1111		    return (test_repeated_ascii_char(repeat_startp, stringp,
1112			regexp));
1113		} else {
1114		    string_char_size = get_wchar(&string_wchar, stringp);
1115		    while (string_char_size > 0) {
1116			stringp += string_char_size;
1117			string_char_size = get_wchar(&string_wchar, stringp);
1118		    }
1119		    regexp++;
1120		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1121			regexp));
1122		}
1123		/* end case <ANY_CHAR|ZERO_OR_MORE> */
1124
1125	    case ANY_CHAR|ONE_OR_MORE:		/* .+ */
1126
1127		/* encoded as <ANY_CHAR|ONE_OR_MORE> */
1128
1129		if (!multibyte) {
1130		    if (*stringp == '\0') {
1131			return ((char *)0);
1132		    } else {
1133			stringp++;
1134			repeat_startp = stringp;
1135			while (*stringp != '\0') {
1136			    stringp++;
1137			}
1138			regexp++;
1139			return (test_repeated_ascii_char(repeat_startp, stringp,
1140			    regexp));
1141		    }
1142		} else {
1143		    string_char_size = get_wchar(&string_wchar, stringp);
1144		    if (string_char_size <= 0) {
1145			return ((char *)0);
1146		    } else {
1147			stringp += string_char_size;
1148			repeat_startp = stringp;
1149			string_char_size = get_wchar(&string_wchar, stringp);
1150			while (string_char_size > 0) {
1151			    stringp += string_char_size;
1152			    string_char_size =
1153				get_wchar(&string_wchar, stringp);
1154			}
1155			regexp++;
1156			return (test_repeated_multibyte_char(repeat_startp,
1157			    stringp, regexp));
1158		    }
1159		}
1160		/* end case <ANY_CHAR|ONE_OR_MORE> */
1161
1162	    case ANY_CHAR|COUNT:	/* .{min_count,max_count} */
1163
1164		/*
1165		 * encoded as	<ANY_CHAR|COUNT>\
1166		 *		<minimum_match_count><maximum_match_count>
1167		 */
1168
1169		get_match_counts(&nmust_match, &nextra_matches_allowed,
1170		    regexp + 1);
1171		if (!multibyte) {
1172		    while ((*stringp != '\0') && (nmust_match > 0)) {
1173			nmust_match--;
1174			stringp++;
1175		    }
1176		    if (nmust_match > 0) {
1177			return ((char *)0);
1178		    } else if (nextra_matches_allowed == UNLIMITED) {
1179			repeat_startp = stringp;
1180			while (*stringp != '\0') {
1181			    stringp++;
1182			}
1183			regexp += 3;
1184			return (test_repeated_ascii_char(repeat_startp, stringp,
1185			    regexp));
1186		    } else {
1187			repeat_startp = stringp;
1188			while ((*stringp != '\0') &&
1189			    (nextra_matches_allowed > 0)) {
1190			    nextra_matches_allowed--;
1191			    stringp++;
1192			}
1193			regexp += 3;
1194			return (test_repeated_ascii_char(repeat_startp, stringp,
1195			    regexp));
1196		    }
1197		} else { /* multibyte character */
1198
1199		    string_char_size = get_wchar(&string_wchar, stringp);
1200		    while ((string_char_size > 0) && (nmust_match > 0)) {
1201			nmust_match--;
1202			stringp += string_char_size;
1203			string_char_size = get_wchar(&string_wchar, stringp);
1204		    }
1205		    if (nmust_match > 0) {
1206			return ((char *)0);
1207		    } else if (nextra_matches_allowed == UNLIMITED) {
1208			repeat_startp = stringp;
1209			while (string_char_size > 0) {
1210			    stringp += string_char_size;
1211			    string_char_size =
1212				get_wchar(&string_wchar, stringp);
1213			}
1214			regexp += 3;
1215			return (test_repeated_multibyte_char(repeat_startp,
1216			    stringp, regexp));
1217		    } else {
1218			repeat_startp = stringp;
1219			while ((string_char_size > 0) &&
1220			    (nextra_matches_allowed > 0)) {
1221			    nextra_matches_allowed--;
1222			    stringp += string_char_size;
1223			    string_char_size =
1224				get_wchar(&string_wchar, stringp);
1225			}
1226			regexp += 3;
1227			return (test_repeated_multibyte_char(repeat_startp,
1228			    stringp, regexp));
1229		    }
1230		} /* end case ANY_CHAR|COUNT */
1231
1232	    case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1233	    case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1234
1235		/*
1236		 * encoded as	<IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1237		 *		<class_length><class ...>
1238		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1239		 *		<class_length><class ...>
1240		 *
1241		 * NOTE: <class_length> includes the <class_length> byte
1242		 */
1243
1244		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1245		    test_condition = IN_CLASS;
1246		} else {
1247		    test_condition = NOT_IN_CLASS;
1248		}
1249		regexp++; /* point to the <class_length> byte */
1250
1251		repeat_startp = stringp;
1252		while ((*stringp != '\0') &&
1253		    (test_char_against_ascii_class(*stringp, regexp,
1254		    test_condition) == CONDITION_TRUE)) {
1255		    stringp++;
1256		}
1257		regexp += (int)*regexp; /* add the class length to regexp */
1258		return (test_repeated_ascii_char(repeat_startp, stringp,
1259		    regexp));
1260
1261		/* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1262
1263	    case IN_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1264	    case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1265
1266		/*
1267		 * encoded as	<IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1268		 *		<class_length><class ...>
1269		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1270		 * 		<class_length><class ...>
1271		 *
1272		 * NOTE: <class_length> includes the <class_length> byte
1273		 */
1274
1275		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1276		    test_condition = IN_CLASS;
1277		} else {
1278		    test_condition = NOT_IN_CLASS;
1279		}
1280		regexp++; /* point to the <class_length> byte */
1281
1282		if ((*stringp == '\0') ||
1283		    (test_char_against_ascii_class(*stringp, regexp,
1284		    test_condition) != CONDITION_TRUE)) {
1285		    return ((char *)0);
1286		} else {
1287		    stringp++;
1288		    repeat_startp = stringp;
1289		    while ((*stringp != '\0') &&
1290			(test_char_against_ascii_class(*stringp, regexp,
1291			test_condition) == CONDITION_TRUE)) {
1292			stringp++;
1293		    }
1294		    regexp += (int)*regexp; /* add the class length to regexp */
1295		    return (test_repeated_ascii_char(repeat_startp, stringp,
1296			regexp));
1297		}
1298		/* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1299
1300	    case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1301	    case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1302
1303		/*
1304		 * endoded as	<IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305		 * 		<class ...><minimum_match_count>\
1306		 *		<maximum_match_count>
1307		 *	or	<NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1308		 *		<class ...><minimum_match_count>\
1309		 *		<maximum_match_count>
1310		 *
1311		 * NOTE: <class_length> includes the <class_length> byte,
1312		 *	but not the <minimum_match_count> or
1313		 *	<maximum_match_count> bytes
1314		 */
1315
1316		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1317		    test_condition = IN_CLASS;
1318		} else {
1319		    test_condition = NOT_IN_CLASS;
1320		}
1321		regexp++; /* point to the <class_length> byte */
1322
1323		get_match_counts(&nmust_match, &nextra_matches_allowed,
1324		    regexp + (int)*regexp);
1325		while ((*stringp != '\0') &&
1326		    (test_char_against_ascii_class(*stringp, regexp,
1327		    test_condition) == CONDITION_TRUE) &&
1328		    (nmust_match > 0)) {
1329		    nmust_match--;
1330		    stringp++;
1331		}
1332		if (nmust_match > 0) {
1333		    return ((char *)0);
1334		} else if (nextra_matches_allowed == UNLIMITED) {
1335		    repeat_startp = stringp;
1336		    while ((*stringp != '\0') &&
1337			(test_char_against_ascii_class(*stringp, regexp,
1338			test_condition) == CONDITION_TRUE)) {
1339			stringp++;
1340		    }
1341		    regexp += (int)*regexp + 2;
1342		    return (test_repeated_ascii_char(repeat_startp, stringp,
1343			regexp));
1344		} else {
1345		    repeat_startp = stringp;
1346		    while ((*stringp != '\0') &&
1347			(test_char_against_ascii_class(*stringp, regexp,
1348			test_condition) == CONDITION_TRUE) &&
1349			(nextra_matches_allowed > 0)) {
1350			nextra_matches_allowed--;
1351			stringp++;
1352		    }
1353		    regexp += (int)*regexp + 2;
1354		    return (test_repeated_ascii_char(repeat_startp, stringp,
1355			regexp));
1356		}
1357		/* end case IN_ASCII_CHAR_CLASS|COUNT */
1358
1359	    case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1360	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1361
1362		/*
1363		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1364		 *		<class_length><class ...>
1365		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1366		 *		<class_length><class ...>
1367		 *
1368		 * NOTE: <class_length> includes the <class_length> byte
1369		 */
1370
1371		if ((int)*regexp ==
1372		    (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1373		    test_condition = IN_CLASS;
1374		} else {
1375		    test_condition = NOT_IN_CLASS;
1376		}
1377		regexp++; /* point to the <class_length> byte */
1378
1379		repeat_startp = stringp;
1380		string_char_size = get_wchar(&string_wchar, stringp);
1381		while ((string_char_size > 0) &&
1382		    (test_char_against_multibyte_class(string_wchar, regexp,
1383		    test_condition) == CONDITION_TRUE)) {
1384		    stringp += string_char_size;
1385		    string_char_size = get_wchar(&string_wchar, stringp);
1386		}
1387		regexp += (int)*regexp; /* add the class length to regexp */
1388		return (test_repeated_multibyte_char(repeat_startp, stringp,
1389		    regexp));
1390
1391		/* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1392
1393	    case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1394	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1395
1396		/*
1397		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1398		 *		<class_length><class ...>
1399		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1400		 *		<class_length><class ...>
1401		 *
1402		 * NOTE: <class_length> includes the <class_length> byte
1403		 */
1404
1405		if ((int)*regexp ==
1406		    (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1407		    test_condition = IN_CLASS;
1408		} else {
1409		    test_condition = NOT_IN_CLASS;
1410		}
1411		regexp++; /* point to the <class_length> byte */
1412
1413		string_char_size = get_wchar(&string_wchar, stringp);
1414		if ((string_char_size <= 0) ||
1415		    (test_char_against_multibyte_class(string_wchar, regexp,
1416		    test_condition) != CONDITION_TRUE)) {
1417		    return ((char *)0);
1418		} else {
1419		    stringp += string_char_size;
1420		    repeat_startp = stringp;
1421		    string_char_size = get_wchar(&string_wchar, stringp);
1422		    while ((string_char_size > 0) &&
1423			(test_char_against_multibyte_class(string_wchar,
1424			regexp, test_condition) == CONDITION_TRUE)) {
1425			stringp += string_char_size;
1426			string_char_size = get_wchar(&string_wchar, stringp);
1427		    }
1428		    regexp += (int)*regexp; /* add the class length to regexp */
1429		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1430			regexp));
1431		}
1432		/* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1433
1434	    case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1435	    case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1436
1437		/*
1438		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1439		 *		<class_length><class ...><min_count><max_count>
1440		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1441		 *		<class_length><class ...><min_count><max_count>
1442		 *
1443		 * NOTE: <class_length> includes the <class_length> byte
1444		 *	but not the <minimum_match_count> or
1445		 *	<maximum_match_count> bytes
1446		 */
1447
1448		if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1449		    test_condition = IN_CLASS;
1450		} else {
1451		    test_condition = NOT_IN_CLASS;
1452		}
1453		regexp++; /* point to the <class_length> byte */
1454
1455		get_match_counts(&nmust_match, &nextra_matches_allowed,
1456		    regexp + (int)*regexp);
1457		string_char_size = get_wchar(&string_wchar, stringp);
1458		while ((string_char_size > 0) &&
1459		    (test_char_against_multibyte_class(string_wchar, regexp,
1460		    test_condition) == CONDITION_TRUE) &&
1461		    (nmust_match > 0)) {
1462		    nmust_match--;
1463		    stringp += string_char_size;
1464		    string_char_size = get_wchar(&string_wchar, stringp);
1465		}
1466		if (nmust_match > 0) {
1467		    return ((char *)0);
1468		} else if (nextra_matches_allowed == UNLIMITED) {
1469		    repeat_startp = stringp;
1470		    while ((string_char_size > 0) &&
1471			(test_char_against_multibyte_class(string_wchar,
1472			regexp, test_condition) == CONDITION_TRUE)) {
1473			stringp += string_char_size;
1474			string_char_size = get_wchar(&string_wchar, stringp);
1475		    }
1476		    regexp += (int)*regexp + 2;
1477		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1478			regexp));
1479		} else {
1480		    repeat_startp = stringp;
1481		    while ((string_char_size > 0) &&
1482			(test_char_against_multibyte_class(string_wchar,
1483			regexp, test_condition) == CONDITION_TRUE) &&
1484			(nextra_matches_allowed > 0)) {
1485			nextra_matches_allowed--;
1486			stringp += string_char_size;
1487			string_char_size = get_wchar(&string_wchar, stringp);
1488		    }
1489		    regexp += (int)*regexp + 2;
1490		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1491			regexp));
1492		}
1493		/* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1494
1495	    case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1496	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1497
1498		/*
1499		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1500		 *		<class_length><class ...>
1501		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1502		 *		<class_length><class ...>
1503		 *
1504		 * NOTE: <class_length> includes the <class_length> byte
1505		 */
1506
1507		if ((int)*regexp ==
1508		    (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1509		    test_condition = IN_CLASS;
1510		} else {
1511		    test_condition = NOT_IN_CLASS;
1512		}
1513		regexp++; /* point to the <class_length> byte */
1514
1515		repeat_startp = stringp;
1516		while ((*stringp != '\0') &&
1517		    (test_char_against_old_ascii_class(*stringp, regexp,
1518		    test_condition) == CONDITION_TRUE)) {
1519		    stringp++;
1520		}
1521		regexp += (int)*regexp; /* add the class length to regexp */
1522		return (test_repeated_ascii_char(repeat_startp, stringp,
1523		    regexp));
1524
1525		/* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1526
1527	    case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1528	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1529
1530		/*
1531		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1532		 *		<class_length><class ...>
1533		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1534		 *		<class_length><class ...>
1535		 *
1536		 * NOTE: <class length> includes the <class_length> byte
1537		 */
1538
1539		if ((int)*regexp ==
1540		    (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1541		    test_condition = IN_CLASS;
1542		} else {
1543		    test_condition = NOT_IN_CLASS;
1544		}
1545		regexp++; /* point to the <class_length> byte */
1546
1547		if ((*stringp == '\0') ||
1548		    (test_char_against_old_ascii_class(*stringp, regexp,
1549		    test_condition) != CONDITION_TRUE)) {
1550		    return ((char *)0);
1551		} else {
1552		    stringp++;
1553		    repeat_startp = stringp;
1554		    while ((*stringp != '\0') &&
1555			(test_char_against_old_ascii_class(*stringp, regexp,
1556			test_condition) == CONDITION_TRUE)) {
1557			stringp++;
1558		    }
1559		    regexp += (int)*regexp; /* add the class length to regexp */
1560		    return (test_repeated_ascii_char(repeat_startp, stringp,
1561			regexp));
1562		}
1563		/* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1564
1565	    case IN_OLD_ASCII_CHAR_CLASS|COUNT:	/* [...]{min_count,max_count} */
1566	    case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1567
1568		/*
1569		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1570		 *		<class ...><minimum_match_count>\
1571		 *		<maximum_match_count>
1572		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1573		 *		<class_length><class ...><minimum_match_count>\
1574		 *		<maximum_match_count>
1575		 *
1576		 * NOTE: <class_length> includes the <class_length> byte
1577		 *	but not the <minimum_match_count> or
1578		 *	<maximum_match_count> bytes
1579		 */
1580
1581		if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1582		    test_condition = IN_CLASS;
1583		} else {
1584		    test_condition = NOT_IN_CLASS;
1585		}
1586		regexp++; /* point to the <class_length> byte */
1587
1588		get_match_counts(&nmust_match, &nextra_matches_allowed,
1589		    regexp + (int)*regexp);
1590		while ((*stringp != '\0') &&
1591		    (test_char_against_old_ascii_class(*stringp, regexp,
1592		    test_condition) == CONDITION_TRUE) &&
1593		    (nmust_match > 0)) {
1594		    nmust_match--;
1595		    stringp++;
1596		}
1597		if (nmust_match > 0) {
1598		    return ((char *)0);
1599		} else if (nextra_matches_allowed == UNLIMITED) {
1600		    repeat_startp = stringp;
1601		    while ((*stringp != '\0') &&
1602			(test_char_against_old_ascii_class(*stringp, regexp,
1603			test_condition) == CONDITION_TRUE)) {
1604			stringp++;
1605		    }
1606		    regexp += (int)*regexp + 2;
1607		    return (test_repeated_ascii_char(repeat_startp, stringp,
1608			regexp));
1609		} else {
1610		    repeat_startp = stringp;
1611		    while ((*stringp != '\0') &&
1612			(test_char_against_old_ascii_class(*stringp, regexp,
1613			test_condition) == CONDITION_TRUE) &&
1614			(nextra_matches_allowed > 0)) {
1615			nextra_matches_allowed--;
1616			stringp++;
1617		    }
1618		    regexp += (int)*regexp + 2;
1619		    return (test_repeated_ascii_char(repeat_startp, stringp,
1620			regexp));
1621		}
1622		/* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1623
1624	    case ZERO_OR_MORE_GROUP:		/* (.....)* */
1625	    case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1626	    case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1627	    case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1628
1629		/*
1630		 * encoded as	<ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1631		 *		<group_length><compiled_regex...>\
1632		 *		<END_GROUP|ZERO_OR_MORE><groupn>
1633		 *
1634		 * NOTE:
1635		 *
1636		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1637		 *	length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1638		 *		<groupn>)
1639		 *
1640		 */
1641
1642		group_length =
1643		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1644		    TIMES_256_SHIFT);
1645		regexp++;
1646		group_length += (unsigned int)*regexp;
1647		regexp++;
1648		repeat_startp = stringp;
1649		test_stringp = test_string(stringp, regexp);
1650		while (test_stringp != (char *)0) {
1651		    if (push_stringp(stringp) == (char *)0)
1652			return ((char *)0);
1653		    stringp = test_stringp;
1654		    test_stringp = test_string(stringp, regexp);
1655		}
1656		regexp += group_length;
1657		return (test_repeated_group(repeat_startp, stringp, regexp));
1658
1659		/* end case ZERO_OR_MORE_GROUP */
1660
1661	    case END_GROUP|ZERO_OR_MORE:	/* (.....)* */
1662
1663		/* encoded as <END_GROUP|ZERO_OR_MORE> */
1664
1665		/* return from recursive call to test_string() */
1666
1667		return ((char *)stringp);
1668
1669		/* end case END_GROUP|ZERO_OR_MORE */
1670
1671	    case ONE_OR_MORE_GROUP:		/* (.....)+ */
1672	    case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1673	    case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1674	    case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1675
1676		/*
1677		 * encoded as	<ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1678		 *		<group_length><compiled_regex...>\
1679		 *		<END_GROUP|ONE_OR_MORE><groupn>
1680		 *
1681		 * NOTE:
1682		 *
1683		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1684		 * 	length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1685		 *		<groupn>)
1686		 */
1687
1688		group_length =
1689		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1690		    TIMES_256_SHIFT);
1691		regexp++;
1692		group_length += (unsigned int)*regexp;
1693		regexp++;
1694		stringp = test_string(stringp, regexp);
1695		if (stringp == (char *)0)
1696		    return ((char *)0);
1697		repeat_startp = stringp;
1698		test_stringp = test_string(stringp, regexp);
1699		while (test_stringp != (char *)0) {
1700		    if (push_stringp(stringp) == (char *)0)
1701			return ((char *)0);
1702		    stringp = test_stringp;
1703		    test_stringp = test_string(stringp, regexp);
1704		}
1705		regexp += group_length;
1706		return (test_repeated_group(repeat_startp, stringp, regexp));
1707
1708		/* end case ONE_OR_MORE_GROUP */
1709
1710	    case END_GROUP|ONE_OR_MORE:		/* (.....)+ */
1711
1712		/* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1713
1714		/* return from recursive call to test_string() */
1715
1716		return ((char *)stringp);
1717
1718		/* end case END_GROUP|ONE_OR_MORE */
1719
1720	    case COUNTED_GROUP:		/* (.....){max_count,min_count} */
1721	    case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1722	    case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1723	    case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1724
1725		/*
1726		 * encoded as	<COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1727		 *		<compiled_regex...>\<END_GROUP|COUNT><groupn>\
1728		 *		<minimum_match_count><maximum_match_count>
1729		 *
1730		 * NOTE:
1731		 *
1732		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1733		 *	length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1734		 *
1735		 * but does not include the <minimum_match_count> or
1736		 *	<maximum_match_count> bytes
1737		 */
1738
1739		group_length =
1740		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1741		    TIMES_256_SHIFT);
1742		regexp++;
1743		group_length += (unsigned int)*regexp;
1744		regexp++;
1745		get_match_counts(&nmust_match, &nextra_matches_allowed,
1746		    regexp + group_length);
1747		test_stringp = test_string(stringp, regexp);
1748		while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1749		    stringp = test_stringp;
1750		    nmust_match--;
1751		    test_stringp = test_string(stringp, regexp);
1752		}
1753		if (nmust_match > 0) {
1754		    return ((char *)0);
1755		} else if (nextra_matches_allowed == UNLIMITED) {
1756		    repeat_startp = stringp;
1757		    while (test_stringp != (char *)0) {
1758			if (push_stringp(stringp) == (char *)0)
1759			    return ((char *)0);
1760			stringp = test_stringp;
1761			test_stringp = test_string(stringp, regexp);
1762		}
1763		    regexp += group_length + 2;
1764		    return (test_repeated_group(repeat_startp, stringp,
1765			regexp));
1766		} else {
1767		    repeat_startp = stringp;
1768		    while ((test_stringp != (char *)0) &&
1769			(nextra_matches_allowed > 0)) {
1770			nextra_matches_allowed--;
1771			if (push_stringp(stringp) == (char *)0)
1772			    return ((char *)0);
1773			stringp = test_stringp;
1774			test_stringp = test_string(stringp, regexp);
1775		}
1776		    regexp += group_length + 2;
1777		    return (test_repeated_group(repeat_startp, stringp,
1778			regexp));
1779		}
1780		/* end case COUNTED_GROUP */
1781
1782	    case END_GROUP|COUNT:	/* (.....){max_count,min_count} */
1783
1784		/* encoded as <END_GROUP|COUNT> */
1785
1786		/* return from recursive call to test_string() */
1787
1788		return (stringp);
1789
1790		/* end case END_GROUP|COUNT */
1791
1792	    case END_OF_STRING_MARK:
1793
1794		/* encoded as <END_OF_STRING_MARK><END_REGEX> */
1795
1796		if (*stringp == '\0') {
1797		    regexp++;
1798		} else {
1799		    return ((char *)0);
1800		}
1801		break; /* end case END_OF_STRING_MARK */
1802
1803	    case END_REGEX: /* end of the compiled regular expression */
1804
1805		/* encoded as <END_REGEX> */
1806
1807		return (stringp);
1808
1809		/* end case END_REGEX */
1810
1811	    default:
1812
1813		return ((char *)0);
1814
1815	    } /* end switch (*regexp) */
1816
1817	} /* end for (;;) */
1818
1819} /* test_string() */
1820