1/*	$NetBSD: main.c,v 1.1 2011/01/08 18:10:31 pgoyette Exp $	*/
2
3/*-
4 * Copyright (c) 1993 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <assert.h>
30#include <regex.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35
36#include <sys/types.h>
37
38#include "test_regex.h"
39
40char *progname;
41int debug = 0;
42int line = 0;
43int status = 0;
44
45int copts = REG_EXTENDED;
46int eopts = 0;
47regoff_t startoff = 0;
48regoff_t endoff = 0;
49
50static char empty = '\0';
51
52static char *eprint(int);
53static int efind(char *);
54
55/*
56 * main - do the simple case, hand off to regress() for regression
57 */
58int
59main(int argc, char *argv[])
60{
61	regex_t re;
62#	define	NS	10
63	regmatch_t subs[NS];
64	char erbuf[100];
65	int err;
66	size_t len;
67	int c;
68	int errflg = 0;
69	int i;
70	extern int optind;
71	extern char *optarg;
72
73	progname = argv[0];
74
75	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
76		switch (c) {
77		case 'c':	/* compile options */
78			copts = options('c', optarg);
79			break;
80		case 'e':	/* execute options */
81			eopts = options('e', optarg);
82			break;
83		case 'S':	/* start offset */
84			startoff = (regoff_t)atoi(optarg);
85			break;
86		case 'E':	/* end offset */
87			endoff = (regoff_t)atoi(optarg);
88			break;
89		case 'x':	/* Debugging. */
90			debug++;
91			break;
92		case '?':
93		default:
94			errflg++;
95			break;
96		}
97	if (errflg) {
98		fprintf(stderr, "usage: %s ", progname);
99		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
100		exit(2);
101	}
102
103	if (optind >= argc) {
104		regress(stdin);
105		exit(status);
106	}
107
108	err = regcomp(&re, argv[optind++], copts);
109	if (err) {
110		len = regerror(err, &re, erbuf, sizeof(erbuf));
111		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
112			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
113		exit(status);
114	}
115	regprint(&re, stdout);
116
117	if (optind >= argc) {
118		regfree(&re);
119		exit(status);
120	}
121
122	if (eopts&REG_STARTEND) {
123		subs[0].rm_so = startoff;
124		subs[0].rm_eo = strlen(argv[optind]) - endoff;
125	}
126	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
127	if (err) {
128		len = regerror(err, &re, erbuf, sizeof(erbuf));
129		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
130			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
131		exit(status);
132	}
133	if (!(copts&REG_NOSUB)) {
134		len = (int)(subs[0].rm_eo - subs[0].rm_so);
135		if (subs[0].rm_so != -1) {
136			if (len != 0)
137				printf("match `%.*s'\n", (int)len,
138					argv[optind] + subs[0].rm_so);
139			else
140				printf("match `'@%.1s\n",
141					argv[optind] + subs[0].rm_so);
142		}
143		for (i = 1; i < NS; i++)
144			if (subs[i].rm_so != -1)
145				printf("(%d) `%.*s'\n", i,
146					(int)(subs[i].rm_eo - subs[i].rm_so),
147					argv[optind] + subs[i].rm_so);
148	}
149	exit(status);
150}
151
152/*
153 * regress - main loop of regression test
154 */
155void
156regress(FILE *in)
157{
158	char inbuf[1000];
159#	define	MAXF	10
160	char *f[MAXF];
161	int nf;
162	int i;
163	char erbuf[100];
164	size_t ne;
165	const char *badpat = "invalid regular expression";
166#	define	SHORT	10
167	const char *bpname = "REG_BADPAT";
168	regex_t re;
169
170	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
171		line++;
172		if (inbuf[0] == '#' || inbuf[0] == '\n')
173			continue;			/* NOTE CONTINUE */
174		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
175		if (debug)
176			fprintf(stdout, "%d:\n", line);
177		nf = split(inbuf, f, MAXF, "\t\t");
178		if (nf < 3) {
179			fprintf(stderr, "bad input, line %d\n", line);
180			exit(1);
181		}
182		for (i = 0; i < nf; i++)
183			if (strcmp(f[i], "\"\"") == 0)
184				f[i] = &empty;
185		if (nf <= 3)
186			f[3] = NULL;
187		if (nf <= 4)
188			f[4] = NULL;
189		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
190		if (opt('&', f[1]))	/* try with either type of RE */
191			try(f[0], f[1], f[2], f[3], f[4],
192					options('c', f[1]) &~ REG_EXTENDED);
193	}
194
195	ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
196	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
197		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
198							erbuf, badpat);
199		status = 1;
200	}
201	ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
202	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
203						ne != strlen(badpat)+1) {
204		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
205						erbuf, SHORT-1, badpat);
206		status = 1;
207	}
208	ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
209	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
210		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
211						erbuf, bpname);
212		status = 1;
213	}
214	re.re_endp = bpname;
215	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
216	if (atoi(erbuf) != (int)REG_BADPAT) {
217		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
218						erbuf, (long)REG_BADPAT);
219		status = 1;
220	} else if (ne != strlen(erbuf)+1) {
221		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
222						erbuf, (long)REG_BADPAT);
223		status = 1;
224	}
225}
226
227/*
228 - try - try it, and report on problems
229 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
230 */
231void
232try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
233{
234	regex_t re;
235#	define	NSUBS	10
236	regmatch_t subs[NSUBS];
237#	define	NSHOULD	15
238	char *should[NSHOULD];
239	int nshould;
240	char erbuf[100];
241	int err;
242	int len;
243	const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
244	int i;
245	char *grump;
246	char f0copy[1000];
247	char f2copy[1000];
248
249	strcpy(f0copy, f0);
250	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
251	fixstr(f0copy);
252	err = regcomp(&re, f0copy, opts);
253	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
254		/* unexpected error or wrong error */
255		len = regerror(err, &re, erbuf, sizeof(erbuf));
256		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
257					line, type, eprint(err), len,
258					(int)sizeof(erbuf), erbuf);
259		status = 1;
260	} else if (err == 0 && opt('C', f1)) {
261		/* unexpected success */
262		fprintf(stderr, "%d: %s should have given REG_%s\n",
263						line, type, f2);
264		status = 1;
265		err = 1;	/* so we won't try regexec */
266	}
267
268	if (err != 0) {
269		regfree(&re);
270		return;
271	}
272
273	strcpy(f2copy, f2);
274	fixstr(f2copy);
275
276	if (options('e', f1)&REG_STARTEND) {
277		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
278			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
279		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
280		subs[0].rm_eo = strchr(f2, ')') - f2;
281	}
282	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
283
284	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
285		/* unexpected error or wrong error */
286		len = regerror(err, &re, erbuf, sizeof(erbuf));
287		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
288					line, type, eprint(err), len,
289					(int)sizeof(erbuf), erbuf);
290		status = 1;
291	} else if (err != 0) {
292		/* nothing more to check */
293	} else if (f3 == NULL) {
294		/* unexpected success */
295		fprintf(stderr, "%d: %s exec should have failed\n",
296						line, type);
297		status = 1;
298		err = 1;		/* just on principle */
299	} else if (opts&REG_NOSUB) {
300		/* nothing more to check */
301	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
302		fprintf(stderr, "%d: %s %s\n", line, type, grump);
303		status = 1;
304		err = 1;
305	}
306
307	if (err != 0 || f4 == NULL) {
308		regfree(&re);
309		return;
310	}
311
312	for (i = 1; i < NSHOULD; i++)
313		should[i] = NULL;
314	nshould = split(f4, &should[1], NSHOULD-1, ",");
315	if (nshould == 0) {
316		nshould = 1;
317		should[1] = &empty;
318	}
319	for (i = 1; i < NSUBS; i++) {
320		grump = check(f2, subs[i], should[i]);
321		if (grump != NULL) {
322			fprintf(stderr, "%d: %s $%d %s\n", line,
323							type, i, grump);
324			status = 1;
325			err = 1;
326		}
327	}
328
329	regfree(&re);
330}
331
332/*
333 - options - pick options out of a regression-test string
334 == int options(int type, char *s);
335 */
336int
337options(int type, char *s)
338{
339	char *p;
340	int o = (type == 'c') ? copts : eopts;
341	const char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
342
343	for (p = s; *p != '\0'; p++)
344		if (strchr(legal, *p) != NULL)
345			switch (*p) {
346			case 'b':
347				o &= ~REG_EXTENDED;
348				break;
349			case 'i':
350				o |= REG_ICASE;
351				break;
352			case 's':
353				o |= REG_NOSUB;
354				break;
355			case 'n':
356				o |= REG_NEWLINE;
357				break;
358			case 'm':
359				o &= ~REG_EXTENDED;
360				o |= REG_NOSPEC;
361				break;
362			case 'p':
363				o |= REG_PEND;
364				break;
365			case '^':
366				o |= REG_NOTBOL;
367				break;
368			case '$':
369				o |= REG_NOTEOL;
370				break;
371			case '#':
372				o |= REG_STARTEND;
373				break;
374			case 't':	/* trace */
375				o |= REG_TRACE;
376				break;
377			case 'l':	/* force long representation */
378				o |= REG_LARGE;
379				break;
380			case 'r':	/* force backref use */
381				o |= REG_BACKR;
382				break;
383			}
384	return(o);
385}
386
387/*
388 - opt - is a particular option in a regression string?
389 == int opt(int c, char *s);
390 */
391int				/* predicate */
392opt(int c, char *s)
393{
394	return(strchr(s, c) != NULL);
395}
396
397/*
398 - fixstr - transform magic characters in strings
399 == void fixstr(char *p);
400 */
401void
402fixstr(char *p)
403{
404	if (p == NULL)
405		return;
406
407	for (; *p != '\0'; p++)
408		if (*p == 'N')
409			*p = '\n';
410		else if (*p == 'T')
411			*p = '\t';
412		else if (*p == 'S')
413			*p = ' ';
414		else if (*p == 'Z')
415			*p = '\0';
416}
417
418/*
419 * check - check a substring match
420 */
421char *				/* NULL or complaint */
422check(char *str, regmatch_t sub, char *should)
423{
424	int len;
425	int shlen;
426	char *p;
427	static char grump[500];
428	char *at = NULL;
429
430	if (should != NULL && strcmp(should, "-") == 0)
431		should = NULL;
432	if (should != NULL && should[0] == '@') {
433		at = should + 1;
434		should = &empty;
435	}
436
437	/* check rm_so and rm_eo for consistency */
438	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
439				(sub.rm_so != -1 && sub.rm_eo == -1) ||
440				(sub.rm_so != -1 && sub.rm_so < 0) ||
441				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
442		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
443							(long)sub.rm_eo);
444		return(grump);
445	}
446
447	/* check for no match */
448	if (sub.rm_so == -1) {
449		if (should == NULL)
450			return(NULL);
451		else {
452			sprintf(grump, "did not match");
453			return(grump);
454		}
455	}
456
457	/* check for in range */
458	if (sub.rm_eo > (ssize_t)strlen(str)) {
459		sprintf(grump, "start %ld end %ld, past end of string",
460					(long)sub.rm_so, (long)sub.rm_eo);
461		return(grump);
462	}
463
464	len = (int)(sub.rm_eo - sub.rm_so);
465	p = str + sub.rm_so;
466
467	/* check for not supposed to match */
468	if (should == NULL) {
469		sprintf(grump, "matched `%.*s'", len, p);
470		return(grump);
471	}
472
473	/* check for wrong match */
474	shlen = (int)strlen(should);
475	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
476		sprintf(grump, "matched `%.*s' instead", len, p);
477		return(grump);
478	}
479	if (shlen > 0)
480		return(NULL);
481
482	/* check null match in right place */
483	if (at == NULL)
484		return(NULL);
485	shlen = strlen(at);
486	if (shlen == 0)
487		shlen = 1;	/* force check for end-of-string */
488	if (strncmp(p, at, shlen) != 0) {
489		sprintf(grump, "matched null at `%.20s'", p);
490		return(grump);
491	}
492	return(NULL);
493}
494
495/*
496 * eprint - convert error number to name
497 */
498static char *
499eprint(int err)
500{
501	static char epbuf[100];
502	size_t len;
503
504	len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
505	assert(len <= sizeof(epbuf));
506	return(epbuf);
507}
508
509/*
510 * efind - convert error name to number
511 */
512static int
513efind(char *name)
514{
515	static char efbuf[100];
516	regex_t re;
517
518	sprintf(efbuf, "REG_%s", name);
519	assert(strlen(efbuf) < sizeof(efbuf));
520	re.re_endp = efbuf;
521	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
522	return(atoi(efbuf));
523}
524