1#include <sys/cdefs.h>
2__FBSDID("$FreeBSD$");
3
4#include <sys/types.h>
5#include <assert.h>
6#include <regex.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <unistd.h>
11
12#include "debug.ih"
13#include "main.ih"
14#include "split.ih"
15
16char *progname;
17int debug = 0;
18int line = 0;
19int status = 0;
20
21int copts = REG_EXTENDED;
22int eopts = 0;
23regoff_t startoff = 0;
24regoff_t endoff = 0;
25
26
27/*
28 - main - do the simple case, hand off to regress() for regression
29 */
30int
31main(int argc, char **argv)
32{
33	regex_t re;
34#	define	NS	10
35	regmatch_t subs[NS];
36	char erbuf[100];
37	int err;
38	size_t len;
39	int c;
40	int errflg = 0;
41	int i;
42	extern int optind;
43	extern char *optarg;
44
45	progname = argv[0];
46
47	while ((c = getopt(argc, argv, "c:e:S:E:x")) != -1)
48		switch (c) {
49		case 'c':	/* compile options */
50			copts = options('c', optarg);
51			break;
52		case 'e':	/* execute options */
53			eopts = options('e', optarg);
54			break;
55		case 'S':	/* start offset */
56			startoff = (regoff_t)atoi(optarg);
57			break;
58		case 'E':	/* end offset */
59			endoff = (regoff_t)atoi(optarg);
60			break;
61		case 'x':	/* Debugging. */
62			debug++;
63			break;
64		case '?':
65		default:
66			errflg++;
67			break;
68		}
69	if (errflg) {
70		fprintf(stderr, "usage: %s ", progname);
71		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
72		exit(2);
73	}
74
75	if (optind >= argc) {
76		regress(stdin);
77		exit(status);
78	}
79
80	err = regcomp(&re, argv[optind++], copts);
81	if (err) {
82		len = regerror(err, &re, erbuf, sizeof(erbuf));
83		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
84		    eprint(err), len, sizeof(erbuf), erbuf);
85		exit(status);
86	}
87	regprint(&re, stdout);
88
89	if (optind >= argc) {
90		regfree(&re);
91		exit(status);
92	}
93
94	if ((eopts & REG_STARTEND) != 0) {
95		subs[0].rm_so = startoff;
96		subs[0].rm_eo = strlen(argv[optind]) - endoff;
97	}
98	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
99	if (err) {
100		len = regerror(err, &re, erbuf, sizeof(erbuf));
101		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
102		    eprint(err), len, sizeof(erbuf), erbuf);
103		exit(status);
104	}
105	if ((copts & REG_NOSUB) == 0) {
106		len = (int)(subs[0].rm_eo - subs[0].rm_so);
107		if (subs[0].rm_so != -1) {
108			if (len != 0)
109				printf("match `%.*s'\n", (int)len,
110				    argv[optind] + subs[0].rm_so);
111			else
112				printf("match `'@%.1s\n",
113				    argv[optind] + subs[0].rm_so);
114		}
115		for (i = 1; i < NS; i++)
116			if (subs[i].rm_so != -1)
117				printf("(%d) `%.*s'\n", i,
118				    (int)(subs[i].rm_eo - subs[i].rm_so),
119				    argv[optind] + subs[i].rm_so);
120	}
121	exit(status);
122}
123
124/*
125 - regress - main loop of regression test
126 == void regress(FILE *in);
127 */
128void
129regress(FILE *in)
130{
131	char inbuf[1000];
132#	define	MAXF	10
133	char *f[MAXF];
134	int nf;
135	int i;
136	char erbuf[100];
137	size_t ne;
138	char *badpat = "invalid regular expression";
139#	define	SHORT	10
140	char *bpname = "REG_BADPAT";
141	regex_t re;
142
143	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
144		line++;
145		if (inbuf[0] == '#' || inbuf[0] == '\n')
146			continue;			/* NOTE CONTINUE */
147		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
148		if (debug)
149			fprintf(stdout, "%d:\n", line);
150		nf = split(inbuf, f, MAXF, "\t\t");
151		if (nf < 3) {
152			fprintf(stderr, "bad input, line %d\n", line);
153			exit(1);
154		}
155		for (i = 0; i < nf; i++)
156			if (strcmp(f[i], "\"\"") == 0)
157				f[i] = "";
158		if (nf <= 3)
159			f[3] = NULL;
160		if (nf <= 4)
161			f[4] = NULL;
162		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
163		if (opt('&', f[1]))	/* try with either type of RE */
164			try(f[0], f[1], f[2], f[3], f[4],
165					options('c', f[1]) &~ REG_EXTENDED);
166	}
167
168	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
169	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
170		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
171							erbuf, badpat);
172		status = 1;
173	}
174	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
175	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
176	    ne != strlen(badpat)+1) {
177		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
178						erbuf, SHORT-1, badpat);
179		status = 1;
180	}
181	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
182	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
183		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
184						erbuf, bpname);
185		status = 1;
186	}
187	re.re_endp = bpname;
188	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
189	if (atoi(erbuf) != (int)REG_BADPAT) {
190		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
191						erbuf, (long)REG_BADPAT);
192		status = 1;
193	} else if (ne != strlen(erbuf) + 1) {
194		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
195						erbuf, (long)REG_BADPAT);
196		status = 1;
197	}
198}
199
200/*
201 - try - try it, and report on problems
202 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
203 - opts: may not match f1
204 */
205void
206try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
207{
208	regex_t re;
209#	define	NSUBS	10
210	regmatch_t subs[NSUBS];
211#	define	NSHOULD	15
212	char *should[NSHOULD];
213	char erbuf[100];
214	size_t len;
215	int err, i, nshould;
216	char *grump;
217	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
218	char f0copy[1000];
219	char f2copy[1000];
220
221	strcpy(f0copy, f0);
222	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
223	fixstr(f0copy);
224	err = regcomp(&re, f0copy, opts);
225	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
226		/* unexpected error or wrong error */
227		len = regerror(err, &re, erbuf, sizeof(erbuf));
228		fprintf(stderr, "%d: %s error %s, %zu/%zu `%s'\n",
229		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
230		status = 1;
231	} else if (err == 0 && opt('C', f1)) {
232		/* unexpected success */
233		fprintf(stderr, "%d: %s should have given REG_%s\n",
234						line, type, f2);
235		status = 1;
236		err = 1;	/* so we won't try regexec */
237	}
238
239	if (err != 0) {
240		regfree(&re);
241		return;
242	}
243
244	strcpy(f2copy, f2);
245	fixstr(f2copy);
246
247	if (options('e', f1)&REG_STARTEND) {
248		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
249			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
250		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
251		subs[0].rm_eo = strchr(f2, ')') - f2;
252	}
253	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
254
255	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
256		/* unexpected error or wrong error */
257		len = regerror(err, &re, erbuf, sizeof(erbuf));
258		fprintf(stderr, "%d: %s exec error %s, %zu/%zu `%s'\n",
259		    line, type, eprint(err), len, sizeof(erbuf), erbuf);
260		status = 1;
261	} else if (err != 0) {
262		/* nothing more to check */
263	} else if (f3 == NULL) {
264		/* unexpected success */
265		fprintf(stderr, "%d: %s exec should have failed\n",
266		    line, type);
267		status = 1;
268		err = 1;		/* just on principle */
269	} else if (opts&REG_NOSUB) {
270		/* nothing more to check */
271	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
272		fprintf(stderr, "%d: %s %s\n", line, type, grump);
273		status = 1;
274		err = 1;
275	}
276
277	if (err != 0 || f4 == NULL) {
278		regfree(&re);
279		return;
280	}
281
282	for (i = 1; i < NSHOULD; i++)
283		should[i] = NULL;
284	nshould = split(f4, should+1, NSHOULD-1, ",");
285	if (nshould == 0) {
286		nshould = 1;
287		should[1] = "";
288	}
289	for (i = 1; i < NSUBS; i++) {
290		grump = check(f2, subs[i], should[i]);
291		if (grump != NULL) {
292			fprintf(stderr, "%d: %s $%d %s\n", line,
293			    type, i, grump);
294			status = 1;
295			err = 1;
296		}
297	}
298
299	regfree(&re);
300}
301
302/*
303 - options - pick options out of a regression-test string
304 - type: 'c' - compile, 'e' - exec
305 == int options(int type, char *s);
306 */
307int
308options(int type, char *s)
309{
310	char *p;
311	int o = (type == 'c') ? copts : eopts;
312	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
313
314	for (p = s; *p != '\0'; p++)
315		if (strchr(legal, *p) != NULL)
316			switch (*p) {
317			case 'b':
318				o &= ~REG_EXTENDED;
319				break;
320			case 'i':
321				o |= REG_ICASE;
322				break;
323			case 's':
324				o |= REG_NOSUB;
325				break;
326			case 'n':
327				o |= REG_NEWLINE;
328				break;
329			case 'm':
330				o &= ~REG_EXTENDED;
331				o |= REG_NOSPEC;
332				break;
333			case 'p':
334				o |= REG_PEND;
335				break;
336			case '^':
337				o |= REG_NOTBOL;
338				break;
339			case '$':
340				o |= REG_NOTEOL;
341				break;
342			case '#':
343				o |= REG_STARTEND;
344				break;
345			case 't':	/* trace */
346				o |= REG_TRACE;
347				break;
348			case 'l':	/* force long representation */
349				o |= REG_LARGE;
350				break;
351			case 'r':	/* force backref use */
352				o |= REG_BACKR;
353				break;
354			}
355	return(o);
356}
357
358/*
359 - opt - is a particular option in a regression string?
360 == int opt(int c, char *s);
361 */
362int				/* predicate */
363opt(int c, char *s)
364{
365	return(strchr(s, c) != NULL);
366}
367
368/*
369 - fixstr - transform magic characters in strings
370 == void fixstr(char *p);
371 */
372void
373fixstr(char *p)
374{
375	if (p == NULL)
376		return;
377
378	for (; *p != '\0'; p++)
379		if (*p == 'N')
380			*p = '\n';
381		else if (*p == 'T')
382			*p = '\t';
383		else if (*p == 'S')
384			*p = ' ';
385		else if (*p == 'Z')
386			*p = '\0';
387}
388
389/*
390 - check - check a substring match
391 == char *check(char *str, regmatch_t sub, char *should);
392 */
393char *				/* NULL or complaint */
394check(char *str, regmatch_t sub, char *should)
395{
396	int len;
397	int shlen;
398	char *p;
399	static char grump[500];
400	char *at = NULL;
401
402	if (should != NULL && strcmp(should, "-") == 0)
403		should = NULL;
404	if (should != NULL && should[0] == '@') {
405		at = should + 1;
406		should = "";
407	}
408
409	/* check rm_so and rm_eo for consistency */
410	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
411				(sub.rm_so != -1 && sub.rm_eo == -1) ||
412				(sub.rm_so != -1 && sub.rm_so < 0) ||
413				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
414		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
415							(long)sub.rm_eo);
416		return(grump);
417	}
418
419	/* check for no match */
420	if (sub.rm_so == -1 && should == NULL)
421		return(NULL);
422	if (sub.rm_so == -1)
423		return("did not match");
424
425	/* check for in range */
426	if (sub.rm_eo > strlen(str)) {
427		sprintf(grump, "start %ld end %ld, past end of string",
428		    (long)sub.rm_so, (long)sub.rm_eo);
429		return(grump);
430	}
431
432	len = (int)(sub.rm_eo - sub.rm_so);
433	shlen = (int)strlen(should);
434	p = str + sub.rm_so;
435
436	/* check for not supposed to match */
437	if (should == NULL) {
438		sprintf(grump, "matched `%.*s'", len, p);
439		return(grump);
440	}
441
442	/* check for wrong match */
443	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
444		sprintf(grump, "matched `%.*s' instead", len, p);
445		return(grump);
446	}
447	if (shlen > 0)
448		return(NULL);
449
450	/* check null match in right place */
451	if (at == NULL)
452		return(NULL);
453	shlen = strlen(at);
454	if (shlen == 0)
455		shlen = 1;	/* force check for end-of-string */
456	if (strncmp(p, at, shlen) != 0) {
457		sprintf(grump, "matched null at `%.20s'", p);
458		return(grump);
459	}
460	return(NULL);
461}
462
463/*
464 - eprint - convert error number to name
465 == static char *eprint(int err);
466 */
467static char *
468eprint(int err)
469{
470	static char epbuf[100];
471	size_t len;
472
473	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
474	assert(len <= sizeof(epbuf));
475	return(epbuf);
476}
477
478/*
479 - efind - convert error name to number
480 == static int efind(char *name);
481 */
482static int
483efind(char *name)
484{
485	static char efbuf[100];
486	size_t n;
487	regex_t re;
488
489	sprintf(efbuf, "REG_%s", name);
490	assert(strlen(efbuf) < sizeof(efbuf));
491	re.re_endp = efbuf;
492	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
493	return(atoi(efbuf));
494}
495