main.c revision 92889
1178825Sdfr/* $FreeBSD: head/lib/libc/regex/grot/main.c 92889 2002-03-21 18:49:23Z obrien $
2233294Sstas */
3233294Sstas#include <stdio.h>
4233294Sstas#include <string.h>
5178825Sdfr#include <sys/types.h>
6233294Sstas#include <regex.h>
7233294Sstas#include <assert.h>
8233294Sstas
9178825Sdfr#include "main.ih"
10233294Sstas
11233294Sstaschar *progname;
12178825Sdfrint debug = 0;
13233294Sstasint line = 0;
14233294Sstasint status = 0;
15233294Sstas
16178825Sdfrint copts = REG_EXTENDED;
17233294Sstasint eopts = 0;
18233294Sstasregoff_t startoff = 0;
19233294Sstasregoff_t endoff = 0;
20178825Sdfr
21233294Sstas
22233294Sstasextern int split();
23233294Sstasextern void regprint();
24233294Sstas
25233294Sstas/*
26233294Sstas - main - do the simple case, hand off to regress() for regression
27233294Sstas */
28233294Sstasmain(argc, argv)
29233294Sstasint argc;
30233294Sstaschar *argv[];
31233294Sstas{
32178825Sdfr	regex_t re;
33178825Sdfr#	define	NS	10
34178825Sdfr	regmatch_t subs[NS];
35178825Sdfr	char erbuf[100];
36178825Sdfr	int err;
37178825Sdfr	size_t len;
38178825Sdfr	int c;
39178825Sdfr	int errflg = 0;
40178825Sdfr	int i;
41178825Sdfr	extern int optind;
42178825Sdfr	extern char *optarg;
43178825Sdfr
44178825Sdfr	progname = argv[0];
45178825Sdfr
46178825Sdfr	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
47178825Sdfr		switch (c) {
48178825Sdfr		case 'c':	/* compile options */
49178825Sdfr			copts = options('c', optarg);
50178825Sdfr			break;
51178825Sdfr		case 'e':	/* execute options */
52178825Sdfr			eopts = options('e', optarg);
53178825Sdfr			break;
54178825Sdfr		case 'S':	/* start offset */
55178825Sdfr			startoff = (regoff_t)atoi(optarg);
56178825Sdfr			break;
57178825Sdfr		case 'E':	/* end offset */
58178825Sdfr			endoff = (regoff_t)atoi(optarg);
59178825Sdfr			break;
60178825Sdfr		case 'x':	/* Debugging. */
61178825Sdfr			debug++;
62178825Sdfr			break;
63233294Sstas		case '?':
64178825Sdfr		default:
65178825Sdfr			errflg++;
66178825Sdfr			break;
67178825Sdfr		}
68178825Sdfr	if (errflg) {
69233294Sstas		fprintf(stderr, "usage: %s ", progname);
70178825Sdfr		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
71178825Sdfr		exit(2);
72178825Sdfr	}
73178825Sdfr
74178825Sdfr	if (optind >= argc) {
75178825Sdfr		regress(stdin);
76178825Sdfr		exit(status);
77178825Sdfr	}
78178825Sdfr
79178825Sdfr	err = regcomp(&re, argv[optind++], copts);
80178825Sdfr	if (err) {
81178825Sdfr		len = regerror(err, &re, erbuf, sizeof(erbuf));
82178825Sdfr		fprintf(stderr, "error %s, %d/%d `%s'\n",
83233294Sstas			eprint(err), len, sizeof(erbuf), erbuf);
84178825Sdfr		exit(status);
85178825Sdfr	}
86178825Sdfr	regprint(&re, stdout);
87178825Sdfr
88178825Sdfr	if (optind >= argc) {
89233294Sstas		regfree(&re);
90233294Sstas		exit(status);
91233294Sstas	}
92233294Sstas
93233294Sstas	if (eopts&REG_STARTEND) {
94233294Sstas		subs[0].rm_so = startoff;
95233294Sstas		subs[0].rm_eo = strlen(argv[optind]) - endoff;
96233294Sstas	}
97233294Sstas	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
98233294Sstas	if (err) {
99233294Sstas		len = regerror(err, &re, erbuf, sizeof(erbuf));
100233294Sstas		fprintf(stderr, "error %s, %d/%d `%s'\n",
101233294Sstas			eprint(err), len, sizeof(erbuf), erbuf);
102233294Sstas		exit(status);
103233294Sstas	}
104233294Sstas	if (!(copts&REG_NOSUB)) {
105233294Sstas		len = (int)(subs[0].rm_eo - subs[0].rm_so);
106178825Sdfr		if (subs[0].rm_so != -1) {
107178825Sdfr			if (len != 0)
108178825Sdfr				printf("match `%.*s'\n", len,
109178825Sdfr					argv[optind] + subs[0].rm_so);
110178825Sdfr			else
111178825Sdfr				printf("match `'@%.1s\n",
112178825Sdfr					argv[optind] + subs[0].rm_so);
113178825Sdfr		}
114178825Sdfr		for (i = 1; i < NS; i++)
115178825Sdfr			if (subs[i].rm_so != -1)
116178825Sdfr				printf("(%d) `%.*s'\n", i,
117178825Sdfr					(int)(subs[i].rm_eo - subs[i].rm_so),
118178825Sdfr					argv[optind] + subs[i].rm_so);
119178825Sdfr	}
120178825Sdfr	exit(status);
121178825Sdfr}
122178825Sdfr
123178825Sdfr/*
124178825Sdfr - regress - main loop of regression test
125178825Sdfr == void regress(FILE *in);
126178825Sdfr */
127178825Sdfrvoid
128178825Sdfrregress(in)
129178825SdfrFILE *in;
130178825Sdfr{
131178825Sdfr	char inbuf[1000];
132178825Sdfr#	define	MAXF	10
133178825Sdfr	char *f[MAXF];
134178825Sdfr	int nf;
135178825Sdfr	int i;
136178825Sdfr	char erbuf[100];
137178825Sdfr	size_t ne;
138178825Sdfr	char *badpat = "invalid regular expression";
139178825Sdfr#	define	SHORT	10
140178825Sdfr	char *bpname = "REG_BADPAT";
141178825Sdfr	regex_t re;
142178825Sdfr
143178825Sdfr	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
144178825Sdfr		line++;
145178825Sdfr		if (inbuf[0] == '#' || inbuf[0] == '\n')
146178825Sdfr			continue;			/* NOTE CONTINUE */
147178825Sdfr		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
148178825Sdfr		if (debug)
149178825Sdfr			fprintf(stdout, "%d:\n", line);
150178825Sdfr		nf = split(inbuf, f, MAXF, "\t\t");
151178825Sdfr		if (nf < 3) {
152178825Sdfr			fprintf(stderr, "bad input, line %d\n", line);
153178825Sdfr			exit(1);
154178825Sdfr		}
155178825Sdfr		for (i = 0; i < nf; i++)
156178825Sdfr			if (strcmp(f[i], "\"\"") == 0)
157178825Sdfr				f[i] = "";
158178825Sdfr		if (nf <= 3)
159178825Sdfr			f[3] = NULL;
160178825Sdfr		if (nf <= 4)
161178825Sdfr			f[4] = NULL;
162178825Sdfr		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
163233294Sstas		if (opt('&', f[1]))	/* try with either type of RE */
164178825Sdfr			try(f[0], f[1], f[2], f[3], f[4],
165178825Sdfr					options('c', f[1]) &~ REG_EXTENDED);
166178825Sdfr	}
167178825Sdfr
168178825Sdfr	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
169178825Sdfr	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
170178825Sdfr		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
171178825Sdfr							erbuf, badpat);
172178825Sdfr		status = 1;
173178825Sdfr	}
174178825Sdfr	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
175178825Sdfr	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
176178825Sdfr						ne != strlen(badpat)+1) {
177178825Sdfr		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
178178825Sdfr						erbuf, SHORT-1, badpat);
179178825Sdfr		status = 1;
180178825Sdfr	}
181178825Sdfr	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
182178825Sdfr	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
183178825Sdfr		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
184178825Sdfr						erbuf, bpname);
185178825Sdfr		status = 1;
186178825Sdfr	}
187178825Sdfr	re.re_endp = bpname;
188178825Sdfr	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
189178825Sdfr	if (atoi(erbuf) != (int)REG_BADPAT) {
190178825Sdfr		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
191178825Sdfr						erbuf, (long)REG_BADPAT);
192178825Sdfr		status = 1;
193178825Sdfr	} else if (ne != strlen(erbuf)+1) {
194178825Sdfr		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
195178825Sdfr						erbuf, (long)REG_BADPAT);
196178825Sdfr		status = 1;
197178825Sdfr	}
198178825Sdfr}
199178825Sdfr
200178825Sdfr/*
201178825Sdfr - try - try it, and report on problems
202178825Sdfr == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
203178825Sdfr */
204178825Sdfrvoid
205178825Sdfrtry(f0, f1, f2, f3, f4, opts)
206178825Sdfrchar *f0;
207178825Sdfrchar *f1;
208178825Sdfrchar *f2;
209178825Sdfrchar *f3;
210178825Sdfrchar *f4;
211178825Sdfrint opts;			/* may not match f1 */
212178825Sdfr{
213	regex_t re;
214#	define	NSUBS	10
215	regmatch_t subs[NSUBS];
216#	define	NSHOULD	15
217	char *should[NSHOULD];
218	int nshould;
219	char erbuf[100];
220	int err;
221	int len;
222	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
223	int i;
224	char *grump;
225	char f0copy[1000];
226	char f2copy[1000];
227
228	strcpy(f0copy, f0);
229	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
230	fixstr(f0copy);
231	err = regcomp(&re, f0copy, opts);
232	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
233		/* unexpected error or wrong error */
234		len = regerror(err, &re, erbuf, sizeof(erbuf));
235		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
236					line, type, eprint(err), len,
237					sizeof(erbuf), erbuf);
238		status = 1;
239	} else if (err == 0 && opt('C', f1)) {
240		/* unexpected success */
241		fprintf(stderr, "%d: %s should have given REG_%s\n",
242						line, type, f2);
243		status = 1;
244		err = 1;	/* so we won't try regexec */
245	}
246
247	if (err != 0) {
248		regfree(&re);
249		return;
250	}
251
252	strcpy(f2copy, f2);
253	fixstr(f2copy);
254
255	if (options('e', f1)&REG_STARTEND) {
256		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
257			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
258		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
259		subs[0].rm_eo = strchr(f2, ')') - f2;
260	}
261	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
262
263	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
264		/* unexpected error or wrong error */
265		len = regerror(err, &re, erbuf, sizeof(erbuf));
266		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
267					line, type, eprint(err), len,
268					sizeof(erbuf), erbuf);
269		status = 1;
270	} else if (err != 0) {
271		/* nothing more to check */
272	} else if (f3 == NULL) {
273		/* unexpected success */
274		fprintf(stderr, "%d: %s exec should have failed\n",
275						line, type);
276		status = 1;
277		err = 1;		/* just on principle */
278	} else if (opts&REG_NOSUB) {
279		/* nothing more to check */
280	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
281		fprintf(stderr, "%d: %s %s\n", line, type, grump);
282		status = 1;
283		err = 1;
284	}
285
286	if (err != 0 || f4 == NULL) {
287		regfree(&re);
288		return;
289	}
290
291	for (i = 1; i < NSHOULD; i++)
292		should[i] = NULL;
293	nshould = split(f4, should+1, NSHOULD-1, ",");
294	if (nshould == 0) {
295		nshould = 1;
296		should[1] = "";
297	}
298	for (i = 1; i < NSUBS; i++) {
299		grump = check(f2, subs[i], should[i]);
300		if (grump != NULL) {
301			fprintf(stderr, "%d: %s $%d %s\n", line,
302							type, i, grump);
303			status = 1;
304			err = 1;
305		}
306	}
307
308	regfree(&re);
309}
310
311/*
312 - options - pick options out of a regression-test string
313 == int options(int type, char *s);
314 */
315int
316options(type, s)
317int type;			/* 'c' compile, 'e' exec */
318char *s;
319{
320	char *p;
321	int o = (type == 'c') ? copts : eopts;
322	char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
323
324	for (p = s; *p != '\0'; p++)
325		if (strchr(legal, *p) != NULL)
326			switch (*p) {
327			case 'b':
328				o &= ~REG_EXTENDED;
329				break;
330			case 'i':
331				o |= REG_ICASE;
332				break;
333			case 's':
334				o |= REG_NOSUB;
335				break;
336			case 'n':
337				o |= REG_NEWLINE;
338				break;
339			case 'm':
340				o &= ~REG_EXTENDED;
341				o |= REG_NOSPEC;
342				break;
343			case 'p':
344				o |= REG_PEND;
345				break;
346			case '^':
347				o |= REG_NOTBOL;
348				break;
349			case '$':
350				o |= REG_NOTEOL;
351				break;
352			case '#':
353				o |= REG_STARTEND;
354				break;
355			case 't':	/* trace */
356				o |= REG_TRACE;
357				break;
358			case 'l':	/* force long representation */
359				o |= REG_LARGE;
360				break;
361			case 'r':	/* force backref use */
362				o |= REG_BACKR;
363				break;
364			}
365	return(o);
366}
367
368/*
369 - opt - is a particular option in a regression string?
370 == int opt(int c, char *s);
371 */
372int				/* predicate */
373opt(c, s)
374int c;
375char *s;
376{
377	return(strchr(s, c) != NULL);
378}
379
380/*
381 - fixstr - transform magic characters in strings
382 == void fixstr(char *p);
383 */
384void
385fixstr(p)
386char *p;
387{
388	if (p == NULL)
389		return;
390
391	for (; *p != '\0'; p++)
392		if (*p == 'N')
393			*p = '\n';
394		else if (*p == 'T')
395			*p = '\t';
396		else if (*p == 'S')
397			*p = ' ';
398		else if (*p == 'Z')
399			*p = '\0';
400}
401
402/*
403 - check - check a substring match
404 == char *check(char *str, regmatch_t sub, char *should);
405 */
406char *				/* NULL or complaint */
407check(str, sub, should)
408char *str;
409regmatch_t sub;
410char *should;
411{
412	int len;
413	int shlen;
414	char *p;
415	static char grump[500];
416	char *at = NULL;
417
418	if (should != NULL && strcmp(should, "-") == 0)
419		should = NULL;
420	if (should != NULL && should[0] == '@') {
421		at = should + 1;
422		should = "";
423	}
424
425	/* check rm_so and rm_eo for consistency */
426	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
427				(sub.rm_so != -1 && sub.rm_eo == -1) ||
428				(sub.rm_so != -1 && sub.rm_so < 0) ||
429				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
430		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
431							(long)sub.rm_eo);
432		return(grump);
433	}
434
435	/* check for no match */
436	if (sub.rm_so == -1 && should == NULL)
437		return(NULL);
438	if (sub.rm_so == -1)
439		return("did not match");
440
441	/* check for in range */
442	if (sub.rm_eo > strlen(str)) {
443		sprintf(grump, "start %ld end %ld, past end of string",
444					(long)sub.rm_so, (long)sub.rm_eo);
445		return(grump);
446	}
447
448	len = (int)(sub.rm_eo - sub.rm_so);
449	shlen = (int)strlen(should);
450	p = str + sub.rm_so;
451
452	/* check for not supposed to match */
453	if (should == NULL) {
454		sprintf(grump, "matched `%.*s'", len, p);
455		return(grump);
456	}
457
458	/* check for wrong match */
459	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
460		sprintf(grump, "matched `%.*s' instead", len, p);
461		return(grump);
462	}
463	if (shlen > 0)
464		return(NULL);
465
466	/* check null match in right place */
467	if (at == NULL)
468		return(NULL);
469	shlen = strlen(at);
470	if (shlen == 0)
471		shlen = 1;	/* force check for end-of-string */
472	if (strncmp(p, at, shlen) != 0) {
473		sprintf(grump, "matched null at `%.20s'", p);
474		return(grump);
475	}
476	return(NULL);
477}
478
479/*
480 - eprint - convert error number to name
481 == static char *eprint(int err);
482 */
483static char *
484eprint(err)
485int err;
486{
487	static char epbuf[100];
488	size_t len;
489
490	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
491	assert(len <= sizeof(epbuf));
492	return(epbuf);
493}
494
495/*
496 - efind - convert error name to number
497 == static int efind(char *name);
498 */
499static int
500efind(name)
501char *name;
502{
503	static char efbuf[100];
504	size_t n;
505	regex_t re;
506
507	sprintf(efbuf, "REG_%s", name);
508	assert(strlen(efbuf) < sizeof(efbuf));
509	re.re_endp = efbuf;
510	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
511	return(atoi(efbuf));
512}
513