lexer.c revision 170268
1/*	$FreeBSD: head/contrib/ipfilter/tools/lexer.c 170268 2007-06-04 02:54:36Z darrenr $	*/
2
3/*
4 * Copyright (C) 2002-2006 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#include <ctype.h>
9#include "ipf.h"
10#ifdef	IPFILTER_SCAN
11# include "netinet/ip_scan.h"
12#endif
13#include <sys/ioctl.h>
14#include <syslog.h>
15#ifdef	TEST_LEXER
16# define	NO_YACC
17union	{
18	int		num;
19	char		*str;
20	struct in_addr	ipa;
21	i6addr_t	ip6;
22} yylval;
23#endif
24#include "lexer.h"
25#include "y.tab.h"
26
27FILE *yyin;
28
29#define	ishex(c)	(ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \
30			 ((c) >= 'A' && (c) <= 'F'))
31#define	TOOLONG		-3
32
33extern int	string_start;
34extern int	string_end;
35extern char	*string_val;
36extern int	pos;
37extern int	yydebug;
38
39char		*yystr = NULL;
40int		yytext[YYBUFSIZ+1];
41int		yylineNum = 1;
42int		yypos = 0;
43int		yylast = -1;
44int		yyexpectaddr = 0;
45int		yybreakondot = 0;
46int		yyvarnext = 0;
47int		yytokentype = 0;
48wordtab_t	*yywordtab = NULL;
49int		yysavedepth = 0;
50wordtab_t	*yysavewords[30];
51
52
53static	wordtab_t	*yyfindkey __P((char *));
54static	int		yygetc __P((void));
55static	void		yyunputc __P((int));
56static	int		yyswallow __P((int));
57static	char		*yytexttostr __P((int, int));
58static	void		yystrtotext __P((char *));
59
60static int yygetc()
61{
62	int c;
63
64	if (yypos < yylast) {
65		c = yytext[yypos++];
66		if (c == '\n')
67			yylineNum++;
68		return c;
69	}
70
71	if (yypos == YYBUFSIZ)
72		return TOOLONG;
73
74	if (pos >= string_start && pos <= string_end) {
75		c = string_val[pos - string_start];
76		yypos++;
77	} else {
78		c = fgetc(yyin);
79	}
80	if (c == '\n')
81		yylineNum++;
82	yytext[yypos++] = c;
83	yylast = yypos;
84	yytext[yypos] = '\0';
85
86	return c;
87}
88
89
90static void yyunputc(c)
91int c;
92{
93	if (c == '\n')
94		yylineNum--;
95	yytext[--yypos] = c;
96}
97
98
99static int yyswallow(last)
100int last;
101{
102	int c;
103
104	while (((c = yygetc()) > '\0') && (c != last))
105		;
106
107	if (c != EOF)
108		yyunputc(c);
109	if (c == last)
110		return 0;
111	return -1;
112}
113
114
115static void yystrtotext(str)
116char *str;
117{
118	int len;
119	char *s;
120
121	len = strlen(str);
122	if (len > YYBUFSIZ)
123		len = YYBUFSIZ;
124
125	for (s = str; *s != '\0' && len > 0; s++, len--)
126		yytext[yylast++] = *s;
127	yytext[yylast] = '\0';
128}
129
130
131static char *yytexttostr(offset, max)
132int offset, max;
133{
134	char *str;
135	int i;
136
137	if ((yytext[offset] == '\'' || yytext[offset] == '"') &&
138	    (yytext[offset] == yytext[offset + max - 1])) {
139		offset++;
140		max--;
141	}
142
143	if (max > yylast)
144		max = yylast;
145	str = malloc(max + 1);
146	if (str != NULL) {
147		for (i = offset; i < max; i++)
148			str[i - offset] = (char)(yytext[i] & 0xff);
149		str[i - offset] = '\0';
150	}
151	return str;
152}
153
154
155int yylex()
156{
157	int c, n, isbuilding, rval, lnext, nokey = 0;
158	char *name;
159
160	isbuilding = 0;
161	lnext = 0;
162	rval = 0;
163
164	if (yystr != NULL) {
165		free(yystr);
166		yystr = NULL;
167	}
168
169nextchar:
170	c = yygetc();
171
172	switch (c)
173	{
174	case '\n' :
175		lnext = 0;
176		nokey = 0;
177	case '\t' :
178	case '\r' :
179	case ' ' :
180		if (isbuilding == 1) {
181			yyunputc(c);
182			goto done;
183		}
184		if (yylast > yypos) {
185			bcopy(yytext + yypos, yytext,
186			      sizeof(yytext[0]) * (yylast - yypos + 1));
187		}
188		yylast -= yypos;
189		yypos = 0;
190		lnext = 0;
191		nokey = 0;
192		goto nextchar;
193
194	case '\\' :
195		if (lnext == 0) {
196			lnext = 1;
197			if (yylast == yypos) {
198				yylast--;
199				yypos--;
200			} else
201				yypos--;
202			if (yypos == 0)
203				nokey = 1;
204			goto nextchar;
205		}
206		break;
207	}
208
209	if (lnext == 1) {
210		lnext = 0;
211		if ((isbuilding == 0) && !ISALNUM(c)) {
212			return c;
213		}
214		goto nextchar;
215	}
216
217	switch (c)
218	{
219	case '#' :
220		if (isbuilding == 1) {
221			yyunputc(c);
222			goto done;
223		}
224		yyswallow('\n');
225		rval = YY_COMMENT;
226		goto nextchar;
227
228	case '$' :
229		if (isbuilding == 1) {
230			yyunputc(c);
231			goto done;
232		}
233		n = yygetc();
234		if (n == '{') {
235			if (yyswallow('}') == -1) {
236				rval = -2;
237				goto done;
238			}
239			(void) yygetc();
240		} else {
241			if (!ISALPHA(n)) {
242				yyunputc(n);
243				break;
244			}
245			do {
246				n = yygetc();
247			} while (ISALPHA(n) || ISDIGIT(n) || n == '_');
248			yyunputc(n);
249		}
250
251		name = yytexttostr(1, yypos);		/* skip $ */
252
253		if (name != NULL) {
254			string_val = get_variable(name, NULL, yylineNum);
255			free(name);
256			if (string_val != NULL) {
257				name = yytexttostr(yypos, yylast);
258				if (name != NULL) {
259					yypos = 0;
260					yylast = 0;
261					yystrtotext(string_val);
262					yystrtotext(name);
263					free(string_val);
264					free(name);
265					goto nextchar;
266				}
267				free(string_val);
268			}
269		}
270		break;
271
272	case '\'':
273	case '"' :
274		if (isbuilding == 1) {
275			goto done;
276		}
277		do {
278			n = yygetc();
279			if (n == EOF || n == TOOLONG) {
280				rval = -2;
281				goto done;
282			}
283			if (n == '\n') {
284				yyunputc(' ');
285				yypos++;
286			}
287		} while (n != c);
288		rval = YY_STR;
289		goto done;
290		/* NOTREACHED */
291
292	case EOF :
293		yylineNum = 1;
294		yypos = 0;
295		yylast = -1;
296		yyexpectaddr = 0;
297		yybreakondot = 0;
298		yyvarnext = 0;
299		yytokentype = 0;
300		return 0;
301	}
302
303	if (strchr("=,/;{}()@", c) != NULL) {
304		if (isbuilding == 1) {
305			yyunputc(c);
306			goto done;
307		}
308		rval = c;
309		goto done;
310	} else if (c == '.') {
311		if (isbuilding == 0) {
312			rval = c;
313			goto done;
314		}
315		if (yybreakondot != 0) {
316			yyunputc(c);
317			goto done;
318		}
319	}
320
321	switch (c)
322	{
323	case '-' :
324		if (yyexpectaddr)
325			break;
326		if (isbuilding == 1)
327			break;
328		n = yygetc();
329		if (n == '>') {
330			isbuilding = 1;
331			goto done;
332		}
333		yyunputc(n);
334		rval = '-';
335		goto done;
336
337	case '!' :
338		if (isbuilding == 1) {
339			yyunputc(c);
340			goto done;
341		}
342		n = yygetc();
343		if (n == '=') {
344			rval = YY_CMP_NE;
345			goto done;
346		}
347		yyunputc(n);
348		rval = '!';
349		goto done;
350
351	case '<' :
352		if (yyexpectaddr)
353			break;
354		if (isbuilding == 1) {
355			yyunputc(c);
356			goto done;
357		}
358		n = yygetc();
359		if (n == '=') {
360			rval = YY_CMP_LE;
361			goto done;
362		}
363		if (n == '>') {
364			rval = YY_RANGE_OUT;
365			goto done;
366		}
367		yyunputc(n);
368		rval = YY_CMP_LT;
369		goto done;
370
371	case '>' :
372		if (yyexpectaddr)
373			break;
374		if (isbuilding == 1) {
375			yyunputc(c);
376			goto done;
377		}
378		n = yygetc();
379		if (n == '=') {
380			rval = YY_CMP_GE;
381			goto done;
382		}
383		if (n == '<') {
384			rval = YY_RANGE_IN;
385			goto done;
386		}
387		yyunputc(n);
388		rval = YY_CMP_GT;
389		goto done;
390	}
391
392	/*
393	 * Now for the reason this is here...IPv6 address parsing.
394	 * The longest string we can expect is of this form:
395	 * 0000:0000:0000:0000:0000:0000:000.000.000.000
396	 * not:
397	 * 0000:0000:0000:0000:0000:0000:0000:0000
398	 */
399#ifdef	USE_INET6
400	if (yyexpectaddr == 1 && isbuilding == 0 && (ishex(c) || c == ':')) {
401		char ipv6buf[45 + 1], *s, oc;
402		int start;
403
404		start = yypos;
405		s = ipv6buf;
406		oc = c;
407
408		/*
409		 * Perhaps we should implement stricter controls on what we
410		 * swallow up here, but surely it would just be duplicating
411		 * the code in inet_pton() anyway.
412		 */
413		do {
414			*s++ = c;
415			c = yygetc();
416		} while ((ishex(c) || c == ':' || c == '.') &&
417			 (s - ipv6buf < 46));
418		yyunputc(c);
419		*s = '\0';
420
421		if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) {
422			rval = YY_IPV6;
423			yyexpectaddr = 0;
424			goto done;
425		}
426		yypos = start;
427		c = oc;
428	}
429#endif
430
431	if (c == ':') {
432		if (isbuilding == 1) {
433			yyunputc(c);
434			goto done;
435		}
436		rval = ':';
437		goto done;
438	}
439
440	if (isbuilding == 0 && c == '0') {
441		n = yygetc();
442		if (n == 'x') {
443			do {
444				n = yygetc();
445			} while (ishex(n));
446			yyunputc(n);
447			rval = YY_HEX;
448			goto done;
449		}
450		yyunputc(n);
451	}
452
453	/*
454	 * No negative numbers with leading - sign..
455	 */
456	if (isbuilding == 0 && ISDIGIT(c)) {
457		do {
458			n = yygetc();
459		} while (ISDIGIT(n));
460		yyunputc(n);
461		rval = YY_NUMBER;
462		goto done;
463	}
464
465	isbuilding = 1;
466	goto nextchar;
467
468done:
469	yystr = yytexttostr(0, yypos);
470
471	if (yydebug)
472		printf("isbuilding %d yyvarnext %d nokey %d\n",
473		       isbuilding, yyvarnext, nokey);
474	if (isbuilding == 1) {
475		wordtab_t *w;
476
477		w = NULL;
478		isbuilding = 0;
479
480		if ((yyvarnext == 0) && (nokey == 0)) {
481			w = yyfindkey(yystr);
482			if (w == NULL && yywordtab != NULL) {
483				yyresetdict();
484				w = yyfindkey(yystr);
485			}
486		} else
487			yyvarnext = 0;
488		if (w != NULL)
489			rval = w->w_value;
490		else
491			rval = YY_STR;
492	}
493
494	if (rval == YY_STR && yysavedepth > 0)
495		yyresetdict();
496
497	yytokentype = rval;
498
499	if (yydebug)
500		printf("lexed(%s) [%d,%d,%d] => %d @%d\n", yystr, string_start,
501			string_end, pos, rval, yysavedepth);
502
503	switch (rval)
504	{
505	case YY_NUMBER :
506		sscanf(yystr, "%u", &yylval.num);
507		break;
508
509	case YY_HEX :
510		sscanf(yystr, "0x%x", (u_int *)&yylval.num);
511		break;
512
513	case YY_STR :
514		yylval.str = strdup(yystr);
515		break;
516
517	default :
518		break;
519	}
520
521	if (yylast > 0) {
522		bcopy(yytext + yypos, yytext,
523		      sizeof(yytext[0]) * (yylast - yypos + 1));
524		yylast -= yypos;
525		yypos = 0;
526	}
527
528	return rval;
529}
530
531
532static wordtab_t *yyfindkey(key)
533char *key;
534{
535	wordtab_t *w;
536
537	if (yywordtab == NULL)
538		return NULL;
539
540	for (w = yywordtab; w->w_word != 0; w++)
541		if (strcasecmp(key, w->w_word) == 0)
542			return w;
543	return NULL;
544}
545
546
547char *yykeytostr(num)
548int num;
549{
550	wordtab_t *w;
551
552	if (yywordtab == NULL)
553		return "<unknown>";
554
555	for (w = yywordtab; w->w_word; w++)
556		if (w->w_value == num)
557			return w->w_word;
558	return "<unknown>";
559}
560
561
562wordtab_t *yysettab(words)
563wordtab_t *words;
564{
565	wordtab_t *save;
566
567	save = yywordtab;
568	yywordtab = words;
569	return save;
570}
571
572
573void yyerror(msg)
574char *msg;
575{
576	char *txt, letter[2];
577	int freetxt = 0;
578
579	if (yytokentype < 256) {
580		letter[0] = yytokentype;
581		letter[1] = '\0';
582		txt =  letter;
583	} else if (yytokentype == YY_STR || yytokentype == YY_HEX ||
584		   yytokentype == YY_NUMBER) {
585		if (yystr == NULL) {
586			txt = yytexttostr(yypos, YYBUFSIZ);
587			freetxt = 1;
588		} else
589			txt = yystr;
590	} else {
591		txt = yykeytostr(yytokentype);
592	}
593	fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum);
594	if (freetxt == 1)
595		free(txt);
596	exit(1);
597}
598
599
600void yysetdict(newdict)
601wordtab_t *newdict;
602{
603	if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) {
604		fprintf(stderr, "%d: at maximum dictionary depth\n",
605			yylineNum);
606		return;
607	}
608
609	yysavewords[yysavedepth++] = yysettab(newdict);
610	if (yydebug)
611		printf("yysavedepth++ => %d\n", yysavedepth);
612}
613
614void yyresetdict()
615{
616	if (yydebug)
617		printf("yyresetdict(%d)\n", yysavedepth);
618	if (yysavedepth > 0) {
619		yysettab(yysavewords[--yysavedepth]);
620		if (yydebug)
621			printf("yysavedepth-- => %d\n", yysavedepth);
622	}
623}
624
625
626
627#ifdef	TEST_LEXER
628int main(argc, argv)
629int argc;
630char *argv[];
631{
632	int n;
633
634	yyin = stdin;
635
636	while ((n = yylex()) != 0)
637		printf("%d.n = %d [%s] %d %d\n",
638			yylineNum, n, yystr, yypos, yylast);
639}
640#endif
641