lexer.c revision 1.1.1.1
1/*	$NetBSD: lexer.c,v 1.1.1.1 2012/03/23 21:20:26 christos Exp $	*/
2
3/*
4 * Copyright (C) 2009 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#include <ctype.h>
9#include "ipf.h"
10#ifdef	IPFILTER_SCAN
11# include "netinet/ip_scan.h"
12#endif
13#include <sys/ioctl.h>
14#include <syslog.h>
15#ifdef	TEST_LEXER
16# define	NO_YACC
17union	{
18	int		num;
19	char		*str;
20	struct in_addr	ipa;
21	i6addr_t	ip6;
22} yylval;
23#endif
24#include "lexer.h"
25#include "y.tab.h"
26
27FILE *yyin;
28
29#define	ishex(c)	(ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \
30			 ((c) >= 'A' && (c) <= 'F'))
31#define	TOOLONG		-3
32
33extern int	string_start;
34extern int	string_end;
35extern char	*string_val;
36extern int	pos;
37extern int	yydebug;
38
39char		*yystr = NULL;
40int		yytext[YYBUFSIZ+1];
41char		yychars[YYBUFSIZ+1];
42int		yylineNum = 1;
43int		yypos = 0;
44int		yylast = -1;
45int		yydictfixed = 0;
46int		yyexpectaddr = 0;
47int		yybreakondot = 0;
48int		yyvarnext = 0;
49int		yytokentype = 0;
50wordtab_t	*yywordtab = NULL;
51int		yysavedepth = 0;
52wordtab_t	*yysavewords[30];
53
54
55static	wordtab_t	*yyfindkey __P((char *));
56static	int		yygetc __P((int));
57static	void		yyunputc __P((int));
58static	int		yyswallow __P((int));
59static	char		*yytexttostr __P((int, int));
60static	void		yystrtotext __P((char *));
61static	char		*yytexttochar __P((void));
62
63static int yygetc(docont)
64	int docont;
65{
66	int c;
67
68	if (yypos < yylast) {
69		c = yytext[yypos++];
70		if (c == '\n')
71			yylineNum++;
72		return c;
73	}
74
75	if (yypos == YYBUFSIZ)
76		return TOOLONG;
77
78	if (pos >= string_start && pos <= string_end) {
79		c = string_val[pos - string_start];
80		yypos++;
81	} else {
82		c = fgetc(yyin);
83		if (docont && (c == '\\')) {
84			c = fgetc(yyin);
85			if (c == '\n') {
86				yylineNum++;
87				c = fgetc(yyin);
88			}
89		}
90	}
91	if (c == '\n')
92		yylineNum++;
93	yytext[yypos++] = c;
94	yylast = yypos;
95	yytext[yypos] = '\0';
96
97	return c;
98}
99
100
101static void yyunputc(c)
102	int c;
103{
104	if (c == '\n')
105		yylineNum--;
106	yytext[--yypos] = c;
107}
108
109
110static int yyswallow(last)
111	int last;
112{
113	int c;
114
115	while (((c = yygetc(0)) > '\0') && (c != last))
116		;
117
118	if (c != EOF)
119		yyunputc(c);
120	if (c == last)
121		return 0;
122	return -1;
123}
124
125
126static char *yytexttochar()
127{
128	int i;
129
130	for (i = 0; i < yypos; i++)
131		yychars[i] = (char)(yytext[i] & 0xff);
132	yychars[i] = '\0';
133	return yychars;
134}
135
136
137static void yystrtotext(str)
138	char *str;
139{
140	int len;
141	char *s;
142
143	len = strlen(str);
144	if (len > YYBUFSIZ)
145		len = YYBUFSIZ;
146
147	for (s = str; *s != '\0' && len > 0; s++, len--)
148		yytext[yylast++] = *s;
149	yytext[yylast] = '\0';
150}
151
152
153static char *yytexttostr(offset, max)
154	int offset, max;
155{
156	char *str;
157	int i;
158
159	if ((yytext[offset] == '\'' || yytext[offset] == '"') &&
160	    (yytext[offset] == yytext[offset + max - 1])) {
161		offset++;
162		max--;
163	}
164
165	if (max > yylast)
166		max = yylast;
167	str = malloc(max + 1);
168	if (str != NULL) {
169		for (i = offset; i < max; i++)
170			str[i - offset] = (char)(yytext[i] & 0xff);
171		str[i - offset] = '\0';
172	}
173	return str;
174}
175
176
177int yylex()
178{
179	static int prior = 0;
180	static int priornum = 0;
181	int c, n, isbuilding, rval, lnext, nokey = 0;
182	char *name;
183	int triedv6 = 0;
184
185	isbuilding = 0;
186	lnext = 0;
187	rval = 0;
188
189	if (yystr != NULL) {
190		free(yystr);
191		yystr = NULL;
192	}
193
194nextchar:
195	c = yygetc(0);
196	if (yydebug > 1)
197		printf("yygetc = (%x) %c [%*.*s]\n", c, c, yypos, yypos, yytexttochar());
198
199	switch (c)
200	{
201	case '\n' :
202		lnext = 0;
203		nokey = 0;
204	case '\t' :
205	case '\r' :
206	case ' ' :
207		if (isbuilding == 1) {
208			yyunputc(c);
209			goto done;
210		}
211		if (yylast > yypos) {
212			bcopy(yytext + yypos, yytext,
213			      sizeof(yytext[0]) * (yylast - yypos + 1));
214		}
215		yylast -= yypos;
216		yypos = 0;
217		lnext = 0;
218		nokey = 0;
219		goto nextchar;
220
221	case '\\' :
222		if (lnext == 0) {
223			lnext = 1;
224			if (yylast == yypos) {
225				yylast--;
226				yypos--;
227			} else
228				yypos--;
229			if (yypos == 0)
230				nokey = 1;
231			goto nextchar;
232		}
233		break;
234	}
235
236	if (lnext == 1) {
237		lnext = 0;
238		if ((isbuilding == 0) && !ISALNUM(c)) {
239			prior = c;
240			return c;
241		}
242		goto nextchar;
243	}
244
245	switch (c)
246	{
247	case '#' :
248		if (isbuilding == 1) {
249			yyunputc(c);
250			goto done;
251		}
252		yyswallow('\n');
253		rval = YY_COMMENT;
254		goto done;
255
256	case '$' :
257		if (isbuilding == 1) {
258			yyunputc(c);
259			goto done;
260		}
261		n = yygetc(0);
262		if (n == '{') {
263			if (yyswallow('}') == -1) {
264				rval = -2;
265				goto done;
266			}
267			(void) yygetc(0);
268		} else {
269			if (!ISALPHA(n)) {
270				yyunputc(n);
271				break;
272			}
273			do {
274				n = yygetc(1);
275			} while (ISALPHA(n) || ISDIGIT(n) || n == '_');
276			yyunputc(n);
277		}
278
279		name = yytexttostr(1, yypos);		/* skip $ */
280
281		if (name != NULL) {
282			string_val = get_variable(name, NULL, yylineNum);
283			free(name);
284			if (string_val != NULL) {
285				name = yytexttostr(yypos, yylast);
286				if (name != NULL) {
287					yypos = 0;
288					yylast = 0;
289					yystrtotext(string_val);
290					yystrtotext(name);
291					free(string_val);
292					free(name);
293					goto nextchar;
294				}
295				free(string_val);
296			}
297		}
298		break;
299
300	case '\'':
301	case '"' :
302		if (isbuilding == 1) {
303			goto done;
304		}
305		do {
306			n = yygetc(1);
307			if (n == EOF || n == TOOLONG) {
308				rval = -2;
309				goto done;
310			}
311			if (n == '\n') {
312				yyunputc(' ');
313				yypos++;
314			}
315		} while (n != c);
316		rval = YY_STR;
317		goto done;
318		/* NOTREACHED */
319
320	case EOF :
321		yylineNum = 1;
322		yypos = 0;
323		yylast = -1;
324		yyexpectaddr = 0;
325		yybreakondot = 0;
326		yyvarnext = 0;
327		yytokentype = 0;
328		if (yydebug)
329			fprintf(stderr, "reset at EOF\n");
330		prior = 0;
331		return 0;
332	}
333
334	if (strchr("=,/;{}()@", c) != NULL) {
335		if (isbuilding == 1) {
336			yyunputc(c);
337			goto done;
338		}
339		rval = c;
340		goto done;
341	} else if (c == '.') {
342		if (isbuilding == 0) {
343			rval = c;
344			goto done;
345		}
346		if (yybreakondot != 0) {
347			yyunputc(c);
348			goto done;
349		}
350	}
351
352	switch (c)
353	{
354	case '-' :
355		n = yygetc(0);
356		if (n == '>') {
357			isbuilding = 1;
358			goto done;
359		}
360		yyunputc(n);
361		if (yyexpectaddr) {
362			if (isbuilding == 1)
363				yyunputc(c);
364			else
365				rval = '-';
366			goto done;
367		}
368		if (isbuilding == 1)
369			break;
370		rval = '-';
371		goto done;
372
373	case '!' :
374		if (isbuilding == 1) {
375			yyunputc(c);
376			goto done;
377		}
378		n = yygetc(0);
379		if (n == '=') {
380			rval = YY_CMP_NE;
381			goto done;
382		}
383		yyunputc(n);
384		rval = '!';
385		goto done;
386
387	case '<' :
388		if (yyexpectaddr)
389			break;
390		if (isbuilding == 1) {
391			yyunputc(c);
392			goto done;
393		}
394		n = yygetc(0);
395		if (n == '=') {
396			rval = YY_CMP_LE;
397			goto done;
398		}
399		if (n == '>') {
400			rval = YY_RANGE_OUT;
401			goto done;
402		}
403		yyunputc(n);
404		rval = YY_CMP_LT;
405		goto done;
406
407	case '>' :
408		if (yyexpectaddr)
409			break;
410		if (isbuilding == 1) {
411			yyunputc(c);
412			goto done;
413		}
414		n = yygetc(0);
415		if (n == '=') {
416			rval = YY_CMP_GE;
417			goto done;
418		}
419		if (n == '<') {
420			rval = YY_RANGE_IN;
421			goto done;
422		}
423		yyunputc(n);
424		rval = YY_CMP_GT;
425		goto done;
426	}
427
428	/*
429	 * Now for the reason this is here...IPv6 address parsing.
430	 * The longest string we can expect is of this form:
431	 * 0000:0000:0000:0000:0000:0000:000.000.000.000
432	 * not:
433	 * 0000:0000:0000:0000:0000:0000:0000:0000
434	 */
435#ifdef	USE_INET6
436	if (yyexpectaddr == 1 && isbuilding == 0 && (ishex(c) || isdigit(c) || c == ':')) {
437		char ipv6buf[45 + 1], *s, oc;
438		int start;
439
440buildipv6:
441		start = yypos;
442		s = ipv6buf;
443		oc = c;
444
445		if (prior == YY_NUMBER && c == ':') {
446			sprintf(s, "%d", priornum);
447			s += strlen(s);
448		}
449
450		/*
451		 * Perhaps we should implement stricter controls on what we
452		 * swallow up here, but surely it would just be duplicating
453		 * the code in inet_pton() anyway.
454		 */
455		do {
456			*s++ = c;
457			c = yygetc(1);
458		} while ((ishex(c) || c == ':' || c == '.') &&
459			 (s - ipv6buf < 46));
460		yyunputc(c);
461		*s = '\0';
462
463		if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) {
464			rval = YY_IPV6;
465			yyexpectaddr = 0;
466			goto done;
467		}
468		yypos = start;
469		c = oc;
470	}
471#endif
472
473	if ((c == ':') && (rval != YY_IPV6) && (triedv6 == 0)) {
474#ifdef	USE_INET6
475		yystr = yytexttostr(0, yypos - 1);
476		if (yystr != NULL) {
477			char *s;
478
479			for (s = yystr; *s && ishex(*s); s++)
480				;
481			if (!*s && *yystr) {
482				isbuilding = 0;
483				c = *yystr;
484				free(yystr);
485				triedv6 = 1;
486				yypos = 1;
487				goto buildipv6;
488			}
489			free(yystr);
490		}
491#endif
492		if (isbuilding == 1) {
493			yyunputc(c);
494			goto done;
495		}
496		rval = ':';
497		goto done;
498	}
499
500	if (isbuilding == 0 && c == '0') {
501		n = yygetc(0);
502		if (n == 'x') {
503			do {
504				n = yygetc(1);
505			} while (ishex(n));
506			yyunputc(n);
507			rval = YY_HEX;
508			goto done;
509		}
510		yyunputc(n);
511	}
512
513	/*
514	 * No negative numbers with leading - sign..
515	 */
516	if (isbuilding == 0 && ISDIGIT(c)) {
517		do {
518			n = yygetc(1);
519		} while (ISDIGIT(n));
520		yyunputc(n);
521		rval = YY_NUMBER;
522		goto done;
523	}
524
525	isbuilding = 1;
526	goto nextchar;
527
528done:
529	yystr = yytexttostr(0, yypos);
530
531	if (yydebug)
532		printf("isbuilding %d yyvarnext %d nokey %d fixed %d addr %d\n",
533		       isbuilding, yyvarnext, nokey, yydictfixed, yyexpectaddr);
534	if (isbuilding == 1) {
535		wordtab_t *w;
536
537		w = NULL;
538		isbuilding = 0;
539
540		if ((yyvarnext == 0) && (nokey == 0)) {
541			w = yyfindkey(yystr);
542			if (w == NULL && yywordtab != NULL && !yydictfixed) {
543				yyresetdict();
544				w = yyfindkey(yystr);
545			}
546		} else
547			yyvarnext = 0;
548		if (w != NULL)
549			rval = w->w_value;
550		else
551			rval = YY_STR;
552	}
553
554	if (rval == YY_STR) {
555		if (yysavedepth > 0 && !yydictfixed)
556			yyresetdict();
557		if (yyexpectaddr == 1)
558			yyexpectaddr = 0;
559	}
560
561	yytokentype = rval;
562
563	if (yydebug)
564		printf("lexed(%s) [%d,%d,%d] => %d @%d\n", yystr, string_start,
565			string_end, pos, rval, yysavedepth);
566
567	switch (rval)
568	{
569	case YY_NUMBER :
570		sscanf(yystr, "%u", &yylval.num);
571		break;
572
573	case YY_HEX :
574		sscanf(yystr, "0x%x", (u_int *)&yylval.num);
575		break;
576
577	case YY_STR :
578		yylval.str = strdup(yystr);
579		break;
580
581	default :
582		break;
583	}
584
585	if (yylast > 0) {
586		bcopy(yytext + yypos, yytext,
587		      sizeof(yytext[0]) * (yylast - yypos + 1));
588		yylast -= yypos;
589		yypos = 0;
590	}
591
592	if (rval == YY_NUMBER)
593		priornum = yylval.num;
594	prior = rval;
595	return rval;
596}
597
598
599static wordtab_t *yyfindkey(key)
600	char *key;
601{
602	wordtab_t *w;
603
604	if (yywordtab == NULL)
605		return NULL;
606
607	for (w = yywordtab; w->w_word != 0; w++)
608		if (strcasecmp(key, w->w_word) == 0)
609			return w;
610	return NULL;
611}
612
613
614char *yykeytostr(num)
615	int num;
616{
617	wordtab_t *w;
618
619	if (yywordtab == NULL)
620		return "<unknown>";
621
622	for (w = yywordtab; w->w_word; w++)
623		if (w->w_value == num)
624			return w->w_word;
625	return "<unknown>";
626}
627
628
629wordtab_t *yysettab(words)
630	wordtab_t *words;
631{
632	wordtab_t *save;
633
634	save = yywordtab;
635	yywordtab = words;
636	return save;
637}
638
639
640void yyerror(msg)
641	char *msg;
642{
643	char *txt, letter[2];
644	int freetxt = 0;
645
646	if (yytokentype < 256) {
647		letter[0] = yytokentype;
648		letter[1] = '\0';
649		txt =  letter;
650	} else if (yytokentype == YY_STR || yytokentype == YY_HEX ||
651		   yytokentype == YY_NUMBER) {
652		if (yystr == NULL) {
653			txt = yytexttostr(yypos, YYBUFSIZ);
654			freetxt = 1;
655		} else
656			txt = yystr;
657	} else {
658		txt = yykeytostr(yytokentype);
659	}
660	fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum);
661	if (freetxt == 1)
662		free(txt);
663	exit(1);
664}
665
666
667void yysetfixeddict(newdict)
668	wordtab_t *newdict;
669{
670	if (yydebug)
671		printf("yysetfixeddict(%lx)\n", (u_long)newdict);
672
673	if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) {
674		fprintf(stderr, "%d: at maximum dictionary depth\n",
675			yylineNum);
676		return;
677	}
678
679	yysavewords[yysavedepth++] = yysettab(newdict);
680	if (yydebug)
681		printf("yysavedepth++ => %d\n", yysavedepth);
682	yydictfixed = 1;
683}
684
685
686void yysetdict(newdict)
687	wordtab_t *newdict;
688{
689	if (yydebug)
690		printf("yysetdict(%lx)\n", (u_long)newdict);
691
692	if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) {
693		fprintf(stderr, "%d: at maximum dictionary depth\n",
694			yylineNum);
695		return;
696	}
697
698	yysavewords[yysavedepth++] = yysettab(newdict);
699	if (yydebug)
700		printf("yysavedepth++ => %d\n", yysavedepth);
701}
702
703void yyresetdict()
704{
705	if (yydebug)
706		printf("yyresetdict(%d)\n", yysavedepth);
707	if (yysavedepth > 0) {
708		yysettab(yysavewords[--yysavedepth]);
709		if (yydebug)
710			printf("yysavedepth-- => %d\n", yysavedepth);
711	}
712	yydictfixed = 0;
713}
714
715
716
717#ifdef	TEST_LEXER
718int main(argc, argv)
719	int argc;
720	char *argv[];
721{
722	int n;
723
724	yyin = stdin;
725
726	while ((n = yylex()) != 0)
727		printf("%d.n = %d [%s] %d %d\n",
728			yylineNum, n, yystr, yypos, yylast);
729}
730#endif
731