lexer.c revision 145510
1/*	$NetBSD$	*/
2
3/*
4 * Copyright (C) 2003 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#include <ctype.h>
9#include "ipf.h"
10#ifdef	IPFILTER_SCAN
11# include "netinet/ip_scan.h"
12#endif
13#include <sys/ioctl.h>
14#include <syslog.h>
15#ifdef	TEST_LEXER
16# define	NO_YACC
17union	{
18	int		num;
19	char		*str;
20	struct in_addr	ipa;
21	i6addr_t	ip6;
22} yylval;
23#endif
24#include "lexer.h"
25#include "y.tab.h"
26
27FILE *yyin;
28
29#define	ishex(c)	(ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \
30			 ((c) >= 'A' && (c) <= 'F'))
31#define	TOOLONG		-3
32
33extern int	string_start;
34extern int	string_end;
35extern char	*string_val;
36extern int	pos;
37extern int	yydebug;
38
39char		*yystr = NULL;
40int		yytext[YYBUFSIZ+1];
41int		yylineNum = 1;
42int		yypos = 0;
43int		yylast = -1;
44int		yyexpectaddr = 0;
45int		yybreakondot = 0;
46int		yyvarnext = 0;
47int		yytokentype = 0;
48wordtab_t	*yywordtab = NULL;
49int		yysavedepth = 0;
50wordtab_t	*yysavewords[30];
51
52
53static	wordtab_t	*yyfindkey __P((char *));
54static	int		yygetc __P((void));
55static	void		yyunputc __P((int));
56static	int		yyswallow __P((int));
57static	char		*yytexttostr __P((int, int));
58static	void		yystrtotext __P((char *));
59
60static int yygetc()
61{
62	int c;
63
64	if (yypos < yylast) {
65		c = yytext[yypos++];
66		if (c == '\n')
67			yylineNum++;
68		return c;
69	}
70
71	if (yypos == YYBUFSIZ)
72		return TOOLONG;
73
74	if (pos >= string_start && pos <= string_end) {
75		c = string_val[pos - string_start];
76		yypos++;
77	} else {
78		c = fgetc(yyin);
79	}
80	if (c == '\n')
81		yylineNum++;
82	yytext[yypos++] = c;
83	yylast = yypos;
84	yytext[yypos] = '\0';
85
86	return c;
87}
88
89
90static void yyunputc(c)
91int c;
92{
93	if (c == '\n')
94		yylineNum--;
95	yytext[--yypos] = c;
96}
97
98
99static int yyswallow(last)
100int last;
101{
102	int c;
103
104	while (((c = yygetc()) > '\0') && (c != last))
105		;
106
107	if (c != EOF)
108		yyunputc(c);
109	if (c == last)
110		return 0;
111	return -1;
112}
113
114
115static void yystrtotext(str)
116char *str;
117{
118	int len;
119	char *s;
120
121	len = strlen(str);
122	if (len > YYBUFSIZ)
123		len = YYBUFSIZ;
124
125	for (s = str; *s != '\0' && len > 0; s++, len--)
126		yytext[yylast++] = *s;
127	yytext[yylast] = '\0';
128}
129
130
131static char *yytexttostr(offset, max)
132int offset, max;
133{
134	char *str;
135	int i;
136
137	if ((yytext[offset] == '\'' || yytext[offset] == '"') &&
138	    (yytext[offset] == yytext[offset + max - 1])) {
139		offset++;
140		max--;
141	}
142
143	if (max > yylast)
144		max = yylast;
145	str = malloc(max + 1);
146	if (str != NULL) {
147		for (i = offset; i < max; i++)
148			str[i - offset] = (char)(yytext[i] & 0xff);
149		str[i - offset] = '\0';
150	}
151	return str;
152}
153
154
155int yylex()
156{
157	int c, n, isbuilding, rval, lnext, nokey = 0;
158	char *name;
159
160	isbuilding = 0;
161	lnext = 0;
162	rval = 0;
163
164	if (yystr != NULL) {
165		free(yystr);
166		yystr = NULL;
167	}
168
169nextchar:
170	c = yygetc();
171
172	switch (c)
173	{
174	case '\n' :
175	case '\t' :
176	case '\r' :
177	case ' ' :
178		if (isbuilding == 1) {
179			yyunputc(c);
180			goto done;
181		}
182		if (yylast > yypos) {
183			bcopy(yytext + yypos, yytext,
184			      sizeof(yytext[0]) * (yylast - yypos + 1));
185		}
186		yylast -= yypos;
187		yypos = 0;
188		lnext = 0;
189		nokey = 0;
190		goto nextchar;
191
192	case '\\' :
193		if (lnext == 0) {
194			lnext = 1;
195			if (yylast == yypos) {
196				yylast--;
197				yypos--;
198			} else
199				yypos--;
200			if (yypos == 0)
201				nokey = 1;
202			goto nextchar;
203		}
204		break;
205	}
206
207	if (lnext == 1) {
208		lnext = 0;
209		if ((isbuilding == 0) && !ISALNUM(c)) {
210			return c;
211		}
212		goto nextchar;
213	}
214
215	switch (c)
216	{
217	case '#' :
218		if (isbuilding == 1) {
219			yyunputc(c);
220			goto done;
221		}
222		yyswallow('\n');
223		rval = YY_COMMENT;
224		goto nextchar;
225
226	case '$' :
227		if (isbuilding == 1) {
228			yyunputc(c);
229			goto done;
230		}
231		n = yygetc();
232		if (n == '{') {
233			if (yyswallow('}') == -1) {
234				rval = -2;
235				goto done;
236			}
237			(void) yygetc();
238		} else {
239			if (!ISALPHA(n)) {
240				yyunputc(n);
241				break;
242			}
243			do {
244				n = yygetc();
245			} while (ISALPHA(n) || ISDIGIT(n) || n == '_');
246			yyunputc(n);
247		}
248
249		name = yytexttostr(1, yypos);		/* skip $ */
250
251		if (name != NULL) {
252			string_val = get_variable(name, NULL, yylineNum);
253			free(name);
254			if (string_val != NULL) {
255				name = yytexttostr(yypos, yylast);
256				if (name != NULL) {
257					yypos = 0;
258					yylast = 0;
259					yystrtotext(string_val);
260					yystrtotext(name);
261					free(string_val);
262					free(name);
263					goto nextchar;
264				}
265				free(string_val);
266			}
267		}
268		break;
269
270	case '\'':
271	case '"' :
272		if (isbuilding == 1) {
273			goto done;
274		}
275		do {
276			n = yygetc();
277			if (n == EOF || n == TOOLONG) {
278				rval = -2;
279				goto done;
280			}
281			if (n == '\n') {
282				yyunputc(' ');
283				yypos++;
284			}
285		} while (n != c);
286		yyunputc(n);
287		break;
288
289	case EOF :
290		yylineNum = 1;
291		yypos = 0;
292		yylast = -1;
293		yyexpectaddr = 0;
294		yybreakondot = 0;
295		yyvarnext = 0;
296		yytokentype = 0;
297		return 0;
298	}
299
300	if (strchr("=,/;{}()@", c) != NULL) {
301		if (isbuilding == 1) {
302			yyunputc(c);
303			goto done;
304		}
305		rval = c;
306		goto done;
307	} else if (c == '.') {
308		if (isbuilding == 0) {
309			rval = c;
310			goto done;
311		}
312		if (yybreakondot != 0) {
313			yyunputc(c);
314			goto done;
315		}
316	}
317
318	switch (c)
319	{
320	case '-' :
321		if (yyexpectaddr)
322			break;
323		if (isbuilding == 1)
324			break;
325		n = yygetc();
326		if (n == '>') {
327			isbuilding = 1;
328			goto done;
329		}
330		yyunputc(n);
331		rval = '-';
332		goto done;
333
334	case '!' :
335		if (isbuilding == 1) {
336			yyunputc(c);
337			goto done;
338		}
339		n = yygetc();
340		if (n == '=') {
341			rval = YY_CMP_NE;
342			goto done;
343		}
344		yyunputc(n);
345		rval = '!';
346		goto done;
347
348	case '<' :
349		if (yyexpectaddr)
350			break;
351		if (isbuilding == 1) {
352			yyunputc(c);
353			goto done;
354		}
355		n = yygetc();
356		if (n == '=') {
357			rval = YY_CMP_LE;
358			goto done;
359		}
360		if (n == '>') {
361			rval = YY_RANGE_OUT;
362			goto done;
363		}
364		yyunputc(n);
365		rval = YY_CMP_LT;
366		goto done;
367
368	case '>' :
369		if (yyexpectaddr)
370			break;
371		if (isbuilding == 1) {
372			yyunputc(c);
373			goto done;
374		}
375		n = yygetc();
376		if (n == '=') {
377			rval = YY_CMP_GE;
378			goto done;
379		}
380		if (n == '<') {
381			rval = YY_RANGE_IN;
382			goto done;
383		}
384		yyunputc(n);
385		rval = YY_CMP_GT;
386		goto done;
387	}
388
389	/*
390	 * Now for the reason this is here...IPv6 address parsing.
391	 * The longest string we can expect is of this form:
392	 * 0000:0000:0000:0000:0000:0000:000.000.000.000
393	 * not:
394	 * 0000:0000:0000:0000:0000:0000:0000:0000
395	 */
396#ifdef	USE_INET6
397	if (yyexpectaddr == 1 && isbuilding == 0 && (ishex(c) || c == ':')) {
398		char ipv6buf[45 + 1], *s, oc;
399		int start;
400
401		start = yypos;
402		s = ipv6buf;
403		oc = c;
404
405		/*
406		 * Perhaps we should implement stricter controls on what we
407		 * swallow up here, but surely it would just be duplicating
408		 * the code in inet_pton() anyway.
409		 */
410		do {
411			*s++ = c;
412			c = yygetc();
413		} while ((ishex(c) || c == ':' || c == '.') &&
414			 (s - ipv6buf < 46));
415		yyunputc(c);
416		*s = '\0';
417
418		if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) {
419			rval = YY_IPV6;
420			yyexpectaddr = 0;
421			goto done;
422		}
423		yypos = start;
424		c = oc;
425	}
426#endif
427
428	if (c == ':') {
429		if (isbuilding == 1) {
430			yyunputc(c);
431			goto done;
432		}
433		rval = ':';
434		goto done;
435	}
436
437	if (isbuilding == 0 && c == '0') {
438		n = yygetc();
439		if (n == 'x') {
440			do {
441				n = yygetc();
442			} while (ishex(n));
443			yyunputc(n);
444			rval = YY_HEX;
445			goto done;
446		}
447		yyunputc(n);
448	}
449
450	/*
451	 * No negative numbers with leading - sign..
452	 */
453	if (isbuilding == 0 && ISDIGIT(c)) {
454		do {
455			n = yygetc();
456		} while (ISDIGIT(n));
457		yyunputc(n);
458		rval = YY_NUMBER;
459		goto done;
460	}
461
462	isbuilding = 1;
463	goto nextchar;
464
465done:
466	yystr = yytexttostr(0, yypos);
467
468	if (isbuilding == 1) {
469		wordtab_t *w;
470
471		w = NULL;
472		isbuilding = 0;
473
474		if ((yyvarnext == 0) && (nokey == 0)) {
475			w = yyfindkey(yystr);
476			if (w == NULL && yywordtab != NULL) {
477				yyresetdict();
478				w = yyfindkey(yystr);
479			}
480		} else
481			yyvarnext = 0;
482		if (w != NULL)
483			rval = w->w_value;
484		else
485			rval = YY_STR;
486	}
487
488	if (rval == YY_STR && yysavedepth > 0)
489		yyresetdict();
490
491	yytokentype = rval;
492
493	if (yydebug)
494		printf("lexed(%s) [%d,%d,%d] => %d\n", yystr, string_start,
495			string_end, pos, rval);
496
497	switch (rval)
498	{
499	case YY_NUMBER :
500		sscanf(yystr, "%u", &yylval.num);
501		break;
502
503	case YY_HEX :
504		sscanf(yystr, "0x%x", (u_int *)&yylval.num);
505		break;
506
507	case YY_STR :
508		yylval.str = strdup(yystr);
509		break;
510
511	default :
512		break;
513	}
514
515	if (yylast > 0) {
516		bcopy(yytext + yypos, yytext,
517		      sizeof(yytext[0]) * (yylast - yypos + 1));
518		yylast -= yypos;
519		yypos = 0;
520	}
521
522	return rval;
523}
524
525
526static wordtab_t *yyfindkey(key)
527char *key;
528{
529	wordtab_t *w;
530
531	if (yywordtab == NULL)
532		return NULL;
533
534	for (w = yywordtab; w->w_word != 0; w++)
535		if (strcasecmp(key, w->w_word) == 0)
536			return w;
537	return NULL;
538}
539
540
541char *yykeytostr(num)
542int num;
543{
544	wordtab_t *w;
545
546	if (yywordtab == NULL)
547		return "<unknown>";
548
549	for (w = yywordtab; w->w_word; w++)
550		if (w->w_value == num)
551			return w->w_word;
552	return "<unknown>";
553}
554
555
556wordtab_t *yysettab(words)
557wordtab_t *words;
558{
559	wordtab_t *save;
560
561	save = yywordtab;
562	yywordtab = words;
563	return save;
564}
565
566
567void yyerror(msg)
568char *msg;
569{
570	char *txt, letter[2];
571	int freetxt = 0;
572
573	if (yytokentype < 256) {
574		letter[0] = yytokentype;
575		letter[1] = '\0';
576		txt =  letter;
577	} else if (yytokentype == YY_STR || yytokentype == YY_HEX ||
578		   yytokentype == YY_NUMBER) {
579		if (yystr == NULL) {
580			txt = yytexttostr(yypos, YYBUFSIZ);
581			freetxt = 1;
582		} else
583			txt = yystr;
584	} else {
585		txt = yykeytostr(yytokentype);
586	}
587	fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum);
588	if (freetxt == 1)
589		free(txt);
590	exit(1);
591}
592
593
594void yysetdict(newdict)
595wordtab_t *newdict;
596{
597	if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) {
598		fprintf(stderr, "%d: at maximum dictionary depth\n",
599			yylineNum);
600		return;
601	}
602
603	yysavewords[yysavedepth++] = yysettab(newdict);
604	if (yydebug)
605		printf("yysavedepth++ => %d\n", yysavedepth);
606}
607
608void yyresetdict()
609{
610	if (yysavedepth > 0) {
611		yysettab(yysavewords[--yysavedepth]);
612		if (yydebug)
613			printf("yysavedepth-- => %d\n", yysavedepth);
614	}
615}
616
617
618
619#ifdef	TEST_LEXER
620int main(argc, argv)
621int argc;
622char *argv[];
623{
624	int n;
625
626	yyin = stdin;
627
628	while ((n = yylex()) != 0)
629		printf("%d.n = %d [%s] %d %d\n",
630			yylineNum, n, yystr, yypos, yylast);
631}
632#endif
633