zlexer.lex revision 1.3
1%{
2/*
3 * zlexer.lex - lexical analyzer for (DNS) zone files
4 *
5 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6 *
7 * See LICENSE for the license.
8 *
9 */
10/* because flex keeps having sign-unsigned compare problems that are unfixed*/
11#pragma GCC diagnostic ignored "-Wsign-compare"
12
13#include "config.h"
14
15#include <ctype.h>
16#include <errno.h>
17#include <string.h>
18#include <strings.h>
19
20#include "zonec.h"
21#include "dname.h"
22#include "zparser.h"
23
24#if 0
25#define LEXOUT(s)  printf s /* used ONLY when debugging */
26#else
27#define LEXOUT(s)
28#endif
29
30enum lexer_state {
31	EXPECT_OWNER,
32	PARSING_OWNER,
33	PARSING_TTL_CLASS_TYPE,
34	PARSING_RDATA
35};
36
37static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
38
39static YY_BUFFER_STATE include_stack[MAXINCLUDES];
40static zparser_type zparser_stack[MAXINCLUDES];
41static int include_stack_ptr = 0;
42
43/*
44 * Saves the file specific variables on the include stack.
45 */
46static void
47push_parser_state(FILE *input)
48{
49	zparser_stack[include_stack_ptr].filename = parser->filename;
50	zparser_stack[include_stack_ptr].line = parser->line;
51	zparser_stack[include_stack_ptr].origin = parser->origin;
52	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
53	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
54	++include_stack_ptr;
55}
56
57/*
58 * Restores the file specific variables from the include stack.
59 */
60static void
61pop_parser_state(void)
62{
63	--include_stack_ptr;
64	parser->filename = zparser_stack[include_stack_ptr].filename;
65	parser->line = zparser_stack[include_stack_ptr].line;
66	parser->origin = zparser_stack[include_stack_ptr].origin;
67	yy_delete_buffer(YY_CURRENT_BUFFER);
68	yy_switch_to_buffer(include_stack[include_stack_ptr]);
69}
70
71static YY_BUFFER_STATE oldstate;
72/* Start string scan */
73void
74parser_push_stringbuf(char* str)
75{
76	oldstate = YY_CURRENT_BUFFER;
77	yy_switch_to_buffer(yy_scan_string(str));
78}
79
80void
81parser_pop_stringbuf(void)
82{
83	yy_delete_buffer(YY_CURRENT_BUFFER);
84	yy_switch_to_buffer(oldstate);
85	oldstate = NULL;
86}
87
88#ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
89#define yy_set_bol(at_bol) \
90	{ \
91		if ( ! yy_current_buffer ) \
92			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
93		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
94	}
95#endif
96
97%}
98%option noinput
99%option nounput
100%{
101#ifndef YY_NO_UNPUT
102#define YY_NO_UNPUT 1
103#endif
104#ifndef YY_NO_INPUT
105#define YY_NO_INPUT 1
106#endif
107%}
108
109SPACE   [ \t]
110LETTER  [a-zA-Z]
111NEWLINE [\n\r]
112ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n
113CHARSTR [^ \t\n\r();.]|\\.|\\\n
114QUOTE   \"
115DOLLAR  \$
116COMMENT ;
117DOT     \.
118BIT	[^\]\n]|\\.
119ANY     [^\"\n\\]|\\.
120
121%x	incl bitlabel quotedstring
122
123%%
124	static int paren_open = 0;
125	static enum lexer_state lexer_state = EXPECT_OWNER;
126{SPACE}*{COMMENT}.*	/* ignore */
127^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
128^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
129
130	/*
131	 * Handle $INCLUDE directives.  See
132	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
133	 */
134^{DOLLAR}INCLUDE        {
135	BEGIN(incl);
136}
137<incl>\n 		|
138<incl><<EOF>>		{
139	int error_occurred = parser->error_occurred;
140	BEGIN(INITIAL);
141	zc_error("missing file name in $INCLUDE directive");
142	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
143	++parser->line;
144	parser->error_occurred = error_occurred;
145}
146<incl>.+ 		{
147	char *tmp;
148	domain_type *origin = parser->origin;
149	int error_occurred = parser->error_occurred;
150
151	BEGIN(INITIAL);
152	if (include_stack_ptr >= MAXINCLUDES ) {
153		zc_error("includes nested too deeply, skipped (>%d)",
154			 MAXINCLUDES);
155	} else {
156		FILE *input;
157
158		/* Remove trailing comment.  */
159		tmp = strrchr(yytext, ';');
160		if (tmp) {
161			*tmp = '\0';
162		}
163		strip_string(yytext);
164
165		/* Parse origin for include file.  */
166		tmp = strrchr(yytext, ' ');
167		if (!tmp) {
168			tmp = strrchr(yytext, '\t');
169		}
170		if (tmp) {
171			const dname_type *dname;
172
173			/* split the original yytext */
174			*tmp = '\0';
175			strip_string(yytext);
176
177			dname = dname_parse(parser->region, tmp + 1);
178			if (!dname) {
179				zc_error("incorrect include origin '%s'",
180					 tmp + 1);
181			} else if (*(tmp + strlen(tmp + 1)) != '.') {
182				zc_error("$INCLUDE directive requires absolute domain name");
183			} else {
184				origin = domain_table_insert(
185					parser->db->domains, dname);
186			}
187		}
188
189		if (strlen(yytext) == 0) {
190			zc_error("missing file name in $INCLUDE directive");
191		} else if (!(input = fopen(yytext, "r"))) {
192			zc_error("cannot open include file '%s': %s",
193				 yytext, strerror(errno));
194		} else {
195			/* Initialize parser for include file.  */
196			char *filename = region_strdup(parser->region, yytext);
197			push_parser_state(input); /* Destroys yytext.  */
198			parser->filename = filename;
199			parser->line = 1;
200			parser->origin = origin;
201			lexer_state = EXPECT_OWNER;
202		}
203	}
204
205	parser->error_occurred = error_occurred;
206}
207<INITIAL><<EOF>>	{
208	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
209	if (include_stack_ptr == 0) {
210		yyterminate();
211	} else {
212		fclose(yyin);
213		pop_parser_state();
214	}
215}
216^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
217{DOT}	{
218	LEXOUT((". "));
219	return parse_token('.', yytext, &lexer_state);
220}
221@	{
222	LEXOUT(("@ "));
223	return parse_token('@', yytext, &lexer_state);
224}
225\\#	{
226	LEXOUT(("\\# "));
227	return parse_token(URR, yytext, &lexer_state);
228}
229{NEWLINE}	{
230	++parser->line;
231	if (!paren_open) {
232		lexer_state = EXPECT_OWNER;
233		LEXOUT(("NL\n"));
234		return NL;
235	} else {
236		LEXOUT(("SP "));
237		return SP;
238	}
239}
240\(	{
241	if (paren_open) {
242		zc_error("nested parentheses");
243		yyterminate();
244	}
245	LEXOUT(("( "));
246	paren_open = 1;
247	return SP;
248}
249\)	{
250	if (!paren_open) {
251		zc_error("closing parentheses without opening parentheses");
252		yyterminate();
253	}
254	LEXOUT((") "));
255	paren_open = 0;
256	return SP;
257}
258{SPACE}+	{
259	if (!paren_open && lexer_state == EXPECT_OWNER) {
260		lexer_state = PARSING_TTL_CLASS_TYPE;
261		LEXOUT(("PREV "));
262		return PREV;
263	}
264	if (lexer_state == PARSING_OWNER) {
265		lexer_state = PARSING_TTL_CLASS_TYPE;
266	}
267	LEXOUT(("SP "));
268	return SP;
269}
270
271	/* Bitlabels.  Strip leading and ending brackets.  */
272\\\[			{ BEGIN(bitlabel); }
273<bitlabel><<EOF>>	{
274	zc_error("EOF inside bitlabel");
275	BEGIN(INITIAL);
276	yyrestart(yyin); /* this is so that lex does not give an internal err */
277	yyterminate();
278}
279<bitlabel>{BIT}*	{ yymore(); }
280<bitlabel>\n		{ ++parser->line; yymore(); }
281<bitlabel>\]		{
282	BEGIN(INITIAL);
283	yytext[yyleng - 1] = '\0';
284	return parse_token(BITLAB, yytext, &lexer_state);
285}
286
287	/* Quoted strings.  Strip leading and ending quotes.  */
288{QUOTE}			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
289<quotedstring><<EOF>> 	{
290	zc_error("EOF inside quoted string");
291	BEGIN(INITIAL);
292	yyrestart(yyin); /* this is so that lex does not give an internal err */
293	yyterminate();
294}
295<quotedstring>{ANY}*	{ LEXOUT(("STR ")); yymore(); }
296<quotedstring>\n 	{ ++parser->line; yymore(); }
297<quotedstring>{QUOTE} {
298	LEXOUT(("\" "));
299	BEGIN(INITIAL);
300	yytext[yyleng - 1] = '\0';
301	return parse_token(STR, yytext, &lexer_state);
302}
303
304{ZONESTR}({CHARSTR})* {
305	/* Any allowed word.  */
306	return parse_token(STR, yytext, &lexer_state);
307}
308. {
309	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
310		 (int) yytext[0], (int) yytext[0]);
311}
312%%
313
314/*
315 * Analyze "word" to see if it matches an RR type, possibly by using
316 * the "TYPExxx" notation.  If it matches, the corresponding token is
317 * returned and the TYPE parameter is set to the RR type value.
318 */
319static int
320rrtype_to_token(const char *word, uint16_t *type)
321{
322	uint16_t t = rrtype_from_string(word);
323	if (t != 0) {
324		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
325		*type = t;
326		return entry->token;
327	}
328
329	return 0;
330}
331
332
333/*
334 * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
335 */
336static size_t
337zoctet(char *text)
338{
339	/*
340	 * s follows the string, p lags behind and rebuilds the new
341	 * string
342	 */
343	char *s;
344	char *p;
345
346	for (s = p = text; *s; ++s, ++p) {
347		assert(p <= s);
348		if (s[0] != '\\') {
349			/* Ordinary character.  */
350			*p = *s;
351		} else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) {
352			/* \DDD escape.  */
353			int val = (hexdigit_to_int(s[1]) * 100 +
354				   hexdigit_to_int(s[2]) * 10 +
355				   hexdigit_to_int(s[3]));
356			if (0 <= val && val <= 255) {
357				s += 3;
358				*p = val;
359			} else {
360				zc_warning("text escape \\DDD overflow");
361				*p = *++s;
362			}
363		} else if (s[1] != '\0') {
364			/* \X where X is any character, keep X.  */
365			*p = *++s;
366		} else {
367			/* Trailing backslash, ignore it.  */
368			zc_warning("trailing backslash ignored");
369			--p;
370		}
371	}
372	*p = '\0';
373	return p - text;
374}
375
376static int
377parse_token(int token, char *yytext, enum lexer_state *lexer_state)
378{
379	size_t len;
380	char *str;
381
382	if (*lexer_state == EXPECT_OWNER) {
383		*lexer_state = PARSING_OWNER;
384	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
385		const char *t;
386		int token;
387		uint16_t rrclass;
388
389		/* type */
390		token = rrtype_to_token(yytext, &yylval.type);
391		if (token != 0) {
392			*lexer_state = PARSING_RDATA;
393			LEXOUT(("%d[%s] ", token, yytext));
394			return token;
395		}
396
397		/* class */
398		rrclass = rrclass_from_string(yytext);
399		if (rrclass != 0) {
400			yylval.klass = rrclass;
401			LEXOUT(("CLASS "));
402			return T_RRCLASS;
403		}
404
405		/* ttl */
406		yylval.ttl = strtottl(yytext, &t);
407		if (*t == '\0') {
408			LEXOUT(("TTL "));
409			return T_TTL;
410		}
411	}
412
413	str = region_strdup(parser->rr_region, yytext);
414	len = zoctet(str);
415
416	yylval.data.str = str;
417	yylval.data.len = len;
418
419	LEXOUT(("%d[%s] ", token, yytext));
420	return token;
421}
422