1%{
2/*
3 * zlexer.lex - lexical analyzer for (DNS) zone files
4 *
5 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6 *
7 * See LICENSE for the license.
8 *
9 */
10/* because flex keeps having sign-unsigned compare problems that are unfixed*/
11#if defined(__clang__)||(defined(__GNUC__)&&((__GNUC__ >4)||(defined(__GNUC_MINOR__)&&(__GNUC__ ==4)&&(__GNUC_MINOR__ >=2))))
12#pragma GCC diagnostic ignored "-Wsign-compare"
13#endif
14/* ignore fallthrough warnings in the generated parse code case statements */
15#if defined(__clang__)||(defined(__GNUC__)&&(__GNUC__ >=7))
16#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
17#endif
18
19#include "config.h"
20
21#include <ctype.h>
22#include <errno.h>
23#include <string.h>
24#include <strings.h>
25
26#include "zonec.h"
27#include "dname.h"
28#include "zparser.h"
29
30#if 0
31#define LEXOUT(s)  printf s /* used ONLY when debugging */
32#else
33#define LEXOUT(s)
34#endif
35
36enum lexer_state {
37	EXPECT_OWNER,
38	PARSING_OWNER,
39	PARSING_TTL_CLASS_TYPE,
40	PARSING_RDATA
41};
42
43static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
44
45static YY_BUFFER_STATE include_stack[MAXINCLUDES];
46static zparser_type zparser_stack[MAXINCLUDES];
47static int include_stack_ptr = 0;
48
49/*
50 * Saves the file specific variables on the include stack.
51 */
52static void
53push_parser_state(FILE *input)
54{
55	zparser_stack[include_stack_ptr].filename = parser->filename;
56	zparser_stack[include_stack_ptr].line = parser->line;
57	zparser_stack[include_stack_ptr].origin = parser->origin;
58	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
59	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
60	++include_stack_ptr;
61}
62
63/*
64 * Restores the file specific variables from the include stack.
65 */
66static void
67pop_parser_state(void)
68{
69	if (parser->filename)
70		region_recycle(parser->region, (void *)parser->filename,
71			strlen(parser->filename)+1);
72
73	--include_stack_ptr;
74	parser->filename = zparser_stack[include_stack_ptr].filename;
75	parser->line = zparser_stack[include_stack_ptr].line;
76	parser->origin = zparser_stack[include_stack_ptr].origin;
77	yy_delete_buffer(YY_CURRENT_BUFFER);
78	yy_switch_to_buffer(include_stack[include_stack_ptr]);
79}
80
81static YY_BUFFER_STATE oldstate;
82/* Start string scan */
83void
84parser_push_stringbuf(char* str)
85{
86	oldstate = YY_CURRENT_BUFFER;
87	yy_switch_to_buffer(yy_scan_string(str));
88}
89
90void
91parser_pop_stringbuf(void)
92{
93	yy_delete_buffer(YY_CURRENT_BUFFER);
94	yy_switch_to_buffer(oldstate);
95	oldstate = NULL;
96}
97
98	static int paren_open = 0;
99	static enum lexer_state lexer_state = EXPECT_OWNER;
100void
101parser_flush(void)
102{
103	YY_FLUSH_BUFFER;
104	paren_open = 0;
105	lexer_state = EXPECT_OWNER;
106}
107
108int at_eof(void)
109{
110	static int once = 1;
111	return (once = !once) ? 0 : NL;
112}
113
114#ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
115#define yy_set_bol(at_bol) \
116	{ \
117		if ( ! yy_current_buffer ) \
118			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
119		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
120	}
121#endif
122
123%}
124%option noinput
125%option nounput
126%{
127#ifndef YY_NO_UNPUT
128#define YY_NO_UNPUT 1
129#endif
130#ifndef YY_NO_INPUT
131#define YY_NO_INPUT 1
132#endif
133%}
134
135SPACE   [ \t]
136LETTER  [a-zA-Z]
137NEWLINE [\n\r]
138ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n
139CHARSTR [^ \t\n\r();.\"]|\\.|\\\n
140QUOTE   \"
141DOLLAR  \$
142COMMENT ;
143DOT     \.
144BIT	[^\]\n]|\\.
145ANY     [^\"\n\\]|\\.
146
147%x	incl bitlabel quotedstring
148
149%%
150{SPACE}*{COMMENT}.*	/* ignore */
151^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
152^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
153
154	/*
155	 * Handle $INCLUDE directives.  See
156	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
157	 */
158^{DOLLAR}INCLUDE        {
159	BEGIN(incl);
160	/* ignore case statement fallthrough on incl<EOF> flex rule */
161}
162<incl>\n		|
163<incl><<EOF>>		{
164	int error_occurred = parser->error_occurred;
165	BEGIN(INITIAL);
166	zc_error("missing file name in $INCLUDE directive");
167	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
168	++parser->line;
169	parser->error_occurred = error_occurred;
170}
171<incl>.+ 		{
172	char *tmp;
173	domain_type *origin = parser->origin;
174	int error_occurred = parser->error_occurred;
175
176	BEGIN(INITIAL);
177	if (include_stack_ptr >= MAXINCLUDES ) {
178		zc_error("includes nested too deeply, skipped (>%d)",
179			 MAXINCLUDES);
180	} else {
181		FILE *input;
182
183		/* Remove trailing comment.  */
184		tmp = strrchr(yytext, ';');
185		if (tmp) {
186			*tmp = '\0';
187		}
188		strip_string(yytext);
189
190		/* Parse origin for include file.  */
191		tmp = strrchr(yytext, ' ');
192		if (!tmp) {
193			tmp = strrchr(yytext, '\t');
194		}
195		if (tmp) {
196			const dname_type *dname;
197
198			/* split the original yytext */
199			*tmp = '\0';
200			strip_string(yytext);
201
202			dname = dname_parse(parser->region, tmp + 1);
203			if (!dname) {
204				zc_error("incorrect include origin '%s'",
205					 tmp + 1);
206			} else if (*(tmp + strlen(tmp + 1)) != '.') {
207				zc_error("$INCLUDE directive requires absolute domain name");
208			} else {
209				origin = domain_table_insert(
210					parser->db->domains, dname);
211			}
212		}
213
214		if (strlen(yytext) == 0) {
215			zc_error("missing file name in $INCLUDE directive");
216		} else if (!(input = fopen(yytext, "r"))) {
217			zc_error("cannot open include file '%s': %s",
218				 yytext, strerror(errno));
219		} else {
220			/* Initialize parser for include file.  */
221			char *filename = region_strdup(parser->region, yytext);
222			push_parser_state(input); /* Destroys yytext.  */
223			parser->filename = filename;
224			parser->line = 1;
225			parser->origin = origin;
226			lexer_state = EXPECT_OWNER;
227		}
228	}
229
230	parser->error_occurred = error_occurred;
231}
232<INITIAL><<EOF>>	{
233	int eo = at_eof();
234	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
235	if (include_stack_ptr == 0) {
236		if(eo == NL)
237			return eo;
238		yyterminate();
239	} else {
240		fclose(yyin);
241		pop_parser_state();
242		if(eo == NL)
243			return eo;
244	}
245}
246^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
247{DOT}	{
248	LEXOUT((". "));
249	return parse_token('.', yytext, &lexer_state);
250}
251@	{
252	LEXOUT(("@ "));
253	return parse_token('@', yytext, &lexer_state);
254}
255\\#	{
256	LEXOUT(("\\# "));
257	return parse_token(URR, yytext, &lexer_state);
258}
259{NEWLINE}	{
260	++parser->line;
261	if (!paren_open) {
262		lexer_state = EXPECT_OWNER;
263		LEXOUT(("NL\n"));
264		return NL;
265	} else {
266		LEXOUT(("SP "));
267		return SP;
268	}
269}
270\(	{
271	if (paren_open) {
272		zc_error("nested parentheses");
273		yyterminate();
274	}
275	LEXOUT(("( "));
276	paren_open = 1;
277	return SP;
278}
279\)	{
280	if (!paren_open) {
281		zc_error("closing parentheses without opening parentheses");
282		yyterminate();
283	}
284	LEXOUT((") "));
285	paren_open = 0;
286	return SP;
287}
288{SPACE}+	{
289	if (!paren_open && lexer_state == EXPECT_OWNER) {
290		lexer_state = PARSING_TTL_CLASS_TYPE;
291		LEXOUT(("PREV "));
292		return PREV;
293	}
294	if (lexer_state == PARSING_OWNER) {
295		lexer_state = PARSING_TTL_CLASS_TYPE;
296	}
297	LEXOUT(("SP "));
298	return SP;
299}
300
301	/* Bitlabels.  Strip leading and ending brackets.  */
302\\\[			{ BEGIN(bitlabel); }
303<bitlabel><<EOF>>	{
304	zc_error("EOF inside bitlabel");
305	BEGIN(INITIAL);
306	yyrestart(yyin); /* this is so that lex does not give an internal err */
307	yyterminate();
308}
309<bitlabel>{BIT}*	{ yymore(); }
310<bitlabel>\n		{ ++parser->line; yymore(); }
311<bitlabel>\]		{
312	BEGIN(INITIAL);
313	yytext[yyleng - 1] = '\0';
314	return parse_token(BITLAB, yytext, &lexer_state);
315}
316
317	/* Quoted strings.  Strip leading and ending quotes.  */
318{QUOTE}			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
319<quotedstring><<EOF>> 	{
320	zc_error("EOF inside quoted string");
321	BEGIN(INITIAL);
322	yyrestart(yyin); /* this is so that lex does not give an internal err */
323	yyterminate();
324}
325<quotedstring>{ANY}*	{ LEXOUT(("QSTR ")); yymore(); }
326<quotedstring>\n 	{ ++parser->line; yymore(); }
327<quotedstring>{QUOTE} {
328	LEXOUT(("\" "));
329	BEGIN(INITIAL);
330	yytext[yyleng - 1] = '\0';
331	return parse_token(QSTR, yytext, &lexer_state);
332}
333
334{ZONESTR}({CHARSTR})* {
335	/* Any allowed word.  */
336	return parse_token(STR, yytext, &lexer_state);
337}
338. {
339	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
340		 (int) yytext[0], (int) yytext[0]);
341}
342%%
343
344/*
345 * Analyze "word" to see if it matches an RR type, possibly by using
346 * the "TYPExxx" notation.  If it matches, the corresponding token is
347 * returned and the TYPE parameter is set to the RR type value.
348 */
349static int
350rrtype_to_token(const char *word, uint16_t *type)
351{
352	uint16_t t = rrtype_from_string(word);
353	if (t != 0) {
354		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
355		*type = t;
356		return entry->token;
357	}
358
359	return 0;
360}
361
362
363/*
364 * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
365 */
366static size_t
367zoctet(char *text)
368{
369	/*
370	 * s follows the string, p lags behind and rebuilds the new
371	 * string
372	 */
373	char *s;
374	char *p;
375
376	for (s = p = text; *s; ++s, ++p) {
377		assert(p <= s);
378		if (s[0] != '\\') {
379			/* Ordinary character.  */
380			*p = *s;
381		} else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) {
382			/* \DDD escape.  */
383			int val = (hexdigit_to_int(s[1]) * 100 +
384				   hexdigit_to_int(s[2]) * 10 +
385				   hexdigit_to_int(s[3]));
386			if (0 <= val && val <= 255) {
387				s += 3;
388				*p = val;
389			} else {
390				zc_warning("text escape \\DDD overflow");
391				*p = *++s;
392			}
393		} else if (s[1] != '\0') {
394			/* \X where X is any character, keep X.  */
395			*p = *++s;
396		} else {
397			/* Trailing backslash, ignore it.  */
398			zc_warning("trailing backslash ignored");
399			--p;
400		}
401	}
402	*p = '\0';
403	return p - text;
404}
405
406static int
407parse_token(int token, char *yytext, enum lexer_state *lexer_state)
408{
409	size_t len;
410	char *str;
411
412	if (*lexer_state == EXPECT_OWNER) {
413		*lexer_state = PARSING_OWNER;
414	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
415		const char *t;
416		int token;
417		uint16_t rrclass;
418
419		/* type */
420		token = rrtype_to_token(yytext, &yylval.type);
421		if (token != 0) {
422			*lexer_state = PARSING_RDATA;
423			LEXOUT(("%d[%s] ", token, yytext));
424			return token;
425		}
426
427		/* class */
428		rrclass = rrclass_from_string(yytext);
429		if (rrclass != 0) {
430			yylval.klass = rrclass;
431			LEXOUT(("CLASS "));
432			return T_RRCLASS;
433		}
434
435		/* ttl */
436		yylval.ttl = strtottl(yytext, &t);
437		if (*t == '\0') {
438			LEXOUT(("TTL "));
439			return T_TTL;
440		}
441	}
442
443	str = region_strdup(parser->rr_region, yytext);
444	len = zoctet(str);
445
446	yylval.data.str = str;
447	yylval.data.len = len;
448
449	LEXOUT(("%d[%s] ", token, yytext));
450	return token;
451}
452