zlexer.lex revision 1.3
1%{ 2/* 3 * zlexer.lex - lexical analyzer for (DNS) zone files 4 * 5 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved 6 * 7 * See LICENSE for the license. 8 * 9 */ 10/* because flex keeps having sign-unsigned compare problems that are unfixed*/ 11#pragma GCC diagnostic ignored "-Wsign-compare" 12 13#include "config.h" 14 15#include <ctype.h> 16#include <errno.h> 17#include <string.h> 18#include <strings.h> 19 20#include "zonec.h" 21#include "dname.h" 22#include "zparser.h" 23 24#if 0 25#define LEXOUT(s) printf s /* used ONLY when debugging */ 26#else 27#define LEXOUT(s) 28#endif 29 30enum lexer_state { 31 EXPECT_OWNER, 32 PARSING_OWNER, 33 PARSING_TTL_CLASS_TYPE, 34 PARSING_RDATA 35}; 36 37static int parse_token(int token, char *yytext, enum lexer_state *lexer_state); 38 39static YY_BUFFER_STATE include_stack[MAXINCLUDES]; 40static zparser_type zparser_stack[MAXINCLUDES]; 41static int include_stack_ptr = 0; 42 43/* 44 * Saves the file specific variables on the include stack. 45 */ 46static void 47push_parser_state(FILE *input) 48{ 49 zparser_stack[include_stack_ptr].filename = parser->filename; 50 zparser_stack[include_stack_ptr].line = parser->line; 51 zparser_stack[include_stack_ptr].origin = parser->origin; 52 include_stack[include_stack_ptr] = YY_CURRENT_BUFFER; 53 yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE)); 54 ++include_stack_ptr; 55} 56 57/* 58 * Restores the file specific variables from the include stack. 59 */ 60static void 61pop_parser_state(void) 62{ 63 --include_stack_ptr; 64 parser->filename = zparser_stack[include_stack_ptr].filename; 65 parser->line = zparser_stack[include_stack_ptr].line; 66 parser->origin = zparser_stack[include_stack_ptr].origin; 67 yy_delete_buffer(YY_CURRENT_BUFFER); 68 yy_switch_to_buffer(include_stack[include_stack_ptr]); 69} 70 71static YY_BUFFER_STATE oldstate; 72/* Start string scan */ 73void 74parser_push_stringbuf(char* str) 75{ 76 oldstate = YY_CURRENT_BUFFER; 77 yy_switch_to_buffer(yy_scan_string(str)); 78} 79 80void 81parser_pop_stringbuf(void) 82{ 83 yy_delete_buffer(YY_CURRENT_BUFFER); 84 yy_switch_to_buffer(oldstate); 85 oldstate = NULL; 86} 87 88#ifndef yy_set_bol /* compat definition, for flex 2.4.6 */ 89#define yy_set_bol(at_bol) \ 90 { \ 91 if ( ! yy_current_buffer ) \ 92 yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ 93 yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \ 94 } 95#endif 96 97%} 98%option noinput 99%option nounput 100%{ 101#ifndef YY_NO_UNPUT 102#define YY_NO_UNPUT 1 103#endif 104#ifndef YY_NO_INPUT 105#define YY_NO_INPUT 1 106#endif 107%} 108 109SPACE [ \t] 110LETTER [a-zA-Z] 111NEWLINE [\n\r] 112ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n 113CHARSTR [^ \t\n\r();.]|\\.|\\\n 114QUOTE \" 115DOLLAR \$ 116COMMENT ; 117DOT \. 118BIT [^\]\n]|\\. 119ANY [^\"\n\\]|\\. 120 121%x incl bitlabel quotedstring 122 123%% 124 static int paren_open = 0; 125 static enum lexer_state lexer_state = EXPECT_OWNER; 126{SPACE}*{COMMENT}.* /* ignore */ 127^{DOLLAR}TTL { lexer_state = PARSING_RDATA; return DOLLAR_TTL; } 128^{DOLLAR}ORIGIN { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; } 129 130 /* 131 * Handle $INCLUDE directives. See 132 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12. 133 */ 134^{DOLLAR}INCLUDE { 135 BEGIN(incl); 136} 137<incl>\n | 138<incl><<EOF>> { 139 int error_occurred = parser->error_occurred; 140 BEGIN(INITIAL); 141 zc_error("missing file name in $INCLUDE directive"); 142 yy_set_bol(1); /* Set beginning of line, so "^" rules match. */ 143 ++parser->line; 144 parser->error_occurred = error_occurred; 145} 146<incl>.+ { 147 char *tmp; 148 domain_type *origin = parser->origin; 149 int error_occurred = parser->error_occurred; 150 151 BEGIN(INITIAL); 152 if (include_stack_ptr >= MAXINCLUDES ) { 153 zc_error("includes nested too deeply, skipped (>%d)", 154 MAXINCLUDES); 155 } else { 156 FILE *input; 157 158 /* Remove trailing comment. */ 159 tmp = strrchr(yytext, ';'); 160 if (tmp) { 161 *tmp = '\0'; 162 } 163 strip_string(yytext); 164 165 /* Parse origin for include file. */ 166 tmp = strrchr(yytext, ' '); 167 if (!tmp) { 168 tmp = strrchr(yytext, '\t'); 169 } 170 if (tmp) { 171 const dname_type *dname; 172 173 /* split the original yytext */ 174 *tmp = '\0'; 175 strip_string(yytext); 176 177 dname = dname_parse(parser->region, tmp + 1); 178 if (!dname) { 179 zc_error("incorrect include origin '%s'", 180 tmp + 1); 181 } else if (*(tmp + strlen(tmp + 1)) != '.') { 182 zc_error("$INCLUDE directive requires absolute domain name"); 183 } else { 184 origin = domain_table_insert( 185 parser->db->domains, dname); 186 } 187 } 188 189 if (strlen(yytext) == 0) { 190 zc_error("missing file name in $INCLUDE directive"); 191 } else if (!(input = fopen(yytext, "r"))) { 192 zc_error("cannot open include file '%s': %s", 193 yytext, strerror(errno)); 194 } else { 195 /* Initialize parser for include file. */ 196 char *filename = region_strdup(parser->region, yytext); 197 push_parser_state(input); /* Destroys yytext. */ 198 parser->filename = filename; 199 parser->line = 1; 200 parser->origin = origin; 201 lexer_state = EXPECT_OWNER; 202 } 203 } 204 205 parser->error_occurred = error_occurred; 206} 207<INITIAL><<EOF>> { 208 yy_set_bol(1); /* Set beginning of line, so "^" rules match. */ 209 if (include_stack_ptr == 0) { 210 yyterminate(); 211 } else { 212 fclose(yyin); 213 pop_parser_state(); 214 } 215} 216^{DOLLAR}{LETTER}+ { zc_warning("Unknown directive: %s", yytext); } 217{DOT} { 218 LEXOUT((". ")); 219 return parse_token('.', yytext, &lexer_state); 220} 221@ { 222 LEXOUT(("@ ")); 223 return parse_token('@', yytext, &lexer_state); 224} 225\\# { 226 LEXOUT(("\\# ")); 227 return parse_token(URR, yytext, &lexer_state); 228} 229{NEWLINE} { 230 ++parser->line; 231 if (!paren_open) { 232 lexer_state = EXPECT_OWNER; 233 LEXOUT(("NL\n")); 234 return NL; 235 } else { 236 LEXOUT(("SP ")); 237 return SP; 238 } 239} 240\( { 241 if (paren_open) { 242 zc_error("nested parentheses"); 243 yyterminate(); 244 } 245 LEXOUT(("( ")); 246 paren_open = 1; 247 return SP; 248} 249\) { 250 if (!paren_open) { 251 zc_error("closing parentheses without opening parentheses"); 252 yyterminate(); 253 } 254 LEXOUT((") ")); 255 paren_open = 0; 256 return SP; 257} 258{SPACE}+ { 259 if (!paren_open && lexer_state == EXPECT_OWNER) { 260 lexer_state = PARSING_TTL_CLASS_TYPE; 261 LEXOUT(("PREV ")); 262 return PREV; 263 } 264 if (lexer_state == PARSING_OWNER) { 265 lexer_state = PARSING_TTL_CLASS_TYPE; 266 } 267 LEXOUT(("SP ")); 268 return SP; 269} 270 271 /* Bitlabels. Strip leading and ending brackets. */ 272\\\[ { BEGIN(bitlabel); } 273<bitlabel><<EOF>> { 274 zc_error("EOF inside bitlabel"); 275 BEGIN(INITIAL); 276 yyrestart(yyin); /* this is so that lex does not give an internal err */ 277 yyterminate(); 278} 279<bitlabel>{BIT}* { yymore(); } 280<bitlabel>\n { ++parser->line; yymore(); } 281<bitlabel>\] { 282 BEGIN(INITIAL); 283 yytext[yyleng - 1] = '\0'; 284 return parse_token(BITLAB, yytext, &lexer_state); 285} 286 287 /* Quoted strings. Strip leading and ending quotes. */ 288{QUOTE} { BEGIN(quotedstring); LEXOUT(("\" ")); } 289<quotedstring><<EOF>> { 290 zc_error("EOF inside quoted string"); 291 BEGIN(INITIAL); 292 yyrestart(yyin); /* this is so that lex does not give an internal err */ 293 yyterminate(); 294} 295<quotedstring>{ANY}* { LEXOUT(("STR ")); yymore(); } 296<quotedstring>\n { ++parser->line; yymore(); } 297<quotedstring>{QUOTE} { 298 LEXOUT(("\" ")); 299 BEGIN(INITIAL); 300 yytext[yyleng - 1] = '\0'; 301 return parse_token(STR, yytext, &lexer_state); 302} 303 304{ZONESTR}({CHARSTR})* { 305 /* Any allowed word. */ 306 return parse_token(STR, yytext, &lexer_state); 307} 308. { 309 zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?", 310 (int) yytext[0], (int) yytext[0]); 311} 312%% 313 314/* 315 * Analyze "word" to see if it matches an RR type, possibly by using 316 * the "TYPExxx" notation. If it matches, the corresponding token is 317 * returned and the TYPE parameter is set to the RR type value. 318 */ 319static int 320rrtype_to_token(const char *word, uint16_t *type) 321{ 322 uint16_t t = rrtype_from_string(word); 323 if (t != 0) { 324 rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t); 325 *type = t; 326 return entry->token; 327 } 328 329 return 0; 330} 331 332 333/* 334 * Remove \DDD constructs from the input. See RFC 1035, section 5.1. 335 */ 336static size_t 337zoctet(char *text) 338{ 339 /* 340 * s follows the string, p lags behind and rebuilds the new 341 * string 342 */ 343 char *s; 344 char *p; 345 346 for (s = p = text; *s; ++s, ++p) { 347 assert(p <= s); 348 if (s[0] != '\\') { 349 /* Ordinary character. */ 350 *p = *s; 351 } else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) { 352 /* \DDD escape. */ 353 int val = (hexdigit_to_int(s[1]) * 100 + 354 hexdigit_to_int(s[2]) * 10 + 355 hexdigit_to_int(s[3])); 356 if (0 <= val && val <= 255) { 357 s += 3; 358 *p = val; 359 } else { 360 zc_warning("text escape \\DDD overflow"); 361 *p = *++s; 362 } 363 } else if (s[1] != '\0') { 364 /* \X where X is any character, keep X. */ 365 *p = *++s; 366 } else { 367 /* Trailing backslash, ignore it. */ 368 zc_warning("trailing backslash ignored"); 369 --p; 370 } 371 } 372 *p = '\0'; 373 return p - text; 374} 375 376static int 377parse_token(int token, char *yytext, enum lexer_state *lexer_state) 378{ 379 size_t len; 380 char *str; 381 382 if (*lexer_state == EXPECT_OWNER) { 383 *lexer_state = PARSING_OWNER; 384 } else if (*lexer_state == PARSING_TTL_CLASS_TYPE) { 385 const char *t; 386 int token; 387 uint16_t rrclass; 388 389 /* type */ 390 token = rrtype_to_token(yytext, &yylval.type); 391 if (token != 0) { 392 *lexer_state = PARSING_RDATA; 393 LEXOUT(("%d[%s] ", token, yytext)); 394 return token; 395 } 396 397 /* class */ 398 rrclass = rrclass_from_string(yytext); 399 if (rrclass != 0) { 400 yylval.klass = rrclass; 401 LEXOUT(("CLASS ")); 402 return T_RRCLASS; 403 } 404 405 /* ttl */ 406 yylval.ttl = strtottl(yytext, &t); 407 if (*t == '\0') { 408 LEXOUT(("TTL ")); 409 return T_TTL; 410 } 411 } 412 413 str = region_strdup(parser->rr_region, yytext); 414 len = zoctet(str); 415 416 yylval.data.str = str; 417 yylval.data.len = len; 418 419 LEXOUT(("%d[%s] ", token, yytext)); 420 return token; 421} 422