parse.c revision 287915
1266077Sdes/* 2266077Sdes * a generic (simple) parser. Use to parse rr's, private key 3266077Sdes * information and /etc/resolv.conf files 4266077Sdes * 5266077Sdes * a Net::DNS like library for C 6266077Sdes * LibDNS Team @ NLnet Labs 7266077Sdes * (c) NLnet Labs, 2005-2006 8266077Sdes * See the file LICENSE for the license 9266077Sdes */ 10266077Sdes#include "config.h" 11287915Sdes#include "sldns/parse.h" 12287915Sdes#include "sldns/parseutil.h" 13287915Sdes#include "sldns/sbuffer.h" 14266077Sdes 15266077Sdes#include <limits.h> 16266077Sdes#include <strings.h> 17266077Sdes 18266077Sdessldns_lookup_table sldns_directive_types[] = { 19266077Sdes { LDNS_DIR_TTL, "$TTL" }, 20266077Sdes { LDNS_DIR_ORIGIN, "$ORIGIN" }, 21266077Sdes { LDNS_DIR_INCLUDE, "$INCLUDE" }, 22266077Sdes { 0, NULL } 23266077Sdes}; 24266077Sdes 25266077Sdes/* add max_limit here? */ 26266077Sdesssize_t 27266077Sdessldns_fget_token(FILE *f, char *token, const char *delim, size_t limit) 28266077Sdes{ 29266077Sdes return sldns_fget_token_l(f, token, delim, limit, NULL); 30266077Sdes} 31266077Sdes 32266077Sdesssize_t 33266077Sdessldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr) 34266077Sdes{ 35266077Sdes int c, prev_c; 36266077Sdes int p; /* 0 -> no parenthese seen, >0 nr of ( seen */ 37266077Sdes int com, quoted; 38266077Sdes char *t; 39266077Sdes size_t i; 40266077Sdes const char *d; 41266077Sdes const char *del; 42266077Sdes 43266077Sdes /* standard delimeters */ 44266077Sdes if (!delim) { 45266077Sdes /* from isspace(3) */ 46266077Sdes del = LDNS_PARSE_NORMAL; 47266077Sdes } else { 48266077Sdes del = delim; 49266077Sdes } 50266077Sdes 51266077Sdes p = 0; 52266077Sdes i = 0; 53266077Sdes com = 0; 54266077Sdes quoted = 0; 55266077Sdes prev_c = 0; 56266077Sdes t = token; 57266077Sdes if (del[0] == '"') { 58266077Sdes quoted = 1; 59266077Sdes } 60266077Sdes while ((c = getc(f)) != EOF) { 61266077Sdes if (c == '\r') /* carriage return */ 62266077Sdes c = ' '; 63266077Sdes if (c == '(' && prev_c != '\\' && !quoted) { 64266077Sdes /* this only counts for non-comments */ 65266077Sdes if (com == 0) { 66266077Sdes p++; 67266077Sdes } 68266077Sdes prev_c = c; 69266077Sdes continue; 70266077Sdes } 71266077Sdes 72266077Sdes if (c == ')' && prev_c != '\\' && !quoted) { 73266077Sdes /* this only counts for non-comments */ 74266077Sdes if (com == 0) { 75266077Sdes p--; 76266077Sdes } 77266077Sdes prev_c = c; 78266077Sdes continue; 79266077Sdes } 80266077Sdes 81266077Sdes if (p < 0) { 82266077Sdes /* more ) then ( - close off the string */ 83266077Sdes *t = '\0'; 84266077Sdes return 0; 85266077Sdes } 86266077Sdes 87266077Sdes /* do something with comments ; */ 88266077Sdes if (c == ';' && quoted == 0) { 89266077Sdes if (prev_c != '\\') { 90266077Sdes com = 1; 91266077Sdes } 92266077Sdes } 93266077Sdes if (c == '\"' && com == 0 && prev_c != '\\') { 94266077Sdes quoted = 1 - quoted; 95266077Sdes } 96266077Sdes 97266077Sdes if (c == '\n' && com != 0) { 98266077Sdes /* comments */ 99266077Sdes com = 0; 100266077Sdes *t = ' '; 101266077Sdes if (line_nr) { 102266077Sdes *line_nr = *line_nr + 1; 103266077Sdes } 104266077Sdes if (p == 0 && i > 0) { 105266077Sdes goto tokenread; 106266077Sdes } else { 107266077Sdes prev_c = c; 108266077Sdes continue; 109266077Sdes } 110266077Sdes } 111266077Sdes 112266077Sdes if (com == 1) { 113266077Sdes *t = ' '; 114266077Sdes prev_c = c; 115266077Sdes continue; 116266077Sdes } 117266077Sdes 118266077Sdes if (c == '\n' && p != 0 && t > token) { 119266077Sdes /* in parentheses */ 120266077Sdes if (line_nr) { 121266077Sdes *line_nr = *line_nr + 1; 122266077Sdes } 123266077Sdes *t++ = ' '; 124266077Sdes prev_c = c; 125266077Sdes continue; 126266077Sdes } 127266077Sdes 128266077Sdes /* check if we hit the delim */ 129266077Sdes for (d = del; *d; d++) { 130266077Sdes if (c == *d && i > 0 && prev_c != '\\' && p == 0) { 131266077Sdes if (c == '\n' && line_nr) { 132266077Sdes *line_nr = *line_nr + 1; 133266077Sdes } 134266077Sdes goto tokenread; 135266077Sdes } 136266077Sdes } 137266077Sdes if (c != '\0' && c != '\n') { 138266077Sdes i++; 139266077Sdes } 140266077Sdes if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) { 141266077Sdes *t = '\0'; 142266077Sdes return -1; 143266077Sdes } 144266077Sdes if (c != '\0' && c != '\n') { 145266077Sdes *t++ = c; 146266077Sdes } 147266077Sdes if (c == '\\' && prev_c == '\\') 148266077Sdes prev_c = 0; 149266077Sdes else prev_c = c; 150266077Sdes } 151266077Sdes *t = '\0'; 152266077Sdes if (c == EOF) { 153266077Sdes return (ssize_t)i; 154266077Sdes } 155266077Sdes 156266077Sdes if (i == 0) { 157266077Sdes /* nothing read */ 158266077Sdes return -1; 159266077Sdes } 160266077Sdes if (p != 0) { 161266077Sdes return -1; 162266077Sdes } 163266077Sdes return (ssize_t)i; 164266077Sdes 165266077Sdestokenread: 166266077Sdes if(*del == '"') 167266077Sdes /* do not skip over quotes after the string, they are part 168266077Sdes * of the next string. But skip over whitespace (if needed)*/ 169266077Sdes sldns_fskipcs_l(f, del+1, line_nr); 170266077Sdes else sldns_fskipcs_l(f, del, line_nr); 171266077Sdes *t = '\0'; 172266077Sdes if (p != 0) { 173266077Sdes return -1; 174266077Sdes } 175266077Sdes 176266077Sdes return (ssize_t)i; 177266077Sdes} 178266077Sdes 179266077Sdesssize_t 180266077Sdessldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data, 181266077Sdes const char *d_del, size_t data_limit) 182266077Sdes{ 183266077Sdes return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del, 184266077Sdes data_limit, NULL); 185266077Sdes} 186266077Sdes 187266077Sdesssize_t 188266077Sdessldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data, 189266077Sdes const char *d_del, size_t data_limit, int *line_nr) 190266077Sdes{ 191266077Sdes /* we assume: keyword|sep|data */ 192266077Sdes char *fkeyword; 193266077Sdes ssize_t i; 194266077Sdes 195266077Sdes if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN) 196266077Sdes return -1; 197266077Sdes fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN); 198266077Sdes if(!fkeyword) 199266077Sdes return -1; 200266077Sdes 201266077Sdes i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN); 202266077Sdes if(i==0 || i==-1) { 203266077Sdes free(fkeyword); 204266077Sdes return -1; 205266077Sdes } 206266077Sdes 207266077Sdes /* case??? i instead of strlen? */ 208266077Sdes if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) { 209266077Sdes /* whee! */ 210266077Sdes /* printf("%s\n%s\n", "Matching keyword", fkeyword); */ 211266077Sdes i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr); 212266077Sdes free(fkeyword); 213266077Sdes return i; 214266077Sdes } else { 215266077Sdes /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/ 216266077Sdes free(fkeyword); 217266077Sdes return -1; 218266077Sdes } 219266077Sdes} 220266077Sdes 221276541Sdesint 222276541Sdessldns_bgetc(sldns_buffer *buffer) 223276541Sdes{ 224276541Sdes if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) { 225276541Sdes sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer)); 226276541Sdes /* sldns_buffer_rewind(buffer);*/ 227276541Sdes return EOF; 228276541Sdes } 229276541Sdes return (int)sldns_buffer_read_u8(buffer); 230276541Sdes} 231276541Sdes 232266077Sdesssize_t 233266077Sdessldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit) 234266077Sdes{ 235266077Sdes return sldns_bget_token_par(b, token, delim, limit, NULL, NULL); 236266077Sdes} 237266077Sdes 238266077Sdesssize_t 239266077Sdessldns_bget_token_par(sldns_buffer *b, char *token, const char *delim, 240266077Sdes size_t limit, int* par, const char* skipw) 241266077Sdes{ 242266077Sdes int c, lc; 243266077Sdes int p; /* 0 -> no parenthese seen, >0 nr of ( seen */ 244266077Sdes int com, quoted; 245266077Sdes char *t; 246266077Sdes size_t i; 247266077Sdes const char *d; 248266077Sdes const char *del; 249266077Sdes 250266077Sdes /* standard delimiters */ 251266077Sdes if (!delim) { 252266077Sdes /* from isspace(3) */ 253266077Sdes del = LDNS_PARSE_NORMAL; 254266077Sdes } else { 255266077Sdes del = delim; 256266077Sdes } 257266077Sdes 258266077Sdes p = (par?*par:0); 259266077Sdes i = 0; 260266077Sdes com = 0; 261266077Sdes quoted = 0; 262266077Sdes t = token; 263266077Sdes lc = 0; 264266077Sdes if (del[0] == '"') { 265266077Sdes quoted = 1; 266266077Sdes } 267266077Sdes 268266077Sdes while ((c = sldns_bgetc(b)) != EOF) { 269266077Sdes if (c == '\r') /* carriage return */ 270266077Sdes c = ' '; 271266077Sdes if (c == '(' && lc != '\\' && !quoted) { 272266077Sdes /* this only counts for non-comments */ 273266077Sdes if (com == 0) { 274266077Sdes if(par) (*par)++; 275266077Sdes p++; 276266077Sdes } 277266077Sdes lc = c; 278266077Sdes continue; 279266077Sdes } 280266077Sdes 281266077Sdes if (c == ')' && lc != '\\' && !quoted) { 282266077Sdes /* this only counts for non-comments */ 283266077Sdes if (com == 0) { 284266077Sdes if(par) (*par)--; 285266077Sdes p--; 286266077Sdes } 287266077Sdes lc = c; 288266077Sdes continue; 289266077Sdes } 290266077Sdes 291266077Sdes if (p < 0) { 292266077Sdes /* more ) then ( */ 293266077Sdes *t = '\0'; 294266077Sdes return 0; 295266077Sdes } 296266077Sdes 297266077Sdes /* do something with comments ; */ 298266077Sdes if (c == ';' && quoted == 0) { 299266077Sdes if (lc != '\\') { 300266077Sdes com = 1; 301266077Sdes } 302266077Sdes } 303266077Sdes if (c == '"' && com == 0 && lc != '\\') { 304266077Sdes quoted = 1 - quoted; 305266077Sdes } 306266077Sdes 307266077Sdes if (c == '\n' && com != 0) { 308266077Sdes /* comments */ 309266077Sdes com = 0; 310266077Sdes *t = ' '; 311266077Sdes lc = c; 312266077Sdes continue; 313266077Sdes } 314266077Sdes 315266077Sdes if (com == 1) { 316266077Sdes *t = ' '; 317266077Sdes lc = c; 318266077Sdes continue; 319266077Sdes } 320266077Sdes 321266077Sdes if (c == '\n' && p != 0) { 322266077Sdes /* in parentheses */ 323266077Sdes /* do not write ' ' if we want to skip spaces */ 324266077Sdes if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) 325266077Sdes *t++ = ' '; 326266077Sdes lc = c; 327266077Sdes continue; 328266077Sdes } 329266077Sdes 330266077Sdes /* check to skip whitespace at start, but also after ( */ 331266077Sdes if(skipw && i==0 && !com && !quoted && lc != '\\') { 332266077Sdes if(strchr(skipw, c)) { 333266077Sdes lc = c; 334266077Sdes continue; 335266077Sdes } 336266077Sdes } 337266077Sdes 338266077Sdes /* check if we hit the delim */ 339266077Sdes for (d = del; *d; d++) { 340266077Sdes /* we can only exit if no parens or user tracks them */ 341266077Sdes if (c == *d && lc != '\\' && (p == 0 || par)) { 342266077Sdes goto tokenread; 343266077Sdes } 344266077Sdes } 345266077Sdes 346266077Sdes i++; 347266077Sdes if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) { 348266077Sdes *t = '\0'; 349266077Sdes return -1; 350266077Sdes } 351266077Sdes *t++ = c; 352266077Sdes 353266077Sdes if (c == '\\' && lc == '\\') { 354266077Sdes lc = 0; 355266077Sdes } else { 356266077Sdes lc = c; 357266077Sdes } 358266077Sdes } 359266077Sdes *t = '\0'; 360266077Sdes if (i == 0) { 361266077Sdes /* nothing read */ 362266077Sdes return -1; 363266077Sdes } 364266077Sdes if (!par && p != 0) { 365266077Sdes return -1; 366266077Sdes } 367266077Sdes return (ssize_t)i; 368266077Sdes 369266077Sdestokenread: 370266077Sdes if(*del == '"') 371266077Sdes /* do not skip over quotes after the string, they are part 372266077Sdes * of the next string. But skip over whitespace (if needed)*/ 373266077Sdes sldns_bskipcs(b, del+1); 374266077Sdes else sldns_bskipcs(b, del); 375266077Sdes *t = '\0'; 376266077Sdes 377266077Sdes if (!par && p != 0) { 378266077Sdes return -1; 379266077Sdes } 380266077Sdes return (ssize_t)i; 381266077Sdes} 382266077Sdes 383266077Sdes 384266077Sdesvoid 385266077Sdessldns_bskipcs(sldns_buffer *buffer, const char *s) 386266077Sdes{ 387266077Sdes int found; 388266077Sdes char c; 389266077Sdes const char *d; 390266077Sdes 391266077Sdes while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) { 392266077Sdes c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position); 393266077Sdes found = 0; 394266077Sdes for (d = s; *d; d++) { 395266077Sdes if (*d == c) { 396266077Sdes found = 1; 397266077Sdes } 398266077Sdes } 399266077Sdes if (found && buffer->_limit > buffer->_position) { 400266077Sdes buffer->_position += sizeof(char); 401266077Sdes } else { 402266077Sdes return; 403266077Sdes } 404266077Sdes } 405266077Sdes} 406266077Sdes 407266077Sdesvoid 408266077Sdessldns_fskipcs(FILE *fp, const char *s) 409266077Sdes{ 410266077Sdes sldns_fskipcs_l(fp, s, NULL); 411266077Sdes} 412266077Sdes 413266077Sdesvoid 414266077Sdessldns_fskipcs_l(FILE *fp, const char *s, int *line_nr) 415266077Sdes{ 416266077Sdes int found; 417266077Sdes int c; 418266077Sdes const char *d; 419266077Sdes 420266077Sdes while ((c = fgetc(fp)) != EOF) { 421266077Sdes if (line_nr && c == '\n') { 422266077Sdes *line_nr = *line_nr + 1; 423266077Sdes } 424266077Sdes found = 0; 425266077Sdes for (d = s; *d; d++) { 426266077Sdes if (*d == c) { 427266077Sdes found = 1; 428266077Sdes } 429266077Sdes } 430266077Sdes if (!found) { 431266077Sdes /* with getc, we've read too far */ 432266077Sdes ungetc(c, fp); 433266077Sdes return; 434266077Sdes } 435266077Sdes } 436266077Sdes} 437266077Sdes 438266077Sdesssize_t 439266077Sdessldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char 440266077Sdes*data, const char *d_del, size_t data_limit) 441266077Sdes{ 442266077Sdes /* we assume: keyword|sep|data */ 443266077Sdes char *fkeyword; 444266077Sdes ssize_t i; 445266077Sdes 446266077Sdes if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN) 447266077Sdes return -1; 448266077Sdes fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN); 449266077Sdes if(!fkeyword) 450266077Sdes return -1; /* out of memory */ 451266077Sdes 452266077Sdes i = sldns_bget_token(b, fkeyword, k_del, data_limit); 453266077Sdes if(i==0 || i==-1) { 454266077Sdes free(fkeyword); 455266077Sdes return -1; /* nothing read */ 456266077Sdes } 457266077Sdes 458266077Sdes /* case??? */ 459266077Sdes if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) { 460266077Sdes free(fkeyword); 461266077Sdes /* whee, the match! */ 462266077Sdes /* retrieve it's data */ 463266077Sdes i = sldns_bget_token(b, data, d_del, 0); 464266077Sdes return i; 465266077Sdes } else { 466266077Sdes free(fkeyword); 467266077Sdes return -1; 468266077Sdes } 469266077Sdes} 470266077Sdes 471