parse.c revision 266077
1266077Sdes/* 2266077Sdes * a generic (simple) parser. Use to parse rr's, private key 3266077Sdes * information and /etc/resolv.conf files 4266077Sdes * 5266077Sdes * a Net::DNS like library for C 6266077Sdes * LibDNS Team @ NLnet Labs 7266077Sdes * (c) NLnet Labs, 2005-2006 8266077Sdes * See the file LICENSE for the license 9266077Sdes */ 10266077Sdes#include "config.h" 11266077Sdes#include "ldns/parse.h" 12266077Sdes#include "ldns/parseutil.h" 13266077Sdes#include "ldns/sbuffer.h" 14266077Sdes 15266077Sdes#include <limits.h> 16266077Sdes#include <strings.h> 17266077Sdes 18266077Sdessldns_lookup_table sldns_directive_types[] = { 19266077Sdes { LDNS_DIR_TTL, "$TTL" }, 20266077Sdes { LDNS_DIR_ORIGIN, "$ORIGIN" }, 21266077Sdes { LDNS_DIR_INCLUDE, "$INCLUDE" }, 22266077Sdes { 0, NULL } 23266077Sdes}; 24266077Sdes 25266077Sdes/* add max_limit here? */ 26266077Sdesssize_t 27266077Sdessldns_fget_token(FILE *f, char *token, const char *delim, size_t limit) 28266077Sdes{ 29266077Sdes return sldns_fget_token_l(f, token, delim, limit, NULL); 30266077Sdes} 31266077Sdes 32266077Sdesssize_t 33266077Sdessldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr) 34266077Sdes{ 35266077Sdes int c, prev_c; 36266077Sdes int p; /* 0 -> no parenthese seen, >0 nr of ( seen */ 37266077Sdes int com, quoted; 38266077Sdes char *t; 39266077Sdes size_t i; 40266077Sdes const char *d; 41266077Sdes const char *del; 42266077Sdes 43266077Sdes /* standard delimeters */ 44266077Sdes if (!delim) { 45266077Sdes /* from isspace(3) */ 46266077Sdes del = LDNS_PARSE_NORMAL; 47266077Sdes } else { 48266077Sdes del = delim; 49266077Sdes } 50266077Sdes 51266077Sdes p = 0; 52266077Sdes i = 0; 53266077Sdes com = 0; 54266077Sdes quoted = 0; 55266077Sdes prev_c = 0; 56266077Sdes t = token; 57266077Sdes if (del[0] == '"') { 58266077Sdes quoted = 1; 59266077Sdes } 60266077Sdes while ((c = getc(f)) != EOF) { 61266077Sdes if (c == '\r') /* carriage return */ 62266077Sdes c = ' '; 63266077Sdes if (c == '(' && prev_c != '\\' && !quoted) { 64266077Sdes /* this only counts for non-comments */ 65266077Sdes if (com == 0) { 66266077Sdes p++; 67266077Sdes } 68266077Sdes prev_c = c; 69266077Sdes continue; 70266077Sdes } 71266077Sdes 72266077Sdes if (c == ')' && prev_c != '\\' && !quoted) { 73266077Sdes /* this only counts for non-comments */ 74266077Sdes if (com == 0) { 75266077Sdes p--; 76266077Sdes } 77266077Sdes prev_c = c; 78266077Sdes continue; 79266077Sdes } 80266077Sdes 81266077Sdes if (p < 0) { 82266077Sdes /* more ) then ( - close off the string */ 83266077Sdes *t = '\0'; 84266077Sdes return 0; 85266077Sdes } 86266077Sdes 87266077Sdes /* do something with comments ; */ 88266077Sdes if (c == ';' && quoted == 0) { 89266077Sdes if (prev_c != '\\') { 90266077Sdes com = 1; 91266077Sdes } 92266077Sdes } 93266077Sdes if (c == '\"' && com == 0 && prev_c != '\\') { 94266077Sdes quoted = 1 - quoted; 95266077Sdes } 96266077Sdes 97266077Sdes if (c == '\n' && com != 0) { 98266077Sdes /* comments */ 99266077Sdes com = 0; 100266077Sdes *t = ' '; 101266077Sdes if (line_nr) { 102266077Sdes *line_nr = *line_nr + 1; 103266077Sdes } 104266077Sdes if (p == 0 && i > 0) { 105266077Sdes goto tokenread; 106266077Sdes } else { 107266077Sdes prev_c = c; 108266077Sdes continue; 109266077Sdes } 110266077Sdes } 111266077Sdes 112266077Sdes if (com == 1) { 113266077Sdes *t = ' '; 114266077Sdes prev_c = c; 115266077Sdes continue; 116266077Sdes } 117266077Sdes 118266077Sdes if (c == '\n' && p != 0 && t > token) { 119266077Sdes /* in parentheses */ 120266077Sdes if (line_nr) { 121266077Sdes *line_nr = *line_nr + 1; 122266077Sdes } 123266077Sdes *t++ = ' '; 124266077Sdes prev_c = c; 125266077Sdes continue; 126266077Sdes } 127266077Sdes 128266077Sdes /* check if we hit the delim */ 129266077Sdes for (d = del; *d; d++) { 130266077Sdes if (c == *d && i > 0 && prev_c != '\\' && p == 0) { 131266077Sdes if (c == '\n' && line_nr) { 132266077Sdes *line_nr = *line_nr + 1; 133266077Sdes } 134266077Sdes goto tokenread; 135266077Sdes } 136266077Sdes } 137266077Sdes if (c != '\0' && c != '\n') { 138266077Sdes i++; 139266077Sdes } 140266077Sdes if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) { 141266077Sdes *t = '\0'; 142266077Sdes return -1; 143266077Sdes } 144266077Sdes if (c != '\0' && c != '\n') { 145266077Sdes *t++ = c; 146266077Sdes } 147266077Sdes if (c == '\\' && prev_c == '\\') 148266077Sdes prev_c = 0; 149266077Sdes else prev_c = c; 150266077Sdes } 151266077Sdes *t = '\0'; 152266077Sdes if (c == EOF) { 153266077Sdes return (ssize_t)i; 154266077Sdes } 155266077Sdes 156266077Sdes if (i == 0) { 157266077Sdes /* nothing read */ 158266077Sdes return -1; 159266077Sdes } 160266077Sdes if (p != 0) { 161266077Sdes return -1; 162266077Sdes } 163266077Sdes return (ssize_t)i; 164266077Sdes 165266077Sdestokenread: 166266077Sdes if(*del == '"') 167266077Sdes /* do not skip over quotes after the string, they are part 168266077Sdes * of the next string. But skip over whitespace (if needed)*/ 169266077Sdes sldns_fskipcs_l(f, del+1, line_nr); 170266077Sdes else sldns_fskipcs_l(f, del, line_nr); 171266077Sdes *t = '\0'; 172266077Sdes if (p != 0) { 173266077Sdes return -1; 174266077Sdes } 175266077Sdes 176266077Sdes return (ssize_t)i; 177266077Sdes} 178266077Sdes 179266077Sdesssize_t 180266077Sdessldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data, 181266077Sdes const char *d_del, size_t data_limit) 182266077Sdes{ 183266077Sdes return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del, 184266077Sdes data_limit, NULL); 185266077Sdes} 186266077Sdes 187266077Sdesssize_t 188266077Sdessldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data, 189266077Sdes const char *d_del, size_t data_limit, int *line_nr) 190266077Sdes{ 191266077Sdes /* we assume: keyword|sep|data */ 192266077Sdes char *fkeyword; 193266077Sdes ssize_t i; 194266077Sdes 195266077Sdes if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN) 196266077Sdes return -1; 197266077Sdes fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN); 198266077Sdes if(!fkeyword) 199266077Sdes return -1; 200266077Sdes 201266077Sdes i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN); 202266077Sdes if(i==0 || i==-1) { 203266077Sdes free(fkeyword); 204266077Sdes return -1; 205266077Sdes } 206266077Sdes 207266077Sdes /* case??? i instead of strlen? */ 208266077Sdes if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) { 209266077Sdes /* whee! */ 210266077Sdes /* printf("%s\n%s\n", "Matching keyword", fkeyword); */ 211266077Sdes i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr); 212266077Sdes free(fkeyword); 213266077Sdes return i; 214266077Sdes } else { 215266077Sdes /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/ 216266077Sdes free(fkeyword); 217266077Sdes return -1; 218266077Sdes } 219266077Sdes} 220266077Sdes 221266077Sdesssize_t 222266077Sdessldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit) 223266077Sdes{ 224266077Sdes return sldns_bget_token_par(b, token, delim, limit, NULL, NULL); 225266077Sdes} 226266077Sdes 227266077Sdesssize_t 228266077Sdessldns_bget_token_par(sldns_buffer *b, char *token, const char *delim, 229266077Sdes size_t limit, int* par, const char* skipw) 230266077Sdes{ 231266077Sdes int c, lc; 232266077Sdes int p; /* 0 -> no parenthese seen, >0 nr of ( seen */ 233266077Sdes int com, quoted; 234266077Sdes char *t; 235266077Sdes size_t i; 236266077Sdes const char *d; 237266077Sdes const char *del; 238266077Sdes 239266077Sdes /* standard delimiters */ 240266077Sdes if (!delim) { 241266077Sdes /* from isspace(3) */ 242266077Sdes del = LDNS_PARSE_NORMAL; 243266077Sdes } else { 244266077Sdes del = delim; 245266077Sdes } 246266077Sdes 247266077Sdes p = (par?*par:0); 248266077Sdes i = 0; 249266077Sdes com = 0; 250266077Sdes quoted = 0; 251266077Sdes t = token; 252266077Sdes lc = 0; 253266077Sdes if (del[0] == '"') { 254266077Sdes quoted = 1; 255266077Sdes } 256266077Sdes 257266077Sdes while ((c = sldns_bgetc(b)) != EOF) { 258266077Sdes if (c == '\r') /* carriage return */ 259266077Sdes c = ' '; 260266077Sdes if (c == '(' && lc != '\\' && !quoted) { 261266077Sdes /* this only counts for non-comments */ 262266077Sdes if (com == 0) { 263266077Sdes if(par) (*par)++; 264266077Sdes p++; 265266077Sdes } 266266077Sdes lc = c; 267266077Sdes continue; 268266077Sdes } 269266077Sdes 270266077Sdes if (c == ')' && lc != '\\' && !quoted) { 271266077Sdes /* this only counts for non-comments */ 272266077Sdes if (com == 0) { 273266077Sdes if(par) (*par)--; 274266077Sdes p--; 275266077Sdes } 276266077Sdes lc = c; 277266077Sdes continue; 278266077Sdes } 279266077Sdes 280266077Sdes if (p < 0) { 281266077Sdes /* more ) then ( */ 282266077Sdes *t = '\0'; 283266077Sdes return 0; 284266077Sdes } 285266077Sdes 286266077Sdes /* do something with comments ; */ 287266077Sdes if (c == ';' && quoted == 0) { 288266077Sdes if (lc != '\\') { 289266077Sdes com = 1; 290266077Sdes } 291266077Sdes } 292266077Sdes if (c == '"' && com == 0 && lc != '\\') { 293266077Sdes quoted = 1 - quoted; 294266077Sdes } 295266077Sdes 296266077Sdes if (c == '\n' && com != 0) { 297266077Sdes /* comments */ 298266077Sdes com = 0; 299266077Sdes *t = ' '; 300266077Sdes lc = c; 301266077Sdes continue; 302266077Sdes } 303266077Sdes 304266077Sdes if (com == 1) { 305266077Sdes *t = ' '; 306266077Sdes lc = c; 307266077Sdes continue; 308266077Sdes } 309266077Sdes 310266077Sdes if (c == '\n' && p != 0) { 311266077Sdes /* in parentheses */ 312266077Sdes /* do not write ' ' if we want to skip spaces */ 313266077Sdes if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) 314266077Sdes *t++ = ' '; 315266077Sdes lc = c; 316266077Sdes continue; 317266077Sdes } 318266077Sdes 319266077Sdes /* check to skip whitespace at start, but also after ( */ 320266077Sdes if(skipw && i==0 && !com && !quoted && lc != '\\') { 321266077Sdes if(strchr(skipw, c)) { 322266077Sdes lc = c; 323266077Sdes continue; 324266077Sdes } 325266077Sdes } 326266077Sdes 327266077Sdes /* check if we hit the delim */ 328266077Sdes for (d = del; *d; d++) { 329266077Sdes /* we can only exit if no parens or user tracks them */ 330266077Sdes if (c == *d && lc != '\\' && (p == 0 || par)) { 331266077Sdes goto tokenread; 332266077Sdes } 333266077Sdes } 334266077Sdes 335266077Sdes i++; 336266077Sdes if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) { 337266077Sdes *t = '\0'; 338266077Sdes return -1; 339266077Sdes } 340266077Sdes *t++ = c; 341266077Sdes 342266077Sdes if (c == '\\' && lc == '\\') { 343266077Sdes lc = 0; 344266077Sdes } else { 345266077Sdes lc = c; 346266077Sdes } 347266077Sdes } 348266077Sdes *t = '\0'; 349266077Sdes if (i == 0) { 350266077Sdes /* nothing read */ 351266077Sdes return -1; 352266077Sdes } 353266077Sdes if (!par && p != 0) { 354266077Sdes return -1; 355266077Sdes } 356266077Sdes return (ssize_t)i; 357266077Sdes 358266077Sdestokenread: 359266077Sdes if(*del == '"') 360266077Sdes /* do not skip over quotes after the string, they are part 361266077Sdes * of the next string. But skip over whitespace (if needed)*/ 362266077Sdes sldns_bskipcs(b, del+1); 363266077Sdes else sldns_bskipcs(b, del); 364266077Sdes *t = '\0'; 365266077Sdes 366266077Sdes if (!par && p != 0) { 367266077Sdes return -1; 368266077Sdes } 369266077Sdes return (ssize_t)i; 370266077Sdes} 371266077Sdes 372266077Sdes 373266077Sdesvoid 374266077Sdessldns_bskipcs(sldns_buffer *buffer, const char *s) 375266077Sdes{ 376266077Sdes int found; 377266077Sdes char c; 378266077Sdes const char *d; 379266077Sdes 380266077Sdes while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) { 381266077Sdes c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position); 382266077Sdes found = 0; 383266077Sdes for (d = s; *d; d++) { 384266077Sdes if (*d == c) { 385266077Sdes found = 1; 386266077Sdes } 387266077Sdes } 388266077Sdes if (found && buffer->_limit > buffer->_position) { 389266077Sdes buffer->_position += sizeof(char); 390266077Sdes } else { 391266077Sdes return; 392266077Sdes } 393266077Sdes } 394266077Sdes} 395266077Sdes 396266077Sdesvoid 397266077Sdessldns_fskipcs(FILE *fp, const char *s) 398266077Sdes{ 399266077Sdes sldns_fskipcs_l(fp, s, NULL); 400266077Sdes} 401266077Sdes 402266077Sdesvoid 403266077Sdessldns_fskipcs_l(FILE *fp, const char *s, int *line_nr) 404266077Sdes{ 405266077Sdes int found; 406266077Sdes int c; 407266077Sdes const char *d; 408266077Sdes 409266077Sdes while ((c = fgetc(fp)) != EOF) { 410266077Sdes if (line_nr && c == '\n') { 411266077Sdes *line_nr = *line_nr + 1; 412266077Sdes } 413266077Sdes found = 0; 414266077Sdes for (d = s; *d; d++) { 415266077Sdes if (*d == c) { 416266077Sdes found = 1; 417266077Sdes } 418266077Sdes } 419266077Sdes if (!found) { 420266077Sdes /* with getc, we've read too far */ 421266077Sdes ungetc(c, fp); 422266077Sdes return; 423266077Sdes } 424266077Sdes } 425266077Sdes} 426266077Sdes 427266077Sdesssize_t 428266077Sdessldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char 429266077Sdes*data, const char *d_del, size_t data_limit) 430266077Sdes{ 431266077Sdes /* we assume: keyword|sep|data */ 432266077Sdes char *fkeyword; 433266077Sdes ssize_t i; 434266077Sdes 435266077Sdes if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN) 436266077Sdes return -1; 437266077Sdes fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN); 438266077Sdes if(!fkeyword) 439266077Sdes return -1; /* out of memory */ 440266077Sdes 441266077Sdes i = sldns_bget_token(b, fkeyword, k_del, data_limit); 442266077Sdes if(i==0 || i==-1) { 443266077Sdes free(fkeyword); 444266077Sdes return -1; /* nothing read */ 445266077Sdes } 446266077Sdes 447266077Sdes /* case??? */ 448266077Sdes if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) { 449266077Sdes free(fkeyword); 450266077Sdes /* whee, the match! */ 451266077Sdes /* retrieve it's data */ 452266077Sdes i = sldns_bget_token(b, data, d_del, 0); 453266077Sdes return i; 454266077Sdes } else { 455266077Sdes free(fkeyword); 456266077Sdes return -1; 457266077Sdes } 458266077Sdes} 459266077Sdes 460