parse.c revision 287915
1266077Sdes/*
2266077Sdes * a generic (simple) parser. Use to parse rr's, private key
3266077Sdes * information and /etc/resolv.conf files
4266077Sdes *
5266077Sdes * a Net::DNS like library for C
6266077Sdes * LibDNS Team @ NLnet Labs
7266077Sdes * (c) NLnet Labs, 2005-2006
8266077Sdes * See the file LICENSE for the license
9266077Sdes */
10266077Sdes#include "config.h"
11287915Sdes#include "sldns/parse.h"
12287915Sdes#include "sldns/parseutil.h"
13287915Sdes#include "sldns/sbuffer.h"
14266077Sdes
15266077Sdes#include <limits.h>
16266077Sdes#include <strings.h>
17266077Sdes
18266077Sdessldns_lookup_table sldns_directive_types[] = {
19266077Sdes        { LDNS_DIR_TTL, "$TTL" },
20266077Sdes        { LDNS_DIR_ORIGIN, "$ORIGIN" },
21266077Sdes        { LDNS_DIR_INCLUDE, "$INCLUDE" },
22266077Sdes        { 0, NULL }
23266077Sdes};
24266077Sdes
25266077Sdes/* add max_limit here? */
26266077Sdesssize_t
27266077Sdessldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28266077Sdes{
29266077Sdes	return sldns_fget_token_l(f, token, delim, limit, NULL);
30266077Sdes}
31266077Sdes
32266077Sdesssize_t
33266077Sdessldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34266077Sdes{
35266077Sdes	int c, prev_c;
36266077Sdes	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
37266077Sdes	int com, quoted;
38266077Sdes	char *t;
39266077Sdes	size_t i;
40266077Sdes	const char *d;
41266077Sdes	const char *del;
42266077Sdes
43266077Sdes	/* standard delimeters */
44266077Sdes	if (!delim) {
45266077Sdes		/* from isspace(3) */
46266077Sdes		del = LDNS_PARSE_NORMAL;
47266077Sdes	} else {
48266077Sdes		del = delim;
49266077Sdes	}
50266077Sdes
51266077Sdes	p = 0;
52266077Sdes	i = 0;
53266077Sdes	com = 0;
54266077Sdes	quoted = 0;
55266077Sdes	prev_c = 0;
56266077Sdes	t = token;
57266077Sdes	if (del[0] == '"') {
58266077Sdes		quoted = 1;
59266077Sdes	}
60266077Sdes	while ((c = getc(f)) != EOF) {
61266077Sdes		if (c == '\r') /* carriage return */
62266077Sdes			c = ' ';
63266077Sdes		if (c == '(' && prev_c != '\\' && !quoted) {
64266077Sdes			/* this only counts for non-comments */
65266077Sdes			if (com == 0) {
66266077Sdes				p++;
67266077Sdes			}
68266077Sdes			prev_c = c;
69266077Sdes			continue;
70266077Sdes		}
71266077Sdes
72266077Sdes		if (c == ')' && prev_c != '\\' && !quoted) {
73266077Sdes			/* this only counts for non-comments */
74266077Sdes			if (com == 0) {
75266077Sdes				p--;
76266077Sdes			}
77266077Sdes			prev_c = c;
78266077Sdes			continue;
79266077Sdes		}
80266077Sdes
81266077Sdes		if (p < 0) {
82266077Sdes			/* more ) then ( - close off the string */
83266077Sdes			*t = '\0';
84266077Sdes			return 0;
85266077Sdes		}
86266077Sdes
87266077Sdes		/* do something with comments ; */
88266077Sdes		if (c == ';' && quoted == 0) {
89266077Sdes			if (prev_c != '\\') {
90266077Sdes				com = 1;
91266077Sdes			}
92266077Sdes		}
93266077Sdes		if (c == '\"' && com == 0 && prev_c != '\\') {
94266077Sdes			quoted = 1 - quoted;
95266077Sdes		}
96266077Sdes
97266077Sdes		if (c == '\n' && com != 0) {
98266077Sdes			/* comments */
99266077Sdes			com = 0;
100266077Sdes			*t = ' ';
101266077Sdes			if (line_nr) {
102266077Sdes				*line_nr = *line_nr + 1;
103266077Sdes			}
104266077Sdes			if (p == 0 && i > 0) {
105266077Sdes				goto tokenread;
106266077Sdes			} else {
107266077Sdes				prev_c = c;
108266077Sdes				continue;
109266077Sdes			}
110266077Sdes		}
111266077Sdes
112266077Sdes		if (com == 1) {
113266077Sdes			*t = ' ';
114266077Sdes			prev_c = c;
115266077Sdes			continue;
116266077Sdes		}
117266077Sdes
118266077Sdes		if (c == '\n' && p != 0 && t > token) {
119266077Sdes			/* in parentheses */
120266077Sdes			if (line_nr) {
121266077Sdes				*line_nr = *line_nr + 1;
122266077Sdes			}
123266077Sdes			*t++ = ' ';
124266077Sdes			prev_c = c;
125266077Sdes			continue;
126266077Sdes		}
127266077Sdes
128266077Sdes		/* check if we hit the delim */
129266077Sdes		for (d = del; *d; d++) {
130266077Sdes			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
131266077Sdes				if (c == '\n' && line_nr) {
132266077Sdes					*line_nr = *line_nr + 1;
133266077Sdes				}
134266077Sdes				goto tokenread;
135266077Sdes			}
136266077Sdes		}
137266077Sdes		if (c != '\0' && c != '\n') {
138266077Sdes			i++;
139266077Sdes		}
140266077Sdes		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
141266077Sdes			*t = '\0';
142266077Sdes			return -1;
143266077Sdes		}
144266077Sdes		if (c != '\0' && c != '\n') {
145266077Sdes			*t++ = c;
146266077Sdes		}
147266077Sdes		if (c == '\\' && prev_c == '\\')
148266077Sdes			prev_c = 0;
149266077Sdes		else	prev_c = c;
150266077Sdes	}
151266077Sdes	*t = '\0';
152266077Sdes	if (c == EOF) {
153266077Sdes		return (ssize_t)i;
154266077Sdes	}
155266077Sdes
156266077Sdes	if (i == 0) {
157266077Sdes		/* nothing read */
158266077Sdes		return -1;
159266077Sdes	}
160266077Sdes	if (p != 0) {
161266077Sdes		return -1;
162266077Sdes	}
163266077Sdes	return (ssize_t)i;
164266077Sdes
165266077Sdestokenread:
166266077Sdes	if(*del == '"')
167266077Sdes		/* do not skip over quotes after the string, they are part
168266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
169266077Sdes		sldns_fskipcs_l(f, del+1, line_nr);
170266077Sdes	else	sldns_fskipcs_l(f, del, line_nr);
171266077Sdes	*t = '\0';
172266077Sdes	if (p != 0) {
173266077Sdes		return -1;
174266077Sdes	}
175266077Sdes
176266077Sdes	return (ssize_t)i;
177266077Sdes}
178266077Sdes
179266077Sdesssize_t
180266077Sdessldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
181266077Sdes               const char *d_del, size_t data_limit)
182266077Sdes{
183266077Sdes       return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
184266077Sdes		       data_limit, NULL);
185266077Sdes}
186266077Sdes
187266077Sdesssize_t
188266077Sdessldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
189266077Sdes               const char *d_del, size_t data_limit, int *line_nr)
190266077Sdes{
191266077Sdes       /* we assume: keyword|sep|data */
192266077Sdes       char *fkeyword;
193266077Sdes       ssize_t i;
194266077Sdes
195266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
196266077Sdes               return -1;
197266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
198266077Sdes       if(!fkeyword)
199266077Sdes               return -1;
200266077Sdes
201266077Sdes       i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
202266077Sdes       if(i==0 || i==-1) {
203266077Sdes               free(fkeyword);
204266077Sdes               return -1;
205266077Sdes       }
206266077Sdes
207266077Sdes       /* case??? i instead of strlen? */
208266077Sdes       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
209266077Sdes               /* whee! */
210266077Sdes               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
211266077Sdes               i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
212266077Sdes               free(fkeyword);
213266077Sdes               return i;
214266077Sdes       } else {
215266077Sdes               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
216266077Sdes               free(fkeyword);
217266077Sdes               return -1;
218266077Sdes       }
219266077Sdes}
220266077Sdes
221276541Sdesint
222276541Sdessldns_bgetc(sldns_buffer *buffer)
223276541Sdes{
224276541Sdes	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
225276541Sdes		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
226276541Sdes		/* sldns_buffer_rewind(buffer);*/
227276541Sdes		return EOF;
228276541Sdes	}
229276541Sdes	return (int)sldns_buffer_read_u8(buffer);
230276541Sdes}
231276541Sdes
232266077Sdesssize_t
233266077Sdessldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
234266077Sdes{
235266077Sdes	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
236266077Sdes}
237266077Sdes
238266077Sdesssize_t
239266077Sdessldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
240266077Sdes	size_t limit, int* par, const char* skipw)
241266077Sdes{
242266077Sdes	int c, lc;
243266077Sdes	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
244266077Sdes	int com, quoted;
245266077Sdes	char *t;
246266077Sdes	size_t i;
247266077Sdes	const char *d;
248266077Sdes	const char *del;
249266077Sdes
250266077Sdes	/* standard delimiters */
251266077Sdes	if (!delim) {
252266077Sdes		/* from isspace(3) */
253266077Sdes		del = LDNS_PARSE_NORMAL;
254266077Sdes	} else {
255266077Sdes		del = delim;
256266077Sdes	}
257266077Sdes
258266077Sdes	p = (par?*par:0);
259266077Sdes	i = 0;
260266077Sdes	com = 0;
261266077Sdes	quoted = 0;
262266077Sdes	t = token;
263266077Sdes	lc = 0;
264266077Sdes	if (del[0] == '"') {
265266077Sdes		quoted = 1;
266266077Sdes	}
267266077Sdes
268266077Sdes	while ((c = sldns_bgetc(b)) != EOF) {
269266077Sdes		if (c == '\r') /* carriage return */
270266077Sdes			c = ' ';
271266077Sdes		if (c == '(' && lc != '\\' && !quoted) {
272266077Sdes			/* this only counts for non-comments */
273266077Sdes			if (com == 0) {
274266077Sdes				if(par) (*par)++;
275266077Sdes				p++;
276266077Sdes			}
277266077Sdes			lc = c;
278266077Sdes			continue;
279266077Sdes		}
280266077Sdes
281266077Sdes		if (c == ')' && lc != '\\' && !quoted) {
282266077Sdes			/* this only counts for non-comments */
283266077Sdes			if (com == 0) {
284266077Sdes				if(par) (*par)--;
285266077Sdes				p--;
286266077Sdes			}
287266077Sdes			lc = c;
288266077Sdes			continue;
289266077Sdes		}
290266077Sdes
291266077Sdes		if (p < 0) {
292266077Sdes			/* more ) then ( */
293266077Sdes			*t = '\0';
294266077Sdes			return 0;
295266077Sdes		}
296266077Sdes
297266077Sdes		/* do something with comments ; */
298266077Sdes		if (c == ';' && quoted == 0) {
299266077Sdes			if (lc != '\\') {
300266077Sdes				com = 1;
301266077Sdes			}
302266077Sdes		}
303266077Sdes		if (c == '"' && com == 0 && lc != '\\') {
304266077Sdes			quoted = 1 - quoted;
305266077Sdes		}
306266077Sdes
307266077Sdes		if (c == '\n' && com != 0) {
308266077Sdes			/* comments */
309266077Sdes			com = 0;
310266077Sdes			*t = ' ';
311266077Sdes			lc = c;
312266077Sdes			continue;
313266077Sdes		}
314266077Sdes
315266077Sdes		if (com == 1) {
316266077Sdes			*t = ' ';
317266077Sdes			lc = c;
318266077Sdes			continue;
319266077Sdes		}
320266077Sdes
321266077Sdes		if (c == '\n' && p != 0) {
322266077Sdes			/* in parentheses */
323266077Sdes			/* do not write ' ' if we want to skip spaces */
324266077Sdes			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' '))))
325266077Sdes				*t++ = ' ';
326266077Sdes			lc = c;
327266077Sdes			continue;
328266077Sdes		}
329266077Sdes
330266077Sdes		/* check to skip whitespace at start, but also after ( */
331266077Sdes		if(skipw && i==0 && !com && !quoted && lc != '\\') {
332266077Sdes			if(strchr(skipw, c)) {
333266077Sdes				lc = c;
334266077Sdes				continue;
335266077Sdes			}
336266077Sdes		}
337266077Sdes
338266077Sdes		/* check if we hit the delim */
339266077Sdes		for (d = del; *d; d++) {
340266077Sdes			/* we can only exit if no parens or user tracks them */
341266077Sdes                        if (c == *d && lc != '\\' && (p == 0 || par)) {
342266077Sdes				goto tokenread;
343266077Sdes                        }
344266077Sdes		}
345266077Sdes
346266077Sdes		i++;
347266077Sdes		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
348266077Sdes			*t = '\0';
349266077Sdes			return -1;
350266077Sdes		}
351266077Sdes		*t++ = c;
352266077Sdes
353266077Sdes		if (c == '\\' && lc == '\\') {
354266077Sdes			lc = 0;
355266077Sdes		} else {
356266077Sdes			lc = c;
357266077Sdes		}
358266077Sdes	}
359266077Sdes	*t = '\0';
360266077Sdes	if (i == 0) {
361266077Sdes		/* nothing read */
362266077Sdes		return -1;
363266077Sdes	}
364266077Sdes	if (!par && p != 0) {
365266077Sdes		return -1;
366266077Sdes	}
367266077Sdes	return (ssize_t)i;
368266077Sdes
369266077Sdestokenread:
370266077Sdes	if(*del == '"')
371266077Sdes		/* do not skip over quotes after the string, they are part
372266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
373266077Sdes		sldns_bskipcs(b, del+1);
374266077Sdes	else 	sldns_bskipcs(b, del);
375266077Sdes	*t = '\0';
376266077Sdes
377266077Sdes	if (!par && p != 0) {
378266077Sdes		return -1;
379266077Sdes	}
380266077Sdes	return (ssize_t)i;
381266077Sdes}
382266077Sdes
383266077Sdes
384266077Sdesvoid
385266077Sdessldns_bskipcs(sldns_buffer *buffer, const char *s)
386266077Sdes{
387266077Sdes        int found;
388266077Sdes        char c;
389266077Sdes        const char *d;
390266077Sdes
391266077Sdes        while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
392266077Sdes                c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
393266077Sdes                found = 0;
394266077Sdes                for (d = s; *d; d++) {
395266077Sdes                        if (*d == c) {
396266077Sdes                                found = 1;
397266077Sdes                        }
398266077Sdes                }
399266077Sdes                if (found && buffer->_limit > buffer->_position) {
400266077Sdes                        buffer->_position += sizeof(char);
401266077Sdes                } else {
402266077Sdes                        return;
403266077Sdes                }
404266077Sdes        }
405266077Sdes}
406266077Sdes
407266077Sdesvoid
408266077Sdessldns_fskipcs(FILE *fp, const char *s)
409266077Sdes{
410266077Sdes	sldns_fskipcs_l(fp, s, NULL);
411266077Sdes}
412266077Sdes
413266077Sdesvoid
414266077Sdessldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
415266077Sdes{
416266077Sdes        int found;
417266077Sdes        int c;
418266077Sdes        const char *d;
419266077Sdes
420266077Sdes	while ((c = fgetc(fp)) != EOF) {
421266077Sdes		if (line_nr && c == '\n') {
422266077Sdes			*line_nr = *line_nr + 1;
423266077Sdes		}
424266077Sdes                found = 0;
425266077Sdes                for (d = s; *d; d++) {
426266077Sdes                        if (*d == c) {
427266077Sdes                                found = 1;
428266077Sdes                        }
429266077Sdes                }
430266077Sdes		if (!found) {
431266077Sdes			/* with getc, we've read too far */
432266077Sdes			ungetc(c, fp);
433266077Sdes			return;
434266077Sdes		}
435266077Sdes	}
436266077Sdes}
437266077Sdes
438266077Sdesssize_t
439266077Sdessldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
440266077Sdes*data, const char *d_del, size_t data_limit)
441266077Sdes{
442266077Sdes       /* we assume: keyword|sep|data */
443266077Sdes       char *fkeyword;
444266077Sdes       ssize_t i;
445266077Sdes
446266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
447266077Sdes               return -1;
448266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
449266077Sdes       if(!fkeyword)
450266077Sdes               return -1; /* out of memory */
451266077Sdes
452266077Sdes       i = sldns_bget_token(b, fkeyword, k_del, data_limit);
453266077Sdes       if(i==0 || i==-1) {
454266077Sdes               free(fkeyword);
455266077Sdes               return -1; /* nothing read */
456266077Sdes       }
457266077Sdes
458266077Sdes       /* case??? */
459266077Sdes       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
460266077Sdes               free(fkeyword);
461266077Sdes               /* whee, the match! */
462266077Sdes               /* retrieve it's data */
463266077Sdes               i = sldns_bget_token(b, data, d_del, 0);
464266077Sdes               return i;
465266077Sdes       } else {
466266077Sdes               free(fkeyword);
467266077Sdes               return -1;
468266077Sdes       }
469266077Sdes}
470266077Sdes
471