parse.c revision 266077
1266077Sdes/*
2266077Sdes * a generic (simple) parser. Use to parse rr's, private key
3266077Sdes * information and /etc/resolv.conf files
4266077Sdes *
5266077Sdes * a Net::DNS like library for C
6266077Sdes * LibDNS Team @ NLnet Labs
7266077Sdes * (c) NLnet Labs, 2005-2006
8266077Sdes * See the file LICENSE for the license
9266077Sdes */
10266077Sdes#include "config.h"
11266077Sdes#include "ldns/parse.h"
12266077Sdes#include "ldns/parseutil.h"
13266077Sdes#include "ldns/sbuffer.h"
14266077Sdes
15266077Sdes#include <limits.h>
16266077Sdes#include <strings.h>
17266077Sdes
18266077Sdessldns_lookup_table sldns_directive_types[] = {
19266077Sdes        { LDNS_DIR_TTL, "$TTL" },
20266077Sdes        { LDNS_DIR_ORIGIN, "$ORIGIN" },
21266077Sdes        { LDNS_DIR_INCLUDE, "$INCLUDE" },
22266077Sdes        { 0, NULL }
23266077Sdes};
24266077Sdes
25266077Sdes/* add max_limit here? */
26266077Sdesssize_t
27266077Sdessldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28266077Sdes{
29266077Sdes	return sldns_fget_token_l(f, token, delim, limit, NULL);
30266077Sdes}
31266077Sdes
32266077Sdesssize_t
33266077Sdessldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34266077Sdes{
35266077Sdes	int c, prev_c;
36266077Sdes	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
37266077Sdes	int com, quoted;
38266077Sdes	char *t;
39266077Sdes	size_t i;
40266077Sdes	const char *d;
41266077Sdes	const char *del;
42266077Sdes
43266077Sdes	/* standard delimeters */
44266077Sdes	if (!delim) {
45266077Sdes		/* from isspace(3) */
46266077Sdes		del = LDNS_PARSE_NORMAL;
47266077Sdes	} else {
48266077Sdes		del = delim;
49266077Sdes	}
50266077Sdes
51266077Sdes	p = 0;
52266077Sdes	i = 0;
53266077Sdes	com = 0;
54266077Sdes	quoted = 0;
55266077Sdes	prev_c = 0;
56266077Sdes	t = token;
57266077Sdes	if (del[0] == '"') {
58266077Sdes		quoted = 1;
59266077Sdes	}
60266077Sdes	while ((c = getc(f)) != EOF) {
61266077Sdes		if (c == '\r') /* carriage return */
62266077Sdes			c = ' ';
63266077Sdes		if (c == '(' && prev_c != '\\' && !quoted) {
64266077Sdes			/* this only counts for non-comments */
65266077Sdes			if (com == 0) {
66266077Sdes				p++;
67266077Sdes			}
68266077Sdes			prev_c = c;
69266077Sdes			continue;
70266077Sdes		}
71266077Sdes
72266077Sdes		if (c == ')' && prev_c != '\\' && !quoted) {
73266077Sdes			/* this only counts for non-comments */
74266077Sdes			if (com == 0) {
75266077Sdes				p--;
76266077Sdes			}
77266077Sdes			prev_c = c;
78266077Sdes			continue;
79266077Sdes		}
80266077Sdes
81266077Sdes		if (p < 0) {
82266077Sdes			/* more ) then ( - close off the string */
83266077Sdes			*t = '\0';
84266077Sdes			return 0;
85266077Sdes		}
86266077Sdes
87266077Sdes		/* do something with comments ; */
88266077Sdes		if (c == ';' && quoted == 0) {
89266077Sdes			if (prev_c != '\\') {
90266077Sdes				com = 1;
91266077Sdes			}
92266077Sdes		}
93266077Sdes		if (c == '\"' && com == 0 && prev_c != '\\') {
94266077Sdes			quoted = 1 - quoted;
95266077Sdes		}
96266077Sdes
97266077Sdes		if (c == '\n' && com != 0) {
98266077Sdes			/* comments */
99266077Sdes			com = 0;
100266077Sdes			*t = ' ';
101266077Sdes			if (line_nr) {
102266077Sdes				*line_nr = *line_nr + 1;
103266077Sdes			}
104266077Sdes			if (p == 0 && i > 0) {
105266077Sdes				goto tokenread;
106266077Sdes			} else {
107266077Sdes				prev_c = c;
108266077Sdes				continue;
109266077Sdes			}
110266077Sdes		}
111266077Sdes
112266077Sdes		if (com == 1) {
113266077Sdes			*t = ' ';
114266077Sdes			prev_c = c;
115266077Sdes			continue;
116266077Sdes		}
117266077Sdes
118266077Sdes		if (c == '\n' && p != 0 && t > token) {
119266077Sdes			/* in parentheses */
120266077Sdes			if (line_nr) {
121266077Sdes				*line_nr = *line_nr + 1;
122266077Sdes			}
123266077Sdes			*t++ = ' ';
124266077Sdes			prev_c = c;
125266077Sdes			continue;
126266077Sdes		}
127266077Sdes
128266077Sdes		/* check if we hit the delim */
129266077Sdes		for (d = del; *d; d++) {
130266077Sdes			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
131266077Sdes				if (c == '\n' && line_nr) {
132266077Sdes					*line_nr = *line_nr + 1;
133266077Sdes				}
134266077Sdes				goto tokenread;
135266077Sdes			}
136266077Sdes		}
137266077Sdes		if (c != '\0' && c != '\n') {
138266077Sdes			i++;
139266077Sdes		}
140266077Sdes		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
141266077Sdes			*t = '\0';
142266077Sdes			return -1;
143266077Sdes		}
144266077Sdes		if (c != '\0' && c != '\n') {
145266077Sdes			*t++ = c;
146266077Sdes		}
147266077Sdes		if (c == '\\' && prev_c == '\\')
148266077Sdes			prev_c = 0;
149266077Sdes		else	prev_c = c;
150266077Sdes	}
151266077Sdes	*t = '\0';
152266077Sdes	if (c == EOF) {
153266077Sdes		return (ssize_t)i;
154266077Sdes	}
155266077Sdes
156266077Sdes	if (i == 0) {
157266077Sdes		/* nothing read */
158266077Sdes		return -1;
159266077Sdes	}
160266077Sdes	if (p != 0) {
161266077Sdes		return -1;
162266077Sdes	}
163266077Sdes	return (ssize_t)i;
164266077Sdes
165266077Sdestokenread:
166266077Sdes	if(*del == '"')
167266077Sdes		/* do not skip over quotes after the string, they are part
168266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
169266077Sdes		sldns_fskipcs_l(f, del+1, line_nr);
170266077Sdes	else	sldns_fskipcs_l(f, del, line_nr);
171266077Sdes	*t = '\0';
172266077Sdes	if (p != 0) {
173266077Sdes		return -1;
174266077Sdes	}
175266077Sdes
176266077Sdes	return (ssize_t)i;
177266077Sdes}
178266077Sdes
179266077Sdesssize_t
180266077Sdessldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
181266077Sdes               const char *d_del, size_t data_limit)
182266077Sdes{
183266077Sdes       return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
184266077Sdes		       data_limit, NULL);
185266077Sdes}
186266077Sdes
187266077Sdesssize_t
188266077Sdessldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
189266077Sdes               const char *d_del, size_t data_limit, int *line_nr)
190266077Sdes{
191266077Sdes       /* we assume: keyword|sep|data */
192266077Sdes       char *fkeyword;
193266077Sdes       ssize_t i;
194266077Sdes
195266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
196266077Sdes               return -1;
197266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
198266077Sdes       if(!fkeyword)
199266077Sdes               return -1;
200266077Sdes
201266077Sdes       i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
202266077Sdes       if(i==0 || i==-1) {
203266077Sdes               free(fkeyword);
204266077Sdes               return -1;
205266077Sdes       }
206266077Sdes
207266077Sdes       /* case??? i instead of strlen? */
208266077Sdes       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
209266077Sdes               /* whee! */
210266077Sdes               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
211266077Sdes               i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
212266077Sdes               free(fkeyword);
213266077Sdes               return i;
214266077Sdes       } else {
215266077Sdes               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
216266077Sdes               free(fkeyword);
217266077Sdes               return -1;
218266077Sdes       }
219266077Sdes}
220266077Sdes
221266077Sdesssize_t
222266077Sdessldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
223266077Sdes{
224266077Sdes	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
225266077Sdes}
226266077Sdes
227266077Sdesssize_t
228266077Sdessldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
229266077Sdes	size_t limit, int* par, const char* skipw)
230266077Sdes{
231266077Sdes	int c, lc;
232266077Sdes	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
233266077Sdes	int com, quoted;
234266077Sdes	char *t;
235266077Sdes	size_t i;
236266077Sdes	const char *d;
237266077Sdes	const char *del;
238266077Sdes
239266077Sdes	/* standard delimiters */
240266077Sdes	if (!delim) {
241266077Sdes		/* from isspace(3) */
242266077Sdes		del = LDNS_PARSE_NORMAL;
243266077Sdes	} else {
244266077Sdes		del = delim;
245266077Sdes	}
246266077Sdes
247266077Sdes	p = (par?*par:0);
248266077Sdes	i = 0;
249266077Sdes	com = 0;
250266077Sdes	quoted = 0;
251266077Sdes	t = token;
252266077Sdes	lc = 0;
253266077Sdes	if (del[0] == '"') {
254266077Sdes		quoted = 1;
255266077Sdes	}
256266077Sdes
257266077Sdes	while ((c = sldns_bgetc(b)) != EOF) {
258266077Sdes		if (c == '\r') /* carriage return */
259266077Sdes			c = ' ';
260266077Sdes		if (c == '(' && lc != '\\' && !quoted) {
261266077Sdes			/* this only counts for non-comments */
262266077Sdes			if (com == 0) {
263266077Sdes				if(par) (*par)++;
264266077Sdes				p++;
265266077Sdes			}
266266077Sdes			lc = c;
267266077Sdes			continue;
268266077Sdes		}
269266077Sdes
270266077Sdes		if (c == ')' && lc != '\\' && !quoted) {
271266077Sdes			/* this only counts for non-comments */
272266077Sdes			if (com == 0) {
273266077Sdes				if(par) (*par)--;
274266077Sdes				p--;
275266077Sdes			}
276266077Sdes			lc = c;
277266077Sdes			continue;
278266077Sdes		}
279266077Sdes
280266077Sdes		if (p < 0) {
281266077Sdes			/* more ) then ( */
282266077Sdes			*t = '\0';
283266077Sdes			return 0;
284266077Sdes		}
285266077Sdes
286266077Sdes		/* do something with comments ; */
287266077Sdes		if (c == ';' && quoted == 0) {
288266077Sdes			if (lc != '\\') {
289266077Sdes				com = 1;
290266077Sdes			}
291266077Sdes		}
292266077Sdes		if (c == '"' && com == 0 && lc != '\\') {
293266077Sdes			quoted = 1 - quoted;
294266077Sdes		}
295266077Sdes
296266077Sdes		if (c == '\n' && com != 0) {
297266077Sdes			/* comments */
298266077Sdes			com = 0;
299266077Sdes			*t = ' ';
300266077Sdes			lc = c;
301266077Sdes			continue;
302266077Sdes		}
303266077Sdes
304266077Sdes		if (com == 1) {
305266077Sdes			*t = ' ';
306266077Sdes			lc = c;
307266077Sdes			continue;
308266077Sdes		}
309266077Sdes
310266077Sdes		if (c == '\n' && p != 0) {
311266077Sdes			/* in parentheses */
312266077Sdes			/* do not write ' ' if we want to skip spaces */
313266077Sdes			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' '))))
314266077Sdes				*t++ = ' ';
315266077Sdes			lc = c;
316266077Sdes			continue;
317266077Sdes		}
318266077Sdes
319266077Sdes		/* check to skip whitespace at start, but also after ( */
320266077Sdes		if(skipw && i==0 && !com && !quoted && lc != '\\') {
321266077Sdes			if(strchr(skipw, c)) {
322266077Sdes				lc = c;
323266077Sdes				continue;
324266077Sdes			}
325266077Sdes		}
326266077Sdes
327266077Sdes		/* check if we hit the delim */
328266077Sdes		for (d = del; *d; d++) {
329266077Sdes			/* we can only exit if no parens or user tracks them */
330266077Sdes                        if (c == *d && lc != '\\' && (p == 0 || par)) {
331266077Sdes				goto tokenread;
332266077Sdes                        }
333266077Sdes		}
334266077Sdes
335266077Sdes		i++;
336266077Sdes		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
337266077Sdes			*t = '\0';
338266077Sdes			return -1;
339266077Sdes		}
340266077Sdes		*t++ = c;
341266077Sdes
342266077Sdes		if (c == '\\' && lc == '\\') {
343266077Sdes			lc = 0;
344266077Sdes		} else {
345266077Sdes			lc = c;
346266077Sdes		}
347266077Sdes	}
348266077Sdes	*t = '\0';
349266077Sdes	if (i == 0) {
350266077Sdes		/* nothing read */
351266077Sdes		return -1;
352266077Sdes	}
353266077Sdes	if (!par && p != 0) {
354266077Sdes		return -1;
355266077Sdes	}
356266077Sdes	return (ssize_t)i;
357266077Sdes
358266077Sdestokenread:
359266077Sdes	if(*del == '"')
360266077Sdes		/* do not skip over quotes after the string, they are part
361266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
362266077Sdes		sldns_bskipcs(b, del+1);
363266077Sdes	else 	sldns_bskipcs(b, del);
364266077Sdes	*t = '\0';
365266077Sdes
366266077Sdes	if (!par && p != 0) {
367266077Sdes		return -1;
368266077Sdes	}
369266077Sdes	return (ssize_t)i;
370266077Sdes}
371266077Sdes
372266077Sdes
373266077Sdesvoid
374266077Sdessldns_bskipcs(sldns_buffer *buffer, const char *s)
375266077Sdes{
376266077Sdes        int found;
377266077Sdes        char c;
378266077Sdes        const char *d;
379266077Sdes
380266077Sdes        while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
381266077Sdes                c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
382266077Sdes                found = 0;
383266077Sdes                for (d = s; *d; d++) {
384266077Sdes                        if (*d == c) {
385266077Sdes                                found = 1;
386266077Sdes                        }
387266077Sdes                }
388266077Sdes                if (found && buffer->_limit > buffer->_position) {
389266077Sdes                        buffer->_position += sizeof(char);
390266077Sdes                } else {
391266077Sdes                        return;
392266077Sdes                }
393266077Sdes        }
394266077Sdes}
395266077Sdes
396266077Sdesvoid
397266077Sdessldns_fskipcs(FILE *fp, const char *s)
398266077Sdes{
399266077Sdes	sldns_fskipcs_l(fp, s, NULL);
400266077Sdes}
401266077Sdes
402266077Sdesvoid
403266077Sdessldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
404266077Sdes{
405266077Sdes        int found;
406266077Sdes        int c;
407266077Sdes        const char *d;
408266077Sdes
409266077Sdes	while ((c = fgetc(fp)) != EOF) {
410266077Sdes		if (line_nr && c == '\n') {
411266077Sdes			*line_nr = *line_nr + 1;
412266077Sdes		}
413266077Sdes                found = 0;
414266077Sdes                for (d = s; *d; d++) {
415266077Sdes                        if (*d == c) {
416266077Sdes                                found = 1;
417266077Sdes                        }
418266077Sdes                }
419266077Sdes		if (!found) {
420266077Sdes			/* with getc, we've read too far */
421266077Sdes			ungetc(c, fp);
422266077Sdes			return;
423266077Sdes		}
424266077Sdes	}
425266077Sdes}
426266077Sdes
427266077Sdesssize_t
428266077Sdessldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
429266077Sdes*data, const char *d_del, size_t data_limit)
430266077Sdes{
431266077Sdes       /* we assume: keyword|sep|data */
432266077Sdes       char *fkeyword;
433266077Sdes       ssize_t i;
434266077Sdes
435266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
436266077Sdes               return -1;
437266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
438266077Sdes       if(!fkeyword)
439266077Sdes               return -1; /* out of memory */
440266077Sdes
441266077Sdes       i = sldns_bget_token(b, fkeyword, k_del, data_limit);
442266077Sdes       if(i==0 || i==-1) {
443266077Sdes               free(fkeyword);
444266077Sdes               return -1; /* nothing read */
445266077Sdes       }
446266077Sdes
447266077Sdes       /* case??? */
448266077Sdes       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
449266077Sdes               free(fkeyword);
450266077Sdes               /* whee, the match! */
451266077Sdes               /* retrieve it's data */
452266077Sdes               i = sldns_bget_token(b, data, d_del, 0);
453266077Sdes               return i;
454266077Sdes       } else {
455266077Sdes               free(fkeyword);
456266077Sdes               return -1;
457266077Sdes       }
458266077Sdes}
459266077Sdes
460