1266077Sdes/*
2266077Sdes * a generic (simple) parser. Use to parse rr's, private key
3266077Sdes * information and /etc/resolv.conf files
4266077Sdes *
5266077Sdes * a Net::DNS like library for C
6266077Sdes * LibDNS Team @ NLnet Labs
7266077Sdes * (c) NLnet Labs, 2005-2006
8266077Sdes * See the file LICENSE for the license
9266077Sdes */
10266077Sdes#include "config.h"
11287915Sdes#include "sldns/parse.h"
12287915Sdes#include "sldns/parseutil.h"
13287915Sdes#include "sldns/sbuffer.h"
14266077Sdes
15266077Sdes#include <limits.h>
16266077Sdes#include <strings.h>
17266077Sdes
18266077Sdessldns_lookup_table sldns_directive_types[] = {
19266077Sdes        { LDNS_DIR_TTL, "$TTL" },
20266077Sdes        { LDNS_DIR_ORIGIN, "$ORIGIN" },
21266077Sdes        { LDNS_DIR_INCLUDE, "$INCLUDE" },
22266077Sdes        { 0, NULL }
23266077Sdes};
24266077Sdes
25266077Sdes/* add max_limit here? */
26266077Sdesssize_t
27266077Sdessldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28266077Sdes{
29266077Sdes	return sldns_fget_token_l(f, token, delim, limit, NULL);
30266077Sdes}
31266077Sdes
32266077Sdesssize_t
33266077Sdessldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34266077Sdes{
35266077Sdes	int c, prev_c;
36356345Scy	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
37266077Sdes	int com, quoted;
38266077Sdes	char *t;
39266077Sdes	size_t i;
40266077Sdes	const char *d;
41266077Sdes	const char *del;
42266077Sdes
43356345Scy	/* standard delimiters */
44266077Sdes	if (!delim) {
45266077Sdes		/* from isspace(3) */
46266077Sdes		del = LDNS_PARSE_NORMAL;
47266077Sdes	} else {
48266077Sdes		del = delim;
49266077Sdes	}
50266077Sdes
51266077Sdes	p = 0;
52266077Sdes	i = 0;
53266077Sdes	com = 0;
54266077Sdes	quoted = 0;
55266077Sdes	prev_c = 0;
56266077Sdes	t = token;
57266077Sdes	if (del[0] == '"') {
58266077Sdes		quoted = 1;
59266077Sdes	}
60266077Sdes	while ((c = getc(f)) != EOF) {
61266077Sdes		if (c == '\r') /* carriage return */
62266077Sdes			c = ' ';
63266077Sdes		if (c == '(' && prev_c != '\\' && !quoted) {
64266077Sdes			/* this only counts for non-comments */
65266077Sdes			if (com == 0) {
66266077Sdes				p++;
67266077Sdes			}
68266077Sdes			prev_c = c;
69266077Sdes			continue;
70266077Sdes		}
71266077Sdes
72266077Sdes		if (c == ')' && prev_c != '\\' && !quoted) {
73266077Sdes			/* this only counts for non-comments */
74266077Sdes			if (com == 0) {
75266077Sdes				p--;
76266077Sdes			}
77266077Sdes			prev_c = c;
78266077Sdes			continue;
79266077Sdes		}
80266077Sdes
81266077Sdes		if (p < 0) {
82266077Sdes			/* more ) then ( - close off the string */
83266077Sdes			*t = '\0';
84266077Sdes			return 0;
85266077Sdes		}
86266077Sdes
87266077Sdes		/* do something with comments ; */
88266077Sdes		if (c == ';' && quoted == 0) {
89266077Sdes			if (prev_c != '\\') {
90266077Sdes				com = 1;
91266077Sdes			}
92266077Sdes		}
93266077Sdes		if (c == '\"' && com == 0 && prev_c != '\\') {
94266077Sdes			quoted = 1 - quoted;
95266077Sdes		}
96266077Sdes
97266077Sdes		if (c == '\n' && com != 0) {
98266077Sdes			/* comments */
99266077Sdes			com = 0;
100266077Sdes			*t = ' ';
101266077Sdes			if (line_nr) {
102266077Sdes				*line_nr = *line_nr + 1;
103266077Sdes			}
104266077Sdes			if (p == 0 && i > 0) {
105266077Sdes				goto tokenread;
106266077Sdes			} else {
107266077Sdes				prev_c = c;
108266077Sdes				continue;
109266077Sdes			}
110266077Sdes		}
111266077Sdes
112266077Sdes		if (com == 1) {
113266077Sdes			*t = ' ';
114266077Sdes			prev_c = c;
115266077Sdes			continue;
116266077Sdes		}
117266077Sdes
118266077Sdes		if (c == '\n' && p != 0 && t > token) {
119266077Sdes			/* in parentheses */
120266077Sdes			if (line_nr) {
121266077Sdes				*line_nr = *line_nr + 1;
122266077Sdes			}
123361435Scy			if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
124356345Scy				*t = '\0';
125356345Scy				return -1;
126356345Scy			}
127266077Sdes			*t++ = ' ';
128266077Sdes			prev_c = c;
129266077Sdes			continue;
130266077Sdes		}
131266077Sdes
132266077Sdes		/* check if we hit the delim */
133266077Sdes		for (d = del; *d; d++) {
134266077Sdes			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
135266077Sdes				if (c == '\n' && line_nr) {
136266077Sdes					*line_nr = *line_nr + 1;
137266077Sdes				}
138266077Sdes				goto tokenread;
139266077Sdes			}
140266077Sdes		}
141266077Sdes		if (c != '\0' && c != '\n') {
142266077Sdes			i++;
143266077Sdes		}
144361435Scy		/* is there space for the character and the zero after it */
145361435Scy		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
146266077Sdes			*t = '\0';
147266077Sdes			return -1;
148266077Sdes		}
149266077Sdes		if (c != '\0' && c != '\n') {
150266077Sdes			*t++ = c;
151266077Sdes		}
152266077Sdes		if (c == '\\' && prev_c == '\\')
153266077Sdes			prev_c = 0;
154266077Sdes		else	prev_c = c;
155266077Sdes	}
156266077Sdes	*t = '\0';
157266077Sdes	if (c == EOF) {
158266077Sdes		return (ssize_t)i;
159266077Sdes	}
160266077Sdes
161266077Sdes	if (i == 0) {
162266077Sdes		/* nothing read */
163266077Sdes		return -1;
164266077Sdes	}
165266077Sdes	if (p != 0) {
166266077Sdes		return -1;
167266077Sdes	}
168266077Sdes	return (ssize_t)i;
169266077Sdes
170266077Sdestokenread:
171266077Sdes	if(*del == '"')
172266077Sdes		/* do not skip over quotes after the string, they are part
173266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
174266077Sdes		sldns_fskipcs_l(f, del+1, line_nr);
175266077Sdes	else	sldns_fskipcs_l(f, del, line_nr);
176266077Sdes	*t = '\0';
177266077Sdes	if (p != 0) {
178266077Sdes		return -1;
179266077Sdes	}
180266077Sdes
181266077Sdes	return (ssize_t)i;
182266077Sdes}
183266077Sdes
184266077Sdesssize_t
185266077Sdessldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
186266077Sdes               const char *d_del, size_t data_limit)
187266077Sdes{
188266077Sdes       return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
189266077Sdes		       data_limit, NULL);
190266077Sdes}
191266077Sdes
192266077Sdesssize_t
193266077Sdessldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
194266077Sdes               const char *d_del, size_t data_limit, int *line_nr)
195266077Sdes{
196266077Sdes       /* we assume: keyword|sep|data */
197266077Sdes       char *fkeyword;
198266077Sdes       ssize_t i;
199266077Sdes
200266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
201266077Sdes               return -1;
202266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
203266077Sdes       if(!fkeyword)
204266077Sdes               return -1;
205266077Sdes
206266077Sdes       i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
207266077Sdes       if(i==0 || i==-1) {
208266077Sdes               free(fkeyword);
209266077Sdes               return -1;
210266077Sdes       }
211266077Sdes
212266077Sdes       /* case??? i instead of strlen? */
213266077Sdes       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
214266077Sdes               /* whee! */
215266077Sdes               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
216266077Sdes               i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
217266077Sdes               free(fkeyword);
218266077Sdes               return i;
219266077Sdes       } else {
220266077Sdes               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
221266077Sdes               free(fkeyword);
222266077Sdes               return -1;
223266077Sdes       }
224266077Sdes}
225266077Sdes
226276541Sdesint
227276541Sdessldns_bgetc(sldns_buffer *buffer)
228276541Sdes{
229276541Sdes	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
230276541Sdes		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
231276541Sdes		/* sldns_buffer_rewind(buffer);*/
232276541Sdes		return EOF;
233276541Sdes	}
234276541Sdes	return (int)sldns_buffer_read_u8(buffer);
235276541Sdes}
236276541Sdes
237266077Sdesssize_t
238266077Sdessldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
239266077Sdes{
240266077Sdes	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
241266077Sdes}
242266077Sdes
243266077Sdesssize_t
244266077Sdessldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
245266077Sdes	size_t limit, int* par, const char* skipw)
246266077Sdes{
247266077Sdes	int c, lc;
248356345Scy	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
249266077Sdes	int com, quoted;
250266077Sdes	char *t;
251266077Sdes	size_t i;
252266077Sdes	const char *d;
253266077Sdes	const char *del;
254266077Sdes
255266077Sdes	/* standard delimiters */
256266077Sdes	if (!delim) {
257266077Sdes		/* from isspace(3) */
258266077Sdes		del = LDNS_PARSE_NORMAL;
259266077Sdes	} else {
260266077Sdes		del = delim;
261266077Sdes	}
262266077Sdes
263266077Sdes	p = (par?*par:0);
264266077Sdes	i = 0;
265266077Sdes	com = 0;
266266077Sdes	quoted = 0;
267266077Sdes	t = token;
268266077Sdes	lc = 0;
269266077Sdes	if (del[0] == '"') {
270266077Sdes		quoted = 1;
271266077Sdes	}
272266077Sdes
273266077Sdes	while ((c = sldns_bgetc(b)) != EOF) {
274266077Sdes		if (c == '\r') /* carriage return */
275266077Sdes			c = ' ';
276266077Sdes		if (c == '(' && lc != '\\' && !quoted) {
277266077Sdes			/* this only counts for non-comments */
278266077Sdes			if (com == 0) {
279266077Sdes				if(par) (*par)++;
280266077Sdes				p++;
281266077Sdes			}
282266077Sdes			lc = c;
283266077Sdes			continue;
284266077Sdes		}
285266077Sdes
286266077Sdes		if (c == ')' && lc != '\\' && !quoted) {
287266077Sdes			/* this only counts for non-comments */
288266077Sdes			if (com == 0) {
289266077Sdes				if(par) (*par)--;
290266077Sdes				p--;
291266077Sdes			}
292266077Sdes			lc = c;
293266077Sdes			continue;
294266077Sdes		}
295266077Sdes
296266077Sdes		if (p < 0) {
297266077Sdes			/* more ) then ( */
298266077Sdes			*t = '\0';
299266077Sdes			return 0;
300266077Sdes		}
301266077Sdes
302266077Sdes		/* do something with comments ; */
303266077Sdes		if (c == ';' && quoted == 0) {
304266077Sdes			if (lc != '\\') {
305266077Sdes				com = 1;
306266077Sdes			}
307266077Sdes		}
308266077Sdes		if (c == '"' && com == 0 && lc != '\\') {
309266077Sdes			quoted = 1 - quoted;
310266077Sdes		}
311266077Sdes
312266077Sdes		if (c == '\n' && com != 0) {
313266077Sdes			/* comments */
314266077Sdes			com = 0;
315266077Sdes			*t = ' ';
316266077Sdes			lc = c;
317266077Sdes			continue;
318266077Sdes		}
319266077Sdes
320266077Sdes		if (com == 1) {
321266077Sdes			*t = ' ';
322266077Sdes			lc = c;
323266077Sdes			continue;
324266077Sdes		}
325266077Sdes
326266077Sdes		if (c == '\n' && p != 0) {
327266077Sdes			/* in parentheses */
328266077Sdes			/* do not write ' ' if we want to skip spaces */
329356345Scy			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) {
330361435Scy				/* check for space for the space character and a zero delimiter after that. */
331361435Scy				if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
332356345Scy					*t = '\0';
333356345Scy					return -1;
334356345Scy				}
335266077Sdes				*t++ = ' ';
336356345Scy			}
337266077Sdes			lc = c;
338266077Sdes			continue;
339266077Sdes		}
340266077Sdes
341266077Sdes		/* check to skip whitespace at start, but also after ( */
342266077Sdes		if(skipw && i==0 && !com && !quoted && lc != '\\') {
343266077Sdes			if(strchr(skipw, c)) {
344266077Sdes				lc = c;
345266077Sdes				continue;
346266077Sdes			}
347266077Sdes		}
348266077Sdes
349266077Sdes		/* check if we hit the delim */
350266077Sdes		for (d = del; *d; d++) {
351266077Sdes			/* we can only exit if no parens or user tracks them */
352266077Sdes                        if (c == *d && lc != '\\' && (p == 0 || par)) {
353266077Sdes				goto tokenread;
354266077Sdes                        }
355266077Sdes		}
356266077Sdes
357266077Sdes		i++;
358361435Scy		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
359266077Sdes			*t = '\0';
360266077Sdes			return -1;
361266077Sdes		}
362266077Sdes		*t++ = c;
363266077Sdes
364266077Sdes		if (c == '\\' && lc == '\\') {
365266077Sdes			lc = 0;
366266077Sdes		} else {
367266077Sdes			lc = c;
368266077Sdes		}
369266077Sdes	}
370266077Sdes	*t = '\0';
371266077Sdes	if (i == 0) {
372266077Sdes		/* nothing read */
373266077Sdes		return -1;
374266077Sdes	}
375266077Sdes	if (!par && p != 0) {
376266077Sdes		return -1;
377266077Sdes	}
378266077Sdes	return (ssize_t)i;
379266077Sdes
380266077Sdestokenread:
381266077Sdes	if(*del == '"')
382266077Sdes		/* do not skip over quotes after the string, they are part
383266077Sdes		 * of the next string.  But skip over whitespace (if needed)*/
384266077Sdes		sldns_bskipcs(b, del+1);
385266077Sdes	else 	sldns_bskipcs(b, del);
386266077Sdes	*t = '\0';
387266077Sdes
388266077Sdes	if (!par && p != 0) {
389266077Sdes		return -1;
390266077Sdes	}
391266077Sdes	return (ssize_t)i;
392266077Sdes}
393266077Sdes
394266077Sdes
395266077Sdesvoid
396266077Sdessldns_bskipcs(sldns_buffer *buffer, const char *s)
397266077Sdes{
398266077Sdes        int found;
399266077Sdes        char c;
400266077Sdes        const char *d;
401266077Sdes
402266077Sdes        while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
403266077Sdes                c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
404266077Sdes                found = 0;
405266077Sdes                for (d = s; *d; d++) {
406266077Sdes                        if (*d == c) {
407266077Sdes                                found = 1;
408266077Sdes                        }
409266077Sdes                }
410266077Sdes                if (found && buffer->_limit > buffer->_position) {
411266077Sdes                        buffer->_position += sizeof(char);
412266077Sdes                } else {
413266077Sdes                        return;
414266077Sdes                }
415266077Sdes        }
416266077Sdes}
417266077Sdes
418266077Sdesvoid
419266077Sdessldns_fskipcs(FILE *fp, const char *s)
420266077Sdes{
421266077Sdes	sldns_fskipcs_l(fp, s, NULL);
422266077Sdes}
423266077Sdes
424266077Sdesvoid
425266077Sdessldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
426266077Sdes{
427266077Sdes        int found;
428266077Sdes        int c;
429266077Sdes        const char *d;
430266077Sdes
431266077Sdes	while ((c = fgetc(fp)) != EOF) {
432266077Sdes		if (line_nr && c == '\n') {
433266077Sdes			*line_nr = *line_nr + 1;
434266077Sdes		}
435266077Sdes                found = 0;
436266077Sdes                for (d = s; *d; d++) {
437266077Sdes                        if (*d == c) {
438266077Sdes                                found = 1;
439266077Sdes                        }
440266077Sdes                }
441266077Sdes		if (!found) {
442266077Sdes			/* with getc, we've read too far */
443266077Sdes			ungetc(c, fp);
444266077Sdes			return;
445266077Sdes		}
446266077Sdes	}
447266077Sdes}
448266077Sdes
449266077Sdesssize_t
450266077Sdessldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
451266077Sdes*data, const char *d_del, size_t data_limit)
452266077Sdes{
453266077Sdes       /* we assume: keyword|sep|data */
454266077Sdes       char *fkeyword;
455266077Sdes       ssize_t i;
456266077Sdes
457266077Sdes       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
458266077Sdes               return -1;
459266077Sdes       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
460266077Sdes       if(!fkeyword)
461266077Sdes               return -1; /* out of memory */
462266077Sdes
463266077Sdes       i = sldns_bget_token(b, fkeyword, k_del, data_limit);
464266077Sdes       if(i==0 || i==-1) {
465266077Sdes               free(fkeyword);
466266077Sdes               return -1; /* nothing read */
467266077Sdes       }
468266077Sdes
469266077Sdes       /* case??? */
470266077Sdes       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
471266077Sdes               free(fkeyword);
472266077Sdes               /* whee, the match! */
473266077Sdes               /* retrieve it's data */
474266077Sdes               i = sldns_bget_token(b, data, d_del, 0);
475266077Sdes               return i;
476266077Sdes       } else {
477266077Sdes               free(fkeyword);
478266077Sdes               return -1;
479266077Sdes       }
480266077Sdes}
481266077Sdes
482