parse.c revision 356345
1/*
2 * a generic (simple) parser. Use to parse rr's, private key
3 * information and /etc/resolv.conf files
4 *
5 * a Net::DNS like library for C
6 * LibDNS Team @ NLnet Labs
7 * (c) NLnet Labs, 2005-2006
8 * See the file LICENSE for the license
9 */
10#include "config.h"
11#include "sldns/parse.h"
12#include "sldns/parseutil.h"
13#include "sldns/sbuffer.h"
14
15#include <limits.h>
16#include <strings.h>
17
18sldns_lookup_table sldns_directive_types[] = {
19        { LDNS_DIR_TTL, "$TTL" },
20        { LDNS_DIR_ORIGIN, "$ORIGIN" },
21        { LDNS_DIR_INCLUDE, "$INCLUDE" },
22        { 0, NULL }
23};
24
25/* add max_limit here? */
26ssize_t
27sldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28{
29	return sldns_fget_token_l(f, token, delim, limit, NULL);
30}
31
32ssize_t
33sldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34{
35	int c, prev_c;
36	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
37	int com, quoted;
38	char *t;
39	size_t i;
40	const char *d;
41	const char *del;
42
43	/* standard delimiters */
44	if (!delim) {
45		/* from isspace(3) */
46		del = LDNS_PARSE_NORMAL;
47	} else {
48		del = delim;
49	}
50
51	p = 0;
52	i = 0;
53	com = 0;
54	quoted = 0;
55	prev_c = 0;
56	t = token;
57	if (del[0] == '"') {
58		quoted = 1;
59	}
60	while ((c = getc(f)) != EOF) {
61		if (c == '\r') /* carriage return */
62			c = ' ';
63		if (c == '(' && prev_c != '\\' && !quoted) {
64			/* this only counts for non-comments */
65			if (com == 0) {
66				p++;
67			}
68			prev_c = c;
69			continue;
70		}
71
72		if (c == ')' && prev_c != '\\' && !quoted) {
73			/* this only counts for non-comments */
74			if (com == 0) {
75				p--;
76			}
77			prev_c = c;
78			continue;
79		}
80
81		if (p < 0) {
82			/* more ) then ( - close off the string */
83			*t = '\0';
84			return 0;
85		}
86
87		/* do something with comments ; */
88		if (c == ';' && quoted == 0) {
89			if (prev_c != '\\') {
90				com = 1;
91			}
92		}
93		if (c == '\"' && com == 0 && prev_c != '\\') {
94			quoted = 1 - quoted;
95		}
96
97		if (c == '\n' && com != 0) {
98			/* comments */
99			com = 0;
100			*t = ' ';
101			if (line_nr) {
102				*line_nr = *line_nr + 1;
103			}
104			if (p == 0 && i > 0) {
105				goto tokenread;
106			} else {
107				prev_c = c;
108				continue;
109			}
110		}
111
112		if (com == 1) {
113			*t = ' ';
114			prev_c = c;
115			continue;
116		}
117
118		if (c == '\n' && p != 0 && t > token) {
119			/* in parentheses */
120			if (line_nr) {
121				*line_nr = *line_nr + 1;
122			}
123			if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
124				*t = '\0';
125				return -1;
126			}
127			*t++ = ' ';
128			prev_c = c;
129			continue;
130		}
131
132		/* check if we hit the delim */
133		for (d = del; *d; d++) {
134			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
135				if (c == '\n' && line_nr) {
136					*line_nr = *line_nr + 1;
137				}
138				goto tokenread;
139			}
140		}
141		if (c != '\0' && c != '\n') {
142			i++;
143		}
144		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
145			*t = '\0';
146			return -1;
147		}
148		if (c != '\0' && c != '\n') {
149			*t++ = c;
150		}
151		if (c == '\\' && prev_c == '\\')
152			prev_c = 0;
153		else	prev_c = c;
154	}
155	*t = '\0';
156	if (c == EOF) {
157		return (ssize_t)i;
158	}
159
160	if (i == 0) {
161		/* nothing read */
162		return -1;
163	}
164	if (p != 0) {
165		return -1;
166	}
167	return (ssize_t)i;
168
169tokenread:
170	if(*del == '"')
171		/* do not skip over quotes after the string, they are part
172		 * of the next string.  But skip over whitespace (if needed)*/
173		sldns_fskipcs_l(f, del+1, line_nr);
174	else	sldns_fskipcs_l(f, del, line_nr);
175	*t = '\0';
176	if (p != 0) {
177		return -1;
178	}
179
180	return (ssize_t)i;
181}
182
183ssize_t
184sldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
185               const char *d_del, size_t data_limit)
186{
187       return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
188		       data_limit, NULL);
189}
190
191ssize_t
192sldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
193               const char *d_del, size_t data_limit, int *line_nr)
194{
195       /* we assume: keyword|sep|data */
196       char *fkeyword;
197       ssize_t i;
198
199       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
200               return -1;
201       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
202       if(!fkeyword)
203               return -1;
204
205       i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
206       if(i==0 || i==-1) {
207               free(fkeyword);
208               return -1;
209       }
210
211       /* case??? i instead of strlen? */
212       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
213               /* whee! */
214               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
215               i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
216               free(fkeyword);
217               return i;
218       } else {
219               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
220               free(fkeyword);
221               return -1;
222       }
223}
224
225int
226sldns_bgetc(sldns_buffer *buffer)
227{
228	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
229		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
230		/* sldns_buffer_rewind(buffer);*/
231		return EOF;
232	}
233	return (int)sldns_buffer_read_u8(buffer);
234}
235
236ssize_t
237sldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
238{
239	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
240}
241
242ssize_t
243sldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
244	size_t limit, int* par, const char* skipw)
245{
246	int c, lc;
247	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
248	int com, quoted;
249	char *t;
250	size_t i;
251	const char *d;
252	const char *del;
253
254	/* standard delimiters */
255	if (!delim) {
256		/* from isspace(3) */
257		del = LDNS_PARSE_NORMAL;
258	} else {
259		del = delim;
260	}
261
262	p = (par?*par:0);
263	i = 0;
264	com = 0;
265	quoted = 0;
266	t = token;
267	lc = 0;
268	if (del[0] == '"') {
269		quoted = 1;
270	}
271
272	while ((c = sldns_bgetc(b)) != EOF) {
273		if (c == '\r') /* carriage return */
274			c = ' ';
275		if (c == '(' && lc != '\\' && !quoted) {
276			/* this only counts for non-comments */
277			if (com == 0) {
278				if(par) (*par)++;
279				p++;
280			}
281			lc = c;
282			continue;
283		}
284
285		if (c == ')' && lc != '\\' && !quoted) {
286			/* this only counts for non-comments */
287			if (com == 0) {
288				if(par) (*par)--;
289				p--;
290			}
291			lc = c;
292			continue;
293		}
294
295		if (p < 0) {
296			/* more ) then ( */
297			*t = '\0';
298			return 0;
299		}
300
301		/* do something with comments ; */
302		if (c == ';' && quoted == 0) {
303			if (lc != '\\') {
304				com = 1;
305			}
306		}
307		if (c == '"' && com == 0 && lc != '\\') {
308			quoted = 1 - quoted;
309		}
310
311		if (c == '\n' && com != 0) {
312			/* comments */
313			com = 0;
314			*t = ' ';
315			lc = c;
316			continue;
317		}
318
319		if (com == 1) {
320			*t = ' ';
321			lc = c;
322			continue;
323		}
324
325		if (c == '\n' && p != 0) {
326			/* in parentheses */
327			/* do not write ' ' if we want to skip spaces */
328			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) {
329				/* check for space for the space character */
330				if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
331					*t = '\0';
332					return -1;
333				}
334				*t++ = ' ';
335			}
336			lc = c;
337			continue;
338		}
339
340		/* check to skip whitespace at start, but also after ( */
341		if(skipw && i==0 && !com && !quoted && lc != '\\') {
342			if(strchr(skipw, c)) {
343				lc = c;
344				continue;
345			}
346		}
347
348		/* check if we hit the delim */
349		for (d = del; *d; d++) {
350			/* we can only exit if no parens or user tracks them */
351                        if (c == *d && lc != '\\' && (p == 0 || par)) {
352				goto tokenread;
353                        }
354		}
355
356		i++;
357		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
358			*t = '\0';
359			return -1;
360		}
361		*t++ = c;
362
363		if (c == '\\' && lc == '\\') {
364			lc = 0;
365		} else {
366			lc = c;
367		}
368	}
369	*t = '\0';
370	if (i == 0) {
371		/* nothing read */
372		return -1;
373	}
374	if (!par && p != 0) {
375		return -1;
376	}
377	return (ssize_t)i;
378
379tokenread:
380	if(*del == '"')
381		/* do not skip over quotes after the string, they are part
382		 * of the next string.  But skip over whitespace (if needed)*/
383		sldns_bskipcs(b, del+1);
384	else 	sldns_bskipcs(b, del);
385	*t = '\0';
386
387	if (!par && p != 0) {
388		return -1;
389	}
390	return (ssize_t)i;
391}
392
393
394void
395sldns_bskipcs(sldns_buffer *buffer, const char *s)
396{
397        int found;
398        char c;
399        const char *d;
400
401        while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
402                c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
403                found = 0;
404                for (d = s; *d; d++) {
405                        if (*d == c) {
406                                found = 1;
407                        }
408                }
409                if (found && buffer->_limit > buffer->_position) {
410                        buffer->_position += sizeof(char);
411                } else {
412                        return;
413                }
414        }
415}
416
417void
418sldns_fskipcs(FILE *fp, const char *s)
419{
420	sldns_fskipcs_l(fp, s, NULL);
421}
422
423void
424sldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
425{
426        int found;
427        int c;
428        const char *d;
429
430	while ((c = fgetc(fp)) != EOF) {
431		if (line_nr && c == '\n') {
432			*line_nr = *line_nr + 1;
433		}
434                found = 0;
435                for (d = s; *d; d++) {
436                        if (*d == c) {
437                                found = 1;
438                        }
439                }
440		if (!found) {
441			/* with getc, we've read too far */
442			ungetc(c, fp);
443			return;
444		}
445	}
446}
447
448ssize_t
449sldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
450*data, const char *d_del, size_t data_limit)
451{
452       /* we assume: keyword|sep|data */
453       char *fkeyword;
454       ssize_t i;
455
456       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
457               return -1;
458       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
459       if(!fkeyword)
460               return -1; /* out of memory */
461
462       i = sldns_bget_token(b, fkeyword, k_del, data_limit);
463       if(i==0 || i==-1) {
464               free(fkeyword);
465               return -1; /* nothing read */
466       }
467
468       /* case??? */
469       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
470               free(fkeyword);
471               /* whee, the match! */
472               /* retrieve it's data */
473               i = sldns_bget_token(b, data, d_del, 0);
474               return i;
475       } else {
476               free(fkeyword);
477               return -1;
478       }
479}
480
481