1/*
2 * a generic (simple) parser. Use to parse rr's, private key
3 * information and /etc/resolv.conf files
4 *
5 * a Net::DNS like library for C
6 * LibDNS Team @ NLnet Labs
7 * (c) NLnet Labs, 2005-2006
8 * See the file LICENSE for the license
9 */
10#include "config.h"
11#include "sldns/parse.h"
12#include "sldns/parseutil.h"
13#include "sldns/sbuffer.h"
14
15#include <limits.h>
16#include <strings.h>
17
18sldns_lookup_table sldns_directive_types[] = {
19        { LDNS_DIR_TTL, "$TTL" },
20        { LDNS_DIR_ORIGIN, "$ORIGIN" },
21        { LDNS_DIR_INCLUDE, "$INCLUDE" },
22        { 0, NULL }
23};
24
25/* add max_limit here? */
26ssize_t
27sldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28{
29	return sldns_fget_token_l(f, token, delim, limit, NULL);
30}
31
32ssize_t
33sldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34{
35	int c, prev_c;
36	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
37	int com, quoted, only_blank;
38	char *t;
39	size_t i;
40	const char *d;
41	const char *del;
42
43	/* standard delimiters */
44	if (!delim) {
45		/* from isspace(3) */
46		del = LDNS_PARSE_NORMAL;
47	} else {
48		del = delim;
49	}
50
51	p = 0;
52	i = 0;
53	com = 0;
54	quoted = 0;
55	prev_c = 0;
56	only_blank = 1;	/* Assume we got only <blank> until now */
57	t = token;
58	if (del[0] == '"') {
59		quoted = 1;
60	}
61	while ((c = getc(f)) != EOF) {
62		if (c == '\r') /* carriage return */
63			c = ' ';
64		if (c == '(' && prev_c != '\\' && !quoted) {
65			/* this only counts for non-comments */
66			if (com == 0) {
67				p++;
68			}
69			prev_c = c;
70			continue;
71		}
72
73		if (c == ')' && prev_c != '\\' && !quoted) {
74			/* this only counts for non-comments */
75			if (com == 0) {
76				p--;
77			}
78			prev_c = c;
79			continue;
80		}
81
82		if (p < 0) {
83			/* more ) then ( - close off the string */
84			*t = '\0';
85			return 0;
86		}
87
88		/* do something with comments ; */
89		if (c == ';' && quoted == 0) {
90			if (prev_c != '\\') {
91				com = 1;
92			}
93		}
94		if (c == '\"' && com == 0 && prev_c != '\\') {
95			quoted = 1 - quoted;
96		}
97
98		if (c == '\n' && com != 0) {
99			/* comments */
100			com = 0;
101			*t = ' ';
102			if (line_nr) {
103				*line_nr = *line_nr + 1;
104			}
105			if (only_blank && i > 0) {
106				/* Got only <blank> so far. Reset and try
107				 * again with the next line.
108				 */
109				i = 0;
110				t = token;
111			}
112			if (p == 0) {
113				/* If p != 0 then the next line is a continuation. So
114				 * we assume that the next line starts with a blank only
115				 * if it is actually a new line.
116				 */
117				only_blank = 1;	/* Assume next line starts with
118						 * <blank>.
119						 */
120			}
121			if (p == 0 && i > 0) {
122				goto tokenread;
123			} else {
124				prev_c = c;
125				continue;
126			}
127		}
128
129		if (com == 1) {
130			*t = ' ';
131			prev_c = c;
132			continue;
133		}
134
135		if (c == '\n' && p != 0 && t > token) {
136			/* in parentheses */
137			if (line_nr) {
138				*line_nr = *line_nr + 1;
139			}
140			if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
141				*t = '\0';
142				return -1;
143			}
144			*t++ = ' ';
145			prev_c = c;
146			continue;
147		}
148
149		/* check if we hit the delim */
150		for (d = del; *d; d++) {
151			if (c == *d)
152				break;
153		}
154
155		if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
156			if (c == '\n' && line_nr) {
157				*line_nr = *line_nr + 1;
158			}
159			if (only_blank) {
160				/* Got only <blank> so far. Reset and
161				 * try again with the next line.
162				 */
163				i = 0;
164				t = token;
165				only_blank = 1;
166				prev_c = c;
167				continue;
168			}
169			goto tokenread;
170		}
171		if (c != ' ' && c != '\t') {
172			/* Found something that is not <blank> */
173			only_blank= 0;
174		}
175		if (c != '\0' && c != '\n') {
176			i++;
177		}
178		/* is there space for the character and the zero after it */
179		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
180			*t = '\0';
181			return -1;
182		}
183		if (c != '\0' && c != '\n') {
184			*t++ = c;
185		}
186		if (c == '\n') {
187			if (line_nr) {
188				*line_nr = *line_nr + 1;
189			}
190			only_blank = 1;	/* Assume next line starts with
191					 * <blank>.
192					 */
193		}
194		if (c == '\\' && prev_c == '\\')
195			prev_c = 0;
196		else	prev_c = c;
197	}
198	*t = '\0';
199	if (c == EOF) {
200		return (ssize_t)i;
201	}
202
203	if (i == 0) {
204		/* nothing read */
205		return -1;
206	}
207	if (p != 0) {
208		return -1;
209	}
210	return (ssize_t)i;
211
212tokenread:
213	if(*del == '"')
214		/* do not skip over quotes after the string, they are part
215		 * of the next string.  But skip over whitespace (if needed)*/
216		sldns_fskipcs_l(f, del+1, line_nr);
217	else	sldns_fskipcs_l(f, del, line_nr);
218	*t = '\0';
219	if (p != 0) {
220		return -1;
221	}
222
223	return (ssize_t)i;
224}
225
226ssize_t
227sldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
228               const char *d_del, size_t data_limit)
229{
230       return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
231		       data_limit, NULL);
232}
233
234ssize_t
235sldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
236               const char *d_del, size_t data_limit, int *line_nr)
237{
238       /* we assume: keyword|sep|data */
239       char *fkeyword;
240       ssize_t i;
241
242       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
243               return -1;
244       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
245       if(!fkeyword)
246               return -1;
247
248       i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
249       if(i==0 || i==-1) {
250               free(fkeyword);
251               return -1;
252       }
253
254       /* case??? i instead of strlen? */
255       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
256               /* whee! */
257               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
258               i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
259               free(fkeyword);
260               return i;
261       } else {
262               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
263               free(fkeyword);
264               return -1;
265       }
266}
267
268int
269sldns_bgetc(sldns_buffer *buffer)
270{
271	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
272		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
273		/* sldns_buffer_rewind(buffer);*/
274		return EOF;
275	}
276	return (int)sldns_buffer_read_u8(buffer);
277}
278
279ssize_t
280sldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
281{
282	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
283}
284
285ssize_t
286sldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
287	size_t limit, int* par, const char* skipw)
288{
289	int c, lc;
290	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
291	int com, quoted;
292	char *t;
293	size_t i;
294	const char *d;
295	const char *del;
296
297	/* standard delimiters */
298	if (!delim) {
299		/* from isspace(3) */
300		del = LDNS_PARSE_NORMAL;
301	} else {
302		del = delim;
303	}
304
305	p = (par?*par:0);
306	i = 0;
307	com = 0;
308	quoted = 0;
309	t = token;
310	lc = 0;
311	if (del[0] == '"') {
312		quoted = 1;
313	}
314
315	while ((c = sldns_bgetc(b)) != EOF) {
316		if (c == '\r') /* carriage return */
317			c = ' ';
318		if (c == '(' && lc != '\\' && !quoted) {
319			/* this only counts for non-comments */
320			if (com == 0) {
321				if(par) (*par)++;
322				p++;
323			}
324			lc = c;
325			continue;
326		}
327
328		if (c == ')' && lc != '\\' && !quoted) {
329			/* this only counts for non-comments */
330			if (com == 0) {
331				if(par) (*par)--;
332				p--;
333			}
334			lc = c;
335			continue;
336		}
337
338		if (p < 0) {
339			/* more ) then ( */
340			*t = '\0';
341			return 0;
342		}
343
344		/* do something with comments ; */
345		if (c == ';' && quoted == 0) {
346			if (lc != '\\') {
347				com = 1;
348			}
349		}
350		if (c == '"' && com == 0 && lc != '\\') {
351			quoted = 1 - quoted;
352		}
353
354		if (c == '\n' && com != 0) {
355			/* comments */
356			com = 0;
357			*t = ' ';
358			lc = c;
359			continue;
360		}
361
362		if (com == 1) {
363			*t = ' ';
364			lc = c;
365			continue;
366		}
367
368		if (c == '\n' && p != 0) {
369			/* in parentheses */
370			/* do not write ' ' if we want to skip spaces */
371			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) {
372				/* check for space for the space character and a zero delimiter after that. */
373				if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
374					*t = '\0';
375					return -1;
376				}
377				*t++ = ' ';
378			}
379			lc = c;
380			continue;
381		}
382
383		/* check to skip whitespace at start, but also after ( */
384		if(skipw && i==0 && !com && !quoted && lc != '\\') {
385			if(strchr(skipw, c)) {
386				lc = c;
387				continue;
388			}
389		}
390
391		/* check if we hit the delim */
392		for (d = del; *d; d++) {
393			/* we can only exit if no parens or user tracks them */
394                        if (c == *d && lc != '\\' && (p == 0 || par)) {
395				goto tokenread;
396                        }
397		}
398
399		i++;
400		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
401			*t = '\0';
402			return -1;
403		}
404		*t++ = c;
405
406		if (c == '\\' && lc == '\\') {
407			lc = 0;
408		} else {
409			lc = c;
410		}
411	}
412	*t = '\0';
413	if (i == 0) {
414		/* nothing read */
415		return -1;
416	}
417	if (!par && p != 0) {
418		return -1;
419	}
420	return (ssize_t)i;
421
422tokenread:
423	if(*del == '"')
424		/* do not skip over quotes after the string, they are part
425		 * of the next string.  But skip over whitespace (if needed)*/
426		sldns_bskipcs(b, del+1);
427	else 	sldns_bskipcs(b, del);
428	*t = '\0';
429
430	if (!par && p != 0) {
431		return -1;
432	}
433	return (ssize_t)i;
434}
435
436
437void
438sldns_bskipcs(sldns_buffer *buffer, const char *s)
439{
440        int found;
441        char c;
442        const char *d;
443
444        while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
445                c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
446                found = 0;
447                for (d = s; *d; d++) {
448                        if (*d == c) {
449                                found = 1;
450                        }
451                }
452                if (found && buffer->_limit > buffer->_position) {
453                        buffer->_position += sizeof(char);
454                } else {
455                        return;
456                }
457        }
458}
459
460void
461sldns_fskipcs(FILE *fp, const char *s)
462{
463	sldns_fskipcs_l(fp, s, NULL);
464}
465
466void
467sldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
468{
469        int found;
470        int c;
471        const char *d;
472
473	while ((c = fgetc(fp)) != EOF) {
474		if (line_nr && c == '\n') {
475			*line_nr = *line_nr + 1;
476		}
477                found = 0;
478                for (d = s; *d; d++) {
479                        if (*d == c) {
480                                found = 1;
481                        }
482                }
483		if (!found) {
484			/* with getc, we've read too far */
485			ungetc(c, fp);
486			return;
487		}
488	}
489}
490
491ssize_t
492sldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
493*data, const char *d_del, size_t data_limit)
494{
495       /* we assume: keyword|sep|data */
496       char *fkeyword;
497       ssize_t i;
498
499       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
500               return -1;
501       fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
502       if(!fkeyword)
503               return -1; /* out of memory */
504
505       i = sldns_bget_token(b, fkeyword, k_del, data_limit);
506       if(i==0 || i==-1) {
507               free(fkeyword);
508               return -1; /* nothing read */
509       }
510
511       /* case??? */
512       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
513               free(fkeyword);
514               /* whee, the match! */
515               /* retrieve it's data */
516               i = sldns_bget_token(b, data, d_del, 0);
517               return i;
518       } else {
519               free(fkeyword);
520               return -1;
521       }
522}
523
524