parseutil.c revision 266077
1/*
2 * parseutil.c - parse utilities for string and wire conversion
3 *
4 * (c) NLnet Labs, 2004-2006
5 *
6 * See the file LICENSE for the license
7 */
8/**
9 * \file
10 *
11 * Utility functions for parsing, base32(DNS variant) and base64 encoding
12 * and decoding, Hex, Time units, Escape codes.
13 */
14
15#include "config.h"
16#include "ldns/parseutil.h"
17#include <sys/time.h>
18#include <time.h>
19#include <ctype.h>
20
21sldns_lookup_table *
22sldns_lookup_by_name(sldns_lookup_table *table, const char *name)
23{
24        while (table->name != NULL) {
25                if (strcasecmp(name, table->name) == 0)
26                        return table;
27                table++;
28        }
29        return NULL;
30}
31
32sldns_lookup_table *
33sldns_lookup_by_id(sldns_lookup_table *table, int id)
34{
35        while (table->name != NULL) {
36                if (table->id == id)
37                        return table;
38                table++;
39        }
40        return NULL;
41}
42
43/* Number of days per month (except for February in leap years). */
44static const int mdays[] = {
45	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
46};
47
48#define LDNS_MOD(x,y) (((x) % (y) < 0) ? ((x) % (y) + (y)) : ((x) % (y)))
49#define LDNS_DIV(x,y) (((x) % (y) < 0) ? ((x) / (y) -  1 ) : ((x) / (y)))
50
51static int
52is_leap_year(int year)
53{
54	return LDNS_MOD(year,   4) == 0 && (LDNS_MOD(year, 100) != 0
55	    || LDNS_MOD(year, 400) == 0);
56}
57
58static int
59leap_days(int y1, int y2)
60{
61	--y1;
62	--y2;
63	return (LDNS_DIV(y2,   4) - LDNS_DIV(y1,   4)) -
64	       (LDNS_DIV(y2, 100) - LDNS_DIV(y1, 100)) +
65	       (LDNS_DIV(y2, 400) - LDNS_DIV(y1, 400));
66}
67
68/*
69 * Code adapted from Python 2.4.1 sources (Lib/calendar.py).
70 */
71time_t
72sldns_mktime_from_utc(const struct tm *tm)
73{
74	int year = 1900 + tm->tm_year;
75	time_t days = 365 * ((time_t) year - 1970) + leap_days(1970, year);
76	time_t hours;
77	time_t minutes;
78	time_t seconds;
79	int i;
80
81	for (i = 0; i < tm->tm_mon; ++i) {
82		days += mdays[i];
83	}
84	if (tm->tm_mon > 1 && is_leap_year(year)) {
85		++days;
86	}
87	days += tm->tm_mday - 1;
88
89	hours = days * 24 + tm->tm_hour;
90	minutes = hours * 60 + tm->tm_min;
91	seconds = minutes * 60 + tm->tm_sec;
92
93	return seconds;
94}
95
96#if SIZEOF_TIME_T <= 4
97
98static void
99sldns_year_and_yday_from_days_since_epoch(int64_t days, struct tm *result)
100{
101	int year = 1970;
102	int new_year;
103
104	while (days < 0 || days >= (int64_t) (is_leap_year(year) ? 366 : 365)) {
105		new_year = year + (int) LDNS_DIV(days, 365);
106		days -= (new_year - year) * 365;
107		days -= leap_days(year, new_year);
108		year  = new_year;
109	}
110	result->tm_year = year;
111	result->tm_yday = (int) days;
112}
113
114/* Number of days per month in a leap year. */
115static const int leap_year_mdays[] = {
116	31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
117};
118
119static void
120sldns_mon_and_mday_from_year_and_yday(struct tm *result)
121{
122	int idays = result->tm_yday;
123	const int *mon_lengths = is_leap_year(result->tm_year) ?
124					leap_year_mdays : mdays;
125
126	result->tm_mon = 0;
127	while  (idays >= mon_lengths[result->tm_mon]) {
128		idays -= mon_lengths[result->tm_mon++];
129	}
130	result->tm_mday = idays + 1;
131}
132
133static void
134sldns_wday_from_year_and_yday(struct tm *result)
135{
136	result->tm_wday = 4 /* 1-1-1970 was a thursday */
137			+ LDNS_MOD((result->tm_year - 1970), 7) * LDNS_MOD(365, 7)
138			+ leap_days(1970, result->tm_year)
139			+ result->tm_yday;
140	result->tm_wday = LDNS_MOD(result->tm_wday, 7);
141	if (result->tm_wday < 0) {
142		result->tm_wday += 7;
143	}
144}
145
146static struct tm *
147sldns_gmtime64_r(int64_t clock, struct tm *result)
148{
149	result->tm_isdst = 0;
150	result->tm_sec   = (int) LDNS_MOD(clock, 60);
151	clock            =       LDNS_DIV(clock, 60);
152	result->tm_min   = (int) LDNS_MOD(clock, 60);
153	clock            =       LDNS_DIV(clock, 60);
154	result->tm_hour  = (int) LDNS_MOD(clock, 24);
155	clock            =       LDNS_DIV(clock, 24);
156
157	sldns_year_and_yday_from_days_since_epoch(clock, result);
158	sldns_mon_and_mday_from_year_and_yday(result);
159	sldns_wday_from_year_and_yday(result);
160	result->tm_year -= 1900;
161
162	return result;
163}
164
165#endif /* SIZEOF_TIME_T <= 4 */
166
167static int64_t
168sldns_serial_arithmitics_time(int32_t time, time_t now)
169{
170	int32_t offset = time - (int32_t) now;
171	return (int64_t) now + offset;
172}
173
174struct tm *
175sldns_serial_arithmitics_gmtime_r(int32_t time, time_t now, struct tm *result)
176{
177#if SIZEOF_TIME_T <= 4
178	int64_t secs_since_epoch = sldns_serial_arithmitics_time(time, now);
179	return  sldns_gmtime64_r(secs_since_epoch, result);
180#else
181	time_t  secs_since_epoch = sldns_serial_arithmitics_time(time, now);
182	return  gmtime_r(&secs_since_epoch, result);
183#endif
184}
185
186int
187sldns_hexdigit_to_int(char ch)
188{
189	switch (ch) {
190	case '0': return 0;
191	case '1': return 1;
192	case '2': return 2;
193	case '3': return 3;
194	case '4': return 4;
195	case '5': return 5;
196	case '6': return 6;
197	case '7': return 7;
198	case '8': return 8;
199	case '9': return 9;
200	case 'a': case 'A': return 10;
201	case 'b': case 'B': return 11;
202	case 'c': case 'C': return 12;
203	case 'd': case 'D': return 13;
204	case 'e': case 'E': return 14;
205	case 'f': case 'F': return 15;
206	default:
207		return -1;
208	}
209}
210
211uint32_t
212sldns_str2period(const char *nptr, const char **endptr)
213{
214	int sign = 0;
215	uint32_t i = 0;
216	uint32_t seconds = 0;
217
218	for(*endptr = nptr; **endptr; (*endptr)++) {
219		switch (**endptr) {
220			case ' ':
221			case '\t':
222				break;
223			case '-':
224				if(sign == 0) {
225					sign = -1;
226				} else {
227					return seconds;
228				}
229				break;
230			case '+':
231				if(sign == 0) {
232					sign = 1;
233				} else {
234					return seconds;
235				}
236				break;
237			case 's':
238			case 'S':
239				seconds += i;
240				i = 0;
241				break;
242			case 'm':
243			case 'M':
244				seconds += i * 60;
245				i = 0;
246				break;
247			case 'h':
248			case 'H':
249				seconds += i * 60 * 60;
250				i = 0;
251				break;
252			case 'd':
253			case 'D':
254				seconds += i * 60 * 60 * 24;
255				i = 0;
256				break;
257			case 'w':
258			case 'W':
259				seconds += i * 60 * 60 * 24 * 7;
260				i = 0;
261				break;
262			case '0':
263			case '1':
264			case '2':
265			case '3':
266			case '4':
267			case '5':
268			case '6':
269			case '7':
270			case '8':
271			case '9':
272				i *= 10;
273				i += (**endptr - '0');
274				break;
275			default:
276				seconds += i;
277				/* disregard signedness */
278				return seconds;
279		}
280	}
281	seconds += i;
282	/* disregard signedness */
283	return seconds;
284}
285
286int
287sldns_parse_escape(uint8_t *ch_p, const char** str_p)
288{
289	uint16_t val;
290
291	if ((*str_p)[0] && isdigit((*str_p)[0])  &&
292	    (*str_p)[1] && isdigit((*str_p)[1])  &&
293	    (*str_p)[2] && isdigit((*str_p)[2]))  {
294
295		val = (uint16_t)(((*str_p)[0] - '0') * 100 +
296				 ((*str_p)[1] - '0') *  10 +
297				 ((*str_p)[2] - '0'));
298
299		if (val > 255) {
300			goto error;
301		}
302		*ch_p = (uint8_t)val;
303		*str_p += 3;
304		return 1;
305
306	} else if ((*str_p)[0] && !isdigit((*str_p)[0])) {
307
308		*ch_p = (uint8_t)*(*str_p)++;
309		return 1;
310	}
311error:
312	*str_p = NULL;
313	return 0; /* LDNS_WIREPARSE_ERR_SYNTAX_BAD_ESCAPE */
314}
315
316/** parse one character, with escape codes */
317int
318sldns_parse_char(uint8_t *ch_p, const char** str_p)
319{
320	switch (**str_p) {
321
322	case '\0':	return 0;
323
324	case '\\':	*str_p += 1;
325			return sldns_parse_escape(ch_p, str_p);
326
327	default:	*ch_p = (uint8_t)*(*str_p)++;
328			return 1;
329	}
330}
331
332size_t sldns_b32_ntop_calculate_size(size_t src_data_length)
333{
334	return src_data_length == 0 ? 0 : ((src_data_length - 1) / 5 + 1) * 8;
335}
336
337size_t sldns_b32_ntop_calculate_size_no_padding(size_t src_data_length)
338{
339	return ((src_data_length + 3) * 8 / 5) - 4;
340}
341
342static int
343sldns_b32_ntop_base(const uint8_t* src, size_t src_sz, char* dst, size_t dst_sz,
344	int extended_hex, int add_padding)
345{
346	size_t ret_sz;
347	const char* b32 = extended_hex ?  "0123456789abcdefghijklmnopqrstuv"
348					: "abcdefghijklmnopqrstuvwxyz234567";
349
350	size_t c = 0; /* c is used to carry partial base32 character over
351		       * byte boundaries for sizes with a remainder.
352		       * (i.e. src_sz % 5 != 0)
353		       */
354
355	ret_sz = add_padding ? sldns_b32_ntop_calculate_size(src_sz)
356			     : sldns_b32_ntop_calculate_size_no_padding(src_sz);
357
358	/* Do we have enough space? */
359	if (dst_sz < ret_sz + 1)
360		return -1;
361
362	/* We know the size; terminate the string */
363	dst[ret_sz] = '\0';
364
365	/* First process all chunks of five */
366	while (src_sz >= 5) {
367		/* 00000... ........ ........ ........ ........ */
368		dst[0] = b32[(src[0]       ) >> 3];
369
370		/* .....111 11...... ........ ........ ........ */
371		dst[1] = b32[(src[0] & 0x07) << 2 | src[1] >> 6];
372
373		/* ........ ..22222. ........ ........ ........ */
374		dst[2] = b32[(src[1] & 0x3e) >> 1];
375
376		/* ........ .......3 3333.... ........ ........ */
377		dst[3] = b32[(src[1] & 0x01) << 4 | src[2] >> 4];
378
379		/* ........ ........ ....4444 4....... ........ */
380		dst[4] = b32[(src[2] & 0x0f) << 1 | src[3] >> 7];
381
382		/* ........ ........ ........ .55555.. ........ */
383		dst[5] = b32[(src[3] & 0x7c) >> 2];
384
385		/* ........ ........ ........ ......66 666..... */
386		dst[6] = b32[(src[3] & 0x03) << 3 | src[4] >> 5];
387
388		/* ........ ........ ........ ........ ...77777 */
389		dst[7] = b32[(src[4] & 0x1f)     ];
390
391		src_sz -= 5;
392		src    += 5;
393		dst    += 8;
394	}
395	/* Process what remains */
396	switch (src_sz) {
397	case 4: /* ........ ........ ........ ......66 666..... */
398		dst[6] = b32[(src[3] & 0x03) << 3];
399
400		/* ........ ........ ........ .55555.. ........ */
401		dst[5] = b32[(src[3] & 0x7c) >> 2];
402
403		/* ........ ........ ....4444 4....... ........ */
404			 c =  src[3]         >> 7 ;
405	case 3: dst[4] = b32[(src[2] & 0x0f) << 1 | c];
406
407		/* ........ .......3 3333.... ........ ........ */
408			 c =  src[2]         >> 4 ;
409	case 2:	dst[3] = b32[(src[1] & 0x01) << 4 | c];
410
411		/* ........ ..22222. ........ ........ ........ */
412		dst[2] = b32[(src[1] & 0x3e) >> 1];
413
414		/* .....111 11...... ........ ........ ........ */
415			 c =  src[1]         >> 6 ;
416	case 1:	dst[1] = b32[(src[0] & 0x07) << 2 | c];
417
418		/* 00000... ........ ........ ........ ........ */
419		dst[0] = b32[ src[0]         >> 3];
420	}
421	/* Add padding */
422	if (add_padding) {
423		switch (src_sz) {
424			case 1: dst[2] = '=';
425				dst[3] = '=';
426			case 2: dst[4] = '=';
427			case 3: dst[5] = '=';
428				dst[6] = '=';
429			case 4: dst[7] = '=';
430		}
431	}
432	return (int)ret_sz;
433}
434
435int
436sldns_b32_ntop(const uint8_t* src, size_t src_sz, char* dst, size_t dst_sz)
437{
438	return sldns_b32_ntop_base(src, src_sz, dst, dst_sz, 0, 1);
439}
440
441int
442sldns_b32_ntop_extended_hex(const uint8_t* src, size_t src_sz,
443		char* dst, size_t dst_sz)
444{
445	return sldns_b32_ntop_base(src, src_sz, dst, dst_sz, 1, 1);
446}
447
448size_t sldns_b32_pton_calculate_size(size_t src_text_length)
449{
450	return src_text_length * 5 / 8;
451}
452
453static int
454sldns_b32_pton_base(const char* src, size_t src_sz, uint8_t* dst, size_t dst_sz,
455	int extended_hex, int check_padding)
456{
457	size_t i = 0;
458	char ch = '\0';
459	uint8_t buf[8];
460	uint8_t* start = dst;
461
462	while (src_sz) {
463		/* Collect 8 characters in buf (if possible) */
464		for (i = 0; i < 8; i++) {
465
466			do {
467				ch = *src++;
468				--src_sz;
469
470			} while (isspace(ch) && src_sz > 0);
471
472			if (ch == '=' || ch == '\0')
473				break;
474
475			else if (extended_hex)
476
477				if (ch >= '0' && ch <= '9')
478					buf[i] = (uint8_t)ch - '0';
479				else if (ch >= 'a' && ch <= 'v')
480					buf[i] = (uint8_t)ch - 'a' + 10;
481				else if (ch >= 'A' && ch <= 'V')
482					buf[i] = (uint8_t)ch - 'A' + 10;
483				else
484					return -1;
485
486			else if (ch >= 'a' && ch <= 'z')
487				buf[i] = (uint8_t)ch - 'a';
488			else if (ch >= 'A' && ch <= 'Z')
489				buf[i] = (uint8_t)ch - 'A';
490			else if (ch >= '2' && ch <= '7')
491				buf[i] = (uint8_t)ch - '2' + 26;
492			else
493				return -1;
494		}
495		/* Less that 8 characters. We're done. */
496		if (i < 8)
497			break;
498
499		/* Enough space available at the destination? */
500		if (dst_sz < 5)
501			return -1;
502
503		/* 00000... ........ ........ ........ ........ */
504		/* .....111 11...... ........ ........ ........ */
505		dst[0] = buf[0] << 3 | buf[1] >> 2;
506
507		/* .....111 11...... ........ ........ ........ */
508		/* ........ ..22222. ........ ........ ........ */
509		/* ........ .......3 3333.... ........ ........ */
510		dst[1] = buf[1] << 6 | buf[2] << 1 | buf[3] >> 4;
511
512		/* ........ .......3 3333.... ........ ........ */
513		/* ........ ........ ....4444 4....... ........ */
514		dst[2] = buf[3] << 4 | buf[4] >> 1;
515
516		/* ........ ........ ....4444 4....... ........ */
517		/* ........ ........ ........ .55555.. ........ */
518		/* ........ ........ ........ ......66 666..... */
519		dst[3] = buf[4] << 7 | buf[5] << 2 | buf[6] >> 3;
520
521		/* ........ ........ ........ ......66 666..... */
522		/* ........ ........ ........ ........ ...77777 */
523		dst[4] = buf[6] << 5 | buf[7];
524
525		dst += 5;
526		dst_sz -= 5;
527	}
528	/* Not ending on a eight byte boundary? */
529	if (i > 0 && i < 8) {
530
531		/* Enough space available at the destination? */
532		if (dst_sz < (i + 1) / 2)
533			return -1;
534
535		switch (i) {
536		case 7: /* ........ ........ ........ ......66 666..... */
537			/* ........ ........ ........ .55555.. ........ */
538			/* ........ ........ ....4444 4....... ........ */
539			dst[3] = buf[4] << 7 | buf[5] << 2 | buf[6] >> 3;
540
541		case 5: /* ........ ........ ....4444 4....... ........ */
542			/* ........ .......3 3333.... ........ ........ */
543			dst[2] = buf[3] << 4 | buf[4] >> 1;
544
545		case 4: /* ........ .......3 3333.... ........ ........ */
546			/* ........ ..22222. ........ ........ ........ */
547			/* .....111 11...... ........ ........ ........ */
548			dst[1] = buf[1] << 6 | buf[2] << 1 | buf[3] >> 4;
549
550		case 2: /* .....111 11...... ........ ........ ........ */
551			/* 00000... ........ ........ ........ ........ */
552			dst[0] = buf[0] << 3 | buf[1] >> 2;
553
554			break;
555
556		default:
557			return -1;
558		}
559		dst += (i + 1) / 2;
560
561		if (check_padding) {
562			/* Check remaining padding characters */
563			if (ch != '=')
564				return -1;
565
566			/* One down, 8 - i - 1 more to come... */
567			for (i = 8 - i - 1; i > 0; i--) {
568
569				do {
570					if (src_sz == 0)
571						return -1;
572					ch = *src++;
573					src_sz--;
574
575				} while (isspace(ch));
576
577				if (ch != '=')
578					return -1;
579			}
580		}
581	}
582	return dst - start;
583}
584
585int
586sldns_b32_pton(const char* src, size_t src_sz, uint8_t* dst, size_t dst_sz)
587{
588	return sldns_b32_pton_base(src, src_sz, dst, dst_sz, 0, 1);
589}
590
591int
592sldns_b32_pton_extended_hex(const char* src, size_t src_sz,
593		uint8_t* dst, size_t dst_sz)
594{
595	return sldns_b32_pton_base(src, src_sz, dst, dst_sz, 1, 1);
596}
597
598size_t sldns_b64_ntop_calculate_size(size_t srcsize)
599{
600	return ((((srcsize + 2) / 3) * 4) + 1);
601}
602
603/* RFC 1521, section 5.2.
604 *
605 * The encoding process represents 24-bit groups of input bits as output
606 * strings of 4 encoded characters. Proceeding from left to right, a
607 * 24-bit input group is formed by concatenating 3 8-bit input groups.
608 * These 24 bits are then treated as 4 concatenated 6-bit groups, each
609 * of which is translated into a single digit in the base64 alphabet.
610 *
611 * This routine does not insert spaces or linebreaks after 76 characters.
612 */
613int sldns_b64_ntop(uint8_t const *src, size_t srclength,
614	char *target, size_t targsize)
615{
616	const char* b64 =
617	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
618	const char pad64 = '=';
619	size_t i = 0, o = 0;
620	if(targsize < sldns_b64_ntop_calculate_size(srclength))
621		return -1;
622	/* whole chunks: xxxxxxyy yyyyzzzz zzwwwwww */
623	while(i+3 <= srclength) {
624		if(o+4 > targsize) return -1;
625		target[o] = b64[src[i] >> 2];
626		target[o+1] = b64[ ((src[i]&0x03)<<4) | (src[i+1]>>4) ];
627		target[o+2] = b64[ ((src[i+1]&0x0f)<<2) | (src[i+2]>>6) ];
628		target[o+3] = b64[ (src[i+2]&0x3f) ];
629		i += 3;
630		o += 4;
631	}
632	/* remainder */
633	switch(srclength - i) {
634	case 2:
635		/* two at end, converted into A B C = */
636		target[o] = b64[src[i] >> 2];
637		target[o+1] = b64[ ((src[i]&0x03)<<4) | (src[i+1]>>4) ];
638		target[o+2] = b64[ ((src[i+1]&0x0f)<<2) ];
639		target[o+3] = pad64;
640		i += 2;
641		o += 4;
642		break;
643	case 1:
644		/* one at end, converted into A B = = */
645		target[o] = b64[src[i] >> 2];
646		target[o+1] = b64[ ((src[i]&0x03)<<4) ];
647		target[o+2] = pad64;
648		target[o+3] = pad64;
649		i += 1;
650		o += 4;
651		break;
652	case 0:
653	default:
654		/* nothing */
655		break;
656	}
657	/* assert: i == srclength */
658	if(o+1 > targsize) return -1;
659	target[o] = 0;
660	return (int)o;
661}
662
663size_t sldns_b64_pton_calculate_size(size_t srcsize)
664{
665	return (((((srcsize + 3) / 4) * 3)) + 1);
666}
667
668int sldns_b64_pton(char const *src, uint8_t *target, size_t targsize)
669{
670	const uint8_t pad64 = 64; /* is 64th in the b64 array */
671	const char* s = src;
672	uint8_t in[4];
673	size_t o = 0, incount = 0;
674
675	while(*s) {
676		/* skip any character that is not base64 */
677		/* conceptually we do:
678		const char* b64 =      pad'=' is appended to array
679		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
680		const char* d = strchr(b64, *s++);
681		and use d-b64;
682		*/
683		char d = *s++;
684		if(d <= 'Z' && d >= 'A')
685			d -= 'A';
686		else if(d <= 'z' && d >= 'a')
687			d = d - 'a' + 26;
688		else if(d <= '9' && d >= '0')
689			d = d - '0' + 52;
690		else if(d == '+')
691			d = 62;
692		else if(d == '/')
693			d = 63;
694		else if(d == '=')
695			d = 64;
696		else	continue;
697		in[incount++] = (uint8_t)d;
698		if(incount != 4)
699			continue;
700		/* process whole block of 4 characters into 3 output bytes */
701		if(in[3] == pad64 && in[2] == pad64) { /* A B = = */
702			if(o+1 > targsize)
703				return -1;
704			target[o] = (in[0]<<2) | ((in[1]&0x30)>>4);
705			o += 1;
706			break; /* we are done */
707		} else if(in[3] == pad64) { /* A B C = */
708			if(o+2 > targsize)
709				return -1;
710			target[o] = (in[0]<<2) | ((in[1]&0x30)>>4);
711			target[o+1]= ((in[1]&0x0f)<<4) | ((in[2]&0x3c)>>2);
712			o += 2;
713			break; /* we are done */
714		} else {
715			if(o+3 > targsize)
716				return -1;
717			/* write xxxxxxyy yyyyzzzz zzwwwwww */
718			target[o] = (in[0]<<2) | ((in[1]&0x30)>>4);
719			target[o+1]= ((in[1]&0x0f)<<4) | ((in[2]&0x3c)>>2);
720			target[o+2]= ((in[2]&0x03)<<6) | in[3];
721			o += 3;
722		}
723		incount = 0;
724	}
725	return (int)o;
726}
727