1/*
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21#include "internal.h"
22
23#include <libkern/OSByteOrder.h>
24
25#if defined(__LITTLE_ENDIAN__)
26#define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16LE
27#define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16BE
28#elif defined(__BIG_ENDIAN__)
29#define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16BE
30#define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16LE
31#endif
32
33enum {
34	_DISPATCH_DATA_FORMAT_NONE = 0x1,
35	_DISPATCH_DATA_FORMAT_UTF8 = 0x2,
36	_DISPATCH_DATA_FORMAT_UTF16LE = 0x4,
37	_DISPATCH_DATA_FORMAT_UTF16BE = 0x8,
38	_DISPATCH_DATA_FORMAT_UTF_ANY = 0x10,
39	_DISPATCH_DATA_FORMAT_BASE32 = 0x20,
40	_DISPATCH_DATA_FORMAT_BASE32HEX = 0x40,
41	_DISPATCH_DATA_FORMAT_BASE64 = 0x80,
42};
43
44#pragma mark -
45#pragma mark baseXX tables
46
47static const unsigned char base32_encode_table[] =
48		"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
49
50static const char base32_decode_table[] = {
51	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
53	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26,
54	27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -2, -1, -1, -1,  0,  1,  2,
55	 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
56	20, 21, 22, 23, 24, 25
57};
58static const ssize_t base32_decode_table_size = sizeof(base32_decode_table)
59		/ sizeof(*base32_decode_table);
60
61static const unsigned char base32hex_encode_table[] =
62		"0123456789ABCDEFGHIJKLMNOPQRSTUV";
63
64static const char base32hex_decode_table[] = {
65	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
66	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  1,  2,
68	 3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -2, -1, -1, -1, 10, 11, 12,
69	13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
70	30, 31
71};
72static const ssize_t base32hex_decode_table_size =
73		sizeof(base32hex_encode_table) / sizeof(*base32hex_encode_table);
74
75static const unsigned char base64_encode_table[] =
76		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
77
78static const char base64_decode_table[] = {
79	-1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
80	-1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
81	-1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
82	-1,  62,  -1,  -1,  -1,  63,  52,  53,  54,  55,  56,  57,  58,  59,
83	60,  61,  -1,  -1,  -1,  -2,  -1,  -1,  -1,   0,   1,   2,   3,   4,
84	 5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,
85	19,  20,  21,  22,  23,  24,  25,  -1,  -1,  -1,  -1,  -1,  -1,  26,
86	27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
87	41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51
88};
89
90static const ssize_t base64_decode_table_size = sizeof(base64_decode_table)
91		/ sizeof(*base64_decode_table);
92
93#pragma mark -
94#pragma mark dispatch_transform_buffer
95
96typedef struct dispatch_transform_buffer_s {
97	dispatch_data_t data;
98	uint8_t *start;
99	union {
100		uint8_t *u8;
101		uint16_t *u16;
102	} ptr;
103	size_t size;
104} dispatch_transform_buffer_s;
105
106static size_t
107_dispatch_transform_sizet_mul(size_t a, size_t b)
108{
109	size_t rv = SIZE_MAX;
110	if (a == 0 || rv/a >= b) {
111		rv = a * b;
112	}
113	return rv;
114}
115
116#define BUFFER_MALLOC_MAX (100*1024*1024)
117
118static bool
119_dispatch_transform_buffer_new(dispatch_transform_buffer_s *buffer,
120		size_t required, size_t size)
121{
122	size_t remaining = buffer->size - (size_t)(buffer->ptr.u8 - buffer->start);
123	if (required == 0 || remaining < required) {
124		if (buffer->start) {
125			if (buffer->ptr.u8 > buffer->start) {
126				dispatch_data_t _new = dispatch_data_create(buffer->start,
127						(size_t)(buffer->ptr.u8 - buffer->start), NULL,
128						DISPATCH_DATA_DESTRUCTOR_FREE);
129				dispatch_data_t _concat = dispatch_data_create_concat(
130						buffer->data, _new);
131				dispatch_release(_new);
132				dispatch_release(buffer->data);
133				buffer->data = _concat;
134			} else {
135				free(buffer->start);
136			}
137		}
138		buffer->size = required + size;
139		buffer->start = NULL;
140		if (buffer->size > 0) {
141			if (buffer->size > BUFFER_MALLOC_MAX) {
142				return false;
143			}
144			buffer->start = (uint8_t*)malloc(buffer->size);
145			if (buffer->start == NULL) {
146				return false;
147			}
148		}
149		buffer->ptr.u8 = buffer->start;
150	}
151	return true;
152}
153
154#pragma mark -
155#pragma mark dispatch_transform_helpers
156
157static dispatch_data_t
158_dispatch_data_subrange_map(dispatch_data_t data, const void **ptr,
159		size_t offset, size_t size)
160{
161	dispatch_data_t subrange, map = NULL;
162
163	subrange = dispatch_data_create_subrange(data, offset, size);
164	if (dispatch_data_get_size(subrange) == size) {
165		map = dispatch_data_create_map(subrange, ptr, NULL);
166	}
167	dispatch_release(subrange);
168	return map;
169}
170
171static dispatch_data_format_type_t
172_dispatch_transform_detect_utf(dispatch_data_t data)
173{
174	const void *p;
175	dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 2);
176
177	if (subrange == NULL) {
178		return NULL;
179	}
180
181	const uint16_t ch = *(const uint16_t *)p;
182	dispatch_data_format_type_t type = DISPATCH_DATA_FORMAT_TYPE_UTF8;
183
184	if (ch == 0xfeff) {
185		type = DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST;
186	} else if (ch == 0xfffe) {
187		type = DISPATCH_DATA_FORMAT_TYPE_UTF16_REV;
188	}
189
190	dispatch_release(subrange);
191
192	return type;
193}
194
195static uint16_t
196_dispatch_transform_swap_to_host(uint16_t x, int32_t byteOrder)
197{
198	if (byteOrder == OSLittleEndian) {
199		return OSSwapLittleToHostInt16(x);
200	}
201	return OSSwapBigToHostInt16(x);
202}
203
204static uint16_t
205_dispatch_transform_swap_from_host(uint16_t x, int32_t byteOrder)
206{
207	if (byteOrder == OSLittleEndian) {
208		return OSSwapHostToLittleInt16(x);
209	}
210	return OSSwapHostToBigInt16(x);
211}
212
213#pragma mark -
214#pragma mark UTF-8
215
216static uint8_t
217_dispatch_transform_utf8_length(uint8_t byte)
218{
219	if ((byte & 0x80) == 0) {
220		return 1;
221	} else if ((byte & 0xe0) == 0xc0) {
222		return 2;
223	} else if ((byte & 0xf0) == 0xe0) {
224		return 3;
225	} else if ((byte & 0xf8) == 0xf0) {
226		return 4;
227	}
228	return 0;
229}
230
231static uint32_t
232_dispatch_transform_read_utf8_sequence(const uint8_t *bytes)
233{
234	uint32_t wch = 0;
235	uint8_t seq_length = _dispatch_transform_utf8_length(*bytes);
236
237	switch (seq_length) {
238	case 4:
239		wch |= (*bytes & 0x7);
240		wch <<= 6;
241		break;
242	case 3:
243		wch |= (*bytes & 0xf);
244		wch <<= 6;
245		break;
246	case 2:
247		wch |= (*bytes & 0x1f);
248		wch <<= 6;
249		break;
250	case 1:
251		wch = (*bytes & 0x7f);
252		break;
253	default:
254		// Not a utf-8 sequence
255		break;
256	}
257
258	bytes++;
259	seq_length--;
260
261	while (seq_length > 0) {
262		wch |= (*bytes & 0x3f);
263		bytes++;
264		seq_length--;
265
266		if (seq_length > 0) {
267			wch <<= 6;
268		}
269	}
270	return wch;
271}
272
273#pragma mark -
274#pragma mark UTF-16
275
276static dispatch_data_t
277_dispatch_transform_to_utf16(dispatch_data_t data, int32_t byteOrder)
278{
279	__block size_t skip = 0;
280
281	__block dispatch_transform_buffer_s buffer = {
282		.data = dispatch_data_empty,
283	};
284
285	bool success = dispatch_data_apply(data, ^(
286			DISPATCH_UNUSED dispatch_data_t region,
287			size_t offset, const void *_buffer, size_t size) {
288		const uint8_t *src = _buffer;
289		size_t i;
290
291		if (offset == 0) {
292			size_t dest_size = 2 + _dispatch_transform_sizet_mul(size,
293					sizeof(uint16_t));
294			if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
295				return (bool)false;
296			}
297			// Insert BOM
298			*(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(0xfeff,
299					byteOrder);
300		}
301
302		// Skip is incremented if the previous block read-ahead into our block
303		if (skip >= size) {
304			skip -= size;
305			return (bool)true;
306		} else if (skip > 0) {
307			src += skip;
308			size -= skip;
309			skip = 0;
310		}
311
312		for (i = 0; i < size;) {
313			uint32_t wch = 0;
314			uint8_t byte_size = _dispatch_transform_utf8_length(*src);
315
316			if (byte_size == 0) {
317				return (bool)false;
318			} else if (byte_size + i > size) {
319				// UTF-8 byte sequence spans over into the next block(s)
320				const void *p;
321				dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p,
322						offset + i, byte_size);
323				if (subrange == NULL) {
324					return (bool)false;
325				}
326
327				wch = _dispatch_transform_read_utf8_sequence(p);
328				skip += byte_size - (size - i);
329				src += byte_size;
330				i = size;
331
332				dispatch_release(subrange);
333			} else {
334				wch = _dispatch_transform_read_utf8_sequence(src);
335				src += byte_size;
336				i += byte_size;
337			}
338
339			size_t next = _dispatch_transform_sizet_mul(size - i, sizeof(uint16_t));
340			if (wch >= 0xd800 && wch < 0xdfff) {
341				// Illegal range (surrogate pair)
342				return (bool)false;
343			} else if (wch >= 0x10000) {
344				// Surrogate pair
345				if (!_dispatch_transform_buffer_new(&buffer, 2 *
346						sizeof(uint16_t), next)) {
347					return (bool)false;
348				}
349				wch -= 0x10000;
350				*(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
351						((wch >> 10) & 0x3ff) + 0xd800, byteOrder);
352				*(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
353						(wch & 0x3ff) + 0xdc00, byteOrder);
354			} else {
355				if (!_dispatch_transform_buffer_new(&buffer, 1 *
356						sizeof(uint16_t), next)) {
357					return (bool)false;
358				}
359				*(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
360						(wch & 0xffff), byteOrder);
361			}
362		}
363
364		(void)_dispatch_transform_buffer_new(&buffer, 0, 0);
365
366		return (bool)true;
367	});
368
369	if (!success) {
370		(void)_dispatch_transform_buffer_new(&buffer, 0, 0);
371		dispatch_release(buffer.data);
372		return NULL;
373	}
374
375	return buffer.data;
376}
377
378static dispatch_data_t
379_dispatch_transform_from_utf16(dispatch_data_t data, int32_t byteOrder)
380{
381	__block size_t skip = 0;
382
383	__block dispatch_transform_buffer_s buffer = {
384		.data = dispatch_data_empty,
385	};
386
387	bool success = dispatch_data_apply(data, ^(
388			DISPATCH_UNUSED dispatch_data_t region, size_t offset,
389			const void *_buffer, size_t size) {
390		const uint16_t *src = _buffer;
391
392		if (offset == 0) {
393			// Assume first buffer will be mostly single-byte UTF-8 sequences
394			size_t dest_size = _dispatch_transform_sizet_mul(size, 2) / 3;
395			if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
396				return (bool)false;
397			}
398		}
399
400		size_t i = 0, max = size / 2;
401
402		// Skip is incremented if the previous block read-ahead into our block
403		if (skip >= size) {
404			skip -= size;
405			return (bool)true;
406		} else if (skip > 0) {
407			src = (uint16_t *)(((uint8_t *)src) + skip);
408			size -= skip;
409			max = (size / 2);
410			skip = 0;
411		}
412
413		// If the buffer is an odd size, allow read ahead into the next region
414		if ((size % 2) != 0) {
415			max += 1;
416		}
417
418		for (i = 0; i < max; i++) {
419			uint32_t wch = 0;
420			uint16_t ch;
421
422			if ((i == (max - 1)) && (max > (size / 2))) {
423				// Last byte of an odd sized range
424				const void *p;
425				dispatch_data_t range = _dispatch_data_subrange_map(data, &p,
426						offset + (i * 2), 2);
427				if (range == NULL) {
428					return (bool)false;
429				}
430				ch = _dispatch_transform_swap_to_host((uint16_t)*(uint64_t*)p,
431						byteOrder);
432				dispatch_release(range);
433				skip += 1;
434			} else {
435				ch =  _dispatch_transform_swap_to_host(src[i], byteOrder);
436			}
437
438			if (ch == 0xfffe && offset == 0 && i == 0) {
439				// Wrong-endian BOM at beginning of data
440				return (bool)false;
441			} else if (ch == 0xfeff && offset == 0 && i == 0) {
442				// Correct-endian BOM, skip it
443				continue;
444			}
445
446			if ((ch >= 0xd800) && (ch <= 0xdbff)) {
447				// Surrogate pair
448				wch = ((ch - 0xd800u) << 10);
449				if (++i >= max) {
450					// Surrogate byte isn't in this block
451					const void *p;
452					dispatch_data_t range = _dispatch_data_subrange_map(data,
453							&p, offset + (i * 2), 2);
454					if (range == NULL) {
455						return (bool)false;
456					}
457					ch = _dispatch_transform_swap_to_host(*(uint16_t *)p,
458							byteOrder);
459					dispatch_release(range);
460					skip += 2;
461				} else {
462					ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
463				}
464				if (!((ch >= 0xdc00) && (ch <= 0xdfff))) {
465					return (bool)false;
466				}
467				wch = (wch | (ch & 0x3ff));
468				wch += 0x10000;
469			} else if ((ch >= 0xdc00) && (ch <= 0xdfff)) {
470				return (bool)false;
471			} else {
472				wch = ch;
473			}
474
475			size_t next = _dispatch_transform_sizet_mul(max - i, 2);
476			if (wch < 0x80) {
477				if (!_dispatch_transform_buffer_new(&buffer, 1, next)) {
478					return (bool)false;
479				}
480				*(buffer.ptr.u8)++ = (uint8_t)(wch & 0xff);
481			} else if (wch < 0x800) {
482				if (!_dispatch_transform_buffer_new(&buffer, 2, next)) {
483					return (bool)false;
484				}
485				*(buffer.ptr.u8)++ = (uint8_t)(0xc0 | (wch >> 6));
486				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
487			} else if (wch < 0x10000) {
488				if (!_dispatch_transform_buffer_new(&buffer, 3, next)) {
489					return (bool)false;
490				}
491				*(buffer.ptr.u8)++ = (uint8_t)(0xe0 | (wch >> 12));
492				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
493				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
494			} else if (wch < 0x200000) {
495				if (!_dispatch_transform_buffer_new(&buffer, 4, next)) {
496					return (bool)false;
497				}
498				*(buffer.ptr.u8)++ = (uint8_t)(0xf0 | (wch >> 18));
499				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 12) & 0x3f));
500				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
501				*(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
502			}
503		}
504
505		(void)_dispatch_transform_buffer_new(&buffer, 0, 0);
506
507		return (bool)true;
508	});
509
510	if (!success) {
511		(void)_dispatch_transform_buffer_new(&buffer, 0, 0);
512		dispatch_release(buffer.data);
513		return NULL;
514	}
515
516	return buffer.data;
517}
518
519static dispatch_data_t
520_dispatch_transform_from_utf16le(dispatch_data_t data)
521{
522	return _dispatch_transform_from_utf16(data, OSLittleEndian);
523}
524
525static dispatch_data_t
526_dispatch_transform_from_utf16be(dispatch_data_t data)
527{
528	return _dispatch_transform_from_utf16(data, OSBigEndian);
529}
530
531static dispatch_data_t
532_dispatch_transform_to_utf16le(dispatch_data_t data)
533{
534	return _dispatch_transform_to_utf16(data, OSLittleEndian);
535}
536
537static dispatch_data_t
538_dispatch_transform_to_utf16be(dispatch_data_t data)
539{
540	return _dispatch_transform_to_utf16(data, OSBigEndian);
541}
542
543#pragma mark -
544#pragma mark base32
545
546static dispatch_data_t
547_dispatch_transform_from_base32_with_table(dispatch_data_t data,
548		const char* table, ssize_t table_size)
549{
550	__block uint64_t x = 0, count = 0, pad = 0;
551
552	__block dispatch_data_t rv = dispatch_data_empty;
553
554	bool success = dispatch_data_apply(data, ^(
555			DISPATCH_UNUSED dispatch_data_t region,
556			DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
557		size_t i, dest_size = (size * 5) / 8;
558
559		uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
560		uint8_t *ptr = dest;
561		if (dest == NULL) {
562			return (bool)false;
563		}
564
565		const uint8_t *bytes = buffer;
566
567		for (i = 0; i < size; i++) {
568			if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
569				continue;
570			}
571
572			ssize_t index = bytes[i];
573			if (index >= table_size || table[index] == -1) {
574				free(dest);
575				return (bool)false;
576			}
577			count++;
578
579			char value = table[index];
580			if (value == -2) {
581				value = 0;
582				pad++;
583			}
584
585			x <<= 5;
586			x += (uint64_t)value;
587
588			if ((count & 0x7) == 0) {
589				*ptr++ = (x >> 32) & 0xff;
590				*ptr++ = (x >> 24) & 0xff;
591				*ptr++ = (x >> 16) & 0xff;
592				*ptr++ = (x >> 8) & 0xff;
593				*ptr++ = x & 0xff;
594			}
595		}
596
597		size_t final = (size_t)(ptr - dest);
598		switch (pad) {
599		case 1:
600			final -= 1;
601			break;
602		case 3:
603			final -= 2;
604			break;
605		case 4:
606			final -= 3;
607			break;
608		case 6:
609			final -= 4;
610			break;
611		}
612
613		dispatch_data_t val = dispatch_data_create(dest, final, NULL,
614				DISPATCH_DATA_DESTRUCTOR_FREE);
615		dispatch_data_t concat = dispatch_data_create_concat(rv, val);
616
617		dispatch_release(val);
618		dispatch_release(rv);
619		rv = concat;
620
621		return (bool)true;
622	});
623
624	if (!success) {
625		dispatch_release(rv);
626		return NULL;
627	}
628
629	return rv;
630}
631
632static dispatch_data_t
633_dispatch_transform_to_base32_with_table(dispatch_data_t data, const unsigned char* table)
634{
635	size_t total = dispatch_data_get_size(data);
636	__block size_t count = 0;
637
638	if (total > SIZE_T_MAX-4 || ((total+4)/5 > SIZE_T_MAX/8)) {
639		/* We can't hold larger than size_t in a dispatch_data_t
640		 * and we want to avoid an integer overflow in the next
641		 * calculation.
642		 */
643		return NULL;
644	}
645
646	size_t dest_size = (total + 4) / 5 * 8;
647	uint8_t *dest = (uint8_t*)malloc(dest_size);
648	if (dest == NULL) {
649		return NULL;
650	}
651
652	__block uint8_t *ptr = dest;
653
654	/*
655					    0        1        2        3        4
656	 8-bit bytes:   xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy
657	 5-bit chunks:  aaaaabbb bbcccccd ddddeeee efffffgg ggghhhhh
658	 */
659
660	bool success = dispatch_data_apply(data, ^(
661			DISPATCH_UNUSED dispatch_data_t region,
662			size_t offset, const void *buffer, size_t size) {
663		const uint8_t *bytes = buffer;
664		size_t i;
665
666		for (i = 0; i < size; i++, count++) {
667			uint8_t curr = bytes[i], last = 0;
668
669			if ((count % 5) != 0) {
670				if (i == 0) {
671					const void *p;
672					dispatch_data_t subrange = _dispatch_data_subrange_map(data,
673							&p, offset - 1, 1);
674					if (subrange == NULL) {
675						return (bool)false;
676					}
677					last = *(uint8_t*)p;
678					dispatch_release(subrange);
679				} else {
680					last = bytes[i - 1];
681				}
682			}
683
684			switch (count % 5) {
685			case 0:
686				// a
687				*ptr++ = table[(curr >> 3) & 0x1fu];
688				break;
689			case 1:
690				// b + c
691				*ptr++ = table[((last << 2)|(curr >> 6)) & 0x1f];
692				*ptr++ = table[(curr >> 1) & 0x1f];
693				break;
694			case 2:
695				// d
696				*ptr++ = table[((last << 4)|(curr >> 4)) & 0x1f];
697				break;
698			case 3:
699				// e + f
700				*ptr++ = table[((last << 1)|(curr >> 7)) & 0x1f];
701				*ptr++ = table[(curr >> 2) & 0x1f];
702				break;
703			case 4:
704				// g + h
705				*ptr++ = table[((last << 3)|(curr >> 5)) & 0x1f];
706				*ptr++ = table[curr & 0x1f];
707				break;
708			}
709		}
710
711		// Last region, insert padding bytes, if needed
712		if (offset + size == total) {
713			switch (count % 5) {
714			case 0:
715				break;
716			case 1:
717				// b[4:2]
718				*ptr++ = table[(bytes[size-1] << 2) & 0x1c];
719				break;
720			case 2:
721				// d[4]
722				*ptr++ = table[(bytes[size-1] << 4) & 0x10];
723				break;
724			case 3:
725				// e[4:1]
726				*ptr++ = table[(bytes[size-1] << 1) & 0x1e];
727				break;
728			case 4:
729				// g[2:3]
730				*ptr++ = table[(bytes[size-1] << 3) & 0x18];
731				break;
732			}
733			switch (count % 5) {
734			case 0:
735				break;
736			case 1:
737				*ptr++ = '='; // c
738				*ptr++ = '='; // d
739			case 2:
740				*ptr++ = '='; // e
741			case 3:
742				*ptr++ = '='; // f
743				*ptr++ = '='; // g
744			case 4:
745				*ptr++ = '='; // h
746				break;
747			}
748		}
749
750		return (bool)true;
751	});
752
753	if (!success) {
754		free(dest);
755		return NULL;
756	}
757	return dispatch_data_create(dest, dest_size, NULL,
758			DISPATCH_DATA_DESTRUCTOR_FREE);
759}
760
761static dispatch_data_t
762_dispatch_transform_from_base32(dispatch_data_t data)
763{
764	return _dispatch_transform_from_base32_with_table(data, base32_decode_table,
765			base32_decode_table_size);
766}
767
768static dispatch_data_t
769_dispatch_transform_to_base32(dispatch_data_t data)
770{
771	return _dispatch_transform_to_base32_with_table(data, base32_encode_table);
772}
773
774static dispatch_data_t
775_dispatch_transform_from_base32hex(dispatch_data_t data)
776{
777	return _dispatch_transform_from_base32_with_table(data,
778			base32hex_decode_table, base32hex_decode_table_size);
779}
780
781static dispatch_data_t
782_dispatch_transform_to_base32hex(dispatch_data_t data)
783{
784	return _dispatch_transform_to_base32_with_table(data,
785			base32hex_encode_table);
786}
787
788#pragma mark -
789#pragma mark base64
790
791static dispatch_data_t
792_dispatch_transform_from_base64(dispatch_data_t data)
793{
794	__block uint64_t x = 0, count = 0;
795	__block size_t pad = 0;
796
797	__block dispatch_data_t rv = dispatch_data_empty;
798
799	bool success = dispatch_data_apply(data, ^(
800			DISPATCH_UNUSED dispatch_data_t region,
801			DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
802		size_t i, dest_size = (size * 3) / 4;
803
804		uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
805		uint8_t *ptr = dest;
806		if (dest == NULL) {
807			return (bool)false;
808		}
809
810		const uint8_t *bytes = buffer;
811
812		for (i = 0; i < size; i++) {
813			if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
814				continue;
815			}
816
817			ssize_t index = bytes[i];
818			if (index >= base64_decode_table_size ||
819					base64_decode_table[index] == -1) {
820				free(dest);
821				return (bool)false;
822			}
823			count++;
824
825			char value = base64_decode_table[index];
826			if (value == -2) {
827				value = 0;
828				pad++;
829			}
830
831			x <<= 6;
832			x += (uint64_t)value;
833
834			if ((count & 0x3) == 0) {
835				*ptr++ = (x >> 16) & 0xff;
836				*ptr++ = (x >> 8) & 0xff;
837				*ptr++ = x & 0xff;
838			}
839		}
840
841		size_t final = (size_t)(ptr - dest);
842		if (pad > 0) {
843			// 2 bytes of pad means only had one char in final group
844			final -= pad;
845		}
846
847		dispatch_data_t val = dispatch_data_create(dest, final, NULL,
848				DISPATCH_DATA_DESTRUCTOR_FREE);
849		dispatch_data_t concat = dispatch_data_create_concat(rv, val);
850
851		dispatch_release(val);
852		dispatch_release(rv);
853		rv = concat;
854
855		return (bool)true;
856	});
857
858	if (!success) {
859		dispatch_release(rv);
860		return NULL;
861	}
862
863	return rv;
864}
865
866static dispatch_data_t
867_dispatch_transform_to_base64(dispatch_data_t data)
868{
869	// RFC 4648 states that we should not linebreak
870	// http://tools.ietf.org/html/rfc4648
871	size_t total = dispatch_data_get_size(data);
872	__block size_t count = 0;
873
874	if (total > SIZE_T_MAX-2 || ((total+2)/3> SIZE_T_MAX/4)) {
875		/* We can't hold larger than size_t in a dispatch_data_t
876		 * and we want to avoid an integer overflow in the next
877		 * calculation.
878		 */
879		return NULL;
880	}
881
882	size_t dest_size = (total + 2) / 3 * 4;
883	uint8_t *dest = (uint8_t*)malloc(dest_size);
884	if (dest == NULL) {
885		return NULL;
886	}
887
888	__block uint8_t *ptr = dest;
889
890	/*
891	 * 3 8-bit bytes:	xxxxxxxx yyyyyyyy zzzzzzzz
892	 * 4 6-bit chunks:	aaaaaabb bbbbcccc ccdddddd
893	 */
894
895	bool success = dispatch_data_apply(data, ^(
896			DISPATCH_UNUSED dispatch_data_t region,
897			size_t offset, const void *buffer, size_t size) {
898		const uint8_t *bytes = buffer;
899		size_t i;
900
901		for (i = 0; i < size; i++, count++) {
902			uint8_t curr = bytes[i], last = 0;
903
904			if ((count % 3) != 0) {
905				if (i == 0) {
906					const void *p;
907					dispatch_data_t subrange = _dispatch_data_subrange_map(data,
908						&p, offset - 1, 1);
909					if (subrange == NULL) {
910						return (bool)false;
911					}
912					last = *(uint8_t*)p;
913					dispatch_release(subrange);
914				} else {
915					last = bytes[i - 1];
916				}
917			}
918
919			switch (count % 3) {
920			case 0:
921				*ptr++ = base64_encode_table[(curr >> 2) & 0x3f];
922				break;
923			case 1:
924				*ptr++ = base64_encode_table[((last << 4)|(curr >> 4)) & 0x3f];
925				break;
926			case 2:
927				*ptr++ = base64_encode_table[((last << 2)|(curr >> 6)) & 0x3f];
928				*ptr++ = base64_encode_table[(curr & 0x3f)];
929				break;
930			}
931		}
932
933		// Last region, insert padding bytes, if needed
934		if (offset + size == total) {
935			switch (count % 3) {
936			case 0:
937				break;
938			case 1:
939				*ptr++ = base64_encode_table[(bytes[size-1] << 4) & 0x30];
940				*ptr++ = '=';
941				*ptr++ = '=';
942				break;
943			case 2:
944				*ptr++ = base64_encode_table[(bytes[size-1] << 2) & 0x3c];
945				*ptr++ = '=';
946				break;
947			}
948		}
949
950		return (bool)true;
951	});
952
953	if (!success) {
954		free(dest);
955		return NULL;
956	}
957	return dispatch_data_create(dest, dest_size, NULL,
958			DISPATCH_DATA_DESTRUCTOR_FREE);
959}
960
961#pragma mark -
962#pragma mark dispatch_data_transform
963
964dispatch_data_t
965dispatch_data_create_with_transform(dispatch_data_t data,
966		dispatch_data_format_type_t input, dispatch_data_format_type_t output)
967{
968	if (input->type == _DISPATCH_DATA_FORMAT_UTF_ANY) {
969		input = _dispatch_transform_detect_utf(data);
970		if (input == NULL) {
971			return NULL;
972		}
973	}
974
975	if ((input->type & ~output->input_mask) != 0) {
976		return NULL;
977	}
978
979	if ((output->type & ~input->output_mask) != 0) {
980		return NULL;
981	}
982
983	if (dispatch_data_get_size(data) == 0) {
984		return data;
985	}
986
987	dispatch_data_t temp1;
988	if (input->decode) {
989		temp1 = input->decode(data);
990	} else {
991		dispatch_retain(data);
992		temp1 = data;
993	}
994
995	if (!temp1) {
996		return NULL;
997	}
998
999	dispatch_data_t temp2;
1000	if (output->encode) {
1001		temp2 = output->encode(temp1);
1002	} else {
1003		dispatch_retain(temp1);
1004		temp2 = temp1;
1005	}
1006
1007	dispatch_release(temp1);
1008	return temp2;
1009}
1010
1011const struct dispatch_data_format_type_s _dispatch_data_format_type_none = {
1012	.type = _DISPATCH_DATA_FORMAT_NONE,
1013	.input_mask = ~0u,
1014	.output_mask = ~0u,
1015	.decode = NULL,
1016	.encode = NULL,
1017};
1018
1019const struct dispatch_data_format_type_s _dispatch_data_format_type_base32 = {
1020	.type = _DISPATCH_DATA_FORMAT_BASE32,
1021	.input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1022			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1023	.output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1024			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1025	.decode = _dispatch_transform_from_base32,
1026	.encode = _dispatch_transform_to_base32,
1027};
1028
1029const struct dispatch_data_format_type_s _dispatch_data_format_type_base32hex =
1030{
1031	.type = _DISPATCH_DATA_FORMAT_BASE32HEX,
1032	.input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1033			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1034	.output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1035			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1036	.decode = _dispatch_transform_from_base32hex,
1037	.encode = _dispatch_transform_to_base32hex,
1038};
1039
1040const struct dispatch_data_format_type_s _dispatch_data_format_type_base64 = {
1041	.type = _DISPATCH_DATA_FORMAT_BASE64,
1042	.input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1043			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1044	.output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1045			_DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1046	.decode = _dispatch_transform_from_base64,
1047	.encode = _dispatch_transform_to_base64,
1048};
1049
1050const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16le = {
1051	.type = _DISPATCH_DATA_FORMAT_UTF16LE,
1052	.input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1053			_DISPATCH_DATA_FORMAT_UTF16LE),
1054	.output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1055			_DISPATCH_DATA_FORMAT_UTF16LE),
1056	.decode = _dispatch_transform_from_utf16le,
1057	.encode = _dispatch_transform_to_utf16le,
1058};
1059
1060const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16be = {
1061	.type = _DISPATCH_DATA_FORMAT_UTF16BE,
1062	.input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1063			_DISPATCH_DATA_FORMAT_UTF16LE),
1064	.output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1065			_DISPATCH_DATA_FORMAT_UTF16LE),
1066	.decode = _dispatch_transform_from_utf16be,
1067	.encode = _dispatch_transform_to_utf16be,
1068};
1069
1070const struct dispatch_data_format_type_s _dispatch_data_format_type_utf8 = {
1071	.type = _DISPATCH_DATA_FORMAT_UTF8,
1072	.input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1073			_DISPATCH_DATA_FORMAT_UTF16LE),
1074	.output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1075			_DISPATCH_DATA_FORMAT_UTF16LE),
1076	.decode = NULL,
1077	.encode = NULL,
1078};
1079
1080const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = {
1081	.type = _DISPATCH_DATA_FORMAT_UTF_ANY,
1082	.input_mask = 0,
1083	.output_mask = 0,
1084	.decode = NULL,
1085	.encode = NULL,
1086};
1087