1/* 2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_APACHE_LICENSE_HEADER_START@ 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * @APPLE_APACHE_LICENSE_HEADER_END@ 19 */ 20 21#include "internal.h" 22 23#include <libkern/OSByteOrder.h> 24 25#if defined(__LITTLE_ENDIAN__) 26#define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16LE 27#define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16BE 28#elif defined(__BIG_ENDIAN__) 29#define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16BE 30#define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16LE 31#endif 32 33enum { 34 _DISPATCH_DATA_FORMAT_NONE = 0x1, 35 _DISPATCH_DATA_FORMAT_UTF8 = 0x2, 36 _DISPATCH_DATA_FORMAT_UTF16LE = 0x4, 37 _DISPATCH_DATA_FORMAT_UTF16BE = 0x8, 38 _DISPATCH_DATA_FORMAT_UTF_ANY = 0x10, 39 _DISPATCH_DATA_FORMAT_BASE32 = 0x20, 40 _DISPATCH_DATA_FORMAT_BASE32HEX = 0x40, 41 _DISPATCH_DATA_FORMAT_BASE64 = 0x80, 42}; 43 44#pragma mark - 45#pragma mark baseXX tables 46 47static const unsigned char base32_encode_table[] = 48 "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; 49 50static const char base32_decode_table[] = { 51 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 52 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 53 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 54 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 55 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 56 20, 21, 22, 23, 24, 25 57}; 58static const ssize_t base32_decode_table_size = sizeof(base32_decode_table) 59 / sizeof(*base32_decode_table); 60 61static const unsigned char base32hex_encode_table[] = 62 "0123456789ABCDEFGHIJKLMNOPQRSTUV"; 63 64static const char base32hex_decode_table[] = { 65 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 66 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 68 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -2, -1, -1, -1, 10, 11, 12, 69 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 70 30, 31 71}; 72static const ssize_t base32hex_decode_table_size = 73 sizeof(base32hex_encode_table) / sizeof(*base32hex_encode_table); 74 75static const unsigned char base64_encode_table[] = 76 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 77 78static const char base64_decode_table[] = { 79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 82 -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 83 60, 61, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 3, 4, 84 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 85 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 86 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 87 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 88}; 89 90static const ssize_t base64_decode_table_size = sizeof(base64_decode_table) 91 / sizeof(*base64_decode_table); 92 93#pragma mark - 94#pragma mark dispatch_transform_buffer 95 96typedef struct dispatch_transform_buffer_s { 97 dispatch_data_t data; 98 uint8_t *start; 99 union { 100 uint8_t *u8; 101 uint16_t *u16; 102 } ptr; 103 size_t size; 104} dispatch_transform_buffer_s; 105 106static size_t 107_dispatch_transform_sizet_mul(size_t a, size_t b) 108{ 109 size_t rv = SIZE_MAX; 110 if (a == 0 || rv/a >= b) { 111 rv = a * b; 112 } 113 return rv; 114} 115 116#define BUFFER_MALLOC_MAX (100*1024*1024) 117 118static bool 119_dispatch_transform_buffer_new(dispatch_transform_buffer_s *buffer, 120 size_t required, size_t size) 121{ 122 size_t remaining = buffer->size - (size_t)(buffer->ptr.u8 - buffer->start); 123 if (required == 0 || remaining < required) { 124 if (buffer->start) { 125 if (buffer->ptr.u8 > buffer->start) { 126 dispatch_data_t _new = dispatch_data_create(buffer->start, 127 (size_t)(buffer->ptr.u8 - buffer->start), NULL, 128 DISPATCH_DATA_DESTRUCTOR_FREE); 129 dispatch_data_t _concat = dispatch_data_create_concat( 130 buffer->data, _new); 131 dispatch_release(_new); 132 dispatch_release(buffer->data); 133 buffer->data = _concat; 134 } else { 135 free(buffer->start); 136 } 137 } 138 buffer->size = required + size; 139 buffer->start = NULL; 140 if (buffer->size > 0) { 141 if (buffer->size > BUFFER_MALLOC_MAX) { 142 return false; 143 } 144 buffer->start = (uint8_t*)malloc(buffer->size); 145 if (buffer->start == NULL) { 146 return false; 147 } 148 } 149 buffer->ptr.u8 = buffer->start; 150 } 151 return true; 152} 153 154#pragma mark - 155#pragma mark dispatch_transform_helpers 156 157static dispatch_data_t 158_dispatch_data_subrange_map(dispatch_data_t data, const void **ptr, 159 size_t offset, size_t size) 160{ 161 dispatch_data_t subrange, map = NULL; 162 163 subrange = dispatch_data_create_subrange(data, offset, size); 164 if (dispatch_data_get_size(subrange) == size) { 165 map = dispatch_data_create_map(subrange, ptr, NULL); 166 } 167 dispatch_release(subrange); 168 return map; 169} 170 171static dispatch_data_format_type_t 172_dispatch_transform_detect_utf(dispatch_data_t data) 173{ 174 const void *p; 175 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 2); 176 177 if (subrange == NULL) { 178 return NULL; 179 } 180 181 const uint16_t ch = *(const uint16_t *)p; 182 dispatch_data_format_type_t type = DISPATCH_DATA_FORMAT_TYPE_UTF8; 183 184 if (ch == 0xfeff) { 185 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST; 186 } else if (ch == 0xfffe) { 187 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_REV; 188 } 189 190 dispatch_release(subrange); 191 192 return type; 193} 194 195static uint16_t 196_dispatch_transform_swap_to_host(uint16_t x, int32_t byteOrder) 197{ 198 if (byteOrder == OSLittleEndian) { 199 return OSSwapLittleToHostInt16(x); 200 } 201 return OSSwapBigToHostInt16(x); 202} 203 204static uint16_t 205_dispatch_transform_swap_from_host(uint16_t x, int32_t byteOrder) 206{ 207 if (byteOrder == OSLittleEndian) { 208 return OSSwapHostToLittleInt16(x); 209 } 210 return OSSwapHostToBigInt16(x); 211} 212 213#pragma mark - 214#pragma mark UTF-8 215 216static uint8_t 217_dispatch_transform_utf8_length(uint8_t byte) 218{ 219 if ((byte & 0x80) == 0) { 220 return 1; 221 } else if ((byte & 0xe0) == 0xc0) { 222 return 2; 223 } else if ((byte & 0xf0) == 0xe0) { 224 return 3; 225 } else if ((byte & 0xf8) == 0xf0) { 226 return 4; 227 } 228 return 0; 229} 230 231static uint32_t 232_dispatch_transform_read_utf8_sequence(const uint8_t *bytes) 233{ 234 uint32_t wch = 0; 235 uint8_t seq_length = _dispatch_transform_utf8_length(*bytes); 236 237 switch (seq_length) { 238 case 4: 239 wch |= (*bytes & 0x7); 240 wch <<= 6; 241 break; 242 case 3: 243 wch |= (*bytes & 0xf); 244 wch <<= 6; 245 break; 246 case 2: 247 wch |= (*bytes & 0x1f); 248 wch <<= 6; 249 break; 250 case 1: 251 wch = (*bytes & 0x7f); 252 break; 253 default: 254 // Not a utf-8 sequence 255 break; 256 } 257 258 bytes++; 259 seq_length--; 260 261 while (seq_length > 0) { 262 wch |= (*bytes & 0x3f); 263 bytes++; 264 seq_length--; 265 266 if (seq_length > 0) { 267 wch <<= 6; 268 } 269 } 270 return wch; 271} 272 273#pragma mark - 274#pragma mark UTF-16 275 276static dispatch_data_t 277_dispatch_transform_to_utf16(dispatch_data_t data, int32_t byteOrder) 278{ 279 __block size_t skip = 0; 280 281 __block dispatch_transform_buffer_s buffer = { 282 .data = dispatch_data_empty, 283 }; 284 285 bool success = dispatch_data_apply(data, ^( 286 DISPATCH_UNUSED dispatch_data_t region, 287 size_t offset, const void *_buffer, size_t size) { 288 const uint8_t *src = _buffer; 289 size_t i; 290 291 if (offset == 0) { 292 size_t dest_size = 2 + _dispatch_transform_sizet_mul(size, 293 sizeof(uint16_t)); 294 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) { 295 return (bool)false; 296 } 297 // Insert BOM 298 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(0xfeff, 299 byteOrder); 300 } 301 302 // Skip is incremented if the previous block read-ahead into our block 303 if (skip >= size) { 304 skip -= size; 305 return (bool)true; 306 } else if (skip > 0) { 307 src += skip; 308 size -= skip; 309 skip = 0; 310 } 311 312 for (i = 0; i < size;) { 313 uint32_t wch = 0; 314 uint8_t byte_size = _dispatch_transform_utf8_length(*src); 315 316 if (byte_size == 0) { 317 return (bool)false; 318 } else if (byte_size + i > size) { 319 // UTF-8 byte sequence spans over into the next block(s) 320 const void *p; 321 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 322 offset + i, byte_size); 323 if (subrange == NULL) { 324 return (bool)false; 325 } 326 327 wch = _dispatch_transform_read_utf8_sequence(p); 328 skip += byte_size - (size - i); 329 src += byte_size; 330 i = size; 331 332 dispatch_release(subrange); 333 } else { 334 wch = _dispatch_transform_read_utf8_sequence(src); 335 src += byte_size; 336 i += byte_size; 337 } 338 339 size_t next = _dispatch_transform_sizet_mul(size - i, sizeof(uint16_t)); 340 if (wch >= 0xd800 && wch < 0xdfff) { 341 // Illegal range (surrogate pair) 342 return (bool)false; 343 } else if (wch >= 0x10000) { 344 // Surrogate pair 345 if (!_dispatch_transform_buffer_new(&buffer, 2 * 346 sizeof(uint16_t), next)) { 347 return (bool)false; 348 } 349 wch -= 0x10000; 350 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host( 351 ((wch >> 10) & 0x3ff) + 0xd800, byteOrder); 352 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host( 353 (wch & 0x3ff) + 0xdc00, byteOrder); 354 } else { 355 if (!_dispatch_transform_buffer_new(&buffer, 1 * 356 sizeof(uint16_t), next)) { 357 return (bool)false; 358 } 359 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host( 360 (wch & 0xffff), byteOrder); 361 } 362 } 363 364 (void)_dispatch_transform_buffer_new(&buffer, 0, 0); 365 366 return (bool)true; 367 }); 368 369 if (!success) { 370 (void)_dispatch_transform_buffer_new(&buffer, 0, 0); 371 dispatch_release(buffer.data); 372 return NULL; 373 } 374 375 return buffer.data; 376} 377 378static dispatch_data_t 379_dispatch_transform_from_utf16(dispatch_data_t data, int32_t byteOrder) 380{ 381 __block size_t skip = 0; 382 383 __block dispatch_transform_buffer_s buffer = { 384 .data = dispatch_data_empty, 385 }; 386 387 bool success = dispatch_data_apply(data, ^( 388 DISPATCH_UNUSED dispatch_data_t region, size_t offset, 389 const void *_buffer, size_t size) { 390 const uint16_t *src = _buffer; 391 392 if (offset == 0) { 393 // Assume first buffer will be mostly single-byte UTF-8 sequences 394 size_t dest_size = _dispatch_transform_sizet_mul(size, 2) / 3; 395 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) { 396 return (bool)false; 397 } 398 } 399 400 size_t i = 0, max = size / 2; 401 402 // Skip is incremented if the previous block read-ahead into our block 403 if (skip >= size) { 404 skip -= size; 405 return (bool)true; 406 } else if (skip > 0) { 407 src = (uint16_t *)(((uint8_t *)src) + skip); 408 size -= skip; 409 max = (size / 2); 410 skip = 0; 411 } 412 413 // If the buffer is an odd size, allow read ahead into the next region 414 if ((size % 2) != 0) { 415 max += 1; 416 } 417 418 for (i = 0; i < max; i++) { 419 uint32_t wch = 0; 420 uint16_t ch; 421 422 if ((i == (max - 1)) && (max > (size / 2))) { 423 // Last byte of an odd sized range 424 const void *p; 425 dispatch_data_t range = _dispatch_data_subrange_map(data, &p, 426 offset + (i * 2), 2); 427 if (range == NULL) { 428 return (bool)false; 429 } 430 ch = _dispatch_transform_swap_to_host((uint16_t)*(uint64_t*)p, 431 byteOrder); 432 dispatch_release(range); 433 skip += 1; 434 } else { 435 ch = _dispatch_transform_swap_to_host(src[i], byteOrder); 436 } 437 438 if (ch == 0xfffe && offset == 0 && i == 0) { 439 // Wrong-endian BOM at beginning of data 440 return (bool)false; 441 } else if (ch == 0xfeff && offset == 0 && i == 0) { 442 // Correct-endian BOM, skip it 443 continue; 444 } 445 446 if ((ch >= 0xd800) && (ch <= 0xdbff)) { 447 // Surrogate pair 448 wch = ((ch - 0xd800u) << 10); 449 if (++i >= max) { 450 // Surrogate byte isn't in this block 451 const void *p; 452 dispatch_data_t range = _dispatch_data_subrange_map(data, 453 &p, offset + (i * 2), 2); 454 if (range == NULL) { 455 return (bool)false; 456 } 457 ch = _dispatch_transform_swap_to_host(*(uint16_t *)p, 458 byteOrder); 459 dispatch_release(range); 460 skip += 2; 461 } else { 462 ch = _dispatch_transform_swap_to_host(src[i], byteOrder); 463 } 464 if (!((ch >= 0xdc00) && (ch <= 0xdfff))) { 465 return (bool)false; 466 } 467 wch = (wch | (ch & 0x3ff)); 468 wch += 0x10000; 469 } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) { 470 return (bool)false; 471 } else { 472 wch = ch; 473 } 474 475 size_t next = _dispatch_transform_sizet_mul(max - i, 2); 476 if (wch < 0x80) { 477 if (!_dispatch_transform_buffer_new(&buffer, 1, next)) { 478 return (bool)false; 479 } 480 *(buffer.ptr.u8)++ = (uint8_t)(wch & 0xff); 481 } else if (wch < 0x800) { 482 if (!_dispatch_transform_buffer_new(&buffer, 2, next)) { 483 return (bool)false; 484 } 485 *(buffer.ptr.u8)++ = (uint8_t)(0xc0 | (wch >> 6)); 486 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f)); 487 } else if (wch < 0x10000) { 488 if (!_dispatch_transform_buffer_new(&buffer, 3, next)) { 489 return (bool)false; 490 } 491 *(buffer.ptr.u8)++ = (uint8_t)(0xe0 | (wch >> 12)); 492 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f)); 493 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f)); 494 } else if (wch < 0x200000) { 495 if (!_dispatch_transform_buffer_new(&buffer, 4, next)) { 496 return (bool)false; 497 } 498 *(buffer.ptr.u8)++ = (uint8_t)(0xf0 | (wch >> 18)); 499 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 12) & 0x3f)); 500 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f)); 501 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f)); 502 } 503 } 504 505 (void)_dispatch_transform_buffer_new(&buffer, 0, 0); 506 507 return (bool)true; 508 }); 509 510 if (!success) { 511 (void)_dispatch_transform_buffer_new(&buffer, 0, 0); 512 dispatch_release(buffer.data); 513 return NULL; 514 } 515 516 return buffer.data; 517} 518 519static dispatch_data_t 520_dispatch_transform_from_utf16le(dispatch_data_t data) 521{ 522 return _dispatch_transform_from_utf16(data, OSLittleEndian); 523} 524 525static dispatch_data_t 526_dispatch_transform_from_utf16be(dispatch_data_t data) 527{ 528 return _dispatch_transform_from_utf16(data, OSBigEndian); 529} 530 531static dispatch_data_t 532_dispatch_transform_to_utf16le(dispatch_data_t data) 533{ 534 return _dispatch_transform_to_utf16(data, OSLittleEndian); 535} 536 537static dispatch_data_t 538_dispatch_transform_to_utf16be(dispatch_data_t data) 539{ 540 return _dispatch_transform_to_utf16(data, OSBigEndian); 541} 542 543#pragma mark - 544#pragma mark base32 545 546static dispatch_data_t 547_dispatch_transform_from_base32_with_table(dispatch_data_t data, 548 const char* table, ssize_t table_size) 549{ 550 __block uint64_t x = 0, count = 0, pad = 0; 551 552 __block dispatch_data_t rv = dispatch_data_empty; 553 554 bool success = dispatch_data_apply(data, ^( 555 DISPATCH_UNUSED dispatch_data_t region, 556 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) { 557 size_t i, dest_size = (size * 5) / 8; 558 559 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t)); 560 uint8_t *ptr = dest; 561 if (dest == NULL) { 562 return (bool)false; 563 } 564 565 const uint8_t *bytes = buffer; 566 567 for (i = 0; i < size; i++) { 568 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') { 569 continue; 570 } 571 572 ssize_t index = bytes[i]; 573 if (index >= table_size || table[index] == -1) { 574 free(dest); 575 return (bool)false; 576 } 577 count++; 578 579 char value = table[index]; 580 if (value == -2) { 581 value = 0; 582 pad++; 583 } 584 585 x <<= 5; 586 x += (uint64_t)value; 587 588 if ((count & 0x7) == 0) { 589 *ptr++ = (x >> 32) & 0xff; 590 *ptr++ = (x >> 24) & 0xff; 591 *ptr++ = (x >> 16) & 0xff; 592 *ptr++ = (x >> 8) & 0xff; 593 *ptr++ = x & 0xff; 594 } 595 } 596 597 size_t final = (size_t)(ptr - dest); 598 switch (pad) { 599 case 1: 600 final -= 1; 601 break; 602 case 3: 603 final -= 2; 604 break; 605 case 4: 606 final -= 3; 607 break; 608 case 6: 609 final -= 4; 610 break; 611 } 612 613 dispatch_data_t val = dispatch_data_create(dest, final, NULL, 614 DISPATCH_DATA_DESTRUCTOR_FREE); 615 dispatch_data_t concat = dispatch_data_create_concat(rv, val); 616 617 dispatch_release(val); 618 dispatch_release(rv); 619 rv = concat; 620 621 return (bool)true; 622 }); 623 624 if (!success) { 625 dispatch_release(rv); 626 return NULL; 627 } 628 629 return rv; 630} 631 632static dispatch_data_t 633_dispatch_transform_to_base32_with_table(dispatch_data_t data, const unsigned char* table) 634{ 635 size_t total = dispatch_data_get_size(data); 636 __block size_t count = 0; 637 638 if (total > SIZE_T_MAX-4 || ((total+4)/5 > SIZE_T_MAX/8)) { 639 /* We can't hold larger than size_t in a dispatch_data_t 640 * and we want to avoid an integer overflow in the next 641 * calculation. 642 */ 643 return NULL; 644 } 645 646 size_t dest_size = (total + 4) / 5 * 8; 647 uint8_t *dest = (uint8_t*)malloc(dest_size); 648 if (dest == NULL) { 649 return NULL; 650 } 651 652 __block uint8_t *ptr = dest; 653 654 /* 655 0 1 2 3 4 656 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy 657 5-bit chunks: aaaaabbb bbcccccd ddddeeee efffffgg ggghhhhh 658 */ 659 660 bool success = dispatch_data_apply(data, ^( 661 DISPATCH_UNUSED dispatch_data_t region, 662 size_t offset, const void *buffer, size_t size) { 663 const uint8_t *bytes = buffer; 664 size_t i; 665 666 for (i = 0; i < size; i++, count++) { 667 uint8_t curr = bytes[i], last = 0; 668 669 if ((count % 5) != 0) { 670 if (i == 0) { 671 const void *p; 672 dispatch_data_t subrange = _dispatch_data_subrange_map(data, 673 &p, offset - 1, 1); 674 if (subrange == NULL) { 675 return (bool)false; 676 } 677 last = *(uint8_t*)p; 678 dispatch_release(subrange); 679 } else { 680 last = bytes[i - 1]; 681 } 682 } 683 684 switch (count % 5) { 685 case 0: 686 // a 687 *ptr++ = table[(curr >> 3) & 0x1fu]; 688 break; 689 case 1: 690 // b + c 691 *ptr++ = table[((last << 2)|(curr >> 6)) & 0x1f]; 692 *ptr++ = table[(curr >> 1) & 0x1f]; 693 break; 694 case 2: 695 // d 696 *ptr++ = table[((last << 4)|(curr >> 4)) & 0x1f]; 697 break; 698 case 3: 699 // e + f 700 *ptr++ = table[((last << 1)|(curr >> 7)) & 0x1f]; 701 *ptr++ = table[(curr >> 2) & 0x1f]; 702 break; 703 case 4: 704 // g + h 705 *ptr++ = table[((last << 3)|(curr >> 5)) & 0x1f]; 706 *ptr++ = table[curr & 0x1f]; 707 break; 708 } 709 } 710 711 // Last region, insert padding bytes, if needed 712 if (offset + size == total) { 713 switch (count % 5) { 714 case 0: 715 break; 716 case 1: 717 // b[4:2] 718 *ptr++ = table[(bytes[size-1] << 2) & 0x1c]; 719 break; 720 case 2: 721 // d[4] 722 *ptr++ = table[(bytes[size-1] << 4) & 0x10]; 723 break; 724 case 3: 725 // e[4:1] 726 *ptr++ = table[(bytes[size-1] << 1) & 0x1e]; 727 break; 728 case 4: 729 // g[2:3] 730 *ptr++ = table[(bytes[size-1] << 3) & 0x18]; 731 break; 732 } 733 switch (count % 5) { 734 case 0: 735 break; 736 case 1: 737 *ptr++ = '='; // c 738 *ptr++ = '='; // d 739 case 2: 740 *ptr++ = '='; // e 741 case 3: 742 *ptr++ = '='; // f 743 *ptr++ = '='; // g 744 case 4: 745 *ptr++ = '='; // h 746 break; 747 } 748 } 749 750 return (bool)true; 751 }); 752 753 if (!success) { 754 free(dest); 755 return NULL; 756 } 757 return dispatch_data_create(dest, dest_size, NULL, 758 DISPATCH_DATA_DESTRUCTOR_FREE); 759} 760 761static dispatch_data_t 762_dispatch_transform_from_base32(dispatch_data_t data) 763{ 764 return _dispatch_transform_from_base32_with_table(data, base32_decode_table, 765 base32_decode_table_size); 766} 767 768static dispatch_data_t 769_dispatch_transform_to_base32(dispatch_data_t data) 770{ 771 return _dispatch_transform_to_base32_with_table(data, base32_encode_table); 772} 773 774static dispatch_data_t 775_dispatch_transform_from_base32hex(dispatch_data_t data) 776{ 777 return _dispatch_transform_from_base32_with_table(data, 778 base32hex_decode_table, base32hex_decode_table_size); 779} 780 781static dispatch_data_t 782_dispatch_transform_to_base32hex(dispatch_data_t data) 783{ 784 return _dispatch_transform_to_base32_with_table(data, 785 base32hex_encode_table); 786} 787 788#pragma mark - 789#pragma mark base64 790 791static dispatch_data_t 792_dispatch_transform_from_base64(dispatch_data_t data) 793{ 794 __block uint64_t x = 0, count = 0; 795 __block size_t pad = 0; 796 797 __block dispatch_data_t rv = dispatch_data_empty; 798 799 bool success = dispatch_data_apply(data, ^( 800 DISPATCH_UNUSED dispatch_data_t region, 801 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) { 802 size_t i, dest_size = (size * 3) / 4; 803 804 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t)); 805 uint8_t *ptr = dest; 806 if (dest == NULL) { 807 return (bool)false; 808 } 809 810 const uint8_t *bytes = buffer; 811 812 for (i = 0; i < size; i++) { 813 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') { 814 continue; 815 } 816 817 ssize_t index = bytes[i]; 818 if (index >= base64_decode_table_size || 819 base64_decode_table[index] == -1) { 820 free(dest); 821 return (bool)false; 822 } 823 count++; 824 825 char value = base64_decode_table[index]; 826 if (value == -2) { 827 value = 0; 828 pad++; 829 } 830 831 x <<= 6; 832 x += (uint64_t)value; 833 834 if ((count & 0x3) == 0) { 835 *ptr++ = (x >> 16) & 0xff; 836 *ptr++ = (x >> 8) & 0xff; 837 *ptr++ = x & 0xff; 838 } 839 } 840 841 size_t final = (size_t)(ptr - dest); 842 if (pad > 0) { 843 // 2 bytes of pad means only had one char in final group 844 final -= pad; 845 } 846 847 dispatch_data_t val = dispatch_data_create(dest, final, NULL, 848 DISPATCH_DATA_DESTRUCTOR_FREE); 849 dispatch_data_t concat = dispatch_data_create_concat(rv, val); 850 851 dispatch_release(val); 852 dispatch_release(rv); 853 rv = concat; 854 855 return (bool)true; 856 }); 857 858 if (!success) { 859 dispatch_release(rv); 860 return NULL; 861 } 862 863 return rv; 864} 865 866static dispatch_data_t 867_dispatch_transform_to_base64(dispatch_data_t data) 868{ 869 // RFC 4648 states that we should not linebreak 870 // http://tools.ietf.org/html/rfc4648 871 size_t total = dispatch_data_get_size(data); 872 __block size_t count = 0; 873 874 if (total > SIZE_T_MAX-2 || ((total+2)/3> SIZE_T_MAX/4)) { 875 /* We can't hold larger than size_t in a dispatch_data_t 876 * and we want to avoid an integer overflow in the next 877 * calculation. 878 */ 879 return NULL; 880 } 881 882 size_t dest_size = (total + 2) / 3 * 4; 883 uint8_t *dest = (uint8_t*)malloc(dest_size); 884 if (dest == NULL) { 885 return NULL; 886 } 887 888 __block uint8_t *ptr = dest; 889 890 /* 891 * 3 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz 892 * 4 6-bit chunks: aaaaaabb bbbbcccc ccdddddd 893 */ 894 895 bool success = dispatch_data_apply(data, ^( 896 DISPATCH_UNUSED dispatch_data_t region, 897 size_t offset, const void *buffer, size_t size) { 898 const uint8_t *bytes = buffer; 899 size_t i; 900 901 for (i = 0; i < size; i++, count++) { 902 uint8_t curr = bytes[i], last = 0; 903 904 if ((count % 3) != 0) { 905 if (i == 0) { 906 const void *p; 907 dispatch_data_t subrange = _dispatch_data_subrange_map(data, 908 &p, offset - 1, 1); 909 if (subrange == NULL) { 910 return (bool)false; 911 } 912 last = *(uint8_t*)p; 913 dispatch_release(subrange); 914 } else { 915 last = bytes[i - 1]; 916 } 917 } 918 919 switch (count % 3) { 920 case 0: 921 *ptr++ = base64_encode_table[(curr >> 2) & 0x3f]; 922 break; 923 case 1: 924 *ptr++ = base64_encode_table[((last << 4)|(curr >> 4)) & 0x3f]; 925 break; 926 case 2: 927 *ptr++ = base64_encode_table[((last << 2)|(curr >> 6)) & 0x3f]; 928 *ptr++ = base64_encode_table[(curr & 0x3f)]; 929 break; 930 } 931 } 932 933 // Last region, insert padding bytes, if needed 934 if (offset + size == total) { 935 switch (count % 3) { 936 case 0: 937 break; 938 case 1: 939 *ptr++ = base64_encode_table[(bytes[size-1] << 4) & 0x30]; 940 *ptr++ = '='; 941 *ptr++ = '='; 942 break; 943 case 2: 944 *ptr++ = base64_encode_table[(bytes[size-1] << 2) & 0x3c]; 945 *ptr++ = '='; 946 break; 947 } 948 } 949 950 return (bool)true; 951 }); 952 953 if (!success) { 954 free(dest); 955 return NULL; 956 } 957 return dispatch_data_create(dest, dest_size, NULL, 958 DISPATCH_DATA_DESTRUCTOR_FREE); 959} 960 961#pragma mark - 962#pragma mark dispatch_data_transform 963 964dispatch_data_t 965dispatch_data_create_with_transform(dispatch_data_t data, 966 dispatch_data_format_type_t input, dispatch_data_format_type_t output) 967{ 968 if (input->type == _DISPATCH_DATA_FORMAT_UTF_ANY) { 969 input = _dispatch_transform_detect_utf(data); 970 if (input == NULL) { 971 return NULL; 972 } 973 } 974 975 if ((input->type & ~output->input_mask) != 0) { 976 return NULL; 977 } 978 979 if ((output->type & ~input->output_mask) != 0) { 980 return NULL; 981 } 982 983 if (dispatch_data_get_size(data) == 0) { 984 return data; 985 } 986 987 dispatch_data_t temp1; 988 if (input->decode) { 989 temp1 = input->decode(data); 990 } else { 991 dispatch_retain(data); 992 temp1 = data; 993 } 994 995 if (!temp1) { 996 return NULL; 997 } 998 999 dispatch_data_t temp2; 1000 if (output->encode) { 1001 temp2 = output->encode(temp1); 1002 } else { 1003 dispatch_retain(temp1); 1004 temp2 = temp1; 1005 } 1006 1007 dispatch_release(temp1); 1008 return temp2; 1009} 1010 1011const struct dispatch_data_format_type_s _dispatch_data_format_type_none = { 1012 .type = _DISPATCH_DATA_FORMAT_NONE, 1013 .input_mask = ~0u, 1014 .output_mask = ~0u, 1015 .decode = NULL, 1016 .encode = NULL, 1017}; 1018 1019const struct dispatch_data_format_type_s _dispatch_data_format_type_base32 = { 1020 .type = _DISPATCH_DATA_FORMAT_BASE32, 1021 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1022 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1023 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1024 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1025 .decode = _dispatch_transform_from_base32, 1026 .encode = _dispatch_transform_to_base32, 1027}; 1028 1029const struct dispatch_data_format_type_s _dispatch_data_format_type_base32hex = 1030{ 1031 .type = _DISPATCH_DATA_FORMAT_BASE32HEX, 1032 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1033 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1034 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1035 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1036 .decode = _dispatch_transform_from_base32hex, 1037 .encode = _dispatch_transform_to_base32hex, 1038}; 1039 1040const struct dispatch_data_format_type_s _dispatch_data_format_type_base64 = { 1041 .type = _DISPATCH_DATA_FORMAT_BASE64, 1042 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1043 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1044 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 | 1045 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64), 1046 .decode = _dispatch_transform_from_base64, 1047 .encode = _dispatch_transform_to_base64, 1048}; 1049 1050const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16le = { 1051 .type = _DISPATCH_DATA_FORMAT_UTF16LE, 1052 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1053 _DISPATCH_DATA_FORMAT_UTF16LE), 1054 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1055 _DISPATCH_DATA_FORMAT_UTF16LE), 1056 .decode = _dispatch_transform_from_utf16le, 1057 .encode = _dispatch_transform_to_utf16le, 1058}; 1059 1060const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16be = { 1061 .type = _DISPATCH_DATA_FORMAT_UTF16BE, 1062 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1063 _DISPATCH_DATA_FORMAT_UTF16LE), 1064 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1065 _DISPATCH_DATA_FORMAT_UTF16LE), 1066 .decode = _dispatch_transform_from_utf16be, 1067 .encode = _dispatch_transform_to_utf16be, 1068}; 1069 1070const struct dispatch_data_format_type_s _dispatch_data_format_type_utf8 = { 1071 .type = _DISPATCH_DATA_FORMAT_UTF8, 1072 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1073 _DISPATCH_DATA_FORMAT_UTF16LE), 1074 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE | 1075 _DISPATCH_DATA_FORMAT_UTF16LE), 1076 .decode = NULL, 1077 .encode = NULL, 1078}; 1079 1080const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = { 1081 .type = _DISPATCH_DATA_FORMAT_UTF_ANY, 1082 .input_mask = 0, 1083 .output_mask = 0, 1084 .decode = NULL, 1085 .encode = NULL, 1086}; 1087