1/* 2 Unix SMB/CIFS implementation. 3 minimal iconv implementation 4 Copyright (C) Andrew Tridgell 2001 5 Copyright (C) Jelmer Vernooij 2002 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. 19*/ 20 21#include "includes.h" 22#include "../lib/util/dlinklist.h" 23#include "system/iconv.h" 24#include "system/filesys.h" 25 26 27/** 28 * @file 29 * 30 * @brief Samba wrapper/stub for iconv character set conversion. 31 * 32 * iconv is the XPG2 interface for converting between character 33 * encodings. This file provides a Samba wrapper around it, and also 34 * a simple reimplementation that is used if the system does not 35 * implement iconv. 36 * 37 * Samba only works with encodings that are supersets of ASCII: ascii 38 * characters like whitespace can be tested for directly, multibyte 39 * sequences start with a byte with the high bit set, and strings are 40 * terminated by a nul byte. 41 * 42 * Note that the only function provided by iconv is conversion between 43 * characters. It doesn't directly support operations like 44 * uppercasing or comparison. We have to convert to UTF-16LE and 45 * compare there. 46 * 47 * @sa Samba Developers Guide 48 **/ 49 50static size_t ascii_pull (void *,const char **, size_t *, char **, size_t *); 51static size_t ascii_push (void *,const char **, size_t *, char **, size_t *); 52static size_t utf8_pull (void *,const char **, size_t *, char **, size_t *); 53static size_t utf8_push (void *,const char **, size_t *, char **, size_t *); 54static size_t utf16_munged_pull(void *,const char **, size_t *, char **, size_t *); 55static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *); 56static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *); 57static size_t iconv_copy (void *,const char **, size_t *, char **, size_t *); 58static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *); 59 60static const struct charset_functions builtin_functions[] = { 61 /* windows is closest to UTF-16 */ 62 {"UCS-2LE", iconv_copy, iconv_copy}, 63 {"UTF-16LE", iconv_copy, iconv_copy}, 64 {"UCS-2BE", iconv_swab, iconv_swab}, 65 {"UTF-16BE", iconv_swab, iconv_swab}, 66 67 /* we include the UTF-8 alias to cope with differing locale settings */ 68 {"UTF8", utf8_pull, utf8_push}, 69 {"UTF-8", utf8_pull, utf8_push}, 70 71 /* this handles the munging needed for String2Key */ 72 {"UTF16_MUNGED", utf16_munged_pull, iconv_copy}, 73 74 {"ASCII", ascii_pull, ascii_push}, 75 {"UCS2-HEX", ucs2hex_pull, ucs2hex_push} 76}; 77 78static struct charset_functions *charsets = NULL; 79 80bool charset_register_backend(const void *_funcs) 81{ 82 struct charset_functions *funcs = (struct charset_functions *)memdup(_funcs,sizeof(struct charset_functions)); 83 struct charset_functions *c; 84 85 /* Check whether we already have this charset... */ 86 for (c = charsets; c != NULL; c = c->next) { 87 if(!strcasecmp(c->name, funcs->name)) { 88 DEBUG(2, ("Duplicate charset %s, not registering\n", funcs->name)); 89 return false; 90 } 91 } 92 93 funcs->next = funcs->prev = NULL; 94 DLIST_ADD(charsets, funcs); 95 return true; 96} 97 98#ifdef HAVE_NATIVE_ICONV 99/* if there was an error then reset the internal state, 100 this ensures that we don't have a shift state remaining for 101 character sets like SJIS */ 102static size_t sys_iconv(void *cd, 103 const char **inbuf, size_t *inbytesleft, 104 char **outbuf, size_t *outbytesleft) 105{ 106 size_t ret = iconv((iconv_t)cd, 107 discard_const_p(char *, inbuf), inbytesleft, 108 outbuf, outbytesleft); 109 if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL); 110 return ret; 111} 112#endif 113 114/** 115 * This is a simple portable iconv() implementaion. 116 * 117 * It only knows about a very small number of character sets - just 118 * enough that Samba works on systems that don't have iconv. 119 **/ 120_PUBLIC_ size_t smb_iconv(smb_iconv_t cd, 121 const char **inbuf, size_t *inbytesleft, 122 char **outbuf, size_t *outbytesleft) 123{ 124 char cvtbuf[2048]; 125 size_t bufsize; 126 127 /* in many cases we can go direct */ 128 if (cd->direct) { 129 return cd->direct(cd->cd_direct, 130 inbuf, inbytesleft, outbuf, outbytesleft); 131 } 132 133 134 /* otherwise we have to do it chunks at a time */ 135 while (*inbytesleft > 0) { 136 char *bufp1 = cvtbuf; 137 const char *bufp2 = cvtbuf; 138 139 bufsize = sizeof(cvtbuf); 140 141 if (cd->pull(cd->cd_pull, 142 inbuf, inbytesleft, &bufp1, &bufsize) == -1 143 && errno != E2BIG) return -1; 144 145 bufsize = sizeof(cvtbuf) - bufsize; 146 147 if (cd->push(cd->cd_push, 148 &bufp2, &bufsize, 149 outbuf, outbytesleft) == -1) return -1; 150 } 151 152 return 0; 153} 154 155static bool is_utf16(const char *name) 156{ 157 return strcasecmp(name, "UCS-2LE") == 0 || 158 strcasecmp(name, "UTF-16LE") == 0; 159} 160 161int smb_iconv_t_destructor(smb_iconv_t hwd) 162{ 163#ifdef HAVE_NATIVE_ICONV 164 if (hwd->cd_pull != NULL && hwd->cd_pull != (iconv_t)-1) 165 iconv_close(hwd->cd_pull); 166 if (hwd->cd_push != NULL && hwd->cd_push != (iconv_t)-1) 167 iconv_close(hwd->cd_push); 168 if (hwd->cd_direct != NULL && hwd->cd_direct != (iconv_t)-1) 169 iconv_close(hwd->cd_direct); 170#endif 171 172 return 0; 173} 174 175_PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 176 const char *fromcode, bool native_iconv) 177{ 178 smb_iconv_t ret; 179 const struct charset_functions *from=NULL, *to=NULL; 180 int i; 181 182 ret = (smb_iconv_t)talloc_named(mem_ctx, 183 sizeof(*ret), 184 "iconv(%s,%s)", tocode, fromcode); 185 if (!ret) { 186 errno = ENOMEM; 187 return (smb_iconv_t)-1; 188 } 189 memset(ret, 0, sizeof(*ret)); 190 talloc_set_destructor(ret, smb_iconv_t_destructor); 191 192 /* check for the simplest null conversion */ 193 if (strcmp(fromcode, tocode) == 0) { 194 ret->direct = iconv_copy; 195 return ret; 196 } 197 198 for (i=0;i<ARRAY_SIZE(builtin_functions);i++) { 199 if (strcasecmp(fromcode, builtin_functions[i].name) == 0) { 200 from = &builtin_functions[i]; 201 } 202 if (strcasecmp(tocode, builtin_functions[i].name) == 0) { 203 to = &builtin_functions[i]; 204 } 205 } 206 207 if (from == NULL) { 208 for (from=charsets; from; from=from->next) { 209 if (strcasecmp(from->name, fromcode) == 0) break; 210 } 211 } 212 213 if (to == NULL) { 214 for (to=charsets; to; to=to->next) { 215 if (strcasecmp(to->name, tocode) == 0) break; 216 } 217 } 218 219#ifdef HAVE_NATIVE_ICONV 220 if ((!from || !to) && !native_iconv) { 221 goto failed; 222 } 223 if (!from) { 224 ret->pull = sys_iconv; 225 ret->cd_pull = iconv_open("UTF-16LE", fromcode); 226 if (ret->cd_pull == (iconv_t)-1) 227 ret->cd_pull = iconv_open("UCS-2LE", fromcode); 228 if (ret->cd_pull == (iconv_t)-1) goto failed; 229 } 230 231 if (!to) { 232 ret->push = sys_iconv; 233 ret->cd_push = iconv_open(tocode, "UTF-16LE"); 234 if (ret->cd_push == (iconv_t)-1) 235 ret->cd_push = iconv_open(tocode, "UCS-2LE"); 236 if (ret->cd_push == (iconv_t)-1) goto failed; 237 } 238#else 239 if (!from || !to) { 240 goto failed; 241 } 242#endif 243 244 /* check for conversion to/from ucs2 */ 245 if (is_utf16(fromcode) && to) { 246 ret->direct = to->push; 247 return ret; 248 } 249 if (is_utf16(tocode) && from) { 250 ret->direct = from->pull; 251 return ret; 252 } 253 254#ifdef HAVE_NATIVE_ICONV 255 if (is_utf16(fromcode)) { 256 ret->direct = sys_iconv; 257 ret->cd_direct = ret->cd_push; 258 ret->cd_push = NULL; 259 return ret; 260 } 261 if (is_utf16(tocode)) { 262 ret->direct = sys_iconv; 263 /* could be set just above - so we need to close iconv */ 264 if (ret->cd_direct != NULL && ret->cd_direct != (iconv_t)-1) 265 iconv_close(ret->cd_direct); 266 ret->cd_direct = ret->cd_pull; 267 ret->cd_pull = NULL; 268 return ret; 269 } 270#endif 271 272 /* the general case has to go via a buffer */ 273 if (!ret->pull) ret->pull = from->pull; 274 if (!ret->push) ret->push = to->push; 275 return ret; 276 277failed: 278 talloc_free(ret); 279 errno = EINVAL; 280 return (smb_iconv_t)-1; 281} 282 283/* 284 simple iconv_open() wrapper 285 */ 286_PUBLIC_ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode) 287{ 288 return smb_iconv_open_ex(talloc_autofree_context(), tocode, fromcode, true); 289} 290 291/* 292 simple iconv_close() wrapper 293*/ 294_PUBLIC_ int smb_iconv_close(smb_iconv_t cd) 295{ 296 talloc_free(cd); 297 return 0; 298} 299 300 301/********************************************************************** 302 the following functions implement the builtin character sets in Samba 303 and also the "test" character sets that are designed to test 304 multi-byte character set support for english users 305***********************************************************************/ 306static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft, 307 char **outbuf, size_t *outbytesleft) 308{ 309 while (*inbytesleft >= 1 && *outbytesleft >= 2) { 310 (*outbuf)[0] = (*inbuf)[0]; 311 (*outbuf)[1] = 0; 312 (*inbytesleft) -= 1; 313 (*outbytesleft) -= 2; 314 (*inbuf) += 1; 315 (*outbuf) += 2; 316 } 317 318 if (*inbytesleft > 0) { 319 errno = E2BIG; 320 return -1; 321 } 322 323 return 0; 324} 325 326static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft, 327 char **outbuf, size_t *outbytesleft) 328{ 329 int ir_count=0; 330 331 while (*inbytesleft >= 2 && *outbytesleft >= 1) { 332 (*outbuf)[0] = (*inbuf)[0] & 0x7F; 333 if ((*inbuf)[1]) ir_count++; 334 (*inbytesleft) -= 2; 335 (*outbytesleft) -= 1; 336 (*inbuf) += 2; 337 (*outbuf) += 1; 338 } 339 340 if (*inbytesleft == 1) { 341 errno = EINVAL; 342 return -1; 343 } 344 345 if (*inbytesleft > 1) { 346 errno = E2BIG; 347 return -1; 348 } 349 350 return ir_count; 351} 352 353 354static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft, 355 char **outbuf, size_t *outbytesleft) 356{ 357 while (*inbytesleft >= 1 && *outbytesleft >= 2) { 358 uint_t v; 359 360 if ((*inbuf)[0] != '@') { 361 /* seven bit ascii case */ 362 (*outbuf)[0] = (*inbuf)[0]; 363 (*outbuf)[1] = 0; 364 (*inbytesleft) -= 1; 365 (*outbytesleft) -= 2; 366 (*inbuf) += 1; 367 (*outbuf) += 2; 368 continue; 369 } 370 /* it's a hex character */ 371 if (*inbytesleft < 5) { 372 errno = EINVAL; 373 return -1; 374 } 375 376 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) { 377 errno = EILSEQ; 378 return -1; 379 } 380 381 (*outbuf)[0] = v&0xff; 382 (*outbuf)[1] = v>>8; 383 (*inbytesleft) -= 5; 384 (*outbytesleft) -= 2; 385 (*inbuf) += 5; 386 (*outbuf) += 2; 387 } 388 389 if (*inbytesleft > 0) { 390 errno = E2BIG; 391 return -1; 392 } 393 394 return 0; 395} 396 397static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft, 398 char **outbuf, size_t *outbytesleft) 399{ 400 while (*inbytesleft >= 2 && *outbytesleft >= 1) { 401 char buf[6]; 402 403 if ((*inbuf)[1] == 0 && 404 ((*inbuf)[0] & 0x80) == 0 && 405 (*inbuf)[0] != '@') { 406 (*outbuf)[0] = (*inbuf)[0]; 407 (*inbytesleft) -= 2; 408 (*outbytesleft) -= 1; 409 (*inbuf) += 2; 410 (*outbuf) += 1; 411 continue; 412 } 413 if (*outbytesleft < 5) { 414 errno = E2BIG; 415 return -1; 416 } 417 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0)); 418 memcpy(*outbuf, buf, 5); 419 (*inbytesleft) -= 2; 420 (*outbytesleft) -= 5; 421 (*inbuf) += 2; 422 (*outbuf) += 5; 423 } 424 425 if (*inbytesleft == 1) { 426 errno = EINVAL; 427 return -1; 428 } 429 430 if (*inbytesleft > 1) { 431 errno = E2BIG; 432 return -1; 433 } 434 435 return 0; 436} 437 438static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft, 439 char **outbuf, size_t *outbytesleft) 440{ 441 int n; 442 443 n = MIN(*inbytesleft, *outbytesleft); 444 445 swab(*inbuf, *outbuf, (n&~1)); 446 if (n&1) { 447 (*outbuf)[n-1] = 0; 448 } 449 450 (*inbytesleft) -= n; 451 (*outbytesleft) -= n; 452 (*inbuf) += n; 453 (*outbuf) += n; 454 455 if (*inbytesleft > 0) { 456 errno = E2BIG; 457 return -1; 458 } 459 460 return 0; 461} 462 463 464static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft, 465 char **outbuf, size_t *outbytesleft) 466{ 467 int n; 468 469 n = MIN(*inbytesleft, *outbytesleft); 470 471 memmove(*outbuf, *inbuf, n); 472 473 (*inbytesleft) -= n; 474 (*outbytesleft) -= n; 475 (*inbuf) += n; 476 (*outbuf) += n; 477 478 if (*inbytesleft > 0) { 479 errno = E2BIG; 480 return -1; 481 } 482 483 return 0; 484} 485 486/* 487 this takes a UTF8 sequence and produces a UTF16 sequence 488 */ 489static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft, 490 char **outbuf, size_t *outbytesleft) 491{ 492 size_t in_left=*inbytesleft, out_left=*outbytesleft; 493 const uint8_t *c = (const uint8_t *)*inbuf; 494 uint8_t *uc = (uint8_t *)*outbuf; 495 496 while (in_left >= 1 && out_left >= 2) { 497 if ((c[0] & 0x80) == 0) { 498 uc[0] = c[0]; 499 uc[1] = 0; 500 c += 1; 501 in_left -= 1; 502 out_left -= 2; 503 uc += 2; 504 continue; 505 } 506 507 if ((c[0] & 0xe0) == 0xc0) { 508 if (in_left < 2 || 509 (c[1] & 0xc0) != 0x80) { 510 errno = EILSEQ; 511 goto error; 512 } 513 uc[1] = (c[0]>>2) & 0x7; 514 uc[0] = (c[0]<<6) | (c[1]&0x3f); 515 c += 2; 516 in_left -= 2; 517 out_left -= 2; 518 uc += 2; 519 continue; 520 } 521 522 if ((c[0] & 0xf0) == 0xe0) { 523 if (in_left < 3 || 524 (c[1] & 0xc0) != 0x80 || 525 (c[2] & 0xc0) != 0x80) { 526 errno = EILSEQ; 527 goto error; 528 } 529 uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF); 530 uc[0] = (c[1]<<6) | (c[2]&0x3f); 531 c += 3; 532 in_left -= 3; 533 out_left -= 2; 534 uc += 2; 535 continue; 536 } 537 538 if ((c[0] & 0xf8) == 0xf0) { 539 unsigned int codepoint; 540 if (in_left < 4 || 541 (c[1] & 0xc0) != 0x80 || 542 (c[2] & 0xc0) != 0x80 || 543 (c[3] & 0xc0) != 0x80) { 544 errno = EILSEQ; 545 goto error; 546 } 547 codepoint = 548 (c[3]&0x3f) | 549 ((c[2]&0x3f)<<6) | 550 ((c[1]&0x3f)<<12) | 551 ((c[0]&0x7)<<18); 552 if (codepoint < 0x10000) { 553 /* accept UTF-8 characters that are not 554 minimally packed, but pack the result */ 555 uc[0] = (codepoint & 0xFF); 556 uc[1] = (codepoint >> 8); 557 c += 4; 558 in_left -= 4; 559 out_left -= 2; 560 uc += 2; 561 continue; 562 } 563 564 codepoint -= 0x10000; 565 566 if (out_left < 4) { 567 errno = E2BIG; 568 goto error; 569 } 570 571 uc[0] = (codepoint>>10) & 0xFF; 572 uc[1] = (codepoint>>18) | 0xd8; 573 uc[2] = codepoint & 0xFF; 574 uc[3] = ((codepoint>>8) & 0x3) | 0xdc; 575 c += 4; 576 in_left -= 4; 577 out_left -= 4; 578 uc += 4; 579 continue; 580 } 581 582 /* we don't handle 5 byte sequences */ 583 errno = EINVAL; 584 goto error; 585 } 586 587 if (in_left > 0) { 588 errno = E2BIG; 589 goto error; 590 } 591 592 *inbytesleft = in_left; 593 *outbytesleft = out_left; 594 *inbuf = (const char *)c; 595 *outbuf = (char *)uc; 596 return 0; 597 598error: 599 *inbytesleft = in_left; 600 *outbytesleft = out_left; 601 *inbuf = (const char *)c; 602 *outbuf = (char *)uc; 603 return -1; 604} 605 606 607/* 608 this takes a UTF16 sequence and produces a UTF8 sequence 609 */ 610static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft, 611 char **outbuf, size_t *outbytesleft) 612{ 613 size_t in_left=*inbytesleft, out_left=*outbytesleft; 614 uint8_t *c = (uint8_t *)*outbuf; 615 const uint8_t *uc = (const uint8_t *)*inbuf; 616 617 while (in_left >= 2 && out_left >= 1) { 618 unsigned int codepoint; 619 620 if (uc[1] == 0 && !(uc[0] & 0x80)) { 621 /* simplest case */ 622 c[0] = uc[0]; 623 in_left -= 2; 624 out_left -= 1; 625 uc += 2; 626 c += 1; 627 continue; 628 } 629 630 if ((uc[1]&0xf8) == 0) { 631 /* next simplest case */ 632 if (out_left < 2) { 633 errno = E2BIG; 634 goto error; 635 } 636 c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2); 637 c[1] = 0x80 | (uc[0] & 0x3f); 638 in_left -= 2; 639 out_left -= 2; 640 uc += 2; 641 c += 2; 642 continue; 643 } 644 645 if ((uc[1] & 0xfc) == 0xdc) { 646 /* its the second part of a 4 byte sequence. Illegal */ 647 if (in_left < 4) { 648 errno = EINVAL; 649 } else { 650 errno = EILSEQ; 651 } 652 goto error; 653 } 654 655 if ((uc[1] & 0xfc) != 0xd8) { 656 codepoint = uc[0] | (uc[1]<<8); 657 if (out_left < 3) { 658 errno = E2BIG; 659 goto error; 660 } 661 c[0] = 0xe0 | (codepoint >> 12); 662 c[1] = 0x80 | ((codepoint >> 6) & 0x3f); 663 c[2] = 0x80 | (codepoint & 0x3f); 664 665 in_left -= 2; 666 out_left -= 3; 667 uc += 2; 668 c += 3; 669 continue; 670 } 671 672 /* its the first part of a 4 byte sequence */ 673 if (in_left < 4) { 674 errno = EINVAL; 675 goto error; 676 } 677 if ((uc[3] & 0xfc) != 0xdc) { 678 errno = EILSEQ; 679 goto error; 680 } 681 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) | 682 (uc[0]<<10) | ((uc[1] & 0x3)<<18)); 683 684 if (out_left < 4) { 685 errno = E2BIG; 686 goto error; 687 } 688 c[0] = 0xf0 | (codepoint >> 18); 689 c[1] = 0x80 | ((codepoint >> 12) & 0x3f); 690 c[2] = 0x80 | ((codepoint >> 6) & 0x3f); 691 c[3] = 0x80 | (codepoint & 0x3f); 692 693 in_left -= 4; 694 out_left -= 4; 695 uc += 4; 696 c += 4; 697 } 698 699 if (in_left == 1) { 700 errno = EINVAL; 701 goto error; 702 } 703 704 if (in_left > 1) { 705 errno = E2BIG; 706 goto error; 707 } 708 709 *inbytesleft = in_left; 710 *outbytesleft = out_left; 711 *inbuf = (const char *)uc; 712 *outbuf = (char *)c; 713 714 return 0; 715 716error: 717 *inbytesleft = in_left; 718 *outbytesleft = out_left; 719 *inbuf = (const char *)uc; 720 *outbuf = (char *)c; 721 return -1; 722} 723 724 725/* 726 this takes a UTF16 munged sequence, modifies it according to the 727 string2key rules, and produces a UTF16 sequence 728 729The rules are: 730 731 1) any 0x0000 characters are mapped to 0x0001 732 733 2) convert any instance of 0xD800 - 0xDBFF (high surrogate) 734 without an immediately following 0xDC00 - 0x0xDFFF (low surrogate) to 735 U+FFFD (OBJECT REPLACEMENT CHARACTER). 736 737 3) the same for any low surrogate that was not preceded by a high surrogate. 738 739 */ 740static size_t utf16_munged_pull(void *cd, const char **inbuf, size_t *inbytesleft, 741 char **outbuf, size_t *outbytesleft) 742{ 743 size_t in_left=*inbytesleft, out_left=*outbytesleft; 744 uint8_t *c = (uint8_t *)*outbuf; 745 const uint8_t *uc = (const uint8_t *)*inbuf; 746 747 while (in_left >= 2 && out_left >= 2) { 748 unsigned int codepoint = uc[0] | (uc[1]<<8); 749 750 if (codepoint == 0) { 751 codepoint = 1; 752 } 753 754 if ((codepoint & 0xfc00) == 0xd800) { 755 /* a high surrogate */ 756 unsigned int codepoint2; 757 if (in_left < 4) { 758 codepoint = 0xfffd; 759 goto codepoint16; 760 } 761 codepoint2 = uc[2] | (uc[3]<<8); 762 if ((codepoint2 & 0xfc00) != 0xdc00) { 763 /* high surrogate not followed by low 764 surrogate: convert to 0xfffd */ 765 codepoint = 0xfffd; 766 goto codepoint16; 767 } 768 if (out_left < 4) { 769 errno = E2BIG; 770 goto error; 771 } 772 memcpy(c, uc, 4); 773 in_left -= 4; 774 out_left -= 4; 775 uc += 4; 776 c += 4; 777 continue; 778 } 779 780 if ((codepoint & 0xfc00) == 0xdc00) { 781 /* low surrogate not preceded by high 782 surrogate: convert to 0xfffd */ 783 codepoint = 0xfffd; 784 } 785 786 codepoint16: 787 c[0] = codepoint & 0xFF; 788 c[1] = (codepoint>>8) & 0xFF; 789 790 in_left -= 2; 791 out_left -= 2; 792 uc += 2; 793 c += 2; 794 continue; 795 } 796 797 if (in_left == 1) { 798 errno = EINVAL; 799 goto error; 800 } 801 802 if (in_left > 1) { 803 errno = E2BIG; 804 goto error; 805 } 806 807 *inbytesleft = in_left; 808 *outbytesleft = out_left; 809 *inbuf = (const char *)uc; 810 *outbuf = (char *)c; 811 812 return 0; 813 814error: 815 *inbytesleft = in_left; 816 *outbytesleft = out_left; 817 *inbuf = (const char *)uc; 818 *outbuf = (char *)c; 819 return -1; 820} 821 822 823 824