1#ifndef lint 2static char *rcsid = "$Id: converter.c,v 1.1 2003/06/04 00:25:51 marka Exp $"; 3#endif 4 5/* 6 * Copyright (c) 2000,2002 Japan Network Information Center. 7 * All rights reserved. 8 * 9 * By using this file, you agree to the terms and conditions set forth bellow. 10 * 11 * LICENSE TERMS AND CONDITIONS 12 * 13 * The following License Terms and Conditions apply, unless a different 14 * license is obtained from Japan Network Information Center ("JPNIC"), 15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 16 * Chiyoda-ku, Tokyo 101-0047, Japan. 17 * 18 * 1. Use, Modification and Redistribution (including distribution of any 19 * modified or derived work) in source and/or binary forms is permitted 20 * under this License Terms and Conditions. 21 * 22 * 2. Redistribution of source code must retain the copyright notices as they 23 * appear in each source code file, this License Terms and Conditions. 24 * 25 * 3. Redistribution in binary form must reproduce the Copyright Notice, 26 * this License Terms and Conditions, in the documentation and/or other 27 * materials provided with the distribution. For the purposes of binary 28 * distribution the "Copyright Notice" refers to the following language: 29 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 30 * 31 * 4. The name of JPNIC may not be used to endorse or promote products 32 * derived from this Software without specific prior written approval of 33 * JPNIC. 34 * 35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 38 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 42 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 43 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 44 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 45 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 46 */ 47 48#include <config.h> 49 50#include <stddef.h> 51#include <stdlib.h> 52#include <stdio.h> 53#include <string.h> 54#include <ctype.h> 55#include <errno.h> 56#ifndef WITHOUT_ICONV 57#include <iconv.h> 58#endif 59 60#include <idn/result.h> 61#include <idn/assert.h> 62#include <idn/logmacro.h> 63#include <idn/converter.h> 64#include <idn/aliaslist.h> 65#include <idn/strhash.h> 66#include <idn/debug.h> 67#include <idn/ucs4.h> 68#include <idn/punycode.h> 69#include <idn/race.h> 70#include <idn/util.h> 71 72#ifndef IDN_UTF8_ENCODING_NAME 73#define IDN_UTF8_ENCODING_NAME "UTF-8" /* by IANA */ 74#endif 75#ifndef IDN_RACE_ENCODING_NAME 76#define IDN_RACE_ENCODING_NAME "RACE" 77#endif 78#ifndef IDN_AMCACEZ_ENCODING_NAME 79#define IDN_AMCACEZ_ENCODING_NAME "AMC-ACE-Z" 80#endif 81#ifndef IDN_PUNYCODE_ENCODING_NAME 82#define IDN_PUNYCODE_ENCODING_NAME "Punycode" 83#endif 84 85#define MAX_RECURSE 20 86 87#ifdef WIN32 88 89#define IDNKEY_IDNKIT "Software\\JPNIC\\IDN" 90#define IDNVAL_ALIASFILE "AliasFile" 91 92#else /* WIN32 */ 93 94#ifndef IDN_RESCONF_DIR 95#define IDN_RESCONF_DIR "/etc" 96#endif 97#define IDN_ALIAS_FILE IDN_RESCONF_DIR "/idnalias.conf" 98 99#endif /* WIN32 */ 100 101typedef struct { 102 idn_converter_openproc_t openfromucs4; 103 idn_converter_openproc_t opentoucs4; 104 idn_converter_convfromucs4proc_t convfromucs4; 105 idn_converter_convtoucs4proc_t convtoucs4; 106 idn_converter_closeproc_t close; 107 int encoding_type; 108} converter_ops_t; 109 110struct idn_converter { 111 char *local_encoding_name; 112 converter_ops_t *ops; 113 int flags; 114 int opened_convfromucs4; 115 int opened_convtoucs4; 116 int reference_count; 117 void *private_data; 118}; 119 120static idn__strhash_t encoding_name_hash; 121static idn__aliaslist_t encoding_alias_list; 122 123static idn_result_t register_standard_encoding(void); 124static idn_result_t roundtrip_check(idn_converter_t ctx, 125 const unsigned long *from, 126 const char *to); 127 128static idn_result_t 129 converter_none_open(idn_converter_t ctx, void **privdata); 130static idn_result_t 131 converter_none_close(idn_converter_t ctx, void *privdata); 132static idn_result_t 133 converter_none_convfromucs4(idn_converter_t ctx, 134 void *privdata, 135 const unsigned long *from, 136 char *to, size_t tolen); 137static idn_result_t 138 converter_none_convtoucs4(idn_converter_t ctx, 139 void *privdata, const char *from, 140 unsigned long *to, size_t tolen); 141 142#ifndef WITHOUT_ICONV 143static idn_result_t 144 converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata); 145static idn_result_t 146 converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata); 147static idn_result_t 148 converter_iconv_close(idn_converter_t ctx, void *privdata); 149static idn_result_t 150 converter_iconv_convfromucs4(idn_converter_t ctx, 151 void *privdata, 152 const unsigned long *from, 153 char *to, size_t tolen); 154static idn_result_t 155 converter_iconv_convtoucs4(idn_converter_t ctx, 156 void *privdata, 157 const char *from, 158 unsigned long *to, size_t tolen); 159 160static idn_result_t 161iconv_initialize_privdata(void **privdata); 162static void 163iconv_finalize_privdata(void *privdata); 164 165static char * get_system_aliasfile(void); 166static int file_exist(const char *filename); 167 168#endif /* !WITHOUT_ICONV */ 169 170#ifdef DEBUG 171static idn_result_t 172 converter_uescape_convfromucs4(idn_converter_t ctx, 173 void *privdata, 174 const unsigned long *from, 175 char *to, size_t tolen); 176static idn_result_t 177 converter_uescape_convtoucs4(idn_converter_t ctx, 178 void *privdata, 179 const char *from, 180 unsigned long *to, 181 size_t tolen); 182#endif /* DEBUG */ 183 184static converter_ops_t none_converter_ops = { 185 converter_none_open, 186 converter_none_open, 187 converter_none_convfromucs4, 188 converter_none_convtoucs4, 189 converter_none_close, 190 IDN_NONACE, 191}; 192 193#ifndef WITHOUT_ICONV 194static converter_ops_t iconv_converter_ops = { 195 converter_iconv_openfromucs4, 196 converter_iconv_opentoucs4, 197 converter_iconv_convfromucs4, 198 converter_iconv_convtoucs4, 199 converter_iconv_close, 200 IDN_NONACE, 201}; 202#endif 203 204/* 205 * Initialize. 206 */ 207 208idn_result_t 209idn_converter_initialize(void) { 210 idn_result_t r; 211 idn__strhash_t hash; 212 idn__aliaslist_t list; 213#ifndef WITHOUT_ICONV 214 const char *fname; 215#endif 216 217 TRACE(("idn_converter_initialize()\n")); 218 219 if (encoding_name_hash == NULL) { 220 if ((r = idn__strhash_create(&hash)) != idn_success) 221 goto ret; 222 encoding_name_hash = hash; 223 r = register_standard_encoding(); 224 } 225 if (encoding_alias_list == NULL) { 226 if ((r = idn__aliaslist_create(&list)) != idn_success) 227 goto ret; 228 encoding_alias_list = list; 229#ifndef WITHOUT_ICONV 230 fname = get_system_aliasfile(); 231 if (fname != NULL && file_exist(fname)) 232 idn_converter_aliasfile(fname); 233#endif 234 } 235 236 r = idn_success; 237ret: 238 TRACE(("idn_converter_initialize(): %s\n", idn_result_tostring(r))); 239 return (r); 240} 241 242#ifndef WITHOUT_ICONV 243static char * 244get_system_aliasfile() { 245#ifdef WIN32 246 static char alias_path[500]; /* a good longer than MAX_PATH */ 247 248 if (idn__util_getregistrystring(idn__util_hkey_localmachine, 249 IDNVAL_ALIASFILE, 250 alias_path, sizeof(alias_path))) { 251 return (alias_path); 252 } else { 253 return (NULL); 254 } 255#else 256 return (IDN_ALIAS_FILE); 257#endif 258} 259 260static int 261file_exist(const char *filename) { 262 FILE *fp; 263 264 if ((fp = fopen(filename, "r")) == NULL) 265 return (0); 266 fclose(fp); 267 return (1); 268} 269#endif 270 271idn_result_t 272idn_converter_create(const char *name, idn_converter_t *ctxp, int flags) { 273 const char *realname; 274 idn_converter_t ctx; 275 idn_result_t r; 276 void *v; 277 278 assert(name != NULL && ctxp != NULL); 279 280 TRACE(("idn_converter_create(%s)\n", name)); 281 282 realname = idn_converter_getrealname(name); 283#ifdef DEBUG 284 if (strcmp(name, realname) != 0) { 285 TRACE(("idn_converter_create: realname=%s\n", realname)); 286 } 287#endif 288 289 *ctxp = NULL; 290 291 /* Allocate memory for a converter context and the name. */ 292 ctx = malloc(sizeof(struct idn_converter) + strlen(realname) + 1); 293 if (ctx == NULL) { 294 r = idn_nomemory; 295 goto ret; 296 } 297 298 ctx->local_encoding_name = (char *)(ctx + 1); 299 (void)strcpy(ctx->local_encoding_name, realname); 300 ctx->flags = flags; 301 ctx->reference_count = 1; 302 ctx->opened_convfromucs4 = 0; 303 ctx->opened_convtoucs4 = 0; 304 ctx->private_data = NULL; 305 306 assert(encoding_name_hash != NULL); 307 308 if (strcmp(realname, IDN_UTF8_ENCODING_NAME) == 0) { 309 /* No conversion needed */ 310 ctx->ops = &none_converter_ops; 311 } else if ((r = idn__strhash_get(encoding_name_hash, realname, &v)) 312 == idn_success) { 313 /* Special converter found */ 314 ctx->ops = (converter_ops_t *)v; 315 } else { 316 /* General case */ 317#ifdef WITHOUT_ICONV 318 free(ctx); 319 *ctxp = NULL; 320 r = idn_invalid_name; 321 goto ret; 322#else 323 ctx->ops = &iconv_converter_ops; 324#endif 325 } 326 327 if ((flags & IDN_CONVERTER_DELAYEDOPEN) == 0) { 328 r = (ctx->ops->openfromucs4)(ctx, &(ctx->private_data)); 329 if (r != idn_success) { 330 WARNING(("idn_converter_create(): open failed " 331 "(ucs4->local)\n")); 332 free(ctx); 333 *ctxp = NULL; 334 goto ret; 335 } 336 ctx->opened_convfromucs4 = 1; 337 338 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data)); 339 if (r != idn_success) { 340 WARNING(("idn_converter_create(): open failed " 341 "(local->ucs4)\n")); 342 free(ctx); 343 *ctxp = NULL; 344 goto ret; 345 } 346 ctx->opened_convtoucs4 = 1; 347 } 348 349 *ctxp = ctx; 350 r = idn_success; 351ret: 352 TRACE(("idn_converter_create(): %s\n", idn_result_tostring(r))); 353 return (r); 354} 355 356void 357idn_converter_destroy(idn_converter_t ctx) { 358 assert(ctx != NULL); 359 360 TRACE(("idn_converter_destroy(ctx=%s)\n", ctx->local_encoding_name)); 361 362 ctx->reference_count--; 363 if (ctx->reference_count <= 0) { 364 TRACE(("idn_converter_destroy(): the object is destroyed\n")); 365 (void)(*ctx->ops->close)(ctx, ctx->private_data); 366 free(ctx); 367 } else { 368 TRACE(("idn_converter_destroy(): " 369 "update reference count (%d->%d)\n", 370 ctx->reference_count + 1, ctx->reference_count)); 371 } 372} 373 374void 375idn_converter_incrref(idn_converter_t ctx) { 376 assert(ctx != NULL); 377 378 TRACE(("idn_converter_incrref(ctx=%s)\n", ctx->local_encoding_name)); 379 TRACE(("idn_converter_incrref: update reference count (%d->%d)\n", 380 ctx->reference_count, ctx->reference_count + 1)); 381 382 ctx->reference_count++; 383} 384 385char * 386idn_converter_localencoding(idn_converter_t ctx) { 387 assert(ctx != NULL); 388 TRACE(("idn_converter_localencoding(ctx=%s)\n", 389 ctx->local_encoding_name)); 390 return (ctx->local_encoding_name); 391} 392 393int 394idn_converter_encodingtype(idn_converter_t ctx) { 395 int encoding_type; 396 397 assert(ctx != NULL); 398 TRACE(("idn_converter_encodingtype(ctx=%s)\n", 399 ctx->local_encoding_name)); 400 401 encoding_type = ctx->ops->encoding_type; 402 TRACE(("idn_converter_encodingtype(): %d\n", encoding_type)); 403 return (encoding_type); 404} 405 406int 407idn_converter_isasciicompatible(idn_converter_t ctx) { 408 int iscompat; 409 410 assert(ctx != NULL); 411 TRACE(("idn_converter_isasciicompatible(ctx=%s)\n", 412 ctx->local_encoding_name)); 413 414 iscompat = (ctx->ops->encoding_type != IDN_NONACE); 415 TRACE(("idn_converter_isasciicompatible(): %d\n", iscompat)); 416 return (iscompat); 417} 418 419idn_result_t 420idn_converter_convfromucs4(idn_converter_t ctx, const unsigned long *from, 421 char *to, size_t tolen) { 422 idn_result_t r; 423 424 assert(ctx != NULL && from != NULL && to != NULL); 425 426 TRACE(("idn_converter_convfromucs4(ctx=%s, from=\"%s\", tolen=%d)\n", 427 ctx->local_encoding_name, idn__debug_ucs4xstring(from, 50), 428 (int)tolen)); 429 430 if (!ctx->opened_convfromucs4) { 431 r = (*ctx->ops->openfromucs4)(ctx, &(ctx->private_data)); 432 if (r != idn_success) 433 goto ret; 434 ctx->opened_convfromucs4 = 1; 435 } 436 437 r = (*ctx->ops->convfromucs4)(ctx, ctx->private_data, from, to, tolen); 438 if (r != idn_success) 439 goto ret; 440 if ((ctx->flags & IDN_CONVERTER_RTCHECK) != 0) { 441 r = roundtrip_check(ctx, from, to); 442 if (r != idn_success) 443 goto ret; 444 } 445 446 r = idn_success; 447ret: 448 if (r == idn_success) { 449 TRACE(("idn_converter_convfromucs4(): success (to=\"%s\")\n", 450 idn__debug_xstring(to, 50))); 451 } else { 452 TRACE(("idn_converter_convfromucs4(): %s\n", 453 idn_result_tostring(r))); 454 } 455 return (r); 456} 457 458idn_result_t 459idn_converter_convtoucs4(idn_converter_t ctx, const char *from, 460 unsigned long *to, size_t tolen) { 461 idn_result_t r; 462 463 assert(ctx != NULL && from != NULL && to != NULL); 464 465 TRACE(("idn_converter_convtoucs4(ctx=%s, from=\"%s\", tolen=%d)\n", 466 ctx->local_encoding_name, idn__debug_xstring(from, 50), 467 (int)tolen)); 468 469 if (!ctx->opened_convtoucs4) { 470 r = (*ctx->ops->opentoucs4)(ctx, &(ctx->private_data)); 471 if (r != idn_success) 472 goto ret; 473 ctx->opened_convtoucs4 = 1; 474 } 475 476 r = (*ctx->ops->convtoucs4)(ctx, ctx->private_data, from, to, tolen); 477ret: 478 if (r == idn_success) { 479 TRACE(("idn_converter_convtoucs4(): success (to=\"%s\")\n", 480 idn__debug_ucs4xstring(to, 50))); 481 } else { 482 TRACE(("idn_converter_convtoucs4(): %s\n", 483 idn_result_tostring(r))); 484 } 485 return (r); 486} 487 488/* 489 * Encoding registration. 490 */ 491 492idn_result_t 493idn_converter_register(const char *name, 494 idn_converter_openproc_t openfromucs4, 495 idn_converter_openproc_t opentoucs4, 496 idn_converter_convfromucs4proc_t convfromucs4, 497 idn_converter_convtoucs4proc_t convtoucs4, 498 idn_converter_closeproc_t close, 499 int encoding_type) { 500 converter_ops_t *ops; 501 idn_result_t r; 502 503 assert(name != NULL && convfromucs4 != NULL && convtoucs4 != NULL); 504 505 TRACE(("idn_converter_register(name=%s)\n", name)); 506 507 if ((ops = malloc(sizeof(*ops))) == NULL) { 508 r = idn_nomemory; 509 goto ret; 510 } 511 512 if (openfromucs4 == NULL) 513 openfromucs4 = converter_none_open; 514 if (opentoucs4 == NULL) 515 opentoucs4 = converter_none_open; 516 if (close == NULL) 517 close = converter_none_close; 518 519 ops->openfromucs4 = openfromucs4; 520 ops->opentoucs4 = opentoucs4; 521 ops->convfromucs4 = convfromucs4; 522 ops->convtoucs4 = convtoucs4; 523 ops->close = close; 524 ops->encoding_type = encoding_type; 525 526 r = idn__strhash_put(encoding_name_hash, name, ops); 527 if (r != idn_success) { 528 free(ops); 529 goto ret; 530 } 531 532 r = idn_success; 533ret: 534 TRACE(("idn_converter_register(): %s\n", idn_result_tostring(r))); 535 return (r); 536} 537 538static idn_result_t 539register_standard_encoding(void) { 540 idn_result_t r; 541 542 r = idn_converter_register(IDN_PUNYCODE_ENCODING_NAME, 543 NULL, 544 NULL, 545 idn__punycode_encode, 546 idn__punycode_decode, 547 converter_none_close, 548 IDN_ACE_STRICTCASE); 549 if (r != idn_success) 550 return (r); 551 552#ifdef IDN_EXTRA_ACE 553 r = idn_converter_register(IDN_AMCACEZ_ENCODING_NAME, 554 NULL, 555 NULL, 556 idn__punycode_encode, 557 idn__punycode_decode, 558 converter_none_close, 559 IDN_ACE_STRICTCASE); 560 if (r != idn_success) 561 return (r); 562 563 r = idn_converter_register(IDN_RACE_ENCODING_NAME, 564 NULL, 565 NULL, 566 idn__race_encode, 567 idn__race_decode, 568 converter_none_close, 569 IDN_ACE_LOOSECASE); 570 if (r != idn_success) 571 return (r); 572#endif /* IDN_EXTRA_ACE */ 573 574#ifdef DEBUG 575 /* This is convenient for debug. Not useful for other purposes. */ 576 r = idn_converter_register("U-escape", 577 NULL, 578 NULL, 579 converter_uescape_convfromucs4, 580 converter_uescape_convtoucs4, 581 NULL, 582 IDN_NONACE); 583 if (r != idn_success) 584 return (r); 585#endif /* DEBUG */ 586 587 return (r); 588} 589 590/* 591 * Encoding alias support. 592 */ 593idn_result_t 594idn_converter_addalias(const char *alias_name, const char *real_name, 595 int first_item) { 596 idn_result_t r; 597 598 assert(alias_name != NULL && real_name != NULL); 599 600 TRACE(("idn_converter_addalias(alias_name=%s,real_name=%s)\n", 601 alias_name, real_name)); 602 603 if (strlen(alias_name) == 0 || strlen(real_name) == 0) { 604 return idn_invalid_syntax; 605 } 606 607 if (strcmp(alias_name, real_name) == 0) { 608 r = idn_success; 609 goto ret; 610 } 611 612 if (encoding_alias_list == NULL) { 613 WARNING(("idn_converter_addalias(): the module is not " 614 "initialized\n")); 615 r = idn_failure; 616 goto ret; 617 } 618 619 r = idn__aliaslist_additem(encoding_alias_list, alias_name, real_name, 620 first_item); 621ret: 622 TRACE(("idn_converter_addalias(): %s\n", idn_result_tostring(r))); 623 return (r); 624} 625 626idn_result_t 627idn_converter_aliasfile(const char *path) { 628 idn_result_t r; 629 630 assert(path != NULL); 631 632 TRACE(("idn_converter_aliasfile(path=%s)\n", path)); 633 634 if (encoding_alias_list == NULL) { 635 WARNING(("idn_converter_aliasfile(): the module is not " 636 "initialized\n")); 637 return (idn_failure); 638 } 639 640 r = idn__aliaslist_aliasfile(encoding_alias_list, path); 641 642 TRACE(("idn_converter_aliasfile(): %s\n", idn_result_tostring(r))); 643 return (r); 644} 645 646idn_result_t 647idn_converter_resetalias(void) { 648 idn__aliaslist_t list; 649 idn_result_t r; 650 651 TRACE(("idn_converter_resetalias()\n")); 652 653 if (encoding_alias_list == NULL) { 654 WARNING(("idn_converter_resetalias(): the module is not " 655 "initialized\n")); 656 return (idn_failure); 657 } 658 659 list = encoding_alias_list; 660 encoding_alias_list = NULL; 661 idn__aliaslist_destroy(list); 662 list = NULL; 663 r = idn__aliaslist_create(&list); 664 encoding_alias_list = list; 665 666 TRACE(("idn_converter_resetalias(): %s\n", idn_result_tostring(r))); 667 return (r); 668} 669 670const char * 671idn_converter_getrealname(const char *name) { 672 char *realname; 673 idn_result_t r; 674 675 TRACE(("idn_converter_getrealname()\n")); 676 677 assert(name != NULL); 678 679 if (encoding_alias_list == NULL) { 680 WARNING(("idn_converter_getrealname(): the module is not " 681 "initialized\n")); 682 return (name); 683 } 684 685 r = idn__aliaslist_find(encoding_alias_list, name, &realname); 686 if (r != idn_success) { 687 return (name); 688 } 689 return (realname); 690} 691 692/* 693 * Round trip check. 694 */ 695 696static idn_result_t 697roundtrip_check(idn_converter_t ctx, const unsigned long *from, const char *to) 698{ 699 /* 700 * One problem with iconv() convertion is that 701 * iconv() doesn't signal an error if the input 702 * string contains characters which are valid but 703 * do not have mapping to the output codeset. 704 * (the behavior of iconv() for that case is defined as 705 * `implementation dependent') 706 * One way to check this case is to perform round-trip 707 * conversion and see if it is same as the original string. 708 */ 709 idn_result_t r; 710 unsigned long *back; 711 unsigned long backbuf[256]; 712 size_t fromlen; 713 size_t backlen; 714 715 TRACE(("idn_converter_convert: round-trip checking (from=\"%s\")\n", 716 idn__debug_ucs4xstring(from, 50))); 717 718 /* Allocate enough buffer. */ 719 fromlen = idn_ucs4_strlen(from) + 1; 720 if (fromlen * sizeof(*back) <= sizeof(backbuf)) { 721 backlen = sizeof(backbuf); 722 back = backbuf; 723 } else { 724 backlen = fromlen; 725 back = (unsigned long *)malloc(backlen * sizeof(*back)); 726 if (back == NULL) 727 return (idn_nomemory); 728 } 729 730 /* 731 * Perform backward conversion. 732 */ 733 r = idn_converter_convtoucs4(ctx, to, back, backlen); 734 switch (r) { 735 case idn_success: 736 if (memcmp(back, from, sizeof(*from) * fromlen) != 0) 737 r = idn_nomapping; 738 break; 739 case idn_invalid_encoding: 740 case idn_buffer_overflow: 741 r = idn_nomapping; 742 break; 743 default: 744 break; 745 } 746 747 if (back != backbuf) 748 free(back); 749 750 if (r != idn_success) { 751 TRACE(("round-trip check failed: %s\n", 752 idn_result_tostring(r))); 753 } 754 755 return (r); 756} 757 758/* 759 * Identity conversion (or, no conversion at all). 760 */ 761 762static idn_result_t 763converter_none_open(idn_converter_t ctx, void **privdata) { 764 assert(ctx != NULL); 765 766 return (idn_success); 767} 768 769static idn_result_t 770converter_none_close(idn_converter_t ctx, void *privdata) { 771 assert(ctx != NULL); 772 773 return (idn_success); 774} 775 776static idn_result_t 777converter_none_convfromucs4(idn_converter_t ctx, void *privdata, 778 const unsigned long *from, char *to, size_t tolen) { 779 assert(ctx != NULL && from != NULL && to != NULL); 780 781 return idn_ucs4_ucs4toutf8(from, to, tolen); 782} 783 784static idn_result_t 785converter_none_convtoucs4(idn_converter_t ctx, void *privdata, 786 const char *from, unsigned long *to, size_t tolen) { 787 assert(ctx != NULL && from != NULL && to != NULL); 788 789 return idn_ucs4_utf8toucs4(from, to, tolen); 790} 791 792#ifndef WITHOUT_ICONV 793 794/* 795 * Conversion using iconv() interface. 796 */ 797 798static idn_result_t 799converter_iconv_openfromucs4(idn_converter_t ctx, void **privdata) { 800 iconv_t *ictxp; 801 idn_result_t r; 802 803 assert(ctx != NULL); 804 805 r = iconv_initialize_privdata(privdata); 806 if (r != idn_success) 807 return (r); 808 809 ictxp = (iconv_t *)*privdata; 810 *ictxp = iconv_open(ctx->local_encoding_name, IDN_UTF8_ENCODING_NAME); 811 if (*ictxp == (iconv_t)(-1)) { 812 free(*privdata); 813 *privdata = NULL; 814 switch (errno) { 815 case ENOMEM: 816 return (idn_nomemory); 817 case EINVAL: 818 return (idn_invalid_name); 819 default: 820 WARNING(("iconv_open failed with errno %d\n", errno)); 821 return (idn_failure); 822 } 823 } 824 825 return (idn_success); 826} 827 828static idn_result_t 829converter_iconv_opentoucs4(idn_converter_t ctx, void **privdata) { 830 iconv_t *ictxp; 831 idn_result_t r; 832 833 assert(ctx != NULL); 834 835 r = iconv_initialize_privdata(privdata); 836 if (r != idn_success) 837 return (r); 838 839 ictxp = (iconv_t *)*privdata + 1; 840 *ictxp = iconv_open(IDN_UTF8_ENCODING_NAME, ctx->local_encoding_name); 841 if (*ictxp == (iconv_t)(-1)) { 842 free(*privdata); 843 *privdata = NULL; 844 switch (errno) { 845 case ENOMEM: 846 return (idn_nomemory); 847 case EINVAL: 848 return (idn_invalid_name); 849 default: 850 WARNING(("iconv_open failed with errno %d\n", errno)); 851 return (idn_failure); 852 } 853 } 854 855 return (idn_success); 856} 857 858static idn_result_t 859iconv_initialize_privdata(void **privdata) { 860 if (*privdata == NULL) { 861 *privdata = malloc(sizeof(iconv_t) * 2); 862 if (*privdata == NULL) 863 return (idn_nomemory); 864 *((iconv_t *)*privdata) = (iconv_t)(-1); 865 *((iconv_t *)*privdata + 1) = (iconv_t)(-1); 866 } 867 868 return (idn_success); 869} 870 871static void 872iconv_finalize_privdata(void *privdata) { 873 iconv_t *ictxp; 874 875 if (privdata != NULL) { 876 ictxp = (iconv_t *)privdata; 877 if (*ictxp != (iconv_t)(-1)) 878 iconv_close(*ictxp); 879 880 ictxp++; 881 if (*ictxp != (iconv_t)(-1)) 882 iconv_close(*ictxp); 883 free(privdata); 884 } 885} 886 887static idn_result_t 888converter_iconv_close(idn_converter_t ctx, void *privdata) { 889 assert(ctx != NULL); 890 891 iconv_finalize_privdata(privdata); 892 893 return (idn_success); 894} 895 896static idn_result_t 897converter_iconv_convfromucs4(idn_converter_t ctx, void *privdata, 898 const unsigned long *from, char *to, 899 size_t tolen) { 900 iconv_t ictx; 901 char *utf8 = NULL; 902 size_t utf8size = 256; /* large enough */ 903 idn_result_t r; 904 size_t sz; 905 size_t inleft; 906 size_t outleft; 907 char *inbuf, *outbuf; 908 909 assert(ctx != NULL && from != NULL && to != NULL); 910 911 if (tolen <= 0) { 912 r = idn_buffer_overflow; /* need space for NUL */ 913 goto ret; 914 } 915 916 /* 917 * UCS4 -> UTF-8 conversion. 918 */ 919 utf8 = (char *)malloc(utf8size); 920 if (utf8 == NULL) { 921 r = idn_nomemory; 922 goto ret; 923 } 924 925try_again: 926 r = idn_ucs4_ucs4toutf8(from, utf8, utf8size); 927 if (r == idn_buffer_overflow) { 928 char *new_utf8; 929 930 utf8size *= 2; 931 new_utf8 = (char *)realloc(utf8, utf8size); 932 if (new_utf8 == NULL) { 933 r = idn_nomemory; 934 goto ret; 935 } 936 utf8 = new_utf8; 937 goto try_again; 938 } else if (r != idn_success) { 939 goto ret; 940 } 941 942 ictx = ((iconv_t *)privdata)[0]; 943 944 /* 945 * Reset internal state. 946 * 947 * The following code should work according to the SUSv2 spec, 948 * but causes segmentation fault with Solaris 2.6. 949 * So.. a work-around. 950 * 951 * (void)iconv(ictx, (const char **)NULL, (size_t *)NULL, 952 * (char **)NULL, (size_t *)NULL); 953 */ 954 inleft = 0; 955 outbuf = NULL; 956 outleft = 0; 957 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft); 958 959 inleft = strlen(utf8); 960 inbuf = utf8; 961 outleft = tolen - 1; /* reserve space for terminating NUL */ 962 sz = iconv(ictx, (const char **)&inbuf, &inleft, &to, &outleft); 963 964 if (sz == (size_t)(-1) || inleft > 0) { 965 switch (errno) { 966 case EILSEQ: 967 case EINVAL: 968 /* 969 * We already checked the validity of the input 970 * string. So we assume a mapping error. 971 */ 972 r = idn_nomapping; 973 goto ret; 974 case E2BIG: 975 r = idn_buffer_overflow; 976 goto ret; 977 default: 978 WARNING(("iconv failed with errno %d\n", errno)); 979 r = idn_failure; 980 goto ret; 981 } 982 } 983 984 /* 985 * For UTF-8 -> local conversion, append a sequence of 986 * state reset. 987 */ 988 inleft = 0; 989 sz = iconv(ictx, (const char **)NULL, &inleft, &to, &outleft); 990 if (sz == (size_t)(-1)) { 991 switch (errno) { 992 case EILSEQ: 993 case EINVAL: 994 r = idn_invalid_encoding; 995 goto ret; 996 case E2BIG: 997 r = idn_buffer_overflow; 998 goto ret; 999 default: 1000 WARNING(("iconv failed with errno %d\n", errno)); 1001 r = idn_failure; 1002 goto ret; 1003 } 1004 } 1005 *to = '\0'; 1006 r = idn_success; 1007 1008ret: 1009 free(utf8); 1010 return (r); 1011 1012} 1013 1014static idn_result_t 1015converter_iconv_convtoucs4(idn_converter_t ctx, void *privdata, 1016 const char *from, unsigned long *to, size_t tolen) { 1017 iconv_t ictx; 1018 char *utf8 = NULL; 1019 size_t utf8size = 256; /* large enough */ 1020 idn_result_t r; 1021 size_t sz; 1022 size_t inleft; 1023 size_t outleft; 1024 const char *from_ptr; 1025 char *outbuf; 1026 1027 assert(ctx != NULL && from != NULL && to != NULL); 1028 1029 if (tolen <= 0) { 1030 r = idn_buffer_overflow; /* need space for NUL */ 1031 goto ret; 1032 } 1033 ictx = ((iconv_t *)privdata)[1]; 1034 utf8 = (char *)malloc(utf8size); 1035 if (utf8 == NULL) { 1036 r = idn_nomemory; 1037 goto ret; 1038 } 1039 1040try_again: 1041 /* 1042 * Reset internal state. 1043 */ 1044 inleft = 0; 1045 outbuf = NULL; 1046 outleft = 0; 1047 (void)iconv(ictx, (const char **)NULL, &inleft, &outbuf, &outleft); 1048 1049 from_ptr = from; 1050 inleft = strlen(from); 1051 outbuf = utf8; 1052 outleft = utf8size - 1; /* reserve space for terminating NUL */ 1053 sz = iconv(ictx, (const char **)&from_ptr, &inleft, &outbuf, &outleft); 1054 1055 if (sz == (size_t)(-1) || inleft > 0) { 1056 char *new_utf8; 1057 1058 switch (errno) { 1059 case EILSEQ: 1060 case EINVAL: 1061 /* 1062 * We assume all the characters in the local 1063 * codeset are included in UCS. This means mapping 1064 * error is not possible, so the input string must 1065 * have some problem. 1066 */ 1067 r = idn_invalid_encoding; 1068 goto ret; 1069 case E2BIG: 1070 utf8size *= 2; 1071 new_utf8 = (char *)realloc(utf8, utf8size); 1072 if (new_utf8 == NULL) { 1073 r = idn_nomemory; 1074 goto ret; 1075 } 1076 utf8 = new_utf8; 1077 goto try_again; 1078 default: 1079 WARNING(("iconv failed with errno %d\n", errno)); 1080 r = idn_failure; 1081 goto ret; 1082 } 1083 } 1084 *outbuf = '\0'; 1085 1086 /* 1087 * UTF-8 -> UCS4 conversion. 1088 */ 1089 r = idn_ucs4_utf8toucs4(utf8, to, tolen); 1090 1091ret: 1092 free(utf8); 1093 return (r); 1094} 1095 1096#endif /* !WITHOUT_ICONV */ 1097 1098#ifdef DEBUG 1099/* 1100 * Conversion to/from unicode escape string. 1101 * Arbitrary UCS-4 character can be specified by a special sequence 1102 * \u{XXXXXX} 1103 * where XXXXX denotes any hexadecimal string up to FFFFFFFF. 1104 * This is designed for debugging. 1105 */ 1106 1107static idn_result_t 1108converter_uescape_convfromucs4(idn_converter_t ctx, void *privdata, 1109 const unsigned long *from, char *to, 1110 size_t tolen) { 1111 idn_result_t r; 1112 unsigned long v; 1113 1114 while (*from != '\0') { 1115 v = *from++; 1116 1117 if (v <= 0x7f) { 1118 if (tolen < 1) { 1119 r = idn_buffer_overflow; 1120 goto failure; 1121 } 1122 *to++ = v; 1123 tolen--; 1124 } else if (v <= 0xffffffff) { 1125 char tmp[20]; 1126 int len; 1127 1128 (void)sprintf(tmp, "\\u{%lx}", v); 1129 len = strlen(tmp); 1130 if (tolen < len) { 1131 r = idn_buffer_overflow; 1132 goto failure; 1133 } 1134 (void)memcpy(to, tmp, len); 1135 to += len; 1136 tolen -= len; 1137 } else { 1138 r = idn_invalid_encoding; 1139 goto failure; 1140 } 1141 } 1142 1143 if (tolen <= 0) { 1144 r = idn_buffer_overflow; 1145 goto failure; 1146 } 1147 *to = '\0'; 1148 1149 return (idn_success); 1150 1151failure: 1152 if (r != idn_buffer_overflow) { 1153 WARNING(("idn_uescape_convfromucs4(): %s\n", 1154 idn_result_tostring(r))); 1155 } 1156 return (r); 1157} 1158 1159static idn_result_t 1160converter_uescape_convtoucs4(idn_converter_t ctx, void *privdata, 1161 const char *from, unsigned long *to, size_t tolen) 1162{ 1163 idn_result_t r; 1164 size_t fromlen = strlen(from); 1165 1166 while (*from != '\0') { 1167 if (tolen <= 0) { 1168 r = idn_buffer_overflow; 1169 goto failure; 1170 } 1171 if (strncmp(from, "\\u{", 3) == 0 || 1172 strncmp(from, "\\U{", 3) == 0) { 1173 size_t ullen; 1174 unsigned long v; 1175 char *end; 1176 1177 v = strtoul(from + 3, &end, 16); 1178 ullen = end - (from + 3); 1179 if (*end == '}' && ullen > 1 && ullen < 8) { 1180 *to = v; 1181 from = end + 1; 1182 fromlen -= ullen; 1183 } else { 1184 *to = '\\'; 1185 from++; 1186 fromlen--; 1187 } 1188 } else { 1189 int c = *(unsigned char *)from; 1190 size_t width; 1191 char buf[8]; 1192 1193 if (c < 0x80) 1194 width = 1; 1195 else if (c < 0xc0) 1196 width = 0; 1197 else if (c < 0xe0) 1198 width = 2; 1199 else if (c < 0xf0) 1200 width = 3; 1201 else if (c < 0xf8) 1202 width = 4; 1203 else if (c < 0xfc) 1204 width = 5; 1205 else if (c < 0xfe) 1206 width = 6; 1207 else 1208 width = 0; 1209 if (width == 0 || width > fromlen) { 1210 r = idn_invalid_encoding; 1211 goto failure; 1212 } 1213 1214 memcpy(buf, from, width); 1215 buf[width] = '\0'; 1216 r = idn_ucs4_utf8toucs4(buf, to, tolen); 1217 if (r != idn_success) { 1218 r = idn_invalid_encoding; 1219 goto failure; 1220 } 1221 from += width; 1222 fromlen -= width; 1223 } 1224 to++; 1225 tolen--; 1226 } 1227 1228 if (tolen <= 0) { 1229 r = idn_buffer_overflow; 1230 goto failure; 1231 } 1232 *to = '\0'; 1233 1234 return (idn_success); 1235 1236failure: 1237 if (r != idn_buffer_overflow) { 1238 WARNING(("idn_uescape_convtoucs4(): %s\n", 1239 idn_result_tostring(r))); 1240 } 1241 return (r); 1242} 1243 1244#endif 1245