1/* 2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21#include <iconv.h> 22 23#include <stdlib.h> 24#include <string.h> 25#include "config.h" 26#include "localcharset.h" 27 28#ifdef __CYGWIN__ 29#include <cygwin/version.h> 30#endif 31 32#if ENABLE_EXTRA 33/* 34 * Consider all system dependent encodings, for any system, 35 * and the extra encodings. 36 */ 37#define USE_AIX 38#define USE_OSF1 39#define USE_DOS 40#define USE_EXTRA 41#else 42/* 43 * Consider those system dependent encodings that are needed for the 44 * current system. 45 */ 46#ifdef _AIX 47#define USE_AIX 48#endif 49#if defined(__osf__) || defined(VMS) 50#define USE_OSF1 51#endif 52#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))) 53#define USE_DOS 54#endif 55#endif 56 57/* 58 * Data type for general conversion loop. 59 */ 60struct loop_funcs { 61 size_t (*loop_convert) (iconv_t icd, 62 const char* * inbuf, size_t *inbytesleft, 63 char* * outbuf, size_t *outbytesleft); 64 size_t (*loop_reset) (iconv_t icd, 65 char* * outbuf, size_t *outbytesleft); 66}; 67 68/* 69 * Converters. 70 */ 71#include "converters.h" 72 73/* 74 * Transliteration tables. 75 */ 76#include "cjk_variants.h" 77#include "translit.h" 78 79/* 80 * Table of all supported encodings. 81 */ 82struct encoding { 83 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */ 84 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */ 85 int oflags; /* flags for unicode -> multibyte conversion */ 86}; 87#define DEFALIAS(xxx_alias,xxx) /* nothing */ 88enum { 89#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 90 ei_##xxx , 91#include "encodings.def" 92#ifdef USE_AIX 93# include "encodings_aix.def" 94#endif 95#ifdef USE_OSF1 96# include "encodings_osf1.def" 97#endif 98#ifdef USE_DOS 99# include "encodings_dos.def" 100#endif 101#ifdef USE_EXTRA 102# include "encodings_extra.def" 103#endif 104#include "encodings_local.def" 105#undef DEFENCODING 106ei_for_broken_compilers_that_dont_like_trailing_commas 107}; 108#include "flags.h" 109static struct encoding const all_encodings[] = { 110#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 111 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags }, 112#include "encodings.def" 113#ifdef USE_AIX 114# include "encodings_aix.def" 115#endif 116#ifdef USE_OSF1 117# include "encodings_osf1.def" 118#endif 119#ifdef USE_DOS 120# include "encodings_dos.def" 121#endif 122#ifdef USE_EXTRA 123# include "encodings_extra.def" 124#endif 125#undef DEFENCODING 126#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 127 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 }, 128#include "encodings_local.def" 129#undef DEFENCODING 130}; 131#undef DEFALIAS 132 133/* 134 * Conversion loops. 135 */ 136#include "loops.h" 137 138/* 139 * Alias lookup function. 140 * Defines 141 * struct alias { int name; unsigned int encoding_index; }; 142 * const struct alias * aliases_lookup (const char *str, unsigned int len); 143 * #define MAX_WORD_LENGTH ... 144 */ 145#if defined _AIX 146# include "aliases_sysaix.h" 147#elif defined hpux || defined __hpux 148# include "aliases_syshpux.h" 149#elif defined __osf__ 150# include "aliases_sysosf1.h" 151#elif defined __sun 152# include "aliases_syssolaris.h" 153#else 154# include "aliases.h" 155#endif 156 157/* 158 * System dependent alias lookup function. 159 * Defines 160 * const struct alias * aliases2_lookup (const char *str); 161 */ 162#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */ 163struct stringpool2_t { 164#define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)]; 165#include "aliases2.h" 166#undef S 167}; 168static const struct stringpool2_t stringpool2_contents = { 169#define S(tag,name,encoding_index) name, 170#include "aliases2.h" 171#undef S 172}; 173#define stringpool2 ((const char *) &stringpool2_contents) 174static const struct alias sysdep_aliases[] = { 175#define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index }, 176#include "aliases2.h" 177#undef S 178}; 179#ifdef __GNUC__ 180__inline 181#endif 182const struct alias * 183aliases2_lookup (register const char *str) 184{ 185 const struct alias * ptr; 186 unsigned int count; 187 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--) 188 if (!strcmp(str, stringpool2 + ptr->name)) 189 return ptr; 190 return NULL; 191} 192#else 193#define aliases2_lookup(str) NULL 194#define stringpool2 NULL 195#endif 196 197#if 0 198/* Like !strcasecmp, except that the both strings can be assumed to be ASCII 199 and the first string can be assumed to be in uppercase. */ 200static int strequal (const char* str1, const char* str2) 201{ 202 unsigned char c1; 203 unsigned char c2; 204 for (;;) { 205 c1 = * (unsigned char *) str1++; 206 c2 = * (unsigned char *) str2++; 207 if (c1 == 0) 208 break; 209 if (c2 >= 'a' && c2 <= 'z') 210 c2 -= 'a'-'A'; 211 if (c1 != c2) 212 break; 213 } 214 return (c1 == c2); 215} 216#endif 217 218iconv_t iconv_open (const char* tocode, const char* fromcode) 219{ 220 struct conv_struct * cd; 221 unsigned int from_index; 222 int from_wchar; 223 unsigned int to_index; 224 int to_wchar; 225 int transliterate; 226 int discard_ilseq; 227 228#include "iconv_open1.h" 229 230 cd = (struct conv_struct *) malloc(from_wchar != to_wchar 231 ? sizeof(struct wchar_conv_struct) 232 : sizeof(struct conv_struct)); 233 if (cd == NULL) { 234 errno = ENOMEM; 235 return (iconv_t)(-1); 236 } 237 238#include "iconv_open2.h" 239 240 return (iconv_t)cd; 241invalid: 242 errno = EINVAL; 243 return (iconv_t)(-1); 244} 245 246size_t iconv (iconv_t icd, 247 ICONV_CONST char* * inbuf, size_t *inbytesleft, 248 char* * outbuf, size_t *outbytesleft) 249{ 250 conv_t cd = (conv_t) icd; 251 if (inbuf == NULL || *inbuf == NULL) 252 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft); 253 else 254 return cd->lfuncs.loop_convert(icd, 255 (const char* *)inbuf,inbytesleft, 256 outbuf,outbytesleft); 257} 258 259int iconv_close (iconv_t icd) 260{ 261 conv_t cd = (conv_t) icd; 262 free(cd); 263 return 0; 264} 265 266#ifndef LIBICONV_PLUG 267 268/* 269 * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each 270 * fit in an iconv_allocation_t. 271 * If this verification fails, iconv_allocation_t must be made larger and 272 * the major version in LIBICONV_VERSION_INFO must be bumped. 273 * Currently 'struct conv_struct' has 21 integer/pointer fields, and 274 * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field. 275 */ 276typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1]; 277typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1]; 278 279int iconv_open_into (const char* tocode, const char* fromcode, 280 iconv_allocation_t* resultp) 281{ 282 struct conv_struct * cd; 283 unsigned int from_index; 284 int from_wchar; 285 unsigned int to_index; 286 int to_wchar; 287 int transliterate; 288 int discard_ilseq; 289 290#include "iconv_open1.h" 291 292 cd = (struct conv_struct *) resultp; 293 294#include "iconv_open2.h" 295 296 return 0; 297invalid: 298 errno = EINVAL; 299 return -1; 300} 301 302int iconvctl (iconv_t icd, int request, void* argument) 303{ 304 conv_t cd = (conv_t) icd; 305 switch (request) { 306 case ICONV_TRIVIALP: 307 *(int *)argument = 308 ((cd->lfuncs.loop_convert == unicode_loop_convert 309 && cd->iindex == cd->oindex) 310 || cd->lfuncs.loop_convert == wchar_id_loop_convert 311 ? 1 : 0); 312 return 0; 313 case ICONV_GET_TRANSLITERATE: 314 *(int *)argument = cd->transliterate; 315 return 0; 316 case ICONV_SET_TRANSLITERATE: 317 cd->transliterate = (*(const int *)argument ? 1 : 0); 318 return 0; 319 case ICONV_GET_DISCARD_ILSEQ: 320 *(int *)argument = cd->discard_ilseq; 321 return 0; 322 case ICONV_SET_DISCARD_ILSEQ: 323 cd->discard_ilseq = (*(const int *)argument ? 1 : 0); 324 return 0; 325 case ICONV_SET_HOOKS: 326 if (argument != NULL) { 327 cd->hooks = *(const struct iconv_hooks *)argument; 328 } else { 329 cd->hooks.uc_hook = NULL; 330 cd->hooks.wc_hook = NULL; 331 cd->hooks.data = NULL; 332 } 333 return 0; 334 case ICONV_SET_FALLBACKS: 335 if (argument != NULL) { 336 cd->fallbacks = *(const struct iconv_fallbacks *)argument; 337 } else { 338 cd->fallbacks.mb_to_uc_fallback = NULL; 339 cd->fallbacks.uc_to_mb_fallback = NULL; 340 cd->fallbacks.mb_to_wc_fallback = NULL; 341 cd->fallbacks.wc_to_mb_fallback = NULL; 342 cd->fallbacks.data = NULL; 343 } 344 return 0; 345 default: 346 errno = EINVAL; 347 return -1; 348 } 349} 350 351/* An alias after its name has been converted from 'int' to 'const char*'. */ 352struct nalias { const char* name; unsigned int encoding_index; }; 353 354static int compare_by_index (const void * arg1, const void * arg2) 355{ 356 const struct nalias * alias1 = (const struct nalias *) arg1; 357 const struct nalias * alias2 = (const struct nalias *) arg2; 358 return (int)alias1->encoding_index - (int)alias2->encoding_index; 359} 360 361static int compare_by_name (const void * arg1, const void * arg2) 362{ 363 const char * name1 = *(const char **)arg1; 364 const char * name2 = *(const char **)arg2; 365 /* Compare alphabetically, but put "CS" names at the end. */ 366 int sign = strcmp(name1,name2); 367 if (sign != 0) { 368 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S')) 369 * 4 + (sign >= 0 ? 1 : -1); 370 } 371 return sign; 372} 373 374void iconvlist (int (*do_one) (unsigned int namescount, 375 const char * const * names, 376 void* data), 377 void* data) 378{ 379#define aliascount1 sizeof(aliases)/sizeof(aliases[0]) 380#ifndef aliases2_lookup 381#define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]) 382#else 383#define aliascount2 0 384#endif 385#define aliascount (aliascount1+aliascount2) 386 struct nalias aliasbuf[aliascount]; 387 const char * namesbuf[aliascount]; 388 size_t num_aliases; 389 { 390 /* Put all existing aliases into a buffer. */ 391 size_t i; 392 size_t j; 393 j = 0; 394 for (i = 0; i < aliascount1; i++) { 395 const struct alias * p = &aliases[i]; 396 if (p->name >= 0 397 && p->encoding_index != ei_local_char 398 && p->encoding_index != ei_local_wchar_t) { 399 aliasbuf[j].name = stringpool + p->name; 400 aliasbuf[j].encoding_index = p->encoding_index; 401 j++; 402 } 403 } 404#ifndef aliases2_lookup 405 for (i = 0; i < aliascount2; i++) { 406 aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name; 407 aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index; 408 j++; 409 } 410#endif 411 num_aliases = j; 412 } 413 /* Sort by encoding_index. */ 414 if (num_aliases > 1) 415 qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index); 416 { 417 /* Process all aliases with the same encoding_index together. */ 418 size_t j; 419 j = 0; 420 while (j < num_aliases) { 421 unsigned int ei = aliasbuf[j].encoding_index; 422 size_t i = 0; 423 do 424 namesbuf[i++] = aliasbuf[j++].name; 425 while (j < num_aliases && aliasbuf[j].encoding_index == ei); 426 if (i > 1) 427 qsort(namesbuf, i, sizeof(const char *), compare_by_name); 428 /* Call the callback. */ 429 if (do_one(i,namesbuf,data)) 430 break; 431 } 432 } 433#undef aliascount 434#undef aliascount2 435#undef aliascount1 436} 437 438/* 439 * Table of canonical names of encodings. 440 * Instead of strings, it contains offsets into stringpool and stringpool2. 441 */ 442static const unsigned short all_canonical[] = { 443#if defined _AIX 444# include "canonical_sysaix.h" 445#elif defined hpux || defined __hpux 446# include "canonical_syshpux.h" 447#elif defined __osf__ 448# include "canonical_sysosf1.h" 449#elif defined __sun 450# include "canonical_syssolaris.h" 451#else 452# include "canonical.h" 453#endif 454#ifdef USE_AIX 455# if defined _AIX 456# include "canonical_aix_sysaix.h" 457# else 458# include "canonical_aix.h" 459# endif 460#endif 461#ifdef USE_OSF1 462# if defined __osf__ 463# include "canonical_osf1_sysosf1.h" 464# else 465# include "canonical_osf1.h" 466# endif 467#endif 468#ifdef USE_DOS 469# include "canonical_dos.h" 470#endif 471#ifdef USE_EXTRA 472# include "canonical_extra.h" 473#endif 474#if defined _AIX 475# include "canonical_local_sysaix.h" 476#elif defined hpux || defined __hpux 477# include "canonical_local_syshpux.h" 478#elif defined __osf__ 479# include "canonical_local_sysosf1.h" 480#elif defined __sun 481# include "canonical_local_syssolaris.h" 482#else 483# include "canonical_local.h" 484#endif 485}; 486 487const char * iconv_canonicalize (const char * name) 488{ 489 const char* code; 490 char buf[MAX_WORD_LENGTH+10+1]; 491 const char* cp; 492 char* bp; 493 const struct alias * ap; 494 unsigned int count; 495 unsigned int index; 496 const char* pool; 497 498 /* Before calling aliases_lookup, convert the input string to upper case, 499 * and check whether it's entirely ASCII (we call gperf with option "-7" 500 * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 501 * or if it's too long, it is not a valid encoding name. 502 */ 503 for (code = name;;) { 504 /* Search code in the table. */ 505 for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 506 unsigned char c = * (unsigned char *) cp; 507 if (c >= 0x80) 508 goto invalid; 509 if (c >= 'a' && c <= 'z') 510 c -= 'a'-'A'; 511 *bp = c; 512 if (c == '\0') 513 break; 514 if (--count == 0) 515 goto invalid; 516 } 517 for (;;) { 518 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 519 bp -= 10; 520 *bp = '\0'; 521 continue; 522 } 523 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 524 bp -= 8; 525 *bp = '\0'; 526 continue; 527 } 528 break; 529 } 530 if (buf[0] == '\0') { 531 code = locale_charset(); 532 /* Avoid an endless loop that could occur when using an older version 533 of localcharset.c. */ 534 if (code[0] == '\0') 535 goto invalid; 536 continue; 537 } 538 pool = stringpool; 539 ap = aliases_lookup(buf,bp-buf); 540 if (ap == NULL) { 541 pool = stringpool2; 542 ap = aliases2_lookup(buf); 543 if (ap == NULL) 544 goto invalid; 545 } 546 if (ap->encoding_index == ei_local_char) { 547 code = locale_charset(); 548 /* Avoid an endless loop that could occur when using an older version 549 of localcharset.c. */ 550 if (code[0] == '\0') 551 goto invalid; 552 continue; 553 } 554 if (ap->encoding_index == ei_local_wchar_t) { 555 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 556 This is also the case on native Woe32 systems and Cygwin >= 1.7, where 557 we know that it is UTF-16. */ 558#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 559 if (sizeof(wchar_t) == 4) { 560 index = ei_ucs4internal; 561 break; 562 } 563 if (sizeof(wchar_t) == 2) { 564# if WORDS_LITTLEENDIAN 565 index = ei_utf16le; 566# else 567 index = ei_utf16be; 568# endif 569 break; 570 } 571#elif __STDC_ISO_10646__ 572 if (sizeof(wchar_t) == 4) { 573 index = ei_ucs4internal; 574 break; 575 } 576 if (sizeof(wchar_t) == 2) { 577 index = ei_ucs2internal; 578 break; 579 } 580 if (sizeof(wchar_t) == 1) { 581 index = ei_iso8859_1; 582 break; 583 } 584#endif 585 } 586 index = ap->encoding_index; 587 break; 588 } 589 return all_canonical[index] + pool; 590 invalid: 591 return name; 592} 593 594int _libiconv_version = _LIBICONV_VERSION; 595 596#if defined __FreeBSD__ && !defined __gnu_freebsd__ 597/* GNU libiconv is the native FreeBSD iconv implementation since 2002. 598 It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */ 599#define strong_alias(name, aliasname) _strong_alias(name, aliasname) 600#define _strong_alias(name, aliasname) \ 601 extern __typeof (name) aliasname __attribute__ ((alias (#name))); 602#undef iconv_open 603#undef iconv 604#undef iconv_close 605strong_alias (libiconv_open, iconv_open) 606strong_alias (libiconv, iconv) 607strong_alias (libiconv_close, iconv_close) 608#endif 609 610#endif 611