1/* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place - 18 * Suite 330, Boston, MA 02111-1307, USA. 19 */ 20 21#include <iconv.h> 22 23#include <stdlib.h> 24#include <string.h> 25#include "config.h" 26#include "libcharset.h" 27 28#if ENABLE_EXTRA 29/* 30 * Consider all system dependent encodings, for any system, 31 * and the extra encodings. 32 */ 33#define USE_AIX 34#define USE_OSF1 35#define USE_DOS 36#define USE_EXTRA 37#else 38/* 39 * Consider those system dependent encodings that are needed for the 40 * current system. 41 */ 42#ifdef _AIX 43#define USE_AIX 44#endif 45#ifdef __osf__ 46#define USE_OSF1 47#endif 48#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))) 49#define USE_DOS 50#endif 51#endif 52 53/* 54 * Data type for general conversion loop. 55 */ 56struct loop_funcs { 57 size_t (*loop_convert) (iconv_t icd, 58 const char* * inbuf, size_t *inbytesleft, 59 char* * outbuf, size_t *outbytesleft); 60 size_t (*loop_reset) (iconv_t icd, 61 char* * outbuf, size_t *outbytesleft); 62}; 63 64/* 65 * Converters. 66 */ 67#include "converters.h" 68 69/* 70 * Transliteration tables. 71 */ 72#include "cjk_variants.h" 73#include "translit.h" 74 75/* 76 * Table of all supported encodings. 77 */ 78struct encoding { 79 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */ 80 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */ 81 int oflags; /* flags for unicode -> multibyte conversion */ 82}; 83enum { 84#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 85 ei_##xxx , 86#include "encodings.def" 87#ifdef USE_AIX 88#include "encodings_aix.def" 89#endif 90#ifdef USE_OSF1 91#include "encodings_osf1.def" 92#endif 93#ifdef USE_DOS 94#include "encodings_dos.def" 95#endif 96#ifdef USE_EXTRA 97#include "encodings_extra.def" 98#endif 99#include "encodings_local.def" 100#undef DEFENCODING 101ei_for_broken_compilers_that_dont_like_trailing_commas 102}; 103#include "flags.h" 104static struct encoding const all_encodings[] = { 105#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 106 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags }, 107#include "encodings.def" 108#ifdef USE_AIX 109#include "encodings_aix.def" 110#endif 111#ifdef USE_OSF1 112#include "encodings_osf1.def" 113#endif 114#ifdef USE_DOS 115#include "encodings_dos.def" 116#endif 117#ifdef USE_EXTRA 118#include "encodings_extra.def" 119#endif 120#undef DEFENCODING 121#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 122 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 }, 123#include "encodings_local.def" 124#undef DEFENCODING 125}; 126 127/* 128 * Conversion loops. 129 */ 130#include "loops.h" 131 132/* 133 * Alias lookup function. 134 * Defines 135 * struct alias { const char* name; unsigned int encoding_index; }; 136 * const struct alias * aliases_lookup (const char *str, unsigned int len); 137 * #define MAX_WORD_LENGTH ... 138 */ 139#include "aliases.h" 140 141/* 142 * System dependent alias lookup function. 143 * Defines 144 * const struct alias * aliases2_lookup (const char *str); 145 */ 146#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */ 147static struct alias sysdep_aliases[] = { 148#ifdef USE_AIX 149#include "aliases_aix.h" 150#endif 151#ifdef USE_OSF1 152#include "aliases_osf1.h" 153#endif 154#ifdef USE_DOS 155#include "aliases_dos.h" 156#endif 157#ifdef USE_EXTRA 158#include "aliases_extra.h" 159#endif 160}; 161#ifdef __GNUC__ 162__inline 163#endif 164const struct alias * 165aliases2_lookup (register const char *str) 166{ 167 struct alias * ptr; 168 unsigned int count; 169 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--) 170 if (!strcmp(str,ptr->name)) 171 return ptr; 172 return NULL; 173} 174#else 175#define aliases2_lookup(str) NULL 176#endif 177 178#if 0 179/* Like !strcasecmp, except that the both strings can be assumed to be ASCII 180 and the first string can be assumed to be in uppercase. */ 181static int strequal (const char* str1, const char* str2) 182{ 183 unsigned char c1; 184 unsigned char c2; 185 for (;;) { 186 c1 = * (unsigned char *) str1++; 187 c2 = * (unsigned char *) str2++; 188 if (c1 == 0) 189 break; 190 if (c2 >= 'a' && c2 <= 'z') 191 c2 -= 'a'-'A'; 192 if (c1 != c2) 193 break; 194 } 195 return (c1 == c2); 196} 197#endif 198 199iconv_t iconv_open (const char* tocode, const char* fromcode) 200{ 201 struct conv_struct * cd; 202 char buf[MAX_WORD_LENGTH+10+1]; 203 const char* cp; 204 char* bp; 205 const struct alias * ap; 206 unsigned int count; 207 unsigned int from_index; 208 int from_wchar; 209 unsigned int to_index; 210 int to_wchar; 211 int transliterate = 0; 212 int discard_ilseq = 0; 213 214 /* Before calling aliases_lookup, convert the input string to upper case, 215 * and check whether it's entirely ASCII (we call gperf with option "-7" 216 * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 217 * or if it's too long, it is not a valid encoding name. 218 */ 219 for (to_wchar = 0;;) { 220 /* Search tocode in the table. */ 221 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 222 unsigned char c = * (unsigned char *) cp; 223 if (c >= 0x80) 224 goto invalid; 225 if (c >= 'a' && c <= 'z') 226 c -= 'a'-'A'; 227 *bp = c; 228 if (c == '\0') 229 break; 230 if (--count == 0) 231 goto invalid; 232 } 233 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 234 bp -= 10; 235 *bp = '\0'; 236 transliterate = 1; 237 } 238 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 239 bp -= 8; 240 *bp = '\0'; 241 discard_ilseq = 1; 242 } 243 if (buf[0] == '\0') { 244 tocode = locale_charset(); 245 /* Avoid an endless loop that could occur when using an older version 246 of localcharset.c. */ 247 if (tocode[0] == '\0') 248 goto invalid; 249 continue; 250 } 251 ap = aliases_lookup(buf,bp-buf); 252 if (ap == NULL) { 253 ap = aliases2_lookup(buf); 254 if (ap == NULL) 255 goto invalid; 256 } 257 if (ap->encoding_index == ei_local_char) { 258 tocode = locale_charset(); 259 /* Avoid an endless loop that could occur when using an older version 260 of localcharset.c. */ 261 if (tocode[0] == '\0') 262 goto invalid; 263 continue; 264 } 265 if (ap->encoding_index == ei_local_wchar_t) { 266#if __STDC_ISO_10646__ 267 if (sizeof(wchar_t) == 4) { 268 to_index = ei_ucs4internal; 269 break; 270 } 271 if (sizeof(wchar_t) == 2) { 272 to_index = ei_ucs2internal; 273 break; 274 } 275 if (sizeof(wchar_t) == 1) { 276 to_index = ei_iso8859_1; 277 break; 278 } 279#endif 280#if HAVE_MBRTOWC 281 to_wchar = 1; 282 tocode = locale_charset(); 283 continue; 284#endif 285 goto invalid; 286 } 287 to_index = ap->encoding_index; 288 break; 289 } 290 for (from_wchar = 0;;) { 291 /* Search fromcode in the table. */ 292 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 293 unsigned char c = * (unsigned char *) cp; 294 if (c >= 0x80) 295 goto invalid; 296 if (c >= 'a' && c <= 'z') 297 c -= 'a'-'A'; 298 *bp = c; 299 if (c == '\0') 300 break; 301 if (--count == 0) 302 goto invalid; 303 } 304 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 305 bp -= 10; 306 *bp = '\0'; 307 } 308 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 309 bp -= 8; 310 *bp = '\0'; 311 } 312 if (buf[0] == '\0') { 313 fromcode = locale_charset(); 314 /* Avoid an endless loop that could occur when using an older version 315 of localcharset.c. */ 316 if (fromcode[0] == '\0') 317 goto invalid; 318 continue; 319 } 320 ap = aliases_lookup(buf,bp-buf); 321 if (ap == NULL) { 322 ap = aliases2_lookup(buf); 323 if (ap == NULL) 324 goto invalid; 325 } 326 if (ap->encoding_index == ei_local_char) { 327 fromcode = locale_charset(); 328 /* Avoid an endless loop that could occur when using an older version 329 of localcharset.c. */ 330 if (fromcode[0] == '\0') 331 goto invalid; 332 continue; 333 } 334 if (ap->encoding_index == ei_local_wchar_t) { 335#if __STDC_ISO_10646__ 336 if (sizeof(wchar_t) == 4) { 337 from_index = ei_ucs4internal; 338 break; 339 } 340 if (sizeof(wchar_t) == 2) { 341 from_index = ei_ucs2internal; 342 break; 343 } 344 if (sizeof(wchar_t) == 1) { 345 from_index = ei_iso8859_1; 346 break; 347 } 348#endif 349#if HAVE_WCRTOMB 350 from_wchar = 1; 351 fromcode = locale_charset(); 352 continue; 353#endif 354 goto invalid; 355 } 356 from_index = ap->encoding_index; 357 break; 358 } 359 cd = (struct conv_struct *) malloc(from_wchar != to_wchar 360 ? sizeof(struct wchar_conv_struct) 361 : sizeof(struct conv_struct)); 362 if (cd == NULL) { 363 errno = ENOMEM; 364 return (iconv_t)(-1); 365 } 366 cd->iindex = from_index; 367 cd->ifuncs = all_encodings[from_index].ifuncs; 368 cd->oindex = to_index; 369 cd->ofuncs = all_encodings[to_index].ofuncs; 370 cd->oflags = all_encodings[to_index].oflags; 371 /* Initialize the loop functions. */ 372#if HAVE_MBRTOWC 373 if (to_wchar) { 374#if HAVE_WCRTOMB 375 if (from_wchar) { 376 cd->lfuncs.loop_convert = wchar_id_loop_convert; 377 cd->lfuncs.loop_reset = wchar_id_loop_reset; 378 } else 379#endif 380 { 381 cd->lfuncs.loop_convert = wchar_to_loop_convert; 382 cd->lfuncs.loop_reset = wchar_to_loop_reset; 383 } 384 } else 385#endif 386 { 387#if HAVE_WCRTOMB 388 if (from_wchar) { 389 cd->lfuncs.loop_convert = wchar_from_loop_convert; 390 cd->lfuncs.loop_reset = wchar_from_loop_reset; 391 } else 392#endif 393 { 394 cd->lfuncs.loop_convert = unicode_loop_convert; 395 cd->lfuncs.loop_reset = unicode_loop_reset; 396 } 397 } 398 /* Initialize the states. */ 399 memset(&cd->istate,'\0',sizeof(state_t)); 400 memset(&cd->ostate,'\0',sizeof(state_t)); 401 /* Initialize the operation flags. */ 402 cd->transliterate = transliterate; 403 cd->discard_ilseq = discard_ilseq; 404 /* Initialize additional fields. */ 405 if (from_wchar != to_wchar) { 406 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd; 407 memset(&wcd->state,'\0',sizeof(mbstate_t)); 408 } 409 /* Done. */ 410 return (iconv_t)cd; 411invalid: 412 errno = EINVAL; 413 return (iconv_t)(-1); 414} 415 416size_t iconv (iconv_t icd, 417 ICONV_CONST char* * inbuf, size_t *inbytesleft, 418 char* * outbuf, size_t *outbytesleft) 419{ 420 conv_t cd = (conv_t) icd; 421 if (inbuf == NULL || *inbuf == NULL) 422 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft); 423 else 424 return cd->lfuncs.loop_convert(icd, 425 (const char* *)inbuf,inbytesleft, 426 outbuf,outbytesleft); 427} 428 429int iconv_close (iconv_t icd) 430{ 431 conv_t cd = (conv_t) icd; 432 free(cd); 433 return 0; 434} 435 436#ifndef LIBICONV_PLUG 437 438int iconvctl (iconv_t icd, int request, void* argument) 439{ 440 conv_t cd = (conv_t) icd; 441 switch (request) { 442 case ICONV_TRIVIALP: 443 *(int *)argument = 444 ((cd->lfuncs.loop_convert == unicode_loop_convert 445 && cd->iindex == cd->oindex) 446 || cd->lfuncs.loop_convert == wchar_id_loop_convert 447 ? 1 : 0); 448 return 0; 449 case ICONV_GET_TRANSLITERATE: 450 *(int *)argument = cd->transliterate; 451 return 0; 452 case ICONV_SET_TRANSLITERATE: 453 cd->transliterate = (*(const int *)argument ? 1 : 0); 454 return 0; 455 case ICONV_GET_DISCARD_ILSEQ: 456 *(int *)argument = cd->discard_ilseq; 457 return 0; 458 case ICONV_SET_DISCARD_ILSEQ: 459 cd->discard_ilseq = (*(const int *)argument ? 1 : 0); 460 return 0; 461 default: 462 errno = EINVAL; 463 return -1; 464 } 465} 466 467static int compare_by_index (const void * arg1, const void * arg2) 468{ 469 const struct alias * alias1 = (const struct alias *) arg1; 470 const struct alias * alias2 = (const struct alias *) arg2; 471 return (int)alias1->encoding_index - (int)alias2->encoding_index; 472} 473 474static int compare_by_name (const void * arg1, const void * arg2) 475{ 476 const char * name1 = *(const char **)arg1; 477 const char * name2 = *(const char **)arg2; 478 /* Compare alphabetically, but put "CS" names at the end. */ 479 int sign = strcmp(name1,name2); 480 if (sign != 0) { 481 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S')) 482 * 4 + (sign >= 0 ? 1 : -1); 483 } 484 return sign; 485} 486 487void iconvlist (int (*do_one) (unsigned int namescount, 488 const char * const * names, 489 void* data), 490 void* data) 491{ 492#define aliascount1 sizeof(aliases)/sizeof(aliases[0]) 493#ifndef aliases2_lookup 494#define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]) 495#else 496#define aliascount2 0 497#endif 498#define aliascount (aliascount1+aliascount2) 499 struct alias aliasbuf[aliascount]; 500 const char * namesbuf[aliascount]; 501 size_t num_aliases; 502 { 503 /* Put all existing aliases into a buffer. */ 504 size_t i; 505 size_t j; 506 j = 0; 507 for (i = 0; i < aliascount1; i++) { 508 const struct alias * p = &aliases[i]; 509 if (p->name[0] != '\0' 510 && p->encoding_index != ei_local_char 511 && p->encoding_index != ei_local_wchar_t) 512 aliasbuf[j++] = *p; 513 } 514#ifndef aliases2_lookup 515 for (i = 0; i < aliascount2; i++) 516 aliasbuf[j++] = sysdep_aliases[i]; 517#endif 518 num_aliases = j; 519 } 520 /* Sort by encoding_index. */ 521 if (num_aliases > 1) 522 qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index); 523 { 524 /* Process all aliases with the same encoding_index together. */ 525 size_t j; 526 j = 0; 527 while (j < num_aliases) { 528 unsigned int ei = aliasbuf[j].encoding_index; 529 size_t i = 0; 530 do 531 namesbuf[i++] = aliasbuf[j++].name; 532 while (j < num_aliases && aliasbuf[j].encoding_index == ei); 533 if (i > 1) 534 qsort(namesbuf, i, sizeof(const char *), compare_by_name); 535 /* Call the callback. */ 536 if (do_one(i,namesbuf,data)) 537 break; 538 } 539 } 540#undef aliascount 541#undef aliascount2 542#undef aliascount1 543} 544 545int _libiconv_version = _LIBICONV_VERSION; 546 547#endif 548