1/* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place - 18 * Suite 330, Boston, MA 02111-1307, USA. 19 */ 20 21#include "../include/iconv.h" 22 23#include <stdlib.h> 24#include <string.h> 25#include "config.h" 26#include "libcharset.h" 27#include <errno.h> 28#if ENABLE_EXTRA 29/* 30 * Consider all system dependent encodings, for any system, 31 * and the extra encodings. 32 */ 33#define USE_AIX 34#define USE_OSF1 35#define USE_DOS 36#define USE_EXTRA 37#else 38/* 39 * Consider those system dependent encodings that are needed for the 40 * current system. 41 */ 42#ifdef _AIX 43#define USE_AIX 44#endif 45#ifdef __osf__ 46#define USE_OSF1 47#endif 48#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))) 49#define USE_DOS 50#endif 51#endif 52 53/* 54 * Data type for general conversion loop. 55 */ 56struct loop_funcs { 57 size_t (*loop_convert) (iconv_t icd, 58 const char* * inbuf, size_t *inbytesleft, 59 char* * outbuf, size_t *outbytesleft); 60 size_t (*loop_reset) (iconv_t icd, 61 char* * outbuf, size_t *outbytesleft); 62}; 63 64/* 65 * Converters. 66 */ 67#include "converters.h" 68 69/* 70 * Transliteration tables. 71 */ 72#include "cjk_variants.h" 73#include "translit.h" 74 75/* 76 * Table of all supported encodings. 77 */ 78struct encoding { 79 struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */ 80 struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */ 81 int oflags; /* flags for unicode -> multibyte conversion */ 82}; 83enum { 84#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 85 ei_##xxx , 86#include "encodings.def" 87#ifdef USE_AIX 88#include "encodings_aix.def" 89#endif 90#ifdef USE_OSF1 91#include "encodings_osf1.def" 92#endif 93#ifdef USE_DOS 94#include "encodings_dos.def" 95#endif 96#ifdef USE_EXTRA 97#include "encodings_extra.def" 98#endif 99#include "encodings_local.def" 100#undef DEFENCODING 101ei_for_broken_compilers_that_dont_like_trailing_commas 102}; 103#include "flags.h" 104static struct encoding const all_encodings[] = { 105#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 106 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags }, 107#include "encodings.def" 108#ifdef USE_AIX 109#include "encodings_aix.def" 110#endif 111#ifdef USE_OSF1 112#include "encodings_osf1.def" 113#endif 114#ifdef USE_DOS 115#include "encodings_dos.def" 116#endif 117#ifdef USE_EXTRA 118#include "encodings_extra.def" 119#endif 120#undef DEFENCODING 121#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \ 122 { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 }, 123#include "encodings_local.def" 124#undef DEFENCODING 125}; 126 127/* 128 * Conversion loops. 129 */ 130#include "loops.h" 131 132/* 133 * Alias lookup function. 134 * Defines 135 * struct alias { const char* name; unsigned int encoding_index; }; 136 * const struct alias * aliases_lookup (const char *str, unsigned int len); 137 * #define MAX_WORD_LENGTH ... 138 */ 139#include "aliases.h" 140 141/* 142 * System dependent alias lookup function. 143 * Defines 144 * const struct alias * aliases2_lookup (const char *str); 145 */ 146#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */ 147static struct alias sysdep_aliases[] = { 148#ifdef USE_AIX 149#include "aliases_aix.h" 150#endif 151#ifdef USE_OSF1 152#include "aliases_osf1.h" 153#endif 154#ifdef USE_DOS 155#include "aliases_dos.h" 156#endif 157#ifdef USE_EXTRA 158#include "aliases_extra.h" 159#endif 160}; 161#ifdef __GNUC__ 162__inline 163#endif 164const struct alias * 165aliases2_lookup (register const char *str) 166{ 167 struct alias * ptr; 168 unsigned int count; 169 for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--) 170 if (!strcmp(str,ptr->name)) 171 return ptr; 172 return NULL; 173} 174#else 175#define aliases2_lookup(str) NULL 176#endif 177 178#if 0 179/* Like !strcasecmp, except that the both strings can be assumed to be ASCII 180 and the first string can be assumed to be in uppercase. */ 181static int strequal (const char* str1, const char* str2) 182{ 183 unsigned char c1; 184 unsigned char c2; 185 for (;;) { 186 c1 = * (unsigned char *) str1++; 187 c2 = * (unsigned char *) str2++; 188 if (c1 == 0) 189 break; 190 if (c2 >= 'a' && c2 <= 'z') 191 c2 -= 'a'-'A'; 192 if (c1 != c2) 193 break; 194 } 195 return (c1 == c2); 196} 197#endif 198 199iconv_t libiconv_open (const char* tocode, const char* fromcode) 200{ 201 struct conv_struct * cd; 202 char buf[MAX_WORD_LENGTH+10+1]; 203 const char* cp; 204 char* bp; 205 const struct alias * ap; 206 unsigned int count; 207 unsigned int from_index; 208 int from_wchar; 209 unsigned int to_index; 210 int to_wchar; 211 int transliterate = 0; 212 int discard_ilseq = 0; 213 /* Before calling aliases_lookup, convert the input string to upper case, 214 * and check whether it's entirely ASCII (we call gperf with option "-7" 215 * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 216 * or if it's too long, it is not a valid encoding name. 217 */ 218 for (to_wchar = 0;;) { 219 /* Search tocode in the table. */ 220 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 221 unsigned char c = * (unsigned char *) cp; 222 if (c >= 0x80) 223 goto invalid; 224 if (c >= 'a' && c <= 'z') 225 c -= 'a'-'A'; 226 *bp = c; 227 if (c == '\0') 228 break; 229 if (--count == 0) 230 goto invalid; 231 } 232 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 233 bp -= 10; 234 *bp = '\0'; 235 transliterate = 1; 236 } 237 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 238 bp -= 8; 239 *bp = '\0'; 240 discard_ilseq = 1; 241 } 242 if (buf[0] == '\0') { 243 tocode = locale_charset(); 244 /* Avoid an endless loop that could occur when using an older version 245 of localcharset.c. */ 246 if (tocode[0] == '\0') 247 goto invalid; 248 continue; 249 } 250 ap = aliases_lookup(buf,bp-buf); 251 if (ap == NULL) { 252 ap = aliases2_lookup(buf); 253 if (ap == NULL) 254 goto invalid; 255 } 256 if (ap->encoding_index == ei_local_char) { 257 tocode = locale_charset(); 258 /* Avoid an endless loop that could occur when using an older version 259 of localcharset.c. */ 260 if (tocode[0] == '\0') 261 goto invalid; 262 continue; 263 } 264 if (ap->encoding_index == ei_local_wchar_t) { 265#if __STDC_ISO_10646__ 266 if (sizeof(wchar_t) == 4) { 267 to_index = ei_ucs4internal; 268 break; 269 } 270 if (sizeof(wchar_t) == 2) { 271 to_index = ei_ucs2internal; 272 break; 273 } 274 if (sizeof(wchar_t) == 1) { 275 to_index = ei_iso8859_1; 276 break; 277 } 278#endif 279#if HAVE_MBRTOWC 280 to_wchar = 1; 281 tocode = locale_charset(); 282 continue; 283#endif 284 goto invalid; 285 } 286 to_index = ap->encoding_index; 287 break; 288 } 289 for (from_wchar = 0;;) { 290 /* Search fromcode in the table. */ 291 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 292 unsigned char c = * (unsigned char *) cp; 293 if (c >= 0x80) 294 goto invalid; 295 if (c >= 'a' && c <= 'z') 296 c -= 'a'-'A'; 297 *bp = c; 298 if (c == '\0') 299 break; 300 if (--count == 0) 301 goto invalid; 302 } 303 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 304 bp -= 10; 305 *bp = '\0'; 306 } 307 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 308 bp -= 8; 309 *bp = '\0'; 310 } 311 if (buf[0] == '\0') { 312 fromcode = locale_charset(); 313 /* Avoid an endless loop that could occur when using an older version 314 of localcharset.c. */ 315 if (fromcode[0] == '\0') 316 goto invalid; 317 continue; 318 } 319 ap = aliases_lookup(buf,bp-buf); 320 if (ap == NULL) { 321 ap = aliases2_lookup(buf); 322 if (ap == NULL) 323 goto invalid; 324 } 325 if (ap->encoding_index == ei_local_char) { 326 fromcode = locale_charset(); 327 /* Avoid an endless loop that could occur when using an older version 328 of localcharset.c. */ 329 if (fromcode[0] == '\0') 330 goto invalid; 331 continue; 332 } 333 if (ap->encoding_index == ei_local_wchar_t) { 334#if __STDC_ISO_10646__ 335 if (sizeof(wchar_t) == 4) { 336 from_index = ei_ucs4internal; 337 break; 338 } 339 if (sizeof(wchar_t) == 2) { 340 from_index = ei_ucs2internal; 341 break; 342 } 343 if (sizeof(wchar_t) == 1) { 344 from_index = ei_iso8859_1; 345 break; 346 } 347#endif 348#if HAVE_WCRTOMB 349 from_wchar = 1; 350 fromcode = locale_charset(); 351 continue; 352#endif 353 goto invalid; 354 } 355 from_index = ap->encoding_index; 356 break; 357 } 358 cd = (struct conv_struct *) malloc(from_wchar != to_wchar 359 ? sizeof(struct wchar_conv_struct) 360 : sizeof(struct conv_struct)); 361 if (cd == NULL) { 362 errno = ENOMEM; 363 return (iconv_t)(-1); 364 } 365 cd->iindex = from_index; 366 cd->ifuncs = all_encodings[from_index].ifuncs; 367 cd->oindex = to_index; 368 cd->ofuncs = all_encodings[to_index].ofuncs; 369 cd->oflags = all_encodings[to_index].oflags; 370 /* Initialize the loop functions. */ 371#if HAVE_MBRTOWC 372 if (to_wchar) { 373#if HAVE_WCRTOMB 374 if (from_wchar) { 375 cd->lfuncs.loop_convert = wchar_id_loop_convert; 376 cd->lfuncs.loop_reset = wchar_id_loop_reset; 377 } else 378#endif 379 { 380 cd->lfuncs.loop_convert = wchar_to_loop_convert; 381 cd->lfuncs.loop_reset = wchar_to_loop_reset; 382 } 383 } else 384#endif 385 { 386#if HAVE_WCRTOMB 387 if (from_wchar) { 388 cd->lfuncs.loop_convert = wchar_from_loop_convert; 389 cd->lfuncs.loop_reset = wchar_from_loop_reset; 390 } else 391#endif 392 { 393 cd->lfuncs.loop_convert = unicode_loop_convert; 394 cd->lfuncs.loop_reset = unicode_loop_reset; 395 } 396 } 397 /* Initialize the states. */ 398 memset(&cd->istate,'\0',sizeof(state_t)); 399 memset(&cd->ostate,'\0',sizeof(state_t)); 400 /* Initialize the operation flags. */ 401 cd->transliterate = transliterate; 402 cd->discard_ilseq = discard_ilseq; 403 /* Initialize additional fields. */ 404 if (from_wchar != to_wchar) { 405 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd; 406 memset(&wcd->state,'\0',sizeof(mbstate_t)); 407 } 408 /* Done. */ 409 return (iconv_t)cd; 410invalid: 411 errno = EINVAL; 412 return (iconv_t)(-1); 413} 414 415size_t libiconv (iconv_t icd, 416 ICONV_CONST char* * inbuf, size_t *inbytesleft, 417 char* * outbuf, size_t *outbytesleft) 418{ 419 conv_t cd = (conv_t) icd; 420 if (inbuf == NULL || *inbuf == NULL) 421 return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft); 422 else 423 return cd->lfuncs.loop_convert(icd, 424 (const char* *)inbuf,inbytesleft, 425 outbuf,outbytesleft); 426} 427 428int libiconv_close (iconv_t icd) 429{ 430 conv_t cd = (conv_t) icd; 431 free(cd); 432 return 0; 433} 434 435#ifndef LIBICONV_PLUG 436 437int iconvctl (iconv_t icd, int request, void* argument) 438{ 439 conv_t cd = (conv_t) icd; 440 switch (request) { 441 case ICONV_TRIVIALP: 442 *(int *)argument = 443 ((cd->lfuncs.loop_convert == unicode_loop_convert 444 && cd->iindex == cd->oindex) 445 || cd->lfuncs.loop_convert == wchar_id_loop_convert 446 ? 1 : 0); 447 return 0; 448 case ICONV_GET_TRANSLITERATE: 449 *(int *)argument = cd->transliterate; 450 return 0; 451 case ICONV_SET_TRANSLITERATE: 452 cd->transliterate = (*(const int *)argument ? 1 : 0); 453 return 0; 454 case ICONV_GET_DISCARD_ILSEQ: 455 *(int *)argument = cd->discard_ilseq; 456 return 0; 457 case ICONV_SET_DISCARD_ILSEQ: 458 cd->discard_ilseq = (*(const int *)argument ? 1 : 0); 459 return 0; 460 default: 461 errno = EINVAL; 462 return -1; 463 } 464} 465 466static int compare_by_index (const void * arg1, const void * arg2) 467{ 468 const struct alias * alias1 = (const struct alias *) arg1; 469 const struct alias * alias2 = (const struct alias *) arg2; 470 return (int)alias1->encoding_index - (int)alias2->encoding_index; 471} 472 473static int compare_by_name (const void * arg1, const void * arg2) 474{ 475 const char * name1 = *(const char **)arg1; 476 const char * name2 = *(const char **)arg2; 477 /* Compare alphabetically, but put "CS" names at the end. */ 478 int sign = strcmp(name1,name2); 479 if (sign != 0) { 480 sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S')) 481 * 4 + (sign >= 0 ? 1 : -1); 482 } 483 return sign; 484} 485 486void iconvlist (int (*do_one) (unsigned int namescount, 487 const char * const * names, 488 void* data), 489 void* data) 490{ 491#define aliascount1 sizeof(aliases)/sizeof(aliases[0]) 492#ifndef aliases2_lookup 493#define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]) 494#else 495#define aliascount2 0 496#endif 497#define aliascount (aliascount1+aliascount2) 498 struct alias aliasbuf[aliascount]; 499 const char * namesbuf[aliascount]; 500 size_t num_aliases; 501 { 502 /* Put all existing aliases into a buffer. */ 503 size_t i; 504 size_t j; 505 j = 0; 506 for (i = 0; i < aliascount1; i++) { 507 const struct alias * p = &aliases[i]; 508 if (p->name[0] != '\0' 509 && p->encoding_index != ei_local_char 510 && p->encoding_index != ei_local_wchar_t) 511 aliasbuf[j++] = *p; 512 } 513#ifndef aliases2_lookup 514 for (i = 0; i < aliascount2; i++) 515 aliasbuf[j++] = sysdep_aliases[i]; 516#endif 517 num_aliases = j; 518 } 519 /* Sort by encoding_index. */ 520 if (num_aliases > 1) 521 qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index); 522 { 523 /* Process all aliases with the same encoding_index together. */ 524 size_t j; 525 j = 0; 526 while (j < num_aliases) { 527 unsigned int ei = aliasbuf[j].encoding_index; 528 size_t i = 0; 529 do 530 namesbuf[i++] = aliasbuf[j++].name; 531 while (j < num_aliases && aliasbuf[j].encoding_index == ei); 532 if (i > 1) 533 qsort(namesbuf, i, sizeof(const char *), compare_by_name); 534 /* Call the callback. */ 535 if (do_one(i,namesbuf,data)) 536 break; 537 } 538 } 539#undef aliascount 540#undef aliascount2 541#undef aliascount1 542} 543 544int _libiconv_version = _LIBICONV_VERSION; 545 546#endif 547