1/*
2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21#include <iconv.h>
22
23#include <stdlib.h>
24#include <string.h>
25#include "config.h"
26#include "libcharset.h"
27
28#if ENABLE_EXTRA
29/*
30 * Consider all system dependent encodings, for any system,
31 * and the extra encodings.
32 */
33#define USE_AIX
34#define USE_OSF1
35#define USE_DOS
36#define USE_EXTRA
37#else
38/*
39 * Consider those system dependent encodings that are needed for the
40 * current system.
41 */
42#ifdef _AIX
43#define USE_AIX
44#endif
45#ifdef __osf__
46#define USE_OSF1
47#endif
48#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
49#define USE_DOS
50#endif
51#endif
52
53/*
54 * Data type for general conversion loop.
55 */
56struct loop_funcs {
57  size_t (*loop_convert) (iconv_t icd,
58                          const char* * inbuf, size_t *inbytesleft,
59                          char* * outbuf, size_t *outbytesleft);
60  size_t (*loop_reset) (iconv_t icd,
61                        char* * outbuf, size_t *outbytesleft);
62};
63
64/*
65 * Converters.
66 */
67#include "converters.h"
68
69/*
70 * Transliteration tables.
71 */
72#include "cjk_variants.h"
73#include "translit.h"
74
75/*
76 * Table of all supported encodings.
77 */
78struct encoding {
79  struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
80  struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
81  int oflags;                 /* flags for unicode -> multibyte conversion */
82};
83enum {
84#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
85  ei_##xxx ,
86#include "encodings.def"
87#ifdef USE_AIX
88#include "encodings_aix.def"
89#endif
90#ifdef USE_OSF1
91#include "encodings_osf1.def"
92#endif
93#ifdef USE_DOS
94#include "encodings_dos.def"
95#endif
96#ifdef USE_EXTRA
97#include "encodings_extra.def"
98#endif
99#include "encodings_local.def"
100#undef DEFENCODING
101ei_for_broken_compilers_that_dont_like_trailing_commas
102};
103#include "flags.h"
104static struct encoding const all_encodings[] = {
105#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
106  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
107#include "encodings.def"
108#ifdef USE_AIX
109#include "encodings_aix.def"
110#endif
111#ifdef USE_OSF1
112#include "encodings_osf1.def"
113#endif
114#ifdef USE_DOS
115#include "encodings_dos.def"
116#endif
117#ifdef USE_EXTRA
118#include "encodings_extra.def"
119#endif
120#undef DEFENCODING
121#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
122  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
123#include "encodings_local.def"
124#undef DEFENCODING
125};
126
127/*
128 * Conversion loops.
129 */
130#include "loops.h"
131
132/*
133 * Alias lookup function.
134 * Defines
135 *   struct alias { const char* name; unsigned int encoding_index; };
136 *   const struct alias * aliases_lookup (const char *str, unsigned int len);
137 *   #define MAX_WORD_LENGTH ...
138 */
139#include "aliases.h"
140
141/*
142 * System dependent alias lookup function.
143 * Defines
144 *   const struct alias * aliases2_lookup (const char *str);
145 */
146#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
147static struct alias sysdep_aliases[] = {
148#ifdef USE_AIX
149#include "aliases_aix.h"
150#endif
151#ifdef USE_OSF1
152#include "aliases_osf1.h"
153#endif
154#ifdef USE_DOS
155#include "aliases_dos.h"
156#endif
157#ifdef USE_EXTRA
158#include "aliases_extra.h"
159#endif
160};
161#ifdef __GNUC__
162__inline
163#endif
164const struct alias *
165aliases2_lookup (register const char *str)
166{
167  struct alias * ptr;
168  unsigned int count;
169  for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
170    if (!strcmp(str,ptr->name))
171      return ptr;
172  return NULL;
173}
174#else
175#define aliases2_lookup(str)  NULL
176#endif
177
178#if 0
179/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
180   and the first string can be assumed to be in uppercase. */
181static int strequal (const char* str1, const char* str2)
182{
183  unsigned char c1;
184  unsigned char c2;
185  for (;;) {
186    c1 = * (unsigned char *) str1++;
187    c2 = * (unsigned char *) str2++;
188    if (c1 == 0)
189      break;
190    if (c2 >= 'a' && c2 <= 'z')
191      c2 -= 'a'-'A';
192    if (c1 != c2)
193      break;
194  }
195  return (c1 == c2);
196}
197#endif
198
199iconv_t iconv_open (const char* tocode, const char* fromcode)
200{
201  struct conv_struct * cd;
202  char buf[MAX_WORD_LENGTH+10+1];
203  const char* cp;
204  char* bp;
205  const struct alias * ap;
206  unsigned int count;
207  unsigned int from_index;
208  int from_wchar;
209  unsigned int to_index;
210  int to_wchar;
211  int transliterate = 0;
212  int discard_ilseq = 0;
213
214  /* Before calling aliases_lookup, convert the input string to upper case,
215   * and check whether it's entirely ASCII (we call gperf with option "-7"
216   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
217   * or if it's too long, it is not a valid encoding name.
218   */
219  for (to_wchar = 0;;) {
220    /* Search tocode in the table. */
221    for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
222      unsigned char c = * (unsigned char *) cp;
223      if (c >= 0x80)
224        goto invalid;
225      if (c >= 'a' && c <= 'z')
226        c -= 'a'-'A';
227      *bp = c;
228      if (c == '\0')
229        break;
230      if (--count == 0)
231        goto invalid;
232    }
233    if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
234      bp -= 10;
235      *bp = '\0';
236      transliterate = 1;
237    }
238    if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
239      bp -= 8;
240      *bp = '\0';
241      discard_ilseq = 1;
242    }
243    if (buf[0] == '\0') {
244      tocode = locale_charset();
245      /* Avoid an endless loop that could occur when using an older version
246         of localcharset.c. */
247      if (tocode[0] == '\0')
248        goto invalid;
249      continue;
250    }
251    ap = aliases_lookup(buf,bp-buf);
252    if (ap == NULL) {
253      ap = aliases2_lookup(buf);
254      if (ap == NULL)
255        goto invalid;
256    }
257    if (ap->encoding_index == ei_local_char) {
258      tocode = locale_charset();
259      /* Avoid an endless loop that could occur when using an older version
260         of localcharset.c. */
261      if (tocode[0] == '\0')
262        goto invalid;
263      continue;
264    }
265    if (ap->encoding_index == ei_local_wchar_t) {
266#if __STDC_ISO_10646__
267      if (sizeof(wchar_t) == 4) {
268        to_index = ei_ucs4internal;
269        break;
270      }
271      if (sizeof(wchar_t) == 2) {
272        to_index = ei_ucs2internal;
273        break;
274      }
275      if (sizeof(wchar_t) == 1) {
276        to_index = ei_iso8859_1;
277        break;
278      }
279#endif
280#if HAVE_MBRTOWC
281      to_wchar = 1;
282      tocode = locale_charset();
283      continue;
284#endif
285      goto invalid;
286    }
287    to_index = ap->encoding_index;
288    break;
289  }
290  for (from_wchar = 0;;) {
291    /* Search fromcode in the table. */
292    for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
293      unsigned char c = * (unsigned char *) cp;
294      if (c >= 0x80)
295        goto invalid;
296      if (c >= 'a' && c <= 'z')
297        c -= 'a'-'A';
298      *bp = c;
299      if (c == '\0')
300        break;
301      if (--count == 0)
302        goto invalid;
303    }
304    if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
305      bp -= 10;
306      *bp = '\0';
307    }
308    if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
309      bp -= 8;
310      *bp = '\0';
311    }
312    if (buf[0] == '\0') {
313      fromcode = locale_charset();
314      /* Avoid an endless loop that could occur when using an older version
315         of localcharset.c. */
316      if (fromcode[0] == '\0')
317        goto invalid;
318      continue;
319    }
320    ap = aliases_lookup(buf,bp-buf);
321    if (ap == NULL) {
322      ap = aliases2_lookup(buf);
323      if (ap == NULL)
324        goto invalid;
325    }
326    if (ap->encoding_index == ei_local_char) {
327      fromcode = locale_charset();
328      /* Avoid an endless loop that could occur when using an older version
329         of localcharset.c. */
330      if (fromcode[0] == '\0')
331        goto invalid;
332      continue;
333    }
334    if (ap->encoding_index == ei_local_wchar_t) {
335#if __STDC_ISO_10646__
336      if (sizeof(wchar_t) == 4) {
337        from_index = ei_ucs4internal;
338        break;
339      }
340      if (sizeof(wchar_t) == 2) {
341        from_index = ei_ucs2internal;
342        break;
343      }
344      if (sizeof(wchar_t) == 1) {
345        from_index = ei_iso8859_1;
346        break;
347      }
348#endif
349#if HAVE_WCRTOMB
350      from_wchar = 1;
351      fromcode = locale_charset();
352      continue;
353#endif
354      goto invalid;
355    }
356    from_index = ap->encoding_index;
357    break;
358  }
359  cd = (struct conv_struct *) malloc(from_wchar != to_wchar
360                                     ? sizeof(struct wchar_conv_struct)
361                                     : sizeof(struct conv_struct));
362  if (cd == NULL) {
363    errno = ENOMEM;
364    return (iconv_t)(-1);
365  }
366  cd->iindex = from_index;
367  cd->ifuncs = all_encodings[from_index].ifuncs;
368  cd->oindex = to_index;
369  cd->ofuncs = all_encodings[to_index].ofuncs;
370  cd->oflags = all_encodings[to_index].oflags;
371  /* Initialize the loop functions. */
372#if HAVE_MBRTOWC
373  if (to_wchar) {
374#if HAVE_WCRTOMB
375    if (from_wchar) {
376      cd->lfuncs.loop_convert = wchar_id_loop_convert;
377      cd->lfuncs.loop_reset = wchar_id_loop_reset;
378    } else
379#endif
380    {
381      cd->lfuncs.loop_convert = wchar_to_loop_convert;
382      cd->lfuncs.loop_reset = wchar_to_loop_reset;
383    }
384  } else
385#endif
386  {
387#if HAVE_WCRTOMB
388    if (from_wchar) {
389      cd->lfuncs.loop_convert = wchar_from_loop_convert;
390      cd->lfuncs.loop_reset = wchar_from_loop_reset;
391    } else
392#endif
393    {
394      cd->lfuncs.loop_convert = unicode_loop_convert;
395      cd->lfuncs.loop_reset = unicode_loop_reset;
396    }
397  }
398  /* Initialize the states. */
399  memset(&cd->istate,'\0',sizeof(state_t));
400  memset(&cd->ostate,'\0',sizeof(state_t));
401  /* Initialize the operation flags. */
402  cd->transliterate = transliterate;
403  cd->discard_ilseq = discard_ilseq;
404  /* Initialize additional fields. */
405  if (from_wchar != to_wchar) {
406    struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
407    memset(&wcd->state,'\0',sizeof(mbstate_t));
408  }
409  /* Done. */
410  return (iconv_t)cd;
411invalid:
412  errno = EINVAL;
413  return (iconv_t)(-1);
414}
415
416size_t iconv (iconv_t icd,
417              ICONV_CONST char* * inbuf, size_t *inbytesleft,
418              char* * outbuf, size_t *outbytesleft)
419{
420  conv_t cd = (conv_t) icd;
421  if (inbuf == NULL || *inbuf == NULL)
422    return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
423  else
424    return cd->lfuncs.loop_convert(icd,
425                                   (const char* *)inbuf,inbytesleft,
426                                   outbuf,outbytesleft);
427}
428
429int iconv_close (iconv_t icd)
430{
431  conv_t cd = (conv_t) icd;
432  free(cd);
433  return 0;
434}
435
436#ifndef LIBICONV_PLUG
437
438int iconvctl (iconv_t icd, int request, void* argument)
439{
440  conv_t cd = (conv_t) icd;
441  switch (request) {
442    case ICONV_TRIVIALP:
443      *(int *)argument =
444        ((cd->lfuncs.loop_convert == unicode_loop_convert
445          && cd->iindex == cd->oindex)
446         || cd->lfuncs.loop_convert == wchar_id_loop_convert
447         ? 1 : 0);
448      return 0;
449    case ICONV_GET_TRANSLITERATE:
450      *(int *)argument = cd->transliterate;
451      return 0;
452    case ICONV_SET_TRANSLITERATE:
453      cd->transliterate = (*(const int *)argument ? 1 : 0);
454      return 0;
455    case ICONV_GET_DISCARD_ILSEQ:
456      *(int *)argument = cd->discard_ilseq;
457      return 0;
458    case ICONV_SET_DISCARD_ILSEQ:
459      cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
460      return 0;
461    default:
462      errno = EINVAL;
463      return -1;
464  }
465}
466
467static int compare_by_index (const void * arg1, const void * arg2)
468{
469  const struct alias * alias1 = (const struct alias *) arg1;
470  const struct alias * alias2 = (const struct alias *) arg2;
471  return (int)alias1->encoding_index - (int)alias2->encoding_index;
472}
473
474static int compare_by_name (const void * arg1, const void * arg2)
475{
476  const char * name1 = *(const char **)arg1;
477  const char * name2 = *(const char **)arg2;
478  /* Compare alphabetically, but put "CS" names at the end. */
479  int sign = strcmp(name1,name2);
480  if (sign != 0) {
481    sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
482           * 4 + (sign >= 0 ? 1 : -1);
483  }
484  return sign;
485}
486
487void iconvlist (int (*do_one) (unsigned int namescount,
488                               const char * const * names,
489                               void* data),
490                void* data)
491{
492#define aliascount1  sizeof(aliases)/sizeof(aliases[0])
493#ifndef aliases2_lookup
494#define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
495#else
496#define aliascount2  0
497#endif
498#define aliascount  (aliascount1+aliascount2)
499  struct alias aliasbuf[aliascount];
500  const char * namesbuf[aliascount];
501  size_t num_aliases;
502  {
503    /* Put all existing aliases into a buffer. */
504    size_t i;
505    size_t j;
506    j = 0;
507    for (i = 0; i < aliascount1; i++) {
508      const struct alias * p = &aliases[i];
509      if (p->name[0] != '\0'
510          && p->encoding_index != ei_local_char
511          && p->encoding_index != ei_local_wchar_t)
512        aliasbuf[j++] = *p;
513    }
514#ifndef aliases2_lookup
515    for (i = 0; i < aliascount2; i++)
516      aliasbuf[j++] = sysdep_aliases[i];
517#endif
518    num_aliases = j;
519  }
520  /* Sort by encoding_index. */
521  if (num_aliases > 1)
522    qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index);
523  {
524    /* Process all aliases with the same encoding_index together. */
525    size_t j;
526    j = 0;
527    while (j < num_aliases) {
528      unsigned int ei = aliasbuf[j].encoding_index;
529      size_t i = 0;
530      do
531        namesbuf[i++] = aliasbuf[j++].name;
532      while (j < num_aliases && aliasbuf[j].encoding_index == ei);
533      if (i > 1)
534        qsort(namesbuf, i, sizeof(const char *), compare_by_name);
535      /* Call the callback. */
536      if (do_one(i,namesbuf,data))
537        break;
538    }
539  }
540#undef aliascount
541#undef aliascount2
542#undef aliascount1
543}
544
545int _libiconv_version = _LIBICONV_VERSION;
546
547#endif
548