1/*
2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21#include "../include/iconv.h"
22
23#include <stdlib.h>
24#include <string.h>
25#include "config.h"
26#include "libcharset.h"
27#include <errno.h>
28#if ENABLE_EXTRA
29/*
30 * Consider all system dependent encodings, for any system,
31 * and the extra encodings.
32 */
33#define USE_AIX
34#define USE_OSF1
35#define USE_DOS
36#define USE_EXTRA
37#else
38/*
39 * Consider those system dependent encodings that are needed for the
40 * current system.
41 */
42#ifdef _AIX
43#define USE_AIX
44#endif
45#ifdef __osf__
46#define USE_OSF1
47#endif
48#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
49#define USE_DOS
50#endif
51#endif
52
53/*
54 * Data type for general conversion loop.
55 */
56struct loop_funcs {
57  size_t (*loop_convert) (iconv_t icd,
58                          const char* * inbuf, size_t *inbytesleft,
59                          char* * outbuf, size_t *outbytesleft);
60  size_t (*loop_reset) (iconv_t icd,
61                        char* * outbuf, size_t *outbytesleft);
62};
63
64/*
65 * Converters.
66 */
67#include "converters.h"
68
69/*
70 * Transliteration tables.
71 */
72#include "cjk_variants.h"
73#include "translit.h"
74
75/*
76 * Table of all supported encodings.
77 */
78struct encoding {
79  struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
80  struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
81  int oflags;                 /* flags for unicode -> multibyte conversion */
82};
83enum {
84#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
85  ei_##xxx ,
86#include "encodings.def"
87#ifdef USE_AIX
88#include "encodings_aix.def"
89#endif
90#ifdef USE_OSF1
91#include "encodings_osf1.def"
92#endif
93#ifdef USE_DOS
94#include "encodings_dos.def"
95#endif
96#ifdef USE_EXTRA
97#include "encodings_extra.def"
98#endif
99#include "encodings_local.def"
100#undef DEFENCODING
101ei_for_broken_compilers_that_dont_like_trailing_commas
102};
103#include "flags.h"
104static struct encoding const all_encodings[] = {
105#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
106  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
107#include "encodings.def"
108#ifdef USE_AIX
109#include "encodings_aix.def"
110#endif
111#ifdef USE_OSF1
112#include "encodings_osf1.def"
113#endif
114#ifdef USE_DOS
115#include "encodings_dos.def"
116#endif
117#ifdef USE_EXTRA
118#include "encodings_extra.def"
119#endif
120#undef DEFENCODING
121#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
122  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
123#include "encodings_local.def"
124#undef DEFENCODING
125};
126
127/*
128 * Conversion loops.
129 */
130#include "loops.h"
131
132/*
133 * Alias lookup function.
134 * Defines
135 *   struct alias { const char* name; unsigned int encoding_index; };
136 *   const struct alias * aliases_lookup (const char *str, unsigned int len);
137 *   #define MAX_WORD_LENGTH ...
138 */
139#include "aliases.h"
140
141/*
142 * System dependent alias lookup function.
143 * Defines
144 *   const struct alias * aliases2_lookup (const char *str);
145 */
146#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
147static struct alias sysdep_aliases[] = {
148#ifdef USE_AIX
149#include "aliases_aix.h"
150#endif
151#ifdef USE_OSF1
152#include "aliases_osf1.h"
153#endif
154#ifdef USE_DOS
155#include "aliases_dos.h"
156#endif
157#ifdef USE_EXTRA
158#include "aliases_extra.h"
159#endif
160};
161#ifdef __GNUC__
162__inline
163#endif
164const struct alias *
165aliases2_lookup (register const char *str)
166{
167  struct alias * ptr;
168  unsigned int count;
169  for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
170    if (!strcmp(str,ptr->name))
171      return ptr;
172  return NULL;
173}
174#else
175#define aliases2_lookup(str)  NULL
176#endif
177
178#if 0
179/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
180   and the first string can be assumed to be in uppercase. */
181static int strequal (const char* str1, const char* str2)
182{
183  unsigned char c1;
184  unsigned char c2;
185  for (;;) {
186    c1 = * (unsigned char *) str1++;
187    c2 = * (unsigned char *) str2++;
188    if (c1 == 0)
189      break;
190    if (c2 >= 'a' && c2 <= 'z')
191      c2 -= 'a'-'A';
192    if (c1 != c2)
193      break;
194  }
195  return (c1 == c2);
196}
197#endif
198
199iconv_t libiconv_open (const char* tocode, const char* fromcode)
200{
201  struct conv_struct * cd;
202  char buf[MAX_WORD_LENGTH+10+1];
203  const char* cp;
204  char* bp;
205  const struct alias * ap;
206  unsigned int count;
207  unsigned int from_index;
208  int from_wchar;
209  unsigned int to_index;
210  int to_wchar;
211  int transliterate = 0;
212  int discard_ilseq = 0;
213  /* Before calling aliases_lookup, convert the input string to upper case,
214   * and check whether it's entirely ASCII (we call gperf with option "-7"
215   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
216   * or if it's too long, it is not a valid encoding name.
217   */
218  for (to_wchar = 0;;) {
219    /* Search tocode in the table. */
220    for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
221      unsigned char c = * (unsigned char *) cp;
222      if (c >= 0x80)
223        goto invalid;
224      if (c >= 'a' && c <= 'z')
225        c -= 'a'-'A';
226      *bp = c;
227      if (c == '\0')
228        break;
229      if (--count == 0)
230        goto invalid;
231    }
232    if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
233      bp -= 10;
234      *bp = '\0';
235      transliterate = 1;
236    }
237    if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
238      bp -= 8;
239      *bp = '\0';
240      discard_ilseq = 1;
241    }
242    if (buf[0] == '\0') {
243      tocode = locale_charset();
244      /* Avoid an endless loop that could occur when using an older version
245         of localcharset.c. */
246      if (tocode[0] == '\0')
247        goto invalid;
248      continue;
249    }
250    ap = aliases_lookup(buf,bp-buf);
251    if (ap == NULL) {
252      ap = aliases2_lookup(buf);
253      if (ap == NULL)
254        goto invalid;
255    }
256    if (ap->encoding_index == ei_local_char) {
257      tocode = locale_charset();
258      /* Avoid an endless loop that could occur when using an older version
259         of localcharset.c. */
260      if (tocode[0] == '\0')
261        goto invalid;
262      continue;
263    }
264    if (ap->encoding_index == ei_local_wchar_t) {
265#if __STDC_ISO_10646__
266      if (sizeof(wchar_t) == 4) {
267        to_index = ei_ucs4internal;
268        break;
269      }
270      if (sizeof(wchar_t) == 2) {
271        to_index = ei_ucs2internal;
272        break;
273      }
274      if (sizeof(wchar_t) == 1) {
275        to_index = ei_iso8859_1;
276        break;
277      }
278#endif
279#if HAVE_MBRTOWC
280      to_wchar = 1;
281      tocode = locale_charset();
282      continue;
283#endif
284      goto invalid;
285    }
286    to_index = ap->encoding_index;
287    break;
288  }
289  for (from_wchar = 0;;) {
290    /* Search fromcode in the table. */
291    for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
292      unsigned char c = * (unsigned char *) cp;
293      if (c >= 0x80)
294        goto invalid;
295      if (c >= 'a' && c <= 'z')
296        c -= 'a'-'A';
297      *bp = c;
298      if (c == '\0')
299        break;
300      if (--count == 0)
301        goto invalid;
302    }
303    if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
304      bp -= 10;
305      *bp = '\0';
306    }
307    if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
308      bp -= 8;
309      *bp = '\0';
310    }
311    if (buf[0] == '\0') {
312      fromcode = locale_charset();
313      /* Avoid an endless loop that could occur when using an older version
314         of localcharset.c. */
315      if (fromcode[0] == '\0')
316        goto invalid;
317      continue;
318    }
319    ap = aliases_lookup(buf,bp-buf);
320    if (ap == NULL) {
321      ap = aliases2_lookup(buf);
322      if (ap == NULL)
323        goto invalid;
324    }
325    if (ap->encoding_index == ei_local_char) {
326      fromcode = locale_charset();
327      /* Avoid an endless loop that could occur when using an older version
328         of localcharset.c. */
329      if (fromcode[0] == '\0')
330        goto invalid;
331      continue;
332    }
333    if (ap->encoding_index == ei_local_wchar_t) {
334#if __STDC_ISO_10646__
335      if (sizeof(wchar_t) == 4) {
336        from_index = ei_ucs4internal;
337        break;
338      }
339      if (sizeof(wchar_t) == 2) {
340        from_index = ei_ucs2internal;
341        break;
342      }
343      if (sizeof(wchar_t) == 1) {
344        from_index = ei_iso8859_1;
345        break;
346      }
347#endif
348#if HAVE_WCRTOMB
349      from_wchar = 1;
350      fromcode = locale_charset();
351      continue;
352#endif
353      goto invalid;
354    }
355    from_index = ap->encoding_index;
356    break;
357  }
358  cd = (struct conv_struct *) malloc(from_wchar != to_wchar
359                                     ? sizeof(struct wchar_conv_struct)
360                                     : sizeof(struct conv_struct));
361  if (cd == NULL) {
362    errno = ENOMEM;
363    return (iconv_t)(-1);
364  }
365  cd->iindex = from_index;
366  cd->ifuncs = all_encodings[from_index].ifuncs;
367  cd->oindex = to_index;
368  cd->ofuncs = all_encodings[to_index].ofuncs;
369  cd->oflags = all_encodings[to_index].oflags;
370  /* Initialize the loop functions. */
371#if HAVE_MBRTOWC
372  if (to_wchar) {
373#if HAVE_WCRTOMB
374    if (from_wchar) {
375      cd->lfuncs.loop_convert = wchar_id_loop_convert;
376      cd->lfuncs.loop_reset = wchar_id_loop_reset;
377    } else
378#endif
379    {
380      cd->lfuncs.loop_convert = wchar_to_loop_convert;
381      cd->lfuncs.loop_reset = wchar_to_loop_reset;
382    }
383  } else
384#endif
385  {
386#if HAVE_WCRTOMB
387    if (from_wchar) {
388      cd->lfuncs.loop_convert = wchar_from_loop_convert;
389      cd->lfuncs.loop_reset = wchar_from_loop_reset;
390    } else
391#endif
392    {
393      cd->lfuncs.loop_convert = unicode_loop_convert;
394      cd->lfuncs.loop_reset = unicode_loop_reset;
395    }
396  }
397  /* Initialize the states. */
398  memset(&cd->istate,'\0',sizeof(state_t));
399  memset(&cd->ostate,'\0',sizeof(state_t));
400  /* Initialize the operation flags. */
401  cd->transliterate = transliterate;
402  cd->discard_ilseq = discard_ilseq;
403  /* Initialize additional fields. */
404  if (from_wchar != to_wchar) {
405    struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) cd;
406    memset(&wcd->state,'\0',sizeof(mbstate_t));
407  }
408  /* Done. */
409  return (iconv_t)cd;
410invalid:
411  errno = EINVAL;
412  return (iconv_t)(-1);
413}
414
415size_t libiconv (iconv_t icd,
416              ICONV_CONST char* * inbuf, size_t *inbytesleft,
417              char* * outbuf, size_t *outbytesleft)
418{
419  conv_t cd = (conv_t) icd;
420  if (inbuf == NULL || *inbuf == NULL)
421    return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
422  else
423    return cd->lfuncs.loop_convert(icd,
424                                   (const char* *)inbuf,inbytesleft,
425                                   outbuf,outbytesleft);
426}
427
428int libiconv_close (iconv_t icd)
429{
430  conv_t cd = (conv_t) icd;
431  free(cd);
432  return 0;
433}
434
435#ifndef LIBICONV_PLUG
436
437int iconvctl (iconv_t icd, int request, void* argument)
438{
439  conv_t cd = (conv_t) icd;
440  switch (request) {
441    case ICONV_TRIVIALP:
442      *(int *)argument =
443        ((cd->lfuncs.loop_convert == unicode_loop_convert
444          && cd->iindex == cd->oindex)
445         || cd->lfuncs.loop_convert == wchar_id_loop_convert
446         ? 1 : 0);
447      return 0;
448    case ICONV_GET_TRANSLITERATE:
449      *(int *)argument = cd->transliterate;
450      return 0;
451    case ICONV_SET_TRANSLITERATE:
452      cd->transliterate = (*(const int *)argument ? 1 : 0);
453      return 0;
454    case ICONV_GET_DISCARD_ILSEQ:
455      *(int *)argument = cd->discard_ilseq;
456      return 0;
457    case ICONV_SET_DISCARD_ILSEQ:
458      cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
459      return 0;
460    default:
461      errno = EINVAL;
462      return -1;
463  }
464}
465
466static int compare_by_index (const void * arg1, const void * arg2)
467{
468  const struct alias * alias1 = (const struct alias *) arg1;
469  const struct alias * alias2 = (const struct alias *) arg2;
470  return (int)alias1->encoding_index - (int)alias2->encoding_index;
471}
472
473static int compare_by_name (const void * arg1, const void * arg2)
474{
475  const char * name1 = *(const char **)arg1;
476  const char * name2 = *(const char **)arg2;
477  /* Compare alphabetically, but put "CS" names at the end. */
478  int sign = strcmp(name1,name2);
479  if (sign != 0) {
480    sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
481           * 4 + (sign >= 0 ? 1 : -1);
482  }
483  return sign;
484}
485
486void iconvlist (int (*do_one) (unsigned int namescount,
487                               const char * const * names,
488                               void* data),
489                void* data)
490{
491#define aliascount1  sizeof(aliases)/sizeof(aliases[0])
492#ifndef aliases2_lookup
493#define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
494#else
495#define aliascount2  0
496#endif
497#define aliascount  (aliascount1+aliascount2)
498  struct alias aliasbuf[aliascount];
499  const char * namesbuf[aliascount];
500  size_t num_aliases;
501  {
502    /* Put all existing aliases into a buffer. */
503    size_t i;
504    size_t j;
505    j = 0;
506    for (i = 0; i < aliascount1; i++) {
507      const struct alias * p = &aliases[i];
508      if (p->name[0] != '\0'
509          && p->encoding_index != ei_local_char
510          && p->encoding_index != ei_local_wchar_t)
511        aliasbuf[j++] = *p;
512    }
513#ifndef aliases2_lookup
514    for (i = 0; i < aliascount2; i++)
515      aliasbuf[j++] = sysdep_aliases[i];
516#endif
517    num_aliases = j;
518  }
519  /* Sort by encoding_index. */
520  if (num_aliases > 1)
521    qsort(aliasbuf, num_aliases, sizeof(struct alias), compare_by_index);
522  {
523    /* Process all aliases with the same encoding_index together. */
524    size_t j;
525    j = 0;
526    while (j < num_aliases) {
527      unsigned int ei = aliasbuf[j].encoding_index;
528      size_t i = 0;
529      do
530        namesbuf[i++] = aliasbuf[j++].name;
531      while (j < num_aliases && aliasbuf[j].encoding_index == ei);
532      if (i > 1)
533        qsort(namesbuf, i, sizeof(const char *), compare_by_name);
534      /* Call the callback. */
535      if (do_one(i,namesbuf,data))
536        break;
537    }
538  }
539#undef aliascount
540#undef aliascount2
541#undef aliascount1
542}
543
544int _libiconv_version = _LIBICONV_VERSION;
545
546#endif
547