1/*
2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21#include <iconv.h>
22
23#include <stdlib.h>
24#include <string.h>
25#include "config.h"
26#include "localcharset.h"
27
28#ifdef __CYGWIN__
29#include <cygwin/version.h>
30#endif
31
32#if ENABLE_EXTRA
33/*
34 * Consider all system dependent encodings, for any system,
35 * and the extra encodings.
36 */
37#define USE_AIX
38#define USE_OSF1
39#define USE_DOS
40#define USE_EXTRA
41#else
42/*
43 * Consider those system dependent encodings that are needed for the
44 * current system.
45 */
46#ifdef _AIX
47#define USE_AIX
48#endif
49#if defined(__osf__) || defined(VMS)
50#define USE_OSF1
51#endif
52#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
53#define USE_DOS
54#endif
55#endif
56
57/*
58 * Data type for general conversion loop.
59 */
60struct loop_funcs {
61  size_t (*loop_convert) (iconv_t icd,
62                          const char* * inbuf, size_t *inbytesleft,
63                          char* * outbuf, size_t *outbytesleft);
64  size_t (*loop_reset) (iconv_t icd,
65                        char* * outbuf, size_t *outbytesleft);
66};
67
68/*
69 * Converters.
70 */
71#include "converters.h"
72
73/*
74 * Transliteration tables.
75 */
76#include "cjk_variants.h"
77#include "translit.h"
78
79/*
80 * Table of all supported encodings.
81 */
82struct encoding {
83  struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
84  struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
85  int oflags;                 /* flags for unicode -> multibyte conversion */
86};
87#define DEFALIAS(xxx_alias,xxx) /* nothing */
88enum {
89#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
90  ei_##xxx ,
91#include "encodings.def"
92#ifdef USE_AIX
93# include "encodings_aix.def"
94#endif
95#ifdef USE_OSF1
96# include "encodings_osf1.def"
97#endif
98#ifdef USE_DOS
99# include "encodings_dos.def"
100#endif
101#ifdef USE_EXTRA
102# include "encodings_extra.def"
103#endif
104#include "encodings_local.def"
105#undef DEFENCODING
106ei_for_broken_compilers_that_dont_like_trailing_commas
107};
108#include "flags.h"
109static struct encoding const all_encodings[] = {
110#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
111  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
112#include "encodings.def"
113#ifdef USE_AIX
114# include "encodings_aix.def"
115#endif
116#ifdef USE_OSF1
117# include "encodings_osf1.def"
118#endif
119#ifdef USE_DOS
120# include "encodings_dos.def"
121#endif
122#ifdef USE_EXTRA
123# include "encodings_extra.def"
124#endif
125#undef DEFENCODING
126#define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
127  { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
128#include "encodings_local.def"
129#undef DEFENCODING
130};
131#undef DEFALIAS
132
133/*
134 * Conversion loops.
135 */
136#include "loops.h"
137
138/*
139 * Alias lookup function.
140 * Defines
141 *   struct alias { int name; unsigned int encoding_index; };
142 *   const struct alias * aliases_lookup (const char *str, unsigned int len);
143 *   #define MAX_WORD_LENGTH ...
144 */
145#if defined _AIX
146# include "aliases_sysaix.h"
147#elif defined hpux || defined __hpux
148# include "aliases_syshpux.h"
149#elif defined __osf__
150# include "aliases_sysosf1.h"
151#elif defined __sun
152# include "aliases_syssolaris.h"
153#else
154# include "aliases.h"
155#endif
156
157/*
158 * System dependent alias lookup function.
159 * Defines
160 *   const struct alias * aliases2_lookup (const char *str);
161 */
162#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
163struct stringpool2_t {
164#define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
165#include "aliases2.h"
166#undef S
167};
168static const struct stringpool2_t stringpool2_contents = {
169#define S(tag,name,encoding_index) name,
170#include "aliases2.h"
171#undef S
172};
173#define stringpool2 ((const char *) &stringpool2_contents)
174static const struct alias sysdep_aliases[] = {
175#define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
176#include "aliases2.h"
177#undef S
178};
179#ifdef __GNUC__
180__inline
181#endif
182const struct alias *
183aliases2_lookup (register const char *str)
184{
185  const struct alias * ptr;
186  unsigned int count;
187  for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
188    if (!strcmp(str, stringpool2 + ptr->name))
189      return ptr;
190  return NULL;
191}
192#else
193#define aliases2_lookup(str)  NULL
194#define stringpool2  NULL
195#endif
196
197#if 0
198/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
199   and the first string can be assumed to be in uppercase. */
200static int strequal (const char* str1, const char* str2)
201{
202  unsigned char c1;
203  unsigned char c2;
204  for (;;) {
205    c1 = * (unsigned char *) str1++;
206    c2 = * (unsigned char *) str2++;
207    if (c1 == 0)
208      break;
209    if (c2 >= 'a' && c2 <= 'z')
210      c2 -= 'a'-'A';
211    if (c1 != c2)
212      break;
213  }
214  return (c1 == c2);
215}
216#endif
217
218iconv_t iconv_open (const char* tocode, const char* fromcode)
219{
220  struct conv_struct * cd;
221  unsigned int from_index;
222  int from_wchar;
223  unsigned int to_index;
224  int to_wchar;
225  int transliterate;
226  int discard_ilseq;
227
228#include "iconv_open1.h"
229
230  cd = (struct conv_struct *) malloc(from_wchar != to_wchar
231                                     ? sizeof(struct wchar_conv_struct)
232                                     : sizeof(struct conv_struct));
233  if (cd == NULL) {
234    errno = ENOMEM;
235    return (iconv_t)(-1);
236  }
237
238#include "iconv_open2.h"
239
240  return (iconv_t)cd;
241invalid:
242  errno = EINVAL;
243  return (iconv_t)(-1);
244}
245
246size_t iconv (iconv_t icd,
247              ICONV_CONST char* * inbuf, size_t *inbytesleft,
248              char* * outbuf, size_t *outbytesleft)
249{
250  conv_t cd = (conv_t) icd;
251  if (inbuf == NULL || *inbuf == NULL)
252    return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
253  else
254    return cd->lfuncs.loop_convert(icd,
255                                   (const char* *)inbuf,inbytesleft,
256                                   outbuf,outbytesleft);
257}
258
259int iconv_close (iconv_t icd)
260{
261  conv_t cd = (conv_t) icd;
262  free(cd);
263  return 0;
264}
265
266#ifndef LIBICONV_PLUG
267
268/*
269 * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
270 * fit in an iconv_allocation_t.
271 * If this verification fails, iconv_allocation_t must be made larger and
272 * the major version in LIBICONV_VERSION_INFO must be bumped.
273 * Currently 'struct conv_struct' has 21 integer/pointer fields, and
274 * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
275 */
276typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
277typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
278
279int iconv_open_into (const char* tocode, const char* fromcode,
280                     iconv_allocation_t* resultp)
281{
282  struct conv_struct * cd;
283  unsigned int from_index;
284  int from_wchar;
285  unsigned int to_index;
286  int to_wchar;
287  int transliterate;
288  int discard_ilseq;
289
290#include "iconv_open1.h"
291
292  cd = (struct conv_struct *) resultp;
293
294#include "iconv_open2.h"
295
296  return 0;
297invalid:
298  errno = EINVAL;
299  return -1;
300}
301
302int iconvctl (iconv_t icd, int request, void* argument)
303{
304  conv_t cd = (conv_t) icd;
305  switch (request) {
306    case ICONV_TRIVIALP:
307      *(int *)argument =
308        ((cd->lfuncs.loop_convert == unicode_loop_convert
309          && cd->iindex == cd->oindex)
310         || cd->lfuncs.loop_convert == wchar_id_loop_convert
311         ? 1 : 0);
312      return 0;
313    case ICONV_GET_TRANSLITERATE:
314      *(int *)argument = cd->transliterate;
315      return 0;
316    case ICONV_SET_TRANSLITERATE:
317      cd->transliterate = (*(const int *)argument ? 1 : 0);
318      return 0;
319    case ICONV_GET_DISCARD_ILSEQ:
320      *(int *)argument = cd->discard_ilseq;
321      return 0;
322    case ICONV_SET_DISCARD_ILSEQ:
323      cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
324      return 0;
325    case ICONV_SET_HOOKS:
326      if (argument != NULL) {
327        cd->hooks = *(const struct iconv_hooks *)argument;
328      } else {
329        cd->hooks.uc_hook = NULL;
330        cd->hooks.wc_hook = NULL;
331        cd->hooks.data = NULL;
332      }
333      return 0;
334    case ICONV_SET_FALLBACKS:
335      if (argument != NULL) {
336        cd->fallbacks = *(const struct iconv_fallbacks *)argument;
337      } else {
338        cd->fallbacks.mb_to_uc_fallback = NULL;
339        cd->fallbacks.uc_to_mb_fallback = NULL;
340        cd->fallbacks.mb_to_wc_fallback = NULL;
341        cd->fallbacks.wc_to_mb_fallback = NULL;
342        cd->fallbacks.data = NULL;
343      }
344      return 0;
345    default:
346      errno = EINVAL;
347      return -1;
348  }
349}
350
351/* An alias after its name has been converted from 'int' to 'const char*'. */
352struct nalias { const char* name; unsigned int encoding_index; };
353
354static int compare_by_index (const void * arg1, const void * arg2)
355{
356  const struct nalias * alias1 = (const struct nalias *) arg1;
357  const struct nalias * alias2 = (const struct nalias *) arg2;
358  return (int)alias1->encoding_index - (int)alias2->encoding_index;
359}
360
361static int compare_by_name (const void * arg1, const void * arg2)
362{
363  const char * name1 = *(const char **)arg1;
364  const char * name2 = *(const char **)arg2;
365  /* Compare alphabetically, but put "CS" names at the end. */
366  int sign = strcmp(name1,name2);
367  if (sign != 0) {
368    sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
369           * 4 + (sign >= 0 ? 1 : -1);
370  }
371  return sign;
372}
373
374void iconvlist (int (*do_one) (unsigned int namescount,
375                               const char * const * names,
376                               void* data),
377                void* data)
378{
379#define aliascount1  sizeof(aliases)/sizeof(aliases[0])
380#ifndef aliases2_lookup
381#define aliascount2  sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
382#else
383#define aliascount2  0
384#endif
385#define aliascount  (aliascount1+aliascount2)
386  struct nalias aliasbuf[aliascount];
387  const char * namesbuf[aliascount];
388  size_t num_aliases;
389  {
390    /* Put all existing aliases into a buffer. */
391    size_t i;
392    size_t j;
393    j = 0;
394    for (i = 0; i < aliascount1; i++) {
395      const struct alias * p = &aliases[i];
396      if (p->name >= 0
397          && p->encoding_index != ei_local_char
398          && p->encoding_index != ei_local_wchar_t) {
399        aliasbuf[j].name = stringpool + p->name;
400        aliasbuf[j].encoding_index = p->encoding_index;
401        j++;
402      }
403    }
404#ifndef aliases2_lookup
405    for (i = 0; i < aliascount2; i++) {
406      aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
407      aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
408      j++;
409    }
410#endif
411    num_aliases = j;
412  }
413  /* Sort by encoding_index. */
414  if (num_aliases > 1)
415    qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
416  {
417    /* Process all aliases with the same encoding_index together. */
418    size_t j;
419    j = 0;
420    while (j < num_aliases) {
421      unsigned int ei = aliasbuf[j].encoding_index;
422      size_t i = 0;
423      do
424        namesbuf[i++] = aliasbuf[j++].name;
425      while (j < num_aliases && aliasbuf[j].encoding_index == ei);
426      if (i > 1)
427        qsort(namesbuf, i, sizeof(const char *), compare_by_name);
428      /* Call the callback. */
429      if (do_one(i,namesbuf,data))
430        break;
431    }
432  }
433#undef aliascount
434#undef aliascount2
435#undef aliascount1
436}
437
438/*
439 * Table of canonical names of encodings.
440 * Instead of strings, it contains offsets into stringpool and stringpool2.
441 */
442static const unsigned short all_canonical[] = {
443#if defined _AIX
444# include "canonical_sysaix.h"
445#elif defined hpux || defined __hpux
446# include "canonical_syshpux.h"
447#elif defined __osf__
448# include "canonical_sysosf1.h"
449#elif defined __sun
450# include "canonical_syssolaris.h"
451#else
452# include "canonical.h"
453#endif
454#ifdef USE_AIX
455# if defined _AIX
456#  include "canonical_aix_sysaix.h"
457# else
458#  include "canonical_aix.h"
459# endif
460#endif
461#ifdef USE_OSF1
462# if defined __osf__
463#  include "canonical_osf1_sysosf1.h"
464# else
465#  include "canonical_osf1.h"
466# endif
467#endif
468#ifdef USE_DOS
469# include "canonical_dos.h"
470#endif
471#ifdef USE_EXTRA
472# include "canonical_extra.h"
473#endif
474#if defined _AIX
475# include "canonical_local_sysaix.h"
476#elif defined hpux || defined __hpux
477# include "canonical_local_syshpux.h"
478#elif defined __osf__
479# include "canonical_local_sysosf1.h"
480#elif defined __sun
481# include "canonical_local_syssolaris.h"
482#else
483# include "canonical_local.h"
484#endif
485};
486
487const char * iconv_canonicalize (const char * name)
488{
489  const char* code;
490  char buf[MAX_WORD_LENGTH+10+1];
491  const char* cp;
492  char* bp;
493  const struct alias * ap;
494  unsigned int count;
495  unsigned int index;
496  const char* pool;
497
498  /* Before calling aliases_lookup, convert the input string to upper case,
499   * and check whether it's entirely ASCII (we call gperf with option "-7"
500   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
501   * or if it's too long, it is not a valid encoding name.
502   */
503  for (code = name;;) {
504    /* Search code in the table. */
505    for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
506      unsigned char c = * (unsigned char *) cp;
507      if (c >= 0x80)
508        goto invalid;
509      if (c >= 'a' && c <= 'z')
510        c -= 'a'-'A';
511      *bp = c;
512      if (c == '\0')
513        break;
514      if (--count == 0)
515        goto invalid;
516    }
517    for (;;) {
518      if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
519        bp -= 10;
520        *bp = '\0';
521        continue;
522      }
523      if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
524        bp -= 8;
525        *bp = '\0';
526        continue;
527      }
528      break;
529    }
530    if (buf[0] == '\0') {
531      code = locale_charset();
532      /* Avoid an endless loop that could occur when using an older version
533         of localcharset.c. */
534      if (code[0] == '\0')
535        goto invalid;
536      continue;
537    }
538    pool = stringpool;
539    ap = aliases_lookup(buf,bp-buf);
540    if (ap == NULL) {
541      pool = stringpool2;
542      ap = aliases2_lookup(buf);
543      if (ap == NULL)
544        goto invalid;
545    }
546    if (ap->encoding_index == ei_local_char) {
547      code = locale_charset();
548      /* Avoid an endless loop that could occur when using an older version
549         of localcharset.c. */
550      if (code[0] == '\0')
551        goto invalid;
552      continue;
553    }
554    if (ap->encoding_index == ei_local_wchar_t) {
555      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
556         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
557         we know that it is UTF-16.  */
558#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
559      if (sizeof(wchar_t) == 4) {
560        index = ei_ucs4internal;
561        break;
562      }
563      if (sizeof(wchar_t) == 2) {
564# if WORDS_LITTLEENDIAN
565        index = ei_utf16le;
566# else
567        index = ei_utf16be;
568# endif
569        break;
570      }
571#elif __STDC_ISO_10646__
572      if (sizeof(wchar_t) == 4) {
573        index = ei_ucs4internal;
574        break;
575      }
576      if (sizeof(wchar_t) == 2) {
577        index = ei_ucs2internal;
578        break;
579      }
580      if (sizeof(wchar_t) == 1) {
581        index = ei_iso8859_1;
582        break;
583      }
584#endif
585    }
586    index = ap->encoding_index;
587    break;
588  }
589  return all_canonical[index] + pool;
590 invalid:
591  return name;
592}
593
594int _libiconv_version = _LIBICONV_VERSION;
595
596#if defined __FreeBSD__ && !defined __gnu_freebsd__
597/* GNU libiconv is the native FreeBSD iconv implementation since 2002.
598   It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'.  */
599#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
600#define _strong_alias(name, aliasname) \
601  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
602#undef iconv_open
603#undef iconv
604#undef iconv_close
605strong_alias (libiconv_open, iconv_open)
606strong_alias (libiconv, iconv)
607strong_alias (libiconv_close, iconv_close)
608#endif
609
610#endif
611