1/*
2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/* Part 1 of iconv_open.
22   Input: const char* tocode, const char* fromcode.
23   Output:
24     unsigned int from_index;
25     int from_wchar;
26     unsigned int to_index;
27     int to_wchar;
28     int transliterate;
29     int discard_ilseq;
30   Jumps to 'invalid' in case of errror.
31 */
32{
33  char buf[MAX_WORD_LENGTH+10+1];
34  const char* cp;
35  char* bp;
36  const struct alias * ap;
37  unsigned int count;
38
39  transliterate = 0;
40  discard_ilseq = 0;
41
42  /* Before calling aliases_lookup, convert the input string to upper case,
43   * and check whether it's entirely ASCII (we call gperf with option "-7"
44   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
45   * or if it's too long, it is not a valid encoding name.
46   */
47  for (to_wchar = 0;;) {
48    /* Search tocode in the table. */
49    for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
50      unsigned char c = * (unsigned char *) cp;
51      if (c >= 0x80)
52        goto invalid;
53      if (c >= 'a' && c <= 'z')
54        c -= 'a'-'A';
55      *bp = c;
56      if (c == '\0')
57        break;
58      if (--count == 0)
59        goto invalid;
60    }
61    for (;;) {
62      if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
63        bp -= 10;
64        *bp = '\0';
65        transliterate = 1;
66        continue;
67      }
68      if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
69        bp -= 8;
70        *bp = '\0';
71        discard_ilseq = 1;
72        continue;
73      }
74      break;
75    }
76    if (buf[0] == '\0') {
77      tocode = locale_charset();
78      /* Avoid an endless loop that could occur when using an older version
79         of localcharset.c. */
80      if (tocode[0] == '\0')
81        goto invalid;
82      continue;
83    }
84    ap = aliases_lookup(buf,bp-buf);
85    if (ap == NULL) {
86      ap = aliases2_lookup(buf);
87      if (ap == NULL)
88        goto invalid;
89    }
90    if (ap->encoding_index == ei_local_char) {
91      tocode = locale_charset();
92      /* Avoid an endless loop that could occur when using an older version
93         of localcharset.c. */
94      if (tocode[0] == '\0')
95        goto invalid;
96      continue;
97    }
98    if (ap->encoding_index == ei_local_wchar_t) {
99      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
100         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
101         we know that it is UTF-16.  */
102#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
103      if (sizeof(wchar_t) == 4) {
104        to_index = ei_ucs4internal;
105        break;
106      }
107      if (sizeof(wchar_t) == 2) {
108# if WORDS_LITTLEENDIAN
109        to_index = ei_utf16le;
110# else
111        to_index = ei_utf16be;
112# endif
113        break;
114      }
115#elif __STDC_ISO_10646__
116      if (sizeof(wchar_t) == 4) {
117        to_index = ei_ucs4internal;
118        break;
119      }
120      if (sizeof(wchar_t) == 2) {
121        to_index = ei_ucs2internal;
122        break;
123      }
124      if (sizeof(wchar_t) == 1) {
125        to_index = ei_iso8859_1;
126        break;
127      }
128#endif
129#if HAVE_MBRTOWC
130      to_wchar = 1;
131      tocode = locale_charset();
132      continue;
133#endif
134      goto invalid;
135    }
136    to_index = ap->encoding_index;
137    break;
138  }
139  for (from_wchar = 0;;) {
140    /* Search fromcode in the table. */
141    for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
142      unsigned char c = * (unsigned char *) cp;
143      if (c >= 0x80)
144        goto invalid;
145      if (c >= 'a' && c <= 'z')
146        c -= 'a'-'A';
147      *bp = c;
148      if (c == '\0')
149        break;
150      if (--count == 0)
151        goto invalid;
152    }
153    for (;;) {
154      if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
155        bp -= 10;
156        *bp = '\0';
157        continue;
158      }
159      if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
160        bp -= 8;
161        *bp = '\0';
162        continue;
163      }
164      break;
165    }
166    if (buf[0] == '\0') {
167      fromcode = locale_charset();
168      /* Avoid an endless loop that could occur when using an older version
169         of localcharset.c. */
170      if (fromcode[0] == '\0')
171        goto invalid;
172      continue;
173    }
174    ap = aliases_lookup(buf,bp-buf);
175    if (ap == NULL) {
176      ap = aliases2_lookup(buf);
177      if (ap == NULL)
178        goto invalid;
179    }
180    if (ap->encoding_index == ei_local_char) {
181      fromcode = locale_charset();
182      /* Avoid an endless loop that could occur when using an older version
183         of localcharset.c. */
184      if (fromcode[0] == '\0')
185        goto invalid;
186      continue;
187    }
188    if (ap->encoding_index == ei_local_wchar_t) {
189      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
190         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
191         we know that it is UTF-16.  */
192#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
193      if (sizeof(wchar_t) == 4) {
194        from_index = ei_ucs4internal;
195        break;
196      }
197      if (sizeof(wchar_t) == 2) {
198# if WORDS_LITTLEENDIAN
199        from_index = ei_utf16le;
200# else
201        from_index = ei_utf16be;
202# endif
203        break;
204      }
205#elif __STDC_ISO_10646__
206      if (sizeof(wchar_t) == 4) {
207        from_index = ei_ucs4internal;
208        break;
209      }
210      if (sizeof(wchar_t) == 2) {
211        from_index = ei_ucs2internal;
212        break;
213      }
214      if (sizeof(wchar_t) == 1) {
215        from_index = ei_iso8859_1;
216        break;
217      }
218#endif
219#if HAVE_WCRTOMB
220      from_wchar = 1;
221      fromcode = locale_charset();
222      continue;
223#endif
224      goto invalid;
225    }
226    from_index = ap->encoding_index;
227    break;
228  }
229}
230