1/* 2 * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21/* Part 1 of iconv_open. 22 Input: const char* tocode, const char* fromcode. 23 Output: 24 unsigned int from_index; 25 int from_wchar; 26 unsigned int to_index; 27 int to_wchar; 28 int transliterate; 29 int discard_ilseq; 30 Jumps to 'invalid' in case of errror. 31 */ 32{ 33 char buf[MAX_WORD_LENGTH+10+1]; 34 const char* cp; 35 char* bp; 36 const struct alias * ap; 37 unsigned int count; 38 39 transliterate = 0; 40 discard_ilseq = 0; 41 42 /* Before calling aliases_lookup, convert the input string to upper case, 43 * and check whether it's entirely ASCII (we call gperf with option "-7" 44 * to achieve a smaller table) and non-empty. If it's not entirely ASCII, 45 * or if it's too long, it is not a valid encoding name. 46 */ 47 for (to_wchar = 0;;) { 48 /* Search tocode in the table. */ 49 for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 50 unsigned char c = * (unsigned char *) cp; 51 if (c >= 0x80) 52 goto invalid; 53 if (c >= 'a' && c <= 'z') 54 c -= 'a'-'A'; 55 *bp = c; 56 if (c == '\0') 57 break; 58 if (--count == 0) 59 goto invalid; 60 } 61 for (;;) { 62 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 63 bp -= 10; 64 *bp = '\0'; 65 transliterate = 1; 66 continue; 67 } 68 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 69 bp -= 8; 70 *bp = '\0'; 71 discard_ilseq = 1; 72 continue; 73 } 74 break; 75 } 76 if (buf[0] == '\0') { 77 tocode = locale_charset(); 78 /* Avoid an endless loop that could occur when using an older version 79 of localcharset.c. */ 80 if (tocode[0] == '\0') 81 goto invalid; 82 continue; 83 } 84 ap = aliases_lookup(buf,bp-buf); 85 if (ap == NULL) { 86 ap = aliases2_lookup(buf); 87 if (ap == NULL) 88 goto invalid; 89 } 90 if (ap->encoding_index == ei_local_char) { 91 tocode = locale_charset(); 92 /* Avoid an endless loop that could occur when using an older version 93 of localcharset.c. */ 94 if (tocode[0] == '\0') 95 goto invalid; 96 continue; 97 } 98 if (ap->encoding_index == ei_local_wchar_t) { 99 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 100 This is also the case on native Woe32 systems and Cygwin >= 1.7, where 101 we know that it is UTF-16. */ 102#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 103 if (sizeof(wchar_t) == 4) { 104 to_index = ei_ucs4internal; 105 break; 106 } 107 if (sizeof(wchar_t) == 2) { 108# if WORDS_LITTLEENDIAN 109 to_index = ei_utf16le; 110# else 111 to_index = ei_utf16be; 112# endif 113 break; 114 } 115#elif __STDC_ISO_10646__ 116 if (sizeof(wchar_t) == 4) { 117 to_index = ei_ucs4internal; 118 break; 119 } 120 if (sizeof(wchar_t) == 2) { 121 to_index = ei_ucs2internal; 122 break; 123 } 124 if (sizeof(wchar_t) == 1) { 125 to_index = ei_iso8859_1; 126 break; 127 } 128#endif 129#if HAVE_MBRTOWC 130 to_wchar = 1; 131 tocode = locale_charset(); 132 continue; 133#endif 134 goto invalid; 135 } 136 to_index = ap->encoding_index; 137 break; 138 } 139 for (from_wchar = 0;;) { 140 /* Search fromcode in the table. */ 141 for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { 142 unsigned char c = * (unsigned char *) cp; 143 if (c >= 0x80) 144 goto invalid; 145 if (c >= 'a' && c <= 'z') 146 c -= 'a'-'A'; 147 *bp = c; 148 if (c == '\0') 149 break; 150 if (--count == 0) 151 goto invalid; 152 } 153 for (;;) { 154 if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { 155 bp -= 10; 156 *bp = '\0'; 157 continue; 158 } 159 if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { 160 bp -= 8; 161 *bp = '\0'; 162 continue; 163 } 164 break; 165 } 166 if (buf[0] == '\0') { 167 fromcode = locale_charset(); 168 /* Avoid an endless loop that could occur when using an older version 169 of localcharset.c. */ 170 if (fromcode[0] == '\0') 171 goto invalid; 172 continue; 173 } 174 ap = aliases_lookup(buf,bp-buf); 175 if (ap == NULL) { 176 ap = aliases2_lookup(buf); 177 if (ap == NULL) 178 goto invalid; 179 } 180 if (ap->encoding_index == ei_local_char) { 181 fromcode = locale_charset(); 182 /* Avoid an endless loop that could occur when using an older version 183 of localcharset.c. */ 184 if (fromcode[0] == '\0') 185 goto invalid; 186 continue; 187 } 188 if (ap->encoding_index == ei_local_wchar_t) { 189 /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. 190 This is also the case on native Woe32 systems and Cygwin >= 1.7, where 191 we know that it is UTF-16. */ 192#if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) 193 if (sizeof(wchar_t) == 4) { 194 from_index = ei_ucs4internal; 195 break; 196 } 197 if (sizeof(wchar_t) == 2) { 198# if WORDS_LITTLEENDIAN 199 from_index = ei_utf16le; 200# else 201 from_index = ei_utf16be; 202# endif 203 break; 204 } 205#elif __STDC_ISO_10646__ 206 if (sizeof(wchar_t) == 4) { 207 from_index = ei_ucs4internal; 208 break; 209 } 210 if (sizeof(wchar_t) == 2) { 211 from_index = ei_ucs2internal; 212 break; 213 } 214 if (sizeof(wchar_t) == 1) { 215 from_index = ei_iso8859_1; 216 break; 217 } 218#endif 219#if HAVE_WCRTOMB 220 from_wchar = 1; 221 fromcode = locale_charset(); 222 continue; 223#endif 224 goto invalid; 225 } 226 from_index = ap->encoding_index; 227 break; 228 } 229} 230