1/* Charset conversion. 2 Copyright (C) 2001-2006 Free Software Foundation, Inc. 3 Written by Bruno Haible and Simon Josefsson. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#include <config.h> 20 21/* Specification. */ 22#include "striconv.h" 23 24#include <errno.h> 25#include <stdlib.h> 26#include <string.h> 27 28#if HAVE_ICONV 29# include <iconv.h> 30/* Get MB_LEN_MAX, CHAR_BIT. */ 31# include <limits.h> 32#endif 33 34#include "strdup.h" 35#include "c-strcase.h" 36 37#ifndef SIZE_MAX 38# define SIZE_MAX ((size_t) -1) 39#endif 40 41 42#if HAVE_ICONV 43 44int 45mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, 46 char **resultp, size_t *lengthp) 47{ 48# define tmpbufsize 4096 49 size_t length; 50 char *result; 51 52 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 53# if defined _LIBICONV_VERSION \ 54 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 55 /* Set to the initial state. */ 56 iconv (cd, NULL, NULL, NULL, NULL); 57# endif 58 59 /* Determine the length we need. */ 60 { 61 size_t count = 0; 62 char tmpbuf[tmpbufsize]; 63 const char *inptr = src; 64 size_t insize = srclen; 65 66 while (insize > 0) 67 { 68 char *outptr = tmpbuf; 69 size_t outsize = tmpbufsize; 70 size_t res = iconv (cd, 71 (ICONV_CONST char **) &inptr, &insize, 72 &outptr, &outsize); 73 74 if (res == (size_t)(-1)) 75 { 76 if (errno == E2BIG) 77 ; 78 else if (errno == EINVAL) 79 break; 80 else 81 return -1; 82 } 83# if !defined _LIBICONV_VERSION && !defined __GLIBC__ 84 /* Irix iconv() inserts a NUL byte if it cannot convert. 85 NetBSD iconv() inserts a question mark if it cannot convert. 86 Only GNU libiconv and GNU libc are known to prefer to fail rather 87 than doing a lossy conversion. */ 88 else if (res > 0) 89 { 90 errno = EILSEQ; 91 return -1; 92 } 93# endif 94 count += outptr - tmpbuf; 95 } 96 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 97# if defined _LIBICONV_VERSION \ 98 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 99 { 100 char *outptr = tmpbuf; 101 size_t outsize = tmpbufsize; 102 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 103 104 if (res == (size_t)(-1)) 105 return -1; 106 count += outptr - tmpbuf; 107 } 108# endif 109 length = count; 110 } 111 112 if (length == 0) 113 { 114 *lengthp = 0; 115 return 0; 116 } 117 result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length)); 118 if (result == NULL) 119 { 120 errno = ENOMEM; 121 return -1; 122 } 123 *resultp = result; 124 *lengthp = length; 125 126 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 127# if defined _LIBICONV_VERSION \ 128 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 129 /* Return to the initial state. */ 130 iconv (cd, NULL, NULL, NULL, NULL); 131# endif 132 133 /* Do the conversion for real. */ 134 { 135 const char *inptr = src; 136 size_t insize = srclen; 137 char *outptr = result; 138 size_t outsize = length; 139 140 while (insize > 0) 141 { 142 size_t res = iconv (cd, 143 (ICONV_CONST char **) &inptr, &insize, 144 &outptr, &outsize); 145 146 if (res == (size_t)(-1)) 147 { 148 if (errno == EINVAL) 149 break; 150 else 151 return -1; 152 } 153# if !defined _LIBICONV_VERSION && !defined __GLIBC__ 154 /* Irix iconv() inserts a NUL byte if it cannot convert. 155 NetBSD iconv() inserts a question mark if it cannot convert. 156 Only GNU libiconv and GNU libc are known to prefer to fail rather 157 than doing a lossy conversion. */ 158 else if (res > 0) 159 { 160 errno = EILSEQ; 161 return -1; 162 } 163# endif 164 } 165 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 166# if defined _LIBICONV_VERSION \ 167 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 168 { 169 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); 170 171 if (res == (size_t)(-1)) 172 return -1; 173 } 174# endif 175 if (outsize != 0) 176 abort (); 177 } 178 179 return 0; 180# undef tmpbufsize 181} 182 183char * 184str_cd_iconv (const char *src, iconv_t cd) 185{ 186 /* For most encodings, a trailing NUL byte in the input will be converted 187 to a trailing NUL byte in the output. But not for UTF-7. So that this 188 function is usable for UTF-7, we have to exclude the NUL byte from the 189 conversion and add it by hand afterwards. */ 190# if PROBABLY_SLOWER 191 192 char *result = NULL; 193 size_t length; 194 int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); 195 char *final_result; 196 197 if (retval < 0) 198 { 199 if (result != NULL) 200 { 201 int saved_errno = errno; 202 free (result); 203 errno = saved_errno; 204 } 205 return NULL; 206 } 207 208 /* Add the terminating NUL byte. */ 209 final_result = 210 (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); 211 if (final_result == NULL) 212 { 213 if (result != NULL) 214 free (result); 215 errno = ENOMEM; 216 return NULL; 217 } 218 final_result[length] = '\0'; 219 220 return final_result; 221 222# else 223 224 char *result; 225 size_t result_size; 226 size_t length; 227 const char *inptr = src; 228 size_t inbytes_remaining = strlen (src); 229 230 /* Make a guess for the worst-case output size, in order to avoid a 231 realloc. It's OK if the guess is wrong as long as it is not zero and 232 doesn't lead to an integer overflow. */ 233 result_size = inbytes_remaining; 234 { 235 size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); 236 if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) 237 result_size *= MB_LEN_MAX; 238 } 239 result_size += 1; /* for the terminating NUL */ 240 241 result = (char *) malloc (result_size); 242 if (result == NULL) 243 { 244 errno = ENOMEM; 245 return NULL; 246 } 247 248 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ 249# if defined _LIBICONV_VERSION \ 250 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 251 /* Set to the initial state. */ 252 iconv (cd, NULL, NULL, NULL, NULL); 253# endif 254 255 /* Do the conversion. */ 256 { 257 char *outptr = result; 258 size_t outbytes_remaining = result_size - 1; 259 260 for (;;) 261 { 262 /* Here inptr + inbytes_remaining = src + strlen (src), 263 outptr + outbytes_remaining = result + result_size - 1. */ 264 size_t res = iconv (cd, 265 (ICONV_CONST char **) &inptr, &inbytes_remaining, 266 &outptr, &outbytes_remaining); 267 268 if (res == (size_t)(-1)) 269 { 270 if (errno == EINVAL) 271 break; 272 else if (errno == E2BIG) 273 { 274 size_t used = outptr - result; 275 size_t newsize = result_size * 2; 276 char *newresult; 277 278 if (!(newsize > result_size)) 279 { 280 errno = ENOMEM; 281 goto failed; 282 } 283 newresult = (char *) realloc (result, newsize); 284 if (newresult == NULL) 285 { 286 errno = ENOMEM; 287 goto failed; 288 } 289 result = newresult; 290 result_size = newsize; 291 outptr = result + used; 292 outbytes_remaining = result_size - 1 - used; 293 } 294 else 295 goto failed; 296 } 297# if !defined _LIBICONV_VERSION && !defined __GLIBC__ 298 /* Irix iconv() inserts a NUL byte if it cannot convert. 299 NetBSD iconv() inserts a question mark if it cannot convert. 300 Only GNU libiconv and GNU libc are known to prefer to fail rather 301 than doing a lossy conversion. */ 302 else if (res > 0) 303 { 304 errno = EILSEQ; 305 goto failed; 306 } 307# endif 308 else 309 break; 310 } 311 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ 312# if defined _LIBICONV_VERSION \ 313 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) 314 for (;;) 315 { 316 /* Here outptr + outbytes_remaining = result + result_size - 1. */ 317 size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); 318 319 if (res == (size_t)(-1)) 320 { 321 if (errno == E2BIG) 322 { 323 size_t used = outptr - result; 324 size_t newsize = result_size * 2; 325 char *newresult; 326 327 if (!(newsize > result_size)) 328 { 329 errno = ENOMEM; 330 goto failed; 331 } 332 newresult = (char *) realloc (result, newsize); 333 if (newresult == NULL) 334 { 335 errno = ENOMEM; 336 goto failed; 337 } 338 result = newresult; 339 result_size = newsize; 340 outptr = result + used; 341 outbytes_remaining = result_size - 1 - used; 342 } 343 else 344 goto failed; 345 } 346 else 347 break; 348 } 349# endif 350 351 /* Add the terminating NUL byte. */ 352 *outptr++ = '\0'; 353 354 length = outptr - result; 355 } 356 357 /* Give away unused memory. */ 358 if (length < result_size) 359 { 360 char *smaller_result = (char *) realloc (result, length); 361 362 if (smaller_result != NULL) 363 result = smaller_result; 364 } 365 366 return result; 367 368 failed: 369 { 370 int saved_errno = errno; 371 free (result); 372 errno = saved_errno; 373 return NULL; 374 } 375 376# endif 377} 378 379#endif 380 381char * 382str_iconv (const char *src, const char *from_codeset, const char *to_codeset) 383{ 384 if (c_strcasecmp (from_codeset, to_codeset) == 0) 385 return strdup (src); 386 else 387 { 388#if HAVE_ICONV 389 iconv_t cd; 390 char *result; 391 392 /* Avoid glibc-2.1 bug with EUC-KR. */ 393# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 394 if (c_strcasecmp (from_codeset, "EUC-KR") == 0 395 || c_strcasecmp (to_codeset, "EUC-KR") == 0) 396 { 397 errno = EINVAL; 398 return NULL; 399 } 400# endif 401 cd = iconv_open (to_codeset, from_codeset); 402 if (cd == (iconv_t) -1) 403 return NULL; 404 405 result = str_cd_iconv (src, cd); 406 407 if (result == NULL) 408 { 409 /* Close cd, but preserve the errno from str_cd_iconv. */ 410 int saved_errno = errno; 411 iconv_close (cd); 412 errno = saved_errno; 413 } 414 else 415 { 416 if (iconv_close (cd) < 0) 417 { 418 /* Return NULL, but free the allocated memory, and while doing 419 that, preserve the errno from iconv_close. */ 420 int saved_errno = errno; 421 free (result); 422 errno = saved_errno; 423 return NULL; 424 } 425 } 426 return result; 427#else 428 /* This is a different error code than if iconv_open existed but didn't 429 support from_codeset and to_codeset, so that the caller can emit 430 an error message such as 431 "iconv() is not supported. Installing GNU libiconv and 432 then reinstalling this package would fix this." */ 433 errno = ENOSYS; 434 return NULL; 435#endif 436 } 437} 438