1/* Charset conversion.
2   Copyright (C) 2001-2006 Free Software Foundation, Inc.
3   Written by Bruno Haible and Simon Josefsson.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#include <config.h>
20
21/* Specification.  */
22#include "striconv.h"
23
24#include <errno.h>
25#include <stdlib.h>
26#include <string.h>
27
28#if HAVE_ICONV
29# include <iconv.h>
30/* Get MB_LEN_MAX, CHAR_BIT.  */
31# include <limits.h>
32#endif
33
34#include "strdup.h"
35#include "c-strcase.h"
36
37#ifndef SIZE_MAX
38# define SIZE_MAX ((size_t) -1)
39#endif
40
41
42#if HAVE_ICONV
43
44int
45mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
46	      char **resultp, size_t *lengthp)
47{
48# define tmpbufsize 4096
49  size_t length;
50  char *result;
51
52  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
53# if defined _LIBICONV_VERSION \
54    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
55  /* Set to the initial state.  */
56  iconv (cd, NULL, NULL, NULL, NULL);
57# endif
58
59  /* Determine the length we need.  */
60  {
61    size_t count = 0;
62    char tmpbuf[tmpbufsize];
63    const char *inptr = src;
64    size_t insize = srclen;
65
66    while (insize > 0)
67      {
68	char *outptr = tmpbuf;
69	size_t outsize = tmpbufsize;
70	size_t res = iconv (cd,
71			    (ICONV_CONST char **) &inptr, &insize,
72			    &outptr, &outsize);
73
74	if (res == (size_t)(-1))
75	  {
76	    if (errno == E2BIG)
77	      ;
78	    else if (errno == EINVAL)
79	      break;
80	    else
81	      return -1;
82	  }
83# if !defined _LIBICONV_VERSION && !defined __GLIBC__
84	/* Irix iconv() inserts a NUL byte if it cannot convert.
85	   NetBSD iconv() inserts a question mark if it cannot convert.
86	   Only GNU libiconv and GNU libc are known to prefer to fail rather
87	   than doing a lossy conversion.  */
88	else if (res > 0)
89	  {
90	    errno = EILSEQ;
91	    return -1;
92	  }
93# endif
94	count += outptr - tmpbuf;
95      }
96    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
97# if defined _LIBICONV_VERSION \
98    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
99    {
100      char *outptr = tmpbuf;
101      size_t outsize = tmpbufsize;
102      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
103
104      if (res == (size_t)(-1))
105	return -1;
106      count += outptr - tmpbuf;
107    }
108# endif
109    length = count;
110  }
111
112  if (length == 0)
113    {
114      *lengthp = 0;
115      return 0;
116    }
117  result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
118  if (result == NULL)
119    {
120      errno = ENOMEM;
121      return -1;
122    }
123  *resultp = result;
124  *lengthp = length;
125
126  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
127# if defined _LIBICONV_VERSION \
128    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
129  /* Return to the initial state.  */
130  iconv (cd, NULL, NULL, NULL, NULL);
131# endif
132
133  /* Do the conversion for real.  */
134  {
135    const char *inptr = src;
136    size_t insize = srclen;
137    char *outptr = result;
138    size_t outsize = length;
139
140    while (insize > 0)
141      {
142	size_t res = iconv (cd,
143			    (ICONV_CONST char **) &inptr, &insize,
144			    &outptr, &outsize);
145
146	if (res == (size_t)(-1))
147	  {
148	    if (errno == EINVAL)
149	      break;
150	    else
151	      return -1;
152	  }
153# if !defined _LIBICONV_VERSION && !defined __GLIBC__
154	/* Irix iconv() inserts a NUL byte if it cannot convert.
155	   NetBSD iconv() inserts a question mark if it cannot convert.
156	   Only GNU libiconv and GNU libc are known to prefer to fail rather
157	   than doing a lossy conversion.  */
158	else if (res > 0)
159	  {
160	    errno = EILSEQ;
161	    return -1;
162	  }
163# endif
164      }
165    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
166# if defined _LIBICONV_VERSION \
167    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
168    {
169      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
170
171      if (res == (size_t)(-1))
172	return -1;
173    }
174# endif
175    if (outsize != 0)
176      abort ();
177  }
178
179  return 0;
180# undef tmpbufsize
181}
182
183char *
184str_cd_iconv (const char *src, iconv_t cd)
185{
186  /* For most encodings, a trailing NUL byte in the input will be converted
187     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
188     function is usable for UTF-7, we have to exclude the NUL byte from the
189     conversion and add it by hand afterwards.  */
190# if PROBABLY_SLOWER
191
192  char *result = NULL;
193  size_t length;
194  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
195  char *final_result;
196
197  if (retval < 0)
198    {
199      if (result != NULL)
200	{
201	  int saved_errno = errno;
202	  free (result);
203	  errno = saved_errno;
204	}
205      return NULL;
206    }
207
208  /* Add the terminating NUL byte.  */
209  final_result =
210    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
211  if (final_result == NULL)
212    {
213      if (result != NULL)
214	free (result);
215      errno = ENOMEM;
216      return NULL;
217    }
218  final_result[length] = '\0';
219
220  return final_result;
221
222# else
223
224  char *result;
225  size_t result_size;
226  size_t length;
227  const char *inptr = src;
228  size_t inbytes_remaining = strlen (src);
229
230  /* Make a guess for the worst-case output size, in order to avoid a
231     realloc.  It's OK if the guess is wrong as long as it is not zero and
232     doesn't lead to an integer overflow.  */
233  result_size = inbytes_remaining;
234  {
235    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
236    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
237      result_size *= MB_LEN_MAX;
238  }
239  result_size += 1; /* for the terminating NUL */
240
241  result = (char *) malloc (result_size);
242  if (result == NULL)
243    {
244      errno = ENOMEM;
245      return NULL;
246    }
247
248  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
249# if defined _LIBICONV_VERSION \
250    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
251  /* Set to the initial state.  */
252  iconv (cd, NULL, NULL, NULL, NULL);
253# endif
254
255  /* Do the conversion.  */
256  {
257    char *outptr = result;
258    size_t outbytes_remaining = result_size - 1;
259
260    for (;;)
261      {
262	/* Here inptr + inbytes_remaining = src + strlen (src),
263		outptr + outbytes_remaining = result + result_size - 1.  */
264	size_t res = iconv (cd,
265			    (ICONV_CONST char **) &inptr, &inbytes_remaining,
266			    &outptr, &outbytes_remaining);
267
268	if (res == (size_t)(-1))
269	  {
270	    if (errno == EINVAL)
271	      break;
272	    else if (errno == E2BIG)
273	      {
274		size_t used = outptr - result;
275		size_t newsize = result_size * 2;
276		char *newresult;
277
278		if (!(newsize > result_size))
279		  {
280		    errno = ENOMEM;
281		    goto failed;
282		  }
283		newresult = (char *) realloc (result, newsize);
284		if (newresult == NULL)
285		  {
286		    errno = ENOMEM;
287		    goto failed;
288		  }
289		result = newresult;
290		result_size = newsize;
291		outptr = result + used;
292		outbytes_remaining = result_size - 1 - used;
293	      }
294	    else
295	      goto failed;
296	  }
297# if !defined _LIBICONV_VERSION && !defined __GLIBC__
298	/* Irix iconv() inserts a NUL byte if it cannot convert.
299	   NetBSD iconv() inserts a question mark if it cannot convert.
300	   Only GNU libiconv and GNU libc are known to prefer to fail rather
301	   than doing a lossy conversion.  */
302	else if (res > 0)
303	  {
304	    errno = EILSEQ;
305	    goto failed;
306	  }
307# endif
308	else
309	  break;
310      }
311    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
312# if defined _LIBICONV_VERSION \
313    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
314    for (;;)
315      {
316	/* Here outptr + outbytes_remaining = result + result_size - 1.  */
317	size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
318
319	if (res == (size_t)(-1))
320	  {
321	    if (errno == E2BIG)
322	      {
323		size_t used = outptr - result;
324		size_t newsize = result_size * 2;
325		char *newresult;
326
327		if (!(newsize > result_size))
328		  {
329		    errno = ENOMEM;
330		    goto failed;
331		  }
332		newresult = (char *) realloc (result, newsize);
333		if (newresult == NULL)
334		  {
335		    errno = ENOMEM;
336		    goto failed;
337		  }
338		result = newresult;
339		result_size = newsize;
340		outptr = result + used;
341		outbytes_remaining = result_size - 1 - used;
342	      }
343	    else
344	      goto failed;
345	  }
346	else
347	  break;
348      }
349# endif
350
351    /* Add the terminating NUL byte.  */
352    *outptr++ = '\0';
353
354    length = outptr - result;
355  }
356
357  /* Give away unused memory.  */
358  if (length < result_size)
359    {
360      char *smaller_result = (char *) realloc (result, length);
361
362      if (smaller_result != NULL)
363	result = smaller_result;
364    }
365
366  return result;
367
368 failed:
369  {
370    int saved_errno = errno;
371    free (result);
372    errno = saved_errno;
373    return NULL;
374  }
375
376# endif
377}
378
379#endif
380
381char *
382str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
383{
384  if (c_strcasecmp (from_codeset, to_codeset) == 0)
385    return strdup (src);
386  else
387    {
388#if HAVE_ICONV
389      iconv_t cd;
390      char *result;
391
392      /* Avoid glibc-2.1 bug with EUC-KR.  */
393# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
394      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
395	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
396	{
397	  errno = EINVAL;
398	  return NULL;
399	}
400# endif
401      cd = iconv_open (to_codeset, from_codeset);
402      if (cd == (iconv_t) -1)
403	return NULL;
404
405      result = str_cd_iconv (src, cd);
406
407      if (result == NULL)
408	{
409	  /* Close cd, but preserve the errno from str_cd_iconv.  */
410	  int saved_errno = errno;
411	  iconv_close (cd);
412	  errno = saved_errno;
413	}
414      else
415	{
416	  if (iconv_close (cd) < 0)
417	    {
418	      /* Return NULL, but free the allocated memory, and while doing
419		 that, preserve the errno from iconv_close.  */
420	      int saved_errno = errno;
421	      free (result);
422	      errno = saved_errno;
423	      return NULL;
424	    }
425	}
426      return result;
427#else
428      /* This is a different error code than if iconv_open existed but didn't
429	 support from_codeset and to_codeset, so that the caller can emit
430	 an error message such as
431	   "iconv() is not supported. Installing GNU libiconv and
432	    then reinstalling this package would fix this."  */
433      errno = ENOSYS;
434      return NULL;
435#endif
436    }
437}
438