1/* Copyright (C) 1999-2001, 2003 Bruno Haible.
2   This file is not part of the GNU LIBICONV Library.
3   This file is put into the public domain.  */
4
5#include "iconv_string.h"
6#include <iconv.h>
7#include <errno.h>
8#include <stdlib.h>
9#include <string.h>
10
11#define tmpbufsize 4096
12
13int iconv_string (const char* tocode, const char* fromcode,
14                  const char* start, const char* end,
15                  char** resultp, size_t* lengthp)
16{
17  iconv_t cd = iconv_open(tocode,fromcode);
18  size_t length;
19  char* result;
20  if (cd == (iconv_t)(-1)) {
21    if (errno != EINVAL)
22      return -1;
23    /* Unsupported fromcode or tocode. Check whether the caller requested
24       autodetection. */
25    if (!strcmp(fromcode,"autodetect_utf8")) {
26      int ret;
27      /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
28         be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
29      ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
30      if (!(ret < 0 && errno == EILSEQ))
31        return ret;
32      ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
33      return ret;
34    }
35    if (!strcmp(fromcode,"autodetect_jp")) {
36      int ret;
37      /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
38         it will fail. */
39      ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
40      if (!(ret < 0 && errno == EILSEQ))
41        return ret;
42      /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
43         is unavoidable. People will condemn SHIFT_JIS.
44         If we tried SHIFT_JIS first, then some short EUC-JP inputs would
45         come out wrong, and people would condemn EUC-JP and Unix, which
46         would not be good. */
47      ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
48      if (!(ret < 0 && errno == EILSEQ))
49        return ret;
50      /* Finally try SHIFT_JIS. */
51      ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
52      return ret;
53    }
54    if (!strcmp(fromcode,"autodetect_kr")) {
55      int ret;
56      /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
57         it will fail. */
58      ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
59      if (!(ret < 0 && errno == EILSEQ))
60        return ret;
61      /* Finally try EUC-KR. */
62      ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
63      return ret;
64    }
65    errno = EINVAL;
66    return -1;
67  }
68  /* Determine the length we need. */
69  {
70    size_t count = 0;
71    char tmpbuf[tmpbufsize];
72    const char* inptr = start;
73    size_t insize = end-start;
74    while (insize > 0) {
75      char* outptr = tmpbuf;
76      size_t outsize = tmpbufsize;
77      size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
78      if (res == (size_t)(-1) && errno != E2BIG) {
79        if (errno == EINVAL)
80          break;
81        else {
82          int saved_errno = errno;
83          iconv_close(cd);
84          errno = saved_errno;
85          return -1;
86        }
87      }
88      count += outptr-tmpbuf;
89    }
90    {
91      char* outptr = tmpbuf;
92      size_t outsize = tmpbufsize;
93      size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
94      if (res == (size_t)(-1)) {
95        int saved_errno = errno;
96        iconv_close(cd);
97        errno = saved_errno;
98        return -1;
99      }
100      count += outptr-tmpbuf;
101    }
102    length = count;
103  }
104  if (lengthp != NULL)
105    *lengthp = length;
106  if (resultp == NULL) {
107    iconv_close(cd);
108    return 0;
109  }
110  result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
111  *resultp = result;
112  if (length == 0) {
113    iconv_close(cd);
114    return 0;
115  }
116  if (result == NULL) {
117    iconv_close(cd);
118    errno = ENOMEM;
119    return -1;
120  }
121  iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
122  /* Do the conversion for real. */
123  {
124    const char* inptr = start;
125    size_t insize = end-start;
126    char* outptr = result;
127    size_t outsize = length;
128    while (insize > 0) {
129      size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
130      if (res == (size_t)(-1)) {
131        if (errno == EINVAL)
132          break;
133        else {
134          int saved_errno = errno;
135          iconv_close(cd);
136          errno = saved_errno;
137          return -1;
138        }
139      }
140    }
141    {
142      size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
143      if (res == (size_t)(-1)) {
144        int saved_errno = errno;
145        iconv_close(cd);
146        errno = saved_errno;
147        return -1;
148      }
149    }
150    if (outsize != 0) abort();
151  }
152  iconv_close(cd);
153  return 0;
154}
155