1/*
2 * Copyright (C) 2000-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21/* This file defines three conversion loops:
22     - from wchar_t to anything else,
23     - from anything else to wchar_t,
24     - from wchar_t to wchar_t.
25 */
26
27#if HAVE_WCRTOMB || HAVE_MBRTOWC
28# include <wchar.h>
29# define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
30  /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
31  extern size_t mbrtowc ();
32# ifdef mbstate_t
33#  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
34#  define mbsinit(ps) 1
35# endif
36# ifndef mbsinit
37#  if !HAVE_MBSINIT
38#   define mbsinit(ps) 1
39#  endif
40# endif
41#else
42# ifndef mbstate_t
43   typedef int mbstate_t;
44# endif
45#endif
46
47/*
48 * The first two conversion loops have an extended conversion descriptor.
49 */
50struct wchar_conv_struct {
51  struct conv_struct parent;
52  mbstate_t state;
53};
54
55
56#if HAVE_WCRTOMB
57
58/* From wchar_t to anything else. */
59
60static size_t wchar_from_loop_convert (iconv_t icd,
61                                       const char* * inbuf, size_t *inbytesleft,
62                                       char* * outbuf, size_t *outbytesleft)
63{
64  struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
65  size_t result = 0;
66  while (*inbytesleft >= sizeof(wchar_t)) {
67    const wchar_t * inptr = (const wchar_t *) *inbuf;
68    size_t inleft = *inbytesleft;
69    char buf[BUF_SIZE];
70    mbstate_t state = wcd->state;
71    size_t bufcount = 0;
72    while (inleft >= sizeof(wchar_t)) {
73      /* Convert one wchar_t to multibyte representation. */
74      size_t count = wcrtomb(buf+bufcount,*inptr,&state);
75      if (count == (size_t)(-1)) {
76        /* Invalid input. */
77        if (!wcd->parent.discard_ilseq) {
78          errno = EILSEQ;
79          return -1;
80        }
81        count = 0;
82      }
83      inptr++;
84      inleft -= sizeof(wchar_t);
85      bufcount += count;
86      if (count == 0) {
87        /* Continue, append next wchar_t. */
88      } else {
89        /* Attempt to convert the accumulated multibyte representations
90           to the target encoding. */
91        const char* bufptr = buf;
92        size_t bufleft = bufcount;
93        char* outptr = *outbuf;
94        size_t outleft = *outbytesleft;
95        size_t res = unicode_loop_convert(&wcd->parent,
96                                          &bufptr,&bufleft,
97                                          &outptr,&outleft);
98        if (res == (size_t)(-1)) {
99          if (errno == EILSEQ)
100            /* Invalid input. */
101            return -1;
102          else if (errno == E2BIG)
103            /* Output buffer too small. */
104            return -1;
105          else if (errno == EINVAL) {
106            /* Continue, append next wchar_t, but avoid buffer overrun. */
107            if (bufcount + MB_CUR_MAX > BUF_SIZE)
108              abort();
109          } else
110            abort();
111        } else {
112          /* Successful conversion. */
113          wcd->state = state;
114          *inbuf = (const char *) inptr;
115          *inbytesleft = inleft;
116          *outbuf = outptr;
117          *outbytesleft = outleft;
118          result += res;
119          break;
120        }
121      }
122    }
123  }
124  return result;
125}
126
127static size_t wchar_from_loop_reset (iconv_t icd,
128                                     char* * outbuf, size_t *outbytesleft)
129{
130  struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
131  if (outbuf == NULL || *outbuf == NULL) {
132    /* Reset the states. */
133    memset(&wcd->state,'\0',sizeof(mbstate_t));
134    return unicode_loop_reset(&wcd->parent,NULL,NULL);
135  } else {
136    if (!mbsinit(&wcd->state)) {
137      mbstate_t state = wcd->state;
138      char buf[BUF_SIZE];
139      size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
140      if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
141        abort();
142      else {
143        const char* bufptr = buf;
144        size_t bufleft = bufcount-1;
145        char* outptr = *outbuf;
146        size_t outleft = *outbytesleft;
147        size_t res = unicode_loop_convert(&wcd->parent,
148                                          &bufptr,&bufleft,
149                                          &outptr,&outleft);
150        if (res == (size_t)(-1)) {
151          if (errno == E2BIG)
152            return -1;
153          else
154            abort();
155        } else {
156          res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
157          if (res == (size_t)(-1))
158            return res;
159          else {
160            /* Successful. */
161            wcd->state = state;
162            *outbuf = outptr;
163            *outbytesleft = outleft;
164            return 0;
165          }
166        }
167      }
168    } else
169      return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
170  }
171}
172
173#endif
174
175
176#if HAVE_MBRTOWC
177
178/* From anything else to wchar_t. */
179
180static size_t wchar_to_loop_convert (iconv_t icd,
181                                     const char* * inbuf, size_t *inbytesleft,
182                                     char* * outbuf, size_t *outbytesleft)
183{
184  struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
185  size_t result = 0;
186  while (*inbytesleft > 0) {
187    size_t incount;
188    for (incount = 1; incount <= *inbytesleft; incount++) {
189      char buf[BUF_SIZE];
190      const char* inptr = *inbuf;
191      size_t inleft = incount;
192      char* bufptr = buf;
193      size_t bufleft = BUF_SIZE;
194      size_t res = unicode_loop_convert(&wcd->parent,
195                                        &inptr,&inleft,
196                                        &bufptr,&bufleft);
197      if (res == (size_t)(-1)) {
198        if (errno == EILSEQ)
199          /* Invalid input. */
200          return -1;
201        else if (errno == EINVAL) {
202          /* Incomplete input. Next try with one more input byte. */
203        } else
204          /* E2BIG shouldn't occur. */
205          abort();
206      } else {
207        /* Successful conversion. */
208        size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
209        mbstate_t state = wcd->state;
210        wchar_t wc;
211        res = mbrtowc(&wc,buf,bufcount,&state);
212        if (res == (size_t)(-2)) {
213          /* Next try with one more input byte. */
214        } else {
215          if (res == (size_t)(-1)) {
216            /* Invalid input. */
217            if (!wcd->parent.discard_ilseq)
218              return -1;
219          } else {
220            if (*outbytesleft < sizeof(wchar_t)) {
221              errno = E2BIG;
222              return -1;
223            }
224            *(wchar_t*) *outbuf = wc;
225            /* Restoring the state is not needed because it is the initial
226               state anyway: For all known locale encodings, the multibyte
227               to wchar_t conversion doesn't have shift state, and we have
228               excluded partial accumulated characters. */
229            /* wcd->state = state; */
230            *outbuf += sizeof(wchar_t);
231            *outbytesleft -= sizeof(wchar_t);
232          }
233          *inbuf += incount;
234          *inbytesleft -= incount;
235          result += res;
236          break;
237        }
238      }
239    }
240  }
241  return result;
242}
243
244static size_t wchar_to_loop_reset (iconv_t icd,
245                                   char* * outbuf, size_t *outbytesleft)
246{
247  struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
248  size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
249  if (res == (size_t)(-1))
250    return res;
251  memset(&wcd->state,0,sizeof(mbstate_t));
252  return 0;
253}
254
255#endif
256
257
258/* From wchar_t to wchar_t. */
259
260static size_t wchar_id_loop_convert (iconv_t icd,
261                                     const char* * inbuf, size_t *inbytesleft,
262                                     char* * outbuf, size_t *outbytesleft)
263{
264  const wchar_t* inptr = (const wchar_t*) *inbuf;
265  size_t inleft = *inbytesleft / sizeof(wchar_t);
266  wchar_t* outptr = (wchar_t*) *outbuf;
267  size_t outleft = *outbytesleft / sizeof(wchar_t);
268  size_t count = (inleft <= outleft ? inleft : outleft);
269  if (count > 0) {
270    *inbytesleft -= count * sizeof(wchar_t);
271    *outbytesleft -= count * sizeof(wchar_t);
272    do
273      *outptr++ = *inptr++;
274    while (--count > 0);
275    *inbuf = (const char*) inptr;
276    *outbuf = (char*) outptr;
277  }
278  return 0;
279}
280
281static size_t wchar_id_loop_reset (iconv_t icd,
282                                   char* * outbuf, size_t *outbytesleft)
283{
284  return 0;
285}
286