1/* Copyright (C) 1999, 2000 Free Software Foundation, Inc.
2   This file is part of the GNU IO Library.
3
4   This library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU General Public License as
6   published by the Free Software Foundation; either version 2, or (at
7   your option) any later version.
8
9   This library is distributed in the hope that it will be useful, but
10   WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this library; see the file COPYING.  If not, write to
16   the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
17   MA 02111-1307, USA.
18
19   As a special exception, if you link this library with files
20   compiled with a GNU compiler to produce an executable, this does
21   not cause the resulting executable to be covered by the GNU General
22   Public License.  This exception does not however invalidate any
23   other reasons why the executable file might be covered by the GNU
24   General Public License.  */
25
26#include <libioP.h>
27#ifdef _LIBC
28# include <dlfcn.h>
29# include <wchar.h>
30# include <locale/localeinfo.h>
31# include <wcsmbs/wcsmbsload.h>
32# include <iconv/gconv_int.h>
33#endif
34#include <stdlib.h>
35#include <string.h>
36
37#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
38# include <langinfo.h>
39#endif
40
41#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
42/* Prototypes of libio's codecvt functions.  */
43static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
44				     __c_mbstate_t *statep,
45				     const wchar_t *from_start,
46				     const wchar_t *from_end,
47				     const wchar_t **from_stop, char *to_start,
48				     char *to_end, char **to_stop);
49static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
50					 __c_mbstate_t *statep, char *to_start,
51					 char *to_end, char **to_stop);
52static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
53				    __c_mbstate_t *statep,
54				    const char *from_start,
55				    const char *from_end,
56				    const char **from_stop, wchar_t *to_start,
57				    wchar_t *to_end, wchar_t **to_stop);
58static int do_encoding (struct _IO_codecvt *codecvt);
59static int do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
60		      const char *from_start,
61		      const char *from_end, _IO_size_t max);
62static int do_max_length (struct _IO_codecvt *codecvt);
63static int do_always_noconv (struct _IO_codecvt *codecvt);
64
65
66/* The functions used in `codecvt' for libio are always the same.  */
67struct _IO_codecvt __libio_codecvt =
68{
69  .__codecvt_destr = NULL,		/* Destructor, never used.  */
70  .__codecvt_do_out = do_out,
71  .__codecvt_do_unshift = do_unshift,
72  .__codecvt_do_in = do_in,
73  .__codecvt_do_encoding = do_encoding,
74  .__codecvt_do_always_noconv = do_always_noconv,
75  .__codecvt_do_length = do_length,
76  .__codecvt_do_max_length = do_max_length
77};
78
79
80#ifdef _LIBC
81static struct __gconv_trans_data libio_translit =
82{
83  .__trans_fct = __gconv_transliterate
84};
85#endif
86#endif /* defined(GLIBCPP_USE_WCHAR_T) */
87
88/* Return orientation of stream.  If mode is nonzero try to change
89   the orientation first.  */
90#undef _IO_fwide
91int
92_IO_fwide (fp, mode)
93     _IO_FILE *fp;
94     int mode;
95{
96  /* Normalize the value.  */
97  mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
98
99  if (mode == 0 || fp->_mode != 0)
100    /* The caller simply wants to know about the current orientation
101       or the orientation already has been determined.  */
102    return fp->_mode;
103
104  /* Set the orientation appropriately.  */
105  if (mode > 0)
106    {
107#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
108      struct _IO_codecvt *cc = fp->_codecvt;
109
110      fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
111      fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
112
113#ifdef _LIBC
114      /* Get the character conversion functions based on the currently
115	 selected locale for LC_CTYPE.  */
116      {
117	struct gconv_fcts fcts;
118
119	/* Clear the state.  We start all over again.  */
120	memset (&fp->_wide_data->_IO_state, '\0', sizeof (__c_mbstate_t));
121	memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__c_mbstate_t));
122
123	__wcsmbs_clone_conv (&fcts);
124
125	/* The functions are always the same.  */
126	*cc = __libio_codecvt;
127
128	cc->__cd_in.__cd.__nsteps = 1; /* Only one step allowed.  */
129	cc->__cd_in.__cd.__steps = fcts.towc;
130
131	cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
132	cc->__cd_in.__cd.__data[0].__internal_use = 1;
133	cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
134	cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
135
136	/* XXX For now no transliteration.  */
137	cc->__cd_in.__cd.__data[0].__trans = NULL;
138
139	cc->__cd_out.__cd.__nsteps = 1; /* Only one step allowed.  */
140	cc->__cd_out.__cd.__steps = fcts.tomb;
141
142	cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
143	cc->__cd_out.__cd.__data[0].__internal_use = 1;
144	cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
145	cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
146
147	/* And now the transliteration.  */
148	cc->__cd_out.__cd.__data[0].__trans = &libio_translit;
149      }
150#else
151# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
152      {
153	/* Determine internal and external character sets.
154	   XXX For now we make our life easy: we assume a fixed internal
155	   encoding (as most sane systems have; hi HP/UX!).  If somebody
156	   cares about systems which changing internal charsets they
157	   should come up with a solution for the determination of the
158	   currently used internal character set.  */
159#if 0
160	const char *internal_ccs = _G_INTERNAL_CCS;
161	const char *external_ccs = nl_langinfo(CODESET);
162
163	if (external_ccs == NULL)
164	  external_ccs = "ISO-8859-1";
165
166	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
167	if (cc->__cd_in != (iconv_t) -1)
168	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
169#endif
170      }
171# else
172#  error "somehow determine this from LC_CTYPE"
173# endif
174#endif
175
176      /* From now on use the wide character callback functions.  */
177      ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
178#else /* !defined(_GLIBCPP_USE_WCHAR_T) */
179      mode = fp->_mode;
180#endif /* !defined(_GLIBCPP_USE_WCHAR_T) */
181    }
182
183  /* Set the mode now.  */
184  fp->_mode = mode;
185
186  return mode;
187}
188
189#ifdef weak_alias
190weak_alias (_IO_fwide, fwide)
191#endif
192
193#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
194
195static enum __codecvt_result
196do_out (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
197	const wchar_t *from_start, const wchar_t *from_end,
198	const wchar_t **from_stop, char *to_start, char *to_end,
199	char **to_stop)
200{
201  enum __codecvt_result result;
202
203#ifdef _LIBC
204  struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
205  int status;
206  size_t dummy;
207  const unsigned char *from_start_copy = (unsigned char *) from_start;
208
209  codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
210  codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
211  codecvt->__cd_out.__cd.__data[0].__statep = statep;
212
213  status = DL_CALL_FCT (gs->__fct,
214			(gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
215			 (const unsigned char *) from_end, NULL,
216			 &dummy, 0, 0));
217
218  *from_stop = (wchar_t *) from_start_copy;
219  *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
220
221  switch (status)
222    {
223    case __GCONV_OK:
224    case __GCONV_EMPTY_INPUT:
225      result = __codecvt_ok;
226      break;
227
228    case __GCONV_FULL_OUTPUT:
229    case __GCONV_INCOMPLETE_INPUT:
230      result = __codecvt_partial;
231      break;
232
233    default:
234      result = __codecvt_error;
235      break;
236    }
237#else
238# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
239  size_t res;
240  const char *from_start_copy = (const char *) from_start;
241  size_t from_len = from_end - from_start;
242  char *to_start_copy = to_start;
243  size_t to_len = to_end - to_start;
244  res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
245	       &to_start_copy, &to_len);
246
247  if (res == 0 || from_len == 0)
248    result = __codecvt_ok;
249  else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
250    result = __codecvt_partial;
251  else
252    result = __codecvt_error;
253
254# else
255  /* Decide what to do.  */
256  result = __codecvt_error;
257# endif
258#endif
259
260  return result;
261}
262
263
264static enum __codecvt_result
265do_unshift (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
266	    char *to_start, char *to_end, char **to_stop)
267{
268  enum __codecvt_result result;
269
270#ifdef _LIBC
271  struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
272  int status;
273  size_t dummy;
274
275  codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
276  codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
277  codecvt->__cd_out.__cd.__data[0].__statep = statep;
278
279  status = DL_CALL_FCT (gs->__fct,
280			(gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
281			 NULL, &dummy, 1, 0));
282
283  *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
284
285  switch (status)
286    {
287    case __GCONV_OK:
288    case __GCONV_EMPTY_INPUT:
289      result = __codecvt_ok;
290      break;
291
292    case __GCONV_FULL_OUTPUT:
293    case __GCONV_INCOMPLETE_INPUT:
294      result = __codecvt_partial;
295      break;
296
297    default:
298      result = __codecvt_error;
299      break;
300    }
301#else
302# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
303  size_t res;
304  char *to_start_copy = (char *) to_start;
305  size_t to_len = to_end - to_start;
306
307  res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
308
309  if (res == 0)
310    result = __codecvt_ok;
311  else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
312    result = __codecvt_partial;
313  else
314    result = __codecvt_error;
315# else
316  /* Decide what to do.  */
317  result = __codecvt_error;
318# endif
319#endif
320
321  return result;
322}
323
324
325static enum __codecvt_result
326do_in (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
327       const char *from_start, const char *from_end, const char **from_stop,
328       wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
329{
330  enum __codecvt_result result;
331
332#ifdef _LIBC
333  struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
334  int status;
335  size_t dummy;
336  const unsigned char *from_start_copy = (unsigned char *) from_start;
337
338  codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
339  codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
340  codecvt->__cd_in.__cd.__data[0].__statep = statep;
341
342  status = DL_CALL_FCT (gs->__fct,
343			(gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
344			 from_end, NULL, &dummy, 0, 0));
345
346  *from_stop = from_start_copy;
347  *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
348
349  switch (status)
350    {
351    case __GCONV_OK:
352    case __GCONV_EMPTY_INPUT:
353      result = __codecvt_ok;
354      break;
355
356    case __GCONV_FULL_OUTPUT:
357    case __GCONV_INCOMPLETE_INPUT:
358      result = __codecvt_partial;
359      break;
360
361    default:
362      result = __codecvt_error;
363      break;
364    }
365#else
366# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
367  size_t res;
368  const char *from_start_copy = (const char *) from_start;
369  size_t from_len = from_end - from_start;
370  char *to_start_copy = (char *) from_start;
371  size_t to_len = to_end - to_start;
372
373  res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
374	       &to_start_copy, &to_len);
375
376  if (res == 0)
377    result = __codecvt_ok;
378  else if (to_len == 0)
379    result = __codecvt_partial;
380  else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
381    result = __codecvt_partial;
382  else
383    result = __codecvt_error;
384# else
385  /* Decide what to do.  */
386  result = __codecvt_error;
387# endif
388#endif
389
390  return result;
391}
392
393
394static int
395do_encoding (struct _IO_codecvt *codecvt)
396{
397#ifdef _LIBC
398  /* See whether the encoding is stateful.  */
399  if (codecvt->__cd_in.__cd.__steps[0].__stateful)
400    return -1;
401  /* Fortunately not.  Now determine the input bytes for the conversion
402     necessary for each wide character.  */
403  if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
404      != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
405    /* Not a constant value.  */
406    return 0;
407
408  return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
409#else
410  /* Worst case scenario.  */
411  return -1;
412#endif
413}
414
415
416static int
417do_always_noconv (struct _IO_codecvt *codecvt)
418{
419  return 0;
420}
421
422
423static int
424do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep,
425	   const char *from_start, const char *from_end, _IO_size_t max)
426{
427  int result;
428#ifdef _LIBC
429  const unsigned char *cp = (const unsigned char *) from_start;
430  wchar_t to_buf[max];
431  struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
432  int status;
433  size_t dummy;
434
435  codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
436  codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
437  codecvt->__cd_in.__cd.__data[0].__statep = statep;
438
439  status = DL_CALL_FCT (gs->__fct,
440			(gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
441			 NULL, &dummy, 0, 0));
442
443  result = cp - (const unsigned char *) from_start;
444#else
445# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T)
446  const char *from_start_copy = (const char *) from_start;
447  size_t from_len = from_end - from_start;
448  wchar_t to_buf[max];
449  size_t res;
450  char *to_start = (char *) to_buf;
451
452  res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
453	       &to_start, &max);
454
455  result = from_start_copy - (char *) from_start;
456# else
457  /* Decide what to do.  */
458  result = 0;
459# endif
460#endif
461
462  return result;
463}
464
465
466static int
467do_max_length (struct _IO_codecvt *codecvt)
468{
469#ifdef _LIBC
470  return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
471#else
472  return MB_CUR_MAX;
473#endif
474}
475
476#endif /* defined(_GLIBCPP_USE_WCHAR_T) */
477