1/* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4   The GNU C Library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License as published by the Free Software Foundation; either
7   version 2.1 of the License, or (at your option) any later version.
8
9   The GNU C Library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public
15   License along with the GNU C Library; if not, write to the Free
16   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17   02111-1307 USA.
18
19   As a special exception, if you link the code in this file with
20   files compiled with a GNU compiler to produce an executable,
21   that does not cause the resulting executable to be covered by
22   the GNU Lesser General Public License.  This exception does not
23   however invalidate any other reasons why the executable file
24   might be covered by the GNU Lesser General Public License.
25   This exception applies to code released by its copyright holders
26   in files containing the exception.  */
27
28#include <libioP.h>
29#ifdef _LIBC
30# include <dlfcn.h>
31# include <wchar.h>
32#endif
33#include <assert.h>
34#include <stdlib.h>
35#include <string.h>
36
37#ifdef _LIBC
38# include <langinfo.h>
39# include <locale/localeinfo.h>
40# include <wcsmbs/wcsmbsload.h>
41# include <iconv/gconv_int.h>
42# include <shlib-compat.h>
43#endif
44
45/* Prototypes of libio's codecvt functions.  */
46static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
47				     __mbstate_t *statep,
48				     const wchar_t *from_start,
49				     const wchar_t *from_end,
50				     const wchar_t **from_stop, char *to_start,
51				     char *to_end, char **to_stop);
52static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
53					 __mbstate_t *statep, char *to_start,
54					 char *to_end, char **to_stop);
55static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
56				    __mbstate_t *statep,
57				    const char *from_start,
58				    const char *from_end,
59				    const char **from_stop, wchar_t *to_start,
60				    wchar_t *to_end, wchar_t **to_stop);
61static int do_encoding (struct _IO_codecvt *codecvt);
62static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
63		      const char *from_start,
64		      const char *from_end, _IO_size_t max);
65static int do_max_length (struct _IO_codecvt *codecvt);
66static int do_always_noconv (struct _IO_codecvt *codecvt);
67
68
69/* The functions used in `codecvt' for libio are always the same.  */
70struct _IO_codecvt __libio_codecvt =
71{
72  .__codecvt_destr = NULL,		/* Destructor, never used.  */
73  .__codecvt_do_out = do_out,
74  .__codecvt_do_unshift = do_unshift,
75  .__codecvt_do_in = do_in,
76  .__codecvt_do_encoding = do_encoding,
77  .__codecvt_do_always_noconv = do_always_noconv,
78  .__codecvt_do_length = do_length,
79  .__codecvt_do_max_length = do_max_length
80};
81
82
83#ifdef _LIBC
84struct __gconv_trans_data __libio_translit attribute_hidden =
85{
86  .__trans_fct = NULL
87};
88#endif
89
90/* Return orientation of stream.  If mode is nonzero try to change
91 * the orientation first.
92 */
93
94#undef _IO_fwide
95
96int
97_IO_fwide(_IO_FILE *fp, int mode)
98{
99	/* Normalize the value.  */
100	mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
101
102	if (mode == 0) {
103		/* The caller simply wants to know about the current orientation. */
104		return fp->_mode;
105	}
106
107#if defined SHARED && defined _LIBC \
108    && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
109  if (__builtin_expect (&_IO_stdin_used == NULL, 0)
110      && (fp == _IO_stdin ||  fp == _IO_stdout || fp == _IO_stderr))
111    /* This is for a stream in the glibc 2.0 format.  */
112    return -1;
113#endif
114
115	if (fp->_mode != 0) {
116		/* The orientation already has been determined.  */
117		return fp->_mode;
118	}
119
120	{
121		struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
122
123		fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
124		fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
125
126		/* Get the character conversion functions based on the currently
127		 * selected locale for LC_CTYPE.
128		 */
129#ifdef _LIBC
130      {
131	struct gconv_fcts fcts;
132
133	/* Clear the state.  We start all over again.  */
134	memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
135	memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
136
137	__wcsmbs_clone_conv (&fcts);
138	assert (fcts.towc_nsteps == 1);
139	assert (fcts.tomb_nsteps == 1);
140
141	/* The functions are always the same.  */
142	*cc = __libio_codecvt;
143
144	cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
145	cc->__cd_in.__cd.__steps = fcts.towc;
146
147	cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
148	cc->__cd_in.__cd.__data[0].__internal_use = 1;
149	cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
150	cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
151
152	/* XXX For now no transliteration.  */
153	cc->__cd_in.__cd.__data[0].__trans = NULL;
154
155	cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
156	cc->__cd_out.__cd.__steps = fcts.tomb;
157
158	cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
159	cc->__cd_out.__cd.__data[0].__internal_use = 1;
160	cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
161	cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
162
163	/* And now the transliteration.  */
164	cc->__cd_out.__cd.__data[0].__trans = &__libio_translit;
165      }
166#else
167# ifdef _GLIBCPP_USE_WCHAR_T
168      {
169	/* Determine internal and external character sets.
170
171	   XXX For now we make our life easy: we assume a fixed internal
172	   encoding (as most sane systems have; hi HP/UX!).  If somebody
173	   cares about systems which changing internal charsets they
174	   should come up with a solution for the determination of the
175	   currently used internal character set.  */
176	const char *internal_ccs = _G_INTERNAL_CCS;
177	const char *external_ccs = NULL;
178
179#  ifdef HAVE_NL_LANGINFO
180	external_ccs = nl_langinfo (CODESET);
181#  endif
182	if (external_ccs == NULL)
183	  external_ccs = "ISO-8859-1";
184
185	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
186	if (cc->__cd_in != (iconv_t) -1)
187	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
188
189	if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
190	  {
191	    if (cc->__cd_in != (iconv_t) -1)
192	      iconv_close (cc->__cd_in);
193	    /* XXX */
194	    abort ();
195	  }
196      }
197# else
198#  error "somehow determine this from LC_CTYPE"
199# endif
200#endif
201
202      /* From now on use the wide character callback functions.  */
203      ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
204
205      /* One last twist: we get the current stream position.  The wide
206	 char streams have much more problems with not knowing the
207	 current position and so we should disable the optimization
208	 which allows the functions without knowing the position.  */
209      fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
210    }
211	/* Set the mode now.  */
212	fp->_mode = mode;
213
214	return mode;
215}
216
217static enum __codecvt_result
218do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
219	const wchar_t *from_start, const wchar_t *from_end,
220	const wchar_t **from_stop, char *to_start, char *to_end,
221	char **to_stop)
222{
223  enum __codecvt_result result;
224
225#ifdef _LIBC
226  struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
227  int status;
228  size_t dummy;
229  const unsigned char *from_start_copy = (unsigned char *) from_start;
230
231  codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
232  codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
233  codecvt->__cd_out.__cd.__data[0].__statep = statep;
234
235  status = DL_CALL_FCT (gs->__fct,
236			(gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
237			 (const unsigned char *) from_end, NULL,
238			 &dummy, 0, 0));
239
240  *from_stop = (wchar_t *) from_start_copy;
241  *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
242
243  switch (status)
244    {
245    case __GCONV_OK:
246    case __GCONV_EMPTY_INPUT:
247      result = __codecvt_ok;
248      break;
249
250    case __GCONV_FULL_OUTPUT:
251    case __GCONV_INCOMPLETE_INPUT:
252      result = __codecvt_partial;
253      break;
254
255    default:
256      result = __codecvt_error;
257      break;
258    }
259#else
260# ifdef _GLIBCPP_USE_WCHAR_T
261  size_t res;
262  const char *from_start_copy = (const char *) from_start;
263  size_t from_len = from_end - from_start;
264  char *to_start_copy = to_start;
265  size_t to_len = to_end - to_start;
266  res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
267	       &to_start_copy, &to_len);
268
269  if (res == 0 || from_len == 0)
270    result = __codecvt_ok;
271  else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
272    result = __codecvt_partial;
273  else
274    result = __codecvt_error;
275
276# else
277  /* Decide what to do.  */
278  result = __codecvt_error;
279# endif
280#endif
281
282  return result;
283}
284
285
286static enum __codecvt_result
287do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
288	    char *to_start, char *to_end, char **to_stop)
289{
290  enum __codecvt_result result;
291
292#ifdef _LIBC
293  struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
294  int status;
295  size_t dummy;
296
297  codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
298  codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
299  codecvt->__cd_out.__cd.__data[0].__statep = statep;
300
301  status = DL_CALL_FCT (gs->__fct,
302			(gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
303			 NULL, &dummy, 1, 0));
304
305  *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
306
307  switch (status)
308    {
309    case __GCONV_OK:
310    case __GCONV_EMPTY_INPUT:
311      result = __codecvt_ok;
312      break;
313
314    case __GCONV_FULL_OUTPUT:
315    case __GCONV_INCOMPLETE_INPUT:
316      result = __codecvt_partial;
317      break;
318
319    default:
320      result = __codecvt_error;
321      break;
322    }
323#else
324# ifdef _GLIBCPP_USE_WCHAR_T
325  size_t res;
326  char *to_start_copy = (char *) to_start;
327  size_t to_len = to_end - to_start;
328
329  res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
330
331  if (res == 0)
332    result = __codecvt_ok;
333  else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
334    result = __codecvt_partial;
335  else
336    result = __codecvt_error;
337# else
338  /* Decide what to do.  */
339  result = __codecvt_error;
340# endif
341#endif
342
343  return result;
344}
345
346
347static enum __codecvt_result
348do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
349       const char *from_start, const char *from_end, const char **from_stop,
350       wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
351{
352  enum __codecvt_result result;
353
354#ifdef _LIBC
355  struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
356  int status;
357  size_t dummy;
358  const unsigned char *from_start_copy = (unsigned char *) from_start;
359
360  codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
361  codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
362  codecvt->__cd_in.__cd.__data[0].__statep = statep;
363
364  status = DL_CALL_FCT (gs->__fct,
365			(gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
366			 from_end, NULL, &dummy, 0, 0));
367
368  *from_stop = from_start_copy;
369  *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
370
371  switch (status)
372    {
373    case __GCONV_OK:
374    case __GCONV_EMPTY_INPUT:
375      result = __codecvt_ok;
376      break;
377
378    case __GCONV_FULL_OUTPUT:
379    case __GCONV_INCOMPLETE_INPUT:
380      result = __codecvt_partial;
381      break;
382
383    default:
384      result = __codecvt_error;
385      break;
386    }
387#else
388# ifdef _GLIBCPP_USE_WCHAR_T
389  size_t res;
390  const char *from_start_copy = (const char *) from_start;
391  size_t from_len = from_end - from_start;
392  char *to_start_copy = (char *) from_start;
393  size_t to_len = to_end - to_start;
394
395  res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
396	       &to_start_copy, &to_len);
397
398  if (res == 0)
399    result = __codecvt_ok;
400  else if (to_len == 0)
401    result = __codecvt_partial;
402  else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
403    result = __codecvt_partial;
404  else
405    result = __codecvt_error;
406# else
407  /* Decide what to do.  */
408  result = __codecvt_error;
409# endif
410#endif
411
412  return result;
413}
414
415
416static int
417do_encoding (struct _IO_codecvt *codecvt)
418{
419#ifdef _LIBC
420  /* See whether the encoding is stateful.  */
421  if (codecvt->__cd_in.__cd.__steps[0].__stateful)
422    return -1;
423  /* Fortunately not.  Now determine the input bytes for the conversion
424     necessary for each wide character.  */
425  if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
426      != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
427    /* Not a constant value.  */
428    return 0;
429
430  return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
431#else
432  /* Worst case scenario.  */
433  return -1;
434#endif
435}
436
437
438static int
439do_always_noconv (struct _IO_codecvt *codecvt)
440{
441  return 0;
442}
443
444
445static int
446do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
447	   const char *from_start, const char *from_end, _IO_size_t max)
448{
449  int result;
450#ifdef _LIBC
451  const unsigned char *cp = (const unsigned char *) from_start;
452  wchar_t to_buf[max];
453  struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
454  int status;
455  size_t dummy;
456
457  codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
458  codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
459  codecvt->__cd_in.__cd.__data[0].__statep = statep;
460
461  status = DL_CALL_FCT (gs->__fct,
462			(gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
463			 NULL, &dummy, 0, 0));
464
465  result = cp - (const unsigned char *) from_start;
466#else
467# ifdef _GLIBCPP_USE_WCHAR_T
468  const char *from_start_copy = (const char *) from_start;
469  size_t from_len = from_end - from_start;
470  wchar_t to_buf[max];
471  size_t res;
472  char *to_start = (char *) to_buf;
473
474  res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
475	       &to_start, &max);
476
477  result = from_start_copy - (char *) from_start;
478# else
479  /* Decide what to do.  */
480  result = 0;
481# endif
482#endif
483
484  return result;
485}
486
487
488static int
489do_max_length (struct _IO_codecvt *codecvt)
490{
491#ifdef _LIBC
492  return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
493#else
494  return MB_CUR_MAX;
495#endif
496}
497