iofwide.c revision 1.2
1/* Copyright (C) 1999, 2000 Free Software Foundation, Inc. 2 This file is part of the GNU IO Library. 3 4 This library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License as 6 published by the Free Software Foundation; either version 2, or (at 7 your option) any later version. 8 9 This library is distributed in the hope that it will be useful, but 10 WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this library; see the file COPYING. If not, write to 16 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, 17 MA 02111-1307, USA. 18 19 As a special exception, if you link this library with files 20 compiled with a GNU compiler to produce an executable, this does 21 not cause the resulting executable to be covered by the GNU General 22 Public License. This exception does not however invalidate any 23 other reasons why the executable file might be covered by the GNU 24 General Public License. */ 25 26#include <libioP.h> 27#ifdef _LIBC 28# include <dlfcn.h> 29# include <wchar.h> 30# include <locale/localeinfo.h> 31# include <wcsmbs/wcsmbsload.h> 32# include <iconv/gconv_int.h> 33#endif 34#include <stdlib.h> 35#include <string.h> 36 37#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 38# include <langinfo.h> 39#endif 40 41#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 42/* Prototypes of libio's codecvt functions. */ 43static enum __codecvt_result do_out (struct _IO_codecvt *codecvt, 44 __c_mbstate_t *statep, 45 const wchar_t *from_start, 46 const wchar_t *from_end, 47 const wchar_t **from_stop, char *to_start, 48 char *to_end, char **to_stop); 49static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt, 50 __c_mbstate_t *statep, char *to_start, 51 char *to_end, char **to_stop); 52static enum __codecvt_result do_in (struct _IO_codecvt *codecvt, 53 __c_mbstate_t *statep, 54 const char *from_start, 55 const char *from_end, 56 const char **from_stop, wchar_t *to_start, 57 wchar_t *to_end, wchar_t **to_stop); 58static int do_encoding (struct _IO_codecvt *codecvt); 59static int do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep, 60 const char *from_start, 61 const char *from_end, _IO_size_t max); 62static int do_max_length (struct _IO_codecvt *codecvt); 63static int do_always_noconv (struct _IO_codecvt *codecvt); 64 65 66/* The functions used in `codecvt' for libio are always the same. */ 67struct _IO_codecvt __libio_codecvt = 68{ 69 .__codecvt_destr = NULL, /* Destructor, never used. */ 70 .__codecvt_do_out = do_out, 71 .__codecvt_do_unshift = do_unshift, 72 .__codecvt_do_in = do_in, 73 .__codecvt_do_encoding = do_encoding, 74 .__codecvt_do_always_noconv = do_always_noconv, 75 .__codecvt_do_length = do_length, 76 .__codecvt_do_max_length = do_max_length 77}; 78 79 80#ifdef _LIBC 81static struct __gconv_trans_data libio_translit = 82{ 83 .__trans_fct = __gconv_transliterate 84}; 85#endif 86#endif /* defined(GLIBCPP_USE_WCHAR_T) */ 87 88/* Return orientation of stream. If mode is nonzero try to change 89 the orientation first. */ 90#undef _IO_fwide 91int 92_IO_fwide (fp, mode) 93 _IO_FILE *fp; 94 int mode; 95{ 96 /* Normalize the value. */ 97 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1); 98 99 if (mode == 0 || fp->_mode != 0) 100 /* The caller simply wants to know about the current orientation 101 or the orientation already has been determined. */ 102 return fp->_mode; 103 104 /* Set the orientation appropriately. */ 105 if (mode > 0) 106 { 107#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 108 struct _IO_codecvt *cc = fp->_codecvt; 109 110 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; 111 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base; 112 113#ifdef _LIBC 114 /* Get the character conversion functions based on the currently 115 selected locale for LC_CTYPE. */ 116 { 117 struct gconv_fcts fcts; 118 119 /* Clear the state. We start all over again. */ 120 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__c_mbstate_t)); 121 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__c_mbstate_t)); 122 123 __wcsmbs_clone_conv (&fcts); 124 125 /* The functions are always the same. */ 126 *cc = __libio_codecvt; 127 128 cc->__cd_in.__cd.__nsteps = 1; /* Only one step allowed. */ 129 cc->__cd_in.__cd.__steps = fcts.towc; 130 131 cc->__cd_in.__cd.__data[0].__invocation_counter = 0; 132 cc->__cd_in.__cd.__data[0].__internal_use = 1; 133 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST; 134 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 135 136 /* XXX For now no transliteration. */ 137 cc->__cd_in.__cd.__data[0].__trans = NULL; 138 139 cc->__cd_out.__cd.__nsteps = 1; /* Only one step allowed. */ 140 cc->__cd_out.__cd.__steps = fcts.tomb; 141 142 cc->__cd_out.__cd.__data[0].__invocation_counter = 0; 143 cc->__cd_out.__cd.__data[0].__internal_use = 1; 144 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST; 145 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 146 147 /* And now the transliteration. */ 148 cc->__cd_out.__cd.__data[0].__trans = &libio_translit; 149 } 150#else 151# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 152 { 153 /* Determine internal and external character sets. 154 XXX For now we make our life easy: we assume a fixed internal 155 encoding (as most sane systems have; hi HP/UX!). If somebody 156 cares about systems which changing internal charsets they 157 should come up with a solution for the determination of the 158 currently used internal character set. */ 159#if 0 160 const char *internal_ccs = _G_INTERNAL_CCS; 161 const char *external_ccs = nl_langinfo(CODESET); 162 163 if (external_ccs == NULL) 164 external_ccs = "ISO-8859-1"; 165 166 cc->__cd_in = iconv_open (internal_ccs, external_ccs); 167 if (cc->__cd_in != (iconv_t) -1) 168 cc->__cd_out = iconv_open (external_ccs, internal_ccs); 169#endif 170 } 171# else 172# error "somehow determine this from LC_CTYPE" 173# endif 174#endif 175 176 /* From now on use the wide character callback functions. */ 177 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable; 178#else /* !defined(_GLIBCPP_USE_WCHAR_T) */ 179 mode = fp->_mode; 180#endif /* !defined(_GLIBCPP_USE_WCHAR_T) */ 181 } 182 183 /* Set the mode now. */ 184 fp->_mode = mode; 185 186 return mode; 187} 188 189#ifdef weak_alias 190weak_alias (_IO_fwide, fwide) 191#endif 192 193#if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 194 195static enum __codecvt_result 196do_out (struct _IO_codecvt *codecvt, __c_mbstate_t *statep, 197 const wchar_t *from_start, const wchar_t *from_end, 198 const wchar_t **from_stop, char *to_start, char *to_end, 199 char **to_stop) 200{ 201 enum __codecvt_result result; 202 203#ifdef _LIBC 204 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 205 int status; 206 size_t dummy; 207 const unsigned char *from_start_copy = (unsigned char *) from_start; 208 209 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 210 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 211 codecvt->__cd_out.__cd.__data[0].__statep = statep; 212 213 status = DL_CALL_FCT (gs->__fct, 214 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy, 215 (const unsigned char *) from_end, NULL, 216 &dummy, 0, 0)); 217 218 *from_stop = (wchar_t *) from_start_copy; 219 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 220 221 switch (status) 222 { 223 case __GCONV_OK: 224 case __GCONV_EMPTY_INPUT: 225 result = __codecvt_ok; 226 break; 227 228 case __GCONV_FULL_OUTPUT: 229 case __GCONV_INCOMPLETE_INPUT: 230 result = __codecvt_partial; 231 break; 232 233 default: 234 result = __codecvt_error; 235 break; 236 } 237#else 238# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 239 size_t res; 240 const char *from_start_copy = (const char *) from_start; 241 size_t from_len = from_end - from_start; 242 char *to_start_copy = to_start; 243 size_t to_len = to_end - to_start; 244 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len, 245 &to_start_copy, &to_len); 246 247 if (res == 0 || from_len == 0) 248 result = __codecvt_ok; 249 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 250 result = __codecvt_partial; 251 else 252 result = __codecvt_error; 253 254# else 255 /* Decide what to do. */ 256 result = __codecvt_error; 257# endif 258#endif 259 260 return result; 261} 262 263 264static enum __codecvt_result 265do_unshift (struct _IO_codecvt *codecvt, __c_mbstate_t *statep, 266 char *to_start, char *to_end, char **to_stop) 267{ 268 enum __codecvt_result result; 269 270#ifdef _LIBC 271 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 272 int status; 273 size_t dummy; 274 275 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 276 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 277 codecvt->__cd_out.__cd.__data[0].__statep = statep; 278 279 status = DL_CALL_FCT (gs->__fct, 280 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL, 281 NULL, &dummy, 1, 0)); 282 283 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 284 285 switch (status) 286 { 287 case __GCONV_OK: 288 case __GCONV_EMPTY_INPUT: 289 result = __codecvt_ok; 290 break; 291 292 case __GCONV_FULL_OUTPUT: 293 case __GCONV_INCOMPLETE_INPUT: 294 result = __codecvt_partial; 295 break; 296 297 default: 298 result = __codecvt_error; 299 break; 300 } 301#else 302# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 303 size_t res; 304 char *to_start_copy = (char *) to_start; 305 size_t to_len = to_end - to_start; 306 307 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len); 308 309 if (res == 0) 310 result = __codecvt_ok; 311 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 312 result = __codecvt_partial; 313 else 314 result = __codecvt_error; 315# else 316 /* Decide what to do. */ 317 result = __codecvt_error; 318# endif 319#endif 320 321 return result; 322} 323 324 325static enum __codecvt_result 326do_in (struct _IO_codecvt *codecvt, __c_mbstate_t *statep, 327 const char *from_start, const char *from_end, const char **from_stop, 328 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop) 329{ 330 enum __codecvt_result result; 331 332#ifdef _LIBC 333 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 334 int status; 335 size_t dummy; 336 const unsigned char *from_start_copy = (unsigned char *) from_start; 337 338 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start; 339 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end; 340 codecvt->__cd_in.__cd.__data[0].__statep = statep; 341 342 status = DL_CALL_FCT (gs->__fct, 343 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy, 344 from_end, NULL, &dummy, 0, 0)); 345 346 *from_stop = from_start_copy; 347 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf; 348 349 switch (status) 350 { 351 case __GCONV_OK: 352 case __GCONV_EMPTY_INPUT: 353 result = __codecvt_ok; 354 break; 355 356 case __GCONV_FULL_OUTPUT: 357 case __GCONV_INCOMPLETE_INPUT: 358 result = __codecvt_partial; 359 break; 360 361 default: 362 result = __codecvt_error; 363 break; 364 } 365#else 366# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 367 size_t res; 368 const char *from_start_copy = (const char *) from_start; 369 size_t from_len = from_end - from_start; 370 char *to_start_copy = (char *) from_start; 371 size_t to_len = to_end - to_start; 372 373 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 374 &to_start_copy, &to_len); 375 376 if (res == 0) 377 result = __codecvt_ok; 378 else if (to_len == 0) 379 result = __codecvt_partial; 380 else if (from_len < codecvt->__codecvt_do_max_length (codecvt)) 381 result = __codecvt_partial; 382 else 383 result = __codecvt_error; 384# else 385 /* Decide what to do. */ 386 result = __codecvt_error; 387# endif 388#endif 389 390 return result; 391} 392 393 394static int 395do_encoding (struct _IO_codecvt *codecvt) 396{ 397#ifdef _LIBC 398 /* See whether the encoding is stateful. */ 399 if (codecvt->__cd_in.__cd.__steps[0].__stateful) 400 return -1; 401 /* Fortunately not. Now determine the input bytes for the conversion 402 necessary for each wide character. */ 403 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from 404 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from) 405 /* Not a constant value. */ 406 return 0; 407 408 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from; 409#else 410 /* Worst case scenario. */ 411 return -1; 412#endif 413} 414 415 416static int 417do_always_noconv (struct _IO_codecvt *codecvt) 418{ 419 return 0; 420} 421 422 423static int 424do_length (struct _IO_codecvt *codecvt, __c_mbstate_t *statep, 425 const char *from_start, const char *from_end, _IO_size_t max) 426{ 427 int result; 428#ifdef _LIBC 429 const unsigned char *cp = (const unsigned char *) from_start; 430 wchar_t to_buf[max]; 431 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 432 int status; 433 size_t dummy; 434 435 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf; 436 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max]; 437 codecvt->__cd_in.__cd.__data[0].__statep = statep; 438 439 status = DL_CALL_FCT (gs->__fct, 440 (gs, codecvt->__cd_in.__cd.__data, &cp, from_end, 441 NULL, &dummy, 0, 0)); 442 443 result = cp - (const unsigned char *) from_start; 444#else 445# if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCPP_USE_TYPE_WCHAR_T) 446 const char *from_start_copy = (const char *) from_start; 447 size_t from_len = from_end - from_start; 448 wchar_t to_buf[max]; 449 size_t res; 450 char *to_start = (char *) to_buf; 451 452 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 453 &to_start, &max); 454 455 result = from_start_copy - (char *) from_start; 456# else 457 /* Decide what to do. */ 458 result = 0; 459# endif 460#endif 461 462 return result; 463} 464 465 466static int 467do_max_length (struct _IO_codecvt *codecvt) 468{ 469#ifdef _LIBC 470 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from; 471#else 472 return MB_CUR_MAX; 473#endif 474} 475 476#endif /* defined(_GLIBCPP_USE_WCHAR_T) */ 477