1169691Skan// Locale support (codecvt) -*- C++ -*- 2169691Skan 3169691Skan// Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 4169691Skan// Free Software Foundation, Inc. 5169691Skan// 6169691Skan// This file is part of the GNU ISO C++ Library. This library is free 7169691Skan// software; you can redistribute it and/or modify it under the 8169691Skan// terms of the GNU General Public License as published by the 9169691Skan// Free Software Foundation; either version 2, or (at your option) 10169691Skan// any later version. 11169691Skan 12169691Skan// This library is distributed in the hope that it will be useful, 13169691Skan// but WITHOUT ANY WARRANTY; without even the implied warranty of 14169691Skan// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15169691Skan// GNU General Public License for more details. 16169691Skan 17169691Skan// You should have received a copy of the GNU General Public License along 18169691Skan// with this library; see the file COPYING. If not, write to the Free 19169691Skan// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 20169691Skan// USA. 21169691Skan 22169691Skan// As a special exception, you may use this file as part of a free software 23169691Skan// library without restriction. Specifically, if other files instantiate 24169691Skan// templates or use macros or inline functions from this file, or you compile 25169691Skan// this file and link it with other files to produce an executable, this 26169691Skan// file does not by itself cause the resulting executable to be covered by 27169691Skan// the GNU General Public License. This exception does not however 28169691Skan// invalidate any other reasons why the executable file might be covered by 29169691Skan// the GNU General Public License. 30169691Skan 31169691Skan// 32169691Skan// ISO C++ 14882: 22.2.1.5 Template class codecvt 33169691Skan// 34169691Skan 35169691Skan// Written by Benjamin Kosnik <bkoz@redhat.com> 36169691Skan 37169691Skan/** @file ext/codecvt_specializations.h 38169691Skan * This file is a GNU extension to the Standard C++ Library. 39169691Skan */ 40169691Skan 41169691Skan#ifndef _EXT_CODECVT_SPECIALIZATIONS_H 42169691Skan#define _EXT_CODECVT_SPECIALIZATIONS_H 1 43169691Skan 44169691Skan#include <bits/c++config.h> 45169691Skan 46169691Skan#ifdef _GLIBCXX_USE_ICONV 47169691Skan 48169691Skan#include <locale> 49169691Skan#include <iconv.h> 50169691Skan 51169691Skan // XXX 52169691Skan // Define this here so codecvt.cc can have _S_max_size definition. 53169691Skan#define _GLIBCXX_USE_ENCODING_STATE 1 54169691Skan 55169691Skan_GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx) 56169691Skan 57169691Skan /// @brief Extension to use icov for dealing with character encodings. 58169691Skan // This includes conversions and comparisons between various character 59169691Skan // sets. This object encapsulates data that may need to be shared between 60169691Skan // char_traits, codecvt and ctype. 61169691Skan class encoding_state 62169691Skan { 63169691Skan public: 64169691Skan // Types: 65169691Skan // NB: A conversion descriptor subsumes and enhances the 66169691Skan // functionality of a simple state type such as mbstate_t. 67169691Skan typedef iconv_t descriptor_type; 68169691Skan 69169691Skan protected: 70169691Skan // Name of internal character set encoding. 71169691Skan std::string _M_int_enc; 72169691Skan 73169691Skan // Name of external character set encoding. 74169691Skan std::string _M_ext_enc; 75169691Skan 76169691Skan // Conversion descriptor between external encoding to internal encoding. 77169691Skan descriptor_type _M_in_desc; 78169691Skan 79169691Skan // Conversion descriptor between internal encoding to external encoding. 80169691Skan descriptor_type _M_out_desc; 81169691Skan 82169691Skan // The byte-order marker for the external encoding, if necessary. 83169691Skan int _M_ext_bom; 84169691Skan 85169691Skan // The byte-order marker for the internal encoding, if necessary. 86169691Skan int _M_int_bom; 87169691Skan 88169691Skan // Number of external bytes needed to construct one complete 89169691Skan // character in the internal encoding. 90169691Skan // NB: -1 indicates variable, or stateful, encodings. 91169691Skan int _M_bytes; 92169691Skan 93169691Skan public: 94169691Skan explicit 95169691Skan encoding_state() 96169691Skan : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 97169691Skan { } 98169691Skan 99169691Skan explicit 100169691Skan encoding_state(const char* __int, const char* __ext, 101169691Skan int __ibom = 0, int __ebom = 0, int __bytes = 1) 102169691Skan : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 103169691Skan _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 104169691Skan { init(); } 105169691Skan 106169691Skan // 21.1.2 traits typedefs 107169691Skan // p4 108169691Skan // typedef STATE_T state_type 109169691Skan // requires: state_type shall meet the requirements of 110169691Skan // CopyConstructible types (20.1.3) 111169691Skan // NB: This does not preseve the actual state of the conversion 112169691Skan // descriptor member, but it does duplicate the encoding 113169691Skan // information. 114169691Skan encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 115169691Skan { construct(__obj); } 116169691Skan 117169691Skan // Need assignment operator as well. 118169691Skan encoding_state& 119169691Skan operator=(const encoding_state& __obj) 120169691Skan { 121169691Skan construct(__obj); 122169691Skan return *this; 123169691Skan } 124169691Skan 125169691Skan ~encoding_state() 126169691Skan { destroy(); } 127169691Skan 128169691Skan bool 129169691Skan good() const throw() 130169691Skan { 131229551Spfg const descriptor_type __err = (iconv_t)(-1); 132169691Skan bool __test = _M_in_desc && _M_in_desc != __err; 133169691Skan __test &= _M_out_desc && _M_out_desc != __err; 134169691Skan return __test; 135169691Skan } 136169691Skan 137169691Skan int 138169691Skan character_ratio() const 139169691Skan { return _M_bytes; } 140169691Skan 141169691Skan const std::string 142169691Skan internal_encoding() const 143169691Skan { return _M_int_enc; } 144169691Skan 145169691Skan int 146169691Skan internal_bom() const 147169691Skan { return _M_int_bom; } 148169691Skan 149169691Skan const std::string 150169691Skan external_encoding() const 151169691Skan { return _M_ext_enc; } 152169691Skan 153169691Skan int 154169691Skan external_bom() const 155169691Skan { return _M_ext_bom; } 156169691Skan 157169691Skan const descriptor_type& 158169691Skan in_descriptor() const 159169691Skan { return _M_in_desc; } 160169691Skan 161169691Skan const descriptor_type& 162169691Skan out_descriptor() const 163169691Skan { return _M_out_desc; } 164169691Skan 165169691Skan protected: 166169691Skan void 167169691Skan init() 168169691Skan { 169229551Spfg const descriptor_type __err = (iconv_t)(-1); 170169691Skan const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 171169691Skan if (!_M_in_desc && __have_encodings) 172169691Skan { 173169691Skan _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 174169691Skan if (_M_in_desc == __err) 175169691Skan std::__throw_runtime_error(__N("encoding_state::_M_init " 176169691Skan "creating iconv input descriptor failed")); 177169691Skan } 178169691Skan if (!_M_out_desc && __have_encodings) 179169691Skan { 180169691Skan _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 181169691Skan if (_M_out_desc == __err) 182169691Skan std::__throw_runtime_error(__N("encoding_state::_M_init " 183169691Skan "creating iconv output descriptor failed")); 184169691Skan } 185169691Skan } 186169691Skan 187169691Skan void 188169691Skan construct(const encoding_state& __obj) 189169691Skan { 190169691Skan destroy(); 191169691Skan _M_int_enc = __obj._M_int_enc; 192169691Skan _M_ext_enc = __obj._M_ext_enc; 193169691Skan _M_ext_bom = __obj._M_ext_bom; 194169691Skan _M_int_bom = __obj._M_int_bom; 195169691Skan _M_bytes = __obj._M_bytes; 196169691Skan init(); 197169691Skan } 198169691Skan 199169691Skan void 200169691Skan destroy() throw() 201169691Skan { 202229551Spfg const descriptor_type __err = (iconv_t)(-1); 203169691Skan if (_M_in_desc && _M_in_desc != __err) 204169691Skan { 205169691Skan iconv_close(_M_in_desc); 206169691Skan _M_in_desc = 0; 207169691Skan } 208169691Skan if (_M_out_desc && _M_out_desc != __err) 209169691Skan { 210169691Skan iconv_close(_M_out_desc); 211169691Skan _M_out_desc = 0; 212169691Skan } 213169691Skan } 214169691Skan }; 215169691Skan 216169691Skan /// @brief encoding_char_traits. 217169691Skan // Custom traits type with encoding_state for the state type, and the 218169691Skan // associated fpos<encoding_state> for the position type, all other 219169691Skan // bits equivalent to the required char_traits instantiations. 220169691Skan template<typename _CharT> 221169691Skan struct encoding_char_traits : public std::char_traits<_CharT> 222169691Skan { 223169691Skan typedef encoding_state state_type; 224169691Skan typedef typename std::fpos<state_type> pos_type; 225169691Skan }; 226169691Skan 227169691Skan_GLIBCXX_END_NAMESPACE 228169691Skan 229169691Skan 230169691Skan_GLIBCXX_BEGIN_NAMESPACE(std) 231169691Skan 232169691Skan using __gnu_cxx::encoding_state; 233169691Skan 234169691Skan /// @brief codecvt<InternT, _ExternT, encoding_state> specialization. 235169691Skan // This partial specialization takes advantage of iconv to provide 236169691Skan // code conversions between a large number of character encodings. 237169691Skan template<typename _InternT, typename _ExternT> 238169691Skan class codecvt<_InternT, _ExternT, encoding_state> 239169691Skan : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 240169691Skan { 241169691Skan public: 242169691Skan // Types: 243169691Skan typedef codecvt_base::result result; 244169691Skan typedef _InternT intern_type; 245169691Skan typedef _ExternT extern_type; 246169691Skan typedef __gnu_cxx::encoding_state state_type; 247169691Skan typedef state_type::descriptor_type descriptor_type; 248169691Skan 249169691Skan // Data Members: 250169691Skan static locale::id id; 251169691Skan 252169691Skan explicit 253169691Skan codecvt(size_t __refs = 0) 254169691Skan : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 255169691Skan { } 256169691Skan 257169691Skan explicit 258169691Skan codecvt(state_type& __enc, size_t __refs = 0) 259169691Skan : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 260169691Skan { } 261169691Skan 262169691Skan protected: 263169691Skan virtual 264169691Skan ~codecvt() { } 265169691Skan 266169691Skan virtual result 267169691Skan do_out(state_type& __state, const intern_type* __from, 268169691Skan const intern_type* __from_end, const intern_type*& __from_next, 269169691Skan extern_type* __to, extern_type* __to_end, 270169691Skan extern_type*& __to_next) const; 271169691Skan 272169691Skan virtual result 273169691Skan do_unshift(state_type& __state, extern_type* __to, 274169691Skan extern_type* __to_end, extern_type*& __to_next) const; 275169691Skan 276169691Skan virtual result 277169691Skan do_in(state_type& __state, const extern_type* __from, 278169691Skan const extern_type* __from_end, const extern_type*& __from_next, 279169691Skan intern_type* __to, intern_type* __to_end, 280169691Skan intern_type*& __to_next) const; 281169691Skan 282169691Skan virtual int 283169691Skan do_encoding() const throw(); 284169691Skan 285169691Skan virtual bool 286169691Skan do_always_noconv() const throw(); 287169691Skan 288169691Skan virtual int 289169691Skan do_length(state_type&, const extern_type* __from, 290169691Skan const extern_type* __end, size_t __max) const; 291169691Skan 292169691Skan virtual int 293169691Skan do_max_length() const throw(); 294169691Skan }; 295169691Skan 296169691Skan template<typename _InternT, typename _ExternT> 297169691Skan locale::id 298169691Skan codecvt<_InternT, _ExternT, encoding_state>::id; 299169691Skan 300169691Skan // This adaptor works around the signature problems of the second 301169691Skan // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 302169691Skan // uses 'char**', which matches the POSIX 1003.1-2001 standard. 303169691Skan // Using this adaptor, g++ will do the work for us. 304169691Skan template<typename _Tp> 305169691Skan inline size_t 306169691Skan __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 307169691Skan iconv_t __cd, char** __inbuf, size_t* __inbytes, 308169691Skan char** __outbuf, size_t* __outbytes) 309169691Skan { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 310169691Skan 311169691Skan template<typename _InternT, typename _ExternT> 312169691Skan codecvt_base::result 313169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 314169691Skan do_out(state_type& __state, const intern_type* __from, 315169691Skan const intern_type* __from_end, const intern_type*& __from_next, 316169691Skan extern_type* __to, extern_type* __to_end, 317169691Skan extern_type*& __to_next) const 318169691Skan { 319169691Skan result __ret = codecvt_base::error; 320169691Skan if (__state.good()) 321169691Skan { 322169691Skan const descriptor_type& __desc = __state.out_descriptor(); 323169691Skan const size_t __fmultiple = sizeof(intern_type); 324169691Skan size_t __fbytes = __fmultiple * (__from_end - __from); 325169691Skan const size_t __tmultiple = sizeof(extern_type); 326169691Skan size_t __tbytes = __tmultiple * (__to_end - __to); 327169691Skan 328169691Skan // Argument list for iconv specifies a byte sequence. Thus, 329169691Skan // all to/from arrays must be brutally casted to char*. 330169691Skan char* __cto = reinterpret_cast<char*>(__to); 331169691Skan char* __cfrom; 332169691Skan size_t __conv; 333169691Skan 334169691Skan // Some encodings need a byte order marker as the first item 335169691Skan // in the byte stream, to designate endian-ness. The default 336169691Skan // value for the byte order marker is NULL, so if this is 337169691Skan // the case, it's not necessary and we can just go on our 338169691Skan // merry way. 339169691Skan int __int_bom = __state.internal_bom(); 340169691Skan if (__int_bom) 341169691Skan { 342169691Skan size_t __size = __from_end - __from; 343169691Skan intern_type* __cfixed = static_cast<intern_type*> 344169691Skan (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 345169691Skan __cfixed[0] = static_cast<intern_type>(__int_bom); 346169691Skan char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 347169691Skan __cfrom = reinterpret_cast<char*>(__cfixed); 348169691Skan __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 349169691Skan &__fbytes, &__cto, &__tbytes); 350169691Skan } 351169691Skan else 352169691Skan { 353169691Skan intern_type* __cfixed = const_cast<intern_type*>(__from); 354169691Skan __cfrom = reinterpret_cast<char*>(__cfixed); 355169691Skan __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 356169691Skan &__cto, &__tbytes); 357169691Skan } 358169691Skan 359169691Skan if (__conv != size_t(-1)) 360169691Skan { 361169691Skan __from_next = reinterpret_cast<const intern_type*>(__cfrom); 362169691Skan __to_next = reinterpret_cast<extern_type*>(__cto); 363169691Skan __ret = codecvt_base::ok; 364169691Skan } 365169691Skan else 366169691Skan { 367169691Skan if (__fbytes < __fmultiple * (__from_end - __from)) 368169691Skan { 369169691Skan __from_next = reinterpret_cast<const intern_type*>(__cfrom); 370169691Skan __to_next = reinterpret_cast<extern_type*>(__cto); 371169691Skan __ret = codecvt_base::partial; 372169691Skan } 373169691Skan else 374169691Skan __ret = codecvt_base::error; 375169691Skan } 376169691Skan } 377169691Skan return __ret; 378169691Skan } 379169691Skan 380169691Skan template<typename _InternT, typename _ExternT> 381169691Skan codecvt_base::result 382169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 383169691Skan do_unshift(state_type& __state, extern_type* __to, 384169691Skan extern_type* __to_end, extern_type*& __to_next) const 385169691Skan { 386169691Skan result __ret = codecvt_base::error; 387169691Skan if (__state.good()) 388169691Skan { 389169691Skan const descriptor_type& __desc = __state.in_descriptor(); 390169691Skan const size_t __tmultiple = sizeof(intern_type); 391169691Skan size_t __tlen = __tmultiple * (__to_end - __to); 392169691Skan 393169691Skan // Argument list for iconv specifies a byte sequence. Thus, 394169691Skan // all to/from arrays must be brutally casted to char*. 395169691Skan char* __cto = reinterpret_cast<char*>(__to); 396169691Skan size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL, 397169691Skan &__cto, &__tlen); 398169691Skan 399169691Skan if (__conv != size_t(-1)) 400169691Skan { 401169691Skan __to_next = reinterpret_cast<extern_type*>(__cto); 402169691Skan if (__tlen == __tmultiple * (__to_end - __to)) 403169691Skan __ret = codecvt_base::noconv; 404169691Skan else if (__tlen == 0) 405169691Skan __ret = codecvt_base::ok; 406169691Skan else 407169691Skan __ret = codecvt_base::partial; 408169691Skan } 409169691Skan else 410169691Skan __ret = codecvt_base::error; 411169691Skan } 412169691Skan return __ret; 413169691Skan } 414169691Skan 415169691Skan template<typename _InternT, typename _ExternT> 416169691Skan codecvt_base::result 417169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 418169691Skan do_in(state_type& __state, const extern_type* __from, 419169691Skan const extern_type* __from_end, const extern_type*& __from_next, 420169691Skan intern_type* __to, intern_type* __to_end, 421169691Skan intern_type*& __to_next) const 422169691Skan { 423169691Skan result __ret = codecvt_base::error; 424169691Skan if (__state.good()) 425169691Skan { 426169691Skan const descriptor_type& __desc = __state.in_descriptor(); 427169691Skan const size_t __fmultiple = sizeof(extern_type); 428169691Skan size_t __flen = __fmultiple * (__from_end - __from); 429169691Skan const size_t __tmultiple = sizeof(intern_type); 430169691Skan size_t __tlen = __tmultiple * (__to_end - __to); 431169691Skan 432169691Skan // Argument list for iconv specifies a byte sequence. Thus, 433169691Skan // all to/from arrays must be brutally casted to char*. 434169691Skan char* __cto = reinterpret_cast<char*>(__to); 435169691Skan char* __cfrom; 436169691Skan size_t __conv; 437169691Skan 438169691Skan // Some encodings need a byte order marker as the first item 439169691Skan // in the byte stream, to designate endian-ness. The default 440169691Skan // value for the byte order marker is NULL, so if this is 441169691Skan // the case, it's not necessary and we can just go on our 442169691Skan // merry way. 443169691Skan int __ext_bom = __state.external_bom(); 444169691Skan if (__ext_bom) 445169691Skan { 446169691Skan size_t __size = __from_end - __from; 447169691Skan extern_type* __cfixed = static_cast<extern_type*> 448169691Skan (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 449169691Skan __cfixed[0] = static_cast<extern_type>(__ext_bom); 450169691Skan char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 451169691Skan __cfrom = reinterpret_cast<char*>(__cfixed); 452169691Skan __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 453169691Skan &__flen, &__cto, &__tlen); 454169691Skan } 455169691Skan else 456169691Skan { 457169691Skan extern_type* __cfixed = const_cast<extern_type*>(__from); 458169691Skan __cfrom = reinterpret_cast<char*>(__cfixed); 459169691Skan __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 460169691Skan &__flen, &__cto, &__tlen); 461169691Skan } 462169691Skan 463169691Skan 464169691Skan if (__conv != size_t(-1)) 465169691Skan { 466169691Skan __from_next = reinterpret_cast<const extern_type*>(__cfrom); 467169691Skan __to_next = reinterpret_cast<intern_type*>(__cto); 468169691Skan __ret = codecvt_base::ok; 469169691Skan } 470169691Skan else 471169691Skan { 472169691Skan if (__flen < static_cast<size_t>(__from_end - __from)) 473169691Skan { 474169691Skan __from_next = reinterpret_cast<const extern_type*>(__cfrom); 475169691Skan __to_next = reinterpret_cast<intern_type*>(__cto); 476169691Skan __ret = codecvt_base::partial; 477169691Skan } 478169691Skan else 479169691Skan __ret = codecvt_base::error; 480169691Skan } 481169691Skan } 482169691Skan return __ret; 483169691Skan } 484169691Skan 485169691Skan template<typename _InternT, typename _ExternT> 486169691Skan int 487169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 488169691Skan do_encoding() const throw() 489169691Skan { 490169691Skan int __ret = 0; 491169691Skan if (sizeof(_ExternT) <= sizeof(_InternT)) 492169691Skan __ret = sizeof(_InternT) / sizeof(_ExternT); 493169691Skan return __ret; 494169691Skan } 495169691Skan 496169691Skan template<typename _InternT, typename _ExternT> 497169691Skan bool 498169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 499169691Skan do_always_noconv() const throw() 500169691Skan { return false; } 501169691Skan 502169691Skan template<typename _InternT, typename _ExternT> 503169691Skan int 504169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 505169691Skan do_length(state_type&, const extern_type* __from, 506169691Skan const extern_type* __end, size_t __max) const 507169691Skan { return std::min(__max, static_cast<size_t>(__end - __from)); } 508169691Skan 509169691Skan // _GLIBCXX_RESOLVE_LIB_DEFECTS 510169691Skan // 74. Garbled text for codecvt::do_max_length 511169691Skan template<typename _InternT, typename _ExternT> 512169691Skan int 513169691Skan codecvt<_InternT, _ExternT, encoding_state>:: 514169691Skan do_max_length() const throw() 515169691Skan { return 1; } 516169691Skan 517169691Skan_GLIBCXX_END_NAMESPACE 518169691Skan 519169691Skan#endif 520169691Skan 521169691Skan#endif 522