// Locale support (codecvt) -*- C++ -*- // Copyright (C) 2000-2020 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . // // ISO C++ 14882: 22.2.1.5 Template class codecvt // // Written by Benjamin Kosnik /** @file ext/codecvt_specializations.h * This file is a GNU extension to the Standard C++ Library. */ #ifndef _EXT_CODECVT_SPECIALIZATIONS_H #define _EXT_CODECVT_SPECIALIZATIONS_H 1 #include #include #include namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_CXX11 /// Extension to use iconv for dealing with character encodings. // This includes conversions and comparisons between various character // sets. This object encapsulates data that may need to be shared between // char_traits, codecvt and ctype. class encoding_state { public: // Types: // NB: A conversion descriptor subsumes and enhances the // functionality of a simple state type such as mbstate_t. typedef iconv_t descriptor_type; protected: // Name of internal character set encoding. std::string _M_int_enc; // Name of external character set encoding. std::string _M_ext_enc; // Conversion descriptor between external encoding to internal encoding. descriptor_type _M_in_desc; // Conversion descriptor between internal encoding to external encoding. descriptor_type _M_out_desc; // The byte-order marker for the external encoding, if necessary. int _M_ext_bom; // The byte-order marker for the internal encoding, if necessary. int _M_int_bom; // Number of external bytes needed to construct one complete // character in the internal encoding. // NB: -1 indicates variable, or stateful, encodings. int _M_bytes; public: explicit encoding_state() : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) { } explicit encoding_state(const char* __int, const char* __ext, int __ibom = 0, int __ebom = 0, int __bytes = 1) : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) { init(); } // 21.1.2 traits typedefs // p4 // typedef STATE_T state_type // requires: state_type shall meet the requirements of // CopyConstructible types (20.1.3) // NB: This does not preserve the actual state of the conversion // descriptor member, but it does duplicate the encoding // information. encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) { construct(__obj); } // Need assignment operator as well. encoding_state& operator=(const encoding_state& __obj) { construct(__obj); return *this; } ~encoding_state() { destroy(); } bool good() const throw() { const descriptor_type __err = (iconv_t)(-1); bool __test = _M_in_desc && _M_in_desc != __err; __test &= _M_out_desc && _M_out_desc != __err; return __test; } int character_ratio() const { return _M_bytes; } const std::string internal_encoding() const { return _M_int_enc; } int internal_bom() const { return _M_int_bom; } const std::string external_encoding() const { return _M_ext_enc; } int external_bom() const { return _M_ext_bom; } const descriptor_type& in_descriptor() const { return _M_in_desc; } const descriptor_type& out_descriptor() const { return _M_out_desc; } protected: void init() { const descriptor_type __err = (iconv_t)(-1); const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); if (!_M_in_desc && __have_encodings) { _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); if (_M_in_desc == __err) std::__throw_runtime_error(__N("encoding_state::_M_init " "creating iconv input descriptor failed")); } if (!_M_out_desc && __have_encodings) { _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); if (_M_out_desc == __err) std::__throw_runtime_error(__N("encoding_state::_M_init " "creating iconv output descriptor failed")); } } void construct(const encoding_state& __obj) { destroy(); _M_int_enc = __obj._M_int_enc; _M_ext_enc = __obj._M_ext_enc; _M_ext_bom = __obj._M_ext_bom; _M_int_bom = __obj._M_int_bom; _M_bytes = __obj._M_bytes; init(); } void destroy() throw() { const descriptor_type __err = (iconv_t)(-1); if (_M_in_desc && _M_in_desc != __err) { iconv_close(_M_in_desc); _M_in_desc = 0; } if (_M_out_desc && _M_out_desc != __err) { iconv_close(_M_out_desc); _M_out_desc = 0; } } }; /// encoding_char_traits // Custom traits type with encoding_state for the state type, and the // associated fpos for the position type, all other // bits equivalent to the required char_traits instantiations. template struct encoding_char_traits : public std::char_traits<_CharT> { typedef encoding_state state_type; typedef typename std::fpos pos_type; }; _GLIBCXX_END_NAMESPACE_CXX11 _GLIBCXX_END_NAMESPACE_VERSION } // namespace namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION using __gnu_cxx::encoding_state; /// codecvt specialization. // This partial specialization takes advantage of iconv to provide // code conversions between a large number of character encodings. template class codecvt<_InternT, _ExternT, encoding_state> : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> { public: // Types: typedef codecvt_base::result result; typedef _InternT intern_type; typedef _ExternT extern_type; typedef __gnu_cxx::encoding_state state_type; typedef state_type::descriptor_type descriptor_type; // Data Members: static locale::id id; explicit codecvt(size_t __refs = 0) : __codecvt_abstract_base(__refs) { } explicit codecvt(state_type& __enc, size_t __refs = 0) : __codecvt_abstract_base(__refs) { } protected: virtual ~codecvt() { } virtual result do_out(state_type& __state, const intern_type* __from, const intern_type* __from_end, const intern_type*& __from_next, extern_type* __to, extern_type* __to_end, extern_type*& __to_next) const; virtual result do_unshift(state_type& __state, extern_type* __to, extern_type* __to_end, extern_type*& __to_next) const; virtual result do_in(state_type& __state, const extern_type* __from, const extern_type* __from_end, const extern_type*& __from_next, intern_type* __to, intern_type* __to_end, intern_type*& __to_next) const; virtual int do_encoding() const throw(); virtual bool do_always_noconv() const throw(); virtual int do_length(state_type&, const extern_type* __from, const extern_type* __end, size_t __max) const; virtual int do_max_length() const throw(); }; template locale::id codecvt<_InternT, _ExternT, encoding_state>::id; // This adaptor works around the signature problems of the second // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 // uses 'char**', which matches the POSIX 1003.1-2001 standard. // Using this adaptor, g++ will do the work for us. template inline size_t __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), iconv_t __cd, char** __inbuf, size_t* __inbytes, char** __outbuf, size_t* __outbytes) { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } template codecvt_base::result codecvt<_InternT, _ExternT, encoding_state>:: do_out(state_type& __state, const intern_type* __from, const intern_type* __from_end, const intern_type*& __from_next, extern_type* __to, extern_type* __to_end, extern_type*& __to_next) const { result __ret = codecvt_base::error; if (__state.good()) { const descriptor_type& __desc = __state.out_descriptor(); const size_t __fmultiple = sizeof(intern_type); size_t __fbytes = __fmultiple * (__from_end - __from); const size_t __tmultiple = sizeof(extern_type); size_t __tbytes = __tmultiple * (__to_end - __to); // Argument list for iconv specifies a byte sequence. Thus, // all to/from arrays must be brutally casted to char*. char* __cto = reinterpret_cast(__to); char* __cfrom; size_t __conv; // Some encodings need a byte order marker as the first item // in the byte stream, to designate endian-ness. The default // value for the byte order marker is NULL, so if this is // the case, it's not necessary and we can just go on our // merry way. int __int_bom = __state.internal_bom(); if (__int_bom) { size_t __size = __from_end - __from; intern_type* __cfixed = static_cast (__builtin_alloca(sizeof(intern_type) * (__size + 1))); __cfixed[0] = static_cast(__int_bom); char_traits::copy(__cfixed + 1, __from, __size); __cfrom = reinterpret_cast(__cfixed); __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, &__cto, &__tbytes); } else { intern_type* __cfixed = const_cast(__from); __cfrom = reinterpret_cast(__cfixed); __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, &__cto, &__tbytes); } if (__conv != size_t(-1)) { __from_next = reinterpret_cast(__cfrom); __to_next = reinterpret_cast(__cto); __ret = codecvt_base::ok; } else { if (__fbytes < __fmultiple * (__from_end - __from)) { __from_next = reinterpret_cast(__cfrom); __to_next = reinterpret_cast(__cto); __ret = codecvt_base::partial; } else __ret = codecvt_base::error; } } return __ret; } template codecvt_base::result codecvt<_InternT, _ExternT, encoding_state>:: do_unshift(state_type& __state, extern_type* __to, extern_type* __to_end, extern_type*& __to_next) const { result __ret = codecvt_base::error; if (__state.good()) { const descriptor_type& __desc = __state.in_descriptor(); const size_t __tmultiple = sizeof(intern_type); size_t __tlen = __tmultiple * (__to_end - __to); // Argument list for iconv specifies a byte sequence. Thus, // all to/from arrays must be brutally casted to char*. char* __cto = reinterpret_cast(__to); size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0, &__cto, &__tlen); if (__conv != size_t(-1)) { __to_next = reinterpret_cast(__cto); if (__tlen == __tmultiple * (__to_end - __to)) __ret = codecvt_base::noconv; else if (__tlen == 0) __ret = codecvt_base::ok; else __ret = codecvt_base::partial; } else __ret = codecvt_base::error; } return __ret; } template codecvt_base::result codecvt<_InternT, _ExternT, encoding_state>:: do_in(state_type& __state, const extern_type* __from, const extern_type* __from_end, const extern_type*& __from_next, intern_type* __to, intern_type* __to_end, intern_type*& __to_next) const { result __ret = codecvt_base::error; if (__state.good()) { const descriptor_type& __desc = __state.in_descriptor(); const size_t __fmultiple = sizeof(extern_type); size_t __flen = __fmultiple * (__from_end - __from); const size_t __tmultiple = sizeof(intern_type); size_t __tlen = __tmultiple * (__to_end - __to); // Argument list for iconv specifies a byte sequence. Thus, // all to/from arrays must be brutally casted to char*. char* __cto = reinterpret_cast(__to); char* __cfrom; size_t __conv; // Some encodings need a byte order marker as the first item // in the byte stream, to designate endian-ness. The default // value for the byte order marker is NULL, so if this is // the case, it's not necessary and we can just go on our // merry way. int __ext_bom = __state.external_bom(); if (__ext_bom) { size_t __size = __from_end - __from; extern_type* __cfixed = static_cast (__builtin_alloca(sizeof(extern_type) * (__size + 1))); __cfixed[0] = static_cast(__ext_bom); char_traits::copy(__cfixed + 1, __from, __size); __cfrom = reinterpret_cast(__cfixed); __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__flen, &__cto, &__tlen); } else { extern_type* __cfixed = const_cast(__from); __cfrom = reinterpret_cast(__cfixed); __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__flen, &__cto, &__tlen); } if (__conv != size_t(-1)) { __from_next = reinterpret_cast(__cfrom); __to_next = reinterpret_cast(__cto); __ret = codecvt_base::ok; } else { if (__flen < static_cast(__from_end - __from)) { __from_next = reinterpret_cast(__cfrom); __to_next = reinterpret_cast(__cto); __ret = codecvt_base::partial; } else __ret = codecvt_base::error; } } return __ret; } template int codecvt<_InternT, _ExternT, encoding_state>:: do_encoding() const throw() { int __ret = 0; if (sizeof(_ExternT) <= sizeof(_InternT)) __ret = sizeof(_InternT) / sizeof(_ExternT); return __ret; } template bool codecvt<_InternT, _ExternT, encoding_state>:: do_always_noconv() const throw() { return false; } template int codecvt<_InternT, _ExternT, encoding_state>:: do_length(state_type&, const extern_type* __from, const extern_type* __end, size_t __max) const { return std::min(__max, static_cast(__end - __from)); } // _GLIBCXX_RESOLVE_LIB_DEFECTS // 74. Garbled text for codecvt::do_max_length template int codecvt<_InternT, _ExternT, encoding_state>:: do_max_length() const throw() { return 1; } _GLIBCXX_END_NAMESPACE_VERSION } // namespace #endif