1103447Skan// std::codecvt implementation details, generic version -*- C++ -*- 2103447Skan 3169691Skan// Copyright (C) 2002, 2005 Free Software Foundation, Inc. 4103447Skan// 5103447Skan// This file is part of the GNU ISO C++ Library. This library is free 6103447Skan// software; you can redistribute it and/or modify it under the 7103447Skan// terms of the GNU General Public License as published by the 8103447Skan// Free Software Foundation; either version 2, or (at your option) 9103447Skan// any later version. 10103447Skan 11103447Skan// This library is distributed in the hope that it will be useful, 12103447Skan// but WITHOUT ANY WARRANTY; without even the implied warranty of 13103447Skan// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14103447Skan// GNU General Public License for more details. 15103447Skan 16103447Skan// You should have received a copy of the GNU General Public License along 17103447Skan// with this library; see the file COPYING. If not, write to the Free 18169691Skan// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 19103447Skan// USA. 20103447Skan 21103447Skan// As a special exception, you may use this file as part of a free software 22103447Skan// library without restriction. Specifically, if other files instantiate 23103447Skan// templates or use macros or inline functions from this file, or you compile 24103447Skan// this file and link it with other files to produce an executable, this 25103447Skan// file does not by itself cause the resulting executable to be covered by 26103447Skan// the GNU General Public License. This exception does not however 27103447Skan// invalidate any other reasons why the executable file might be covered by 28103447Skan// the GNU General Public License. 29103447Skan 30103447Skan// 31103447Skan// ISO C++ 14882: 22.2.1.5 - Template class codecvt 32103447Skan// 33103447Skan 34103447Skan// Written by Benjamin Kosnik <bkoz@redhat.com> 35103447Skan 36103447Skan#include <locale> 37103447Skan 38169691Skan_GLIBCXX_BEGIN_NAMESPACE(std) 39169691Skan 40103447Skan // Specializations. 41132720Skan#ifdef _GLIBCXX_USE_WCHAR_T 42103447Skan codecvt_base::result 43103447Skan codecvt<wchar_t, char, mbstate_t>:: 44103447Skan do_out(state_type& __state, const intern_type* __from, 45103447Skan const intern_type* __from_end, const intern_type*& __from_next, 46103447Skan extern_type* __to, extern_type* __to_end, 47103447Skan extern_type*& __to_next) const 48103447Skan { 49132720Skan result __ret = ok; 50132720Skan // The conversion must be done using a temporary destination buffer 51132720Skan // since it is not possible to pass the size of the buffer to wcrtomb 52132720Skan state_type __tmp_state(__state); 53103447Skan 54132720Skan // The conversion must be done by calling wcrtomb in a loop rather 55132720Skan // than using wcsrtombs because wcsrtombs assumes that the input is 56132720Skan // zero-terminated. 57132720Skan 58132720Skan // Either we can upper bound the total number of external characters to 59132720Skan // something smaller than __to_end - __to or the conversion must be done 60132720Skan // using a temporary destination buffer since it is not possible to 61132720Skan // pass the size of the buffer to wcrtomb 62132720Skan if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0) 63132720Skan while (__from < __from_end) 64132720Skan { 65132720Skan const size_t __conv = wcrtomb(__to, *__from, &__tmp_state); 66132720Skan if (__conv == static_cast<size_t>(-1)) 67132720Skan { 68132720Skan __ret = error; 69132720Skan break; 70132720Skan } 71132720Skan __state = __tmp_state; 72132720Skan __to += __conv; 73132720Skan __from++; 74132720Skan } 75132720Skan else 76103447Skan { 77132720Skan extern_type __buf[MB_LEN_MAX]; 78132720Skan while (__from < __from_end && __to < __to_end) 79132720Skan { 80132720Skan const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state); 81132720Skan if (__conv == static_cast<size_t>(-1)) 82132720Skan { 83132720Skan __ret = error; 84132720Skan break; 85132720Skan } 86132720Skan else if (__conv > static_cast<size_t>(__to_end - __to)) 87132720Skan { 88132720Skan __ret = partial; 89132720Skan break; 90132720Skan } 91132720Skan 92132720Skan memcpy(__to, __buf, __conv); 93132720Skan __state = __tmp_state; 94132720Skan __to += __conv; 95132720Skan __from++; 96132720Skan } 97103447Skan } 98132720Skan 99132720Skan if (__ret == ok && __from < __from_end) 100132720Skan __ret = partial; 101132720Skan 102132720Skan __from_next = __from; 103132720Skan __to_next = __to; 104103447Skan return __ret; 105103447Skan } 106103447Skan 107103447Skan codecvt_base::result 108103447Skan codecvt<wchar_t, char, mbstate_t>:: 109103447Skan do_in(state_type& __state, const extern_type* __from, 110103447Skan const extern_type* __from_end, const extern_type*& __from_next, 111103447Skan intern_type* __to, intern_type* __to_end, 112103447Skan intern_type*& __to_next) const 113103447Skan { 114132720Skan result __ret = ok; 115132720Skan // This temporary state object is neccessary so __state won't be modified 116132720Skan // if [__from, __from_end) is a partial multibyte character. 117132720Skan state_type __tmp_state(__state); 118103447Skan 119132720Skan // Conversion must be done by calling mbrtowc in a loop rather than 120132720Skan // by calling mbsrtowcs because mbsrtowcs assumes that the input 121132720Skan // sequence is zero-terminated. 122132720Skan while (__from < __from_end && __to < __to_end) 123103447Skan { 124132720Skan size_t __conv = mbrtowc(__to, __from, __from_end - __from, 125132720Skan &__tmp_state); 126132720Skan if (__conv == static_cast<size_t>(-1)) 127132720Skan { 128132720Skan __ret = error; 129132720Skan break; 130132720Skan } 131132720Skan else if (__conv == static_cast<size_t>(-2)) 132132720Skan { 133132720Skan // It is unclear what to return in this case (see DR 382). 134132720Skan __ret = partial; 135132720Skan break; 136132720Skan } 137132720Skan else if (__conv == 0) 138132720Skan { 139132720Skan // XXX Probably wrong for stateful encodings 140132720Skan __conv = 1; 141132720Skan *__to = L'\0'; 142132720Skan } 143132720Skan 144132720Skan __state = __tmp_state; 145132720Skan __to++; 146132720Skan __from += __conv; 147103447Skan } 148132720Skan 149132720Skan // It is not clear that __from < __from_end implies __ret != ok 150132720Skan // (see DR 382). 151132720Skan if (__ret == ok && __from < __from_end) 152132720Skan __ret = partial; 153132720Skan 154132720Skan __from_next = __from; 155132720Skan __to_next = __to; 156132720Skan return __ret; 157132720Skan } 158132720Skan 159132720Skan int 160132720Skan codecvt<wchar_t, char, mbstate_t>:: 161132720Skan do_encoding() const throw() 162132720Skan { 163132720Skan // XXX This implementation assumes that the encoding is 164132720Skan // stateless and is either single-byte or variable-width. 165132720Skan int __ret = 0; 166132720Skan if (MB_CUR_MAX == 1) 167132720Skan __ret = 1; 168132720Skan return __ret; 169132720Skan } 170132720Skan 171132720Skan int 172132720Skan codecvt<wchar_t, char, mbstate_t>:: 173132720Skan do_max_length() const throw() 174132720Skan { 175132720Skan // XXX Probably wrong for stateful encodings. 176132720Skan int __ret = MB_CUR_MAX; 177132720Skan return __ret; 178132720Skan } 179132720Skan 180132720Skan int 181132720Skan codecvt<wchar_t, char, mbstate_t>:: 182132720Skan do_length(state_type& __state, const extern_type* __from, 183132720Skan const extern_type* __end, size_t __max) const 184132720Skan { 185132720Skan int __ret = 0; 186132720Skan state_type __tmp_state(__state); 187132720Skan 188132720Skan while (__from < __end && __max) 189103447Skan { 190132720Skan size_t __conv = mbrtowc(NULL, __from, __end - __from, &__tmp_state); 191132720Skan if (__conv == static_cast<size_t>(-1)) 192132720Skan { 193132720Skan // Invalid source character 194132720Skan break; 195132720Skan } 196132720Skan else if (__conv == static_cast<size_t>(-2)) 197132720Skan { 198132720Skan // Remainder of input does not form a complete destination 199132720Skan // character. 200132720Skan break; 201132720Skan } 202132720Skan else if (__conv == 0) 203132720Skan { 204132720Skan // XXX Probably wrong for stateful encodings 205132720Skan __conv = 1; 206132720Skan } 207132720Skan 208132720Skan __state = __tmp_state; 209132720Skan __from += __conv; 210132720Skan __ret += __conv; 211132720Skan __max--; 212103447Skan } 213132720Skan 214103447Skan return __ret; 215103447Skan } 216103447Skan#endif 217169691Skan 218169691Skan_GLIBCXX_END_NAMESPACE 219