1// std::codecvt implementation details, GNU version -*- C++ -*- 2 3// Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 2, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// You should have received a copy of the GNU General Public License along 17// with this library; see the file COPYING. If not, write to the Free 18// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 19// USA. 20 21// As a special exception, you may use this file as part of a free software 22// library without restriction. Specifically, if other files instantiate 23// templates or use macros or inline functions from this file, or you compile 24// this file and link it with other files to produce an executable, this 25// file does not by itself cause the resulting executable to be covered by 26// the GNU General Public License. This exception does not however 27// invalidate any other reasons why the executable file might be covered by 28// the GNU General Public License. 29 30// 31// ISO C++ 14882: 22.2.1.5 - Template class codecvt 32// 33 34// Written by Benjamin Kosnik <bkoz@redhat.com> 35 36#include <locale> 37#include <bits/c++locale_internal.h> 38 39_GLIBCXX_BEGIN_NAMESPACE(std) 40 41 // Specializations. 42#ifdef _GLIBCXX_USE_WCHAR_T 43 codecvt_base::result 44 codecvt<wchar_t, char, mbstate_t>:: 45 do_out(state_type& __state, const intern_type* __from, 46 const intern_type* __from_end, const intern_type*& __from_next, 47 extern_type* __to, extern_type* __to_end, 48 extern_type*& __to_next) const 49 { 50 result __ret = ok; 51 state_type __tmp_state(__state); 52 53#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 54 __c_locale __old = __uselocale(_M_c_locale_codecvt); 55#endif 56 57 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 58 // in case we fall back to wcrtomb and then continue, in a loop. 59 // NB: wcsnrtombs is a GNU extension 60 for (__from_next = __from, __to_next = __to; 61 __from_next < __from_end && __to_next < __to_end 62 && __ret == ok;) 63 { 64 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 65 __from_end - __from_next); 66 if (!__from_chunk_end) 67 __from_chunk_end = __from_end; 68 69 __from = __from_next; 70 const size_t __conv = wcsnrtombs(__to_next, &__from_next, 71 __from_chunk_end - __from_next, 72 __to_end - __to_next, &__state); 73 if (__conv == static_cast<size_t>(-1)) 74 { 75 // In case of error, in order to stop at the exact place we 76 // have to start again from the beginning with a series of 77 // wcrtomb. 78 for (; __from < __from_next; ++__from) 79 __to_next += wcrtomb(__to_next, *__from, &__tmp_state); 80 __state = __tmp_state; 81 __ret = error; 82 } 83 else if (__from_next && __from_next < __from_chunk_end) 84 { 85 __to_next += __conv; 86 __ret = partial; 87 } 88 else 89 { 90 __from_next = __from_chunk_end; 91 __to_next += __conv; 92 } 93 94 if (__from_next < __from_end && __ret == ok) 95 { 96 extern_type __buf[MB_LEN_MAX]; 97 __tmp_state = __state; 98 const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); 99 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 100 __ret = partial; 101 else 102 { 103 memcpy(__to_next, __buf, __conv2); 104 __state = __tmp_state; 105 __to_next += __conv2; 106 ++__from_next; 107 } 108 } 109 } 110 111#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 112 __uselocale(__old); 113#endif 114 115 return __ret; 116 } 117 118 codecvt_base::result 119 codecvt<wchar_t, char, mbstate_t>:: 120 do_in(state_type& __state, const extern_type* __from, 121 const extern_type* __from_end, const extern_type*& __from_next, 122 intern_type* __to, intern_type* __to_end, 123 intern_type*& __to_next) const 124 { 125 result __ret = ok; 126 state_type __tmp_state(__state); 127 128#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 129 __c_locale __old = __uselocale(_M_c_locale_codecvt); 130#endif 131 132 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 133 // in case we store a L'\0' and then continue, in a loop. 134 // NB: mbsnrtowcs is a GNU extension 135 for (__from_next = __from, __to_next = __to; 136 __from_next < __from_end && __to_next < __to_end 137 && __ret == ok;) 138 { 139 const extern_type* __from_chunk_end; 140 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 141 __from_end 142 - __from_next)); 143 if (!__from_chunk_end) 144 __from_chunk_end = __from_end; 145 146 __from = __from_next; 147 size_t __conv = mbsnrtowcs(__to_next, &__from_next, 148 __from_chunk_end - __from_next, 149 __to_end - __to_next, &__state); 150 if (__conv == static_cast<size_t>(-1)) 151 { 152 // In case of error, in order to stop at the exact place we 153 // have to start again from the beginning with a series of 154 // mbrtowc. 155 for (;; ++__to_next, __from += __conv) 156 { 157 __conv = mbrtowc(__to_next, __from, __from_end - __from, 158 &__tmp_state); 159 if (__conv == static_cast<size_t>(-1) 160 || __conv == static_cast<size_t>(-2)) 161 break; 162 } 163 __from_next = __from; 164 __state = __tmp_state; 165 __ret = error; 166 } 167 else if (__from_next && __from_next < __from_chunk_end) 168 { 169 // It is unclear what to return in this case (see DR 382). 170 __to_next += __conv; 171 __ret = partial; 172 } 173 else 174 { 175 __from_next = __from_chunk_end; 176 __to_next += __conv; 177 } 178 179 if (__from_next < __from_end && __ret == ok) 180 { 181 if (__to_next < __to_end) 182 { 183 // XXX Probably wrong for stateful encodings 184 __tmp_state = __state; 185 ++__from_next; 186 *__to_next++ = L'\0'; 187 } 188 else 189 __ret = partial; 190 } 191 } 192 193#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 194 __uselocale(__old); 195#endif 196 197 return __ret; 198 } 199 200 int 201 codecvt<wchar_t, char, mbstate_t>:: 202 do_encoding() const throw() 203 { 204 // XXX This implementation assumes that the encoding is 205 // stateless and is either single-byte or variable-width. 206 int __ret = 0; 207#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 208 __c_locale __old = __uselocale(_M_c_locale_codecvt); 209#endif 210 if (MB_CUR_MAX == 1) 211 __ret = 1; 212#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 213 __uselocale(__old); 214#endif 215 return __ret; 216 } 217 218 int 219 codecvt<wchar_t, char, mbstate_t>:: 220 do_max_length() const throw() 221 { 222#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 223 __c_locale __old = __uselocale(_M_c_locale_codecvt); 224#endif 225 // XXX Probably wrong for stateful encodings. 226 int __ret = MB_CUR_MAX; 227#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 228 __uselocale(__old); 229#endif 230 return __ret; 231 } 232 233 int 234 codecvt<wchar_t, char, mbstate_t>:: 235 do_length(state_type& __state, const extern_type* __from, 236 const extern_type* __end, size_t __max) const 237 { 238 int __ret = 0; 239 state_type __tmp_state(__state); 240 241#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 242 __c_locale __old = __uselocale(_M_c_locale_codecvt); 243#endif 244 245 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 246 // in case we advance past it and then continue, in a loop. 247 // NB: mbsnrtowcs is a GNU extension 248 249 // A dummy internal buffer is needed in order for mbsnrtocws to consider 250 // its fourth parameter (it wouldn't with NULL as first parameter). 251 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 252 * __max)); 253 while (__from < __end && __max) 254 { 255 const extern_type* __from_chunk_end; 256 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 257 __end 258 - __from)); 259 if (!__from_chunk_end) 260 __from_chunk_end = __end; 261 262 const extern_type* __tmp_from = __from; 263 size_t __conv = mbsnrtowcs(__to, &__from, 264 __from_chunk_end - __from, 265 __max, &__state); 266 if (__conv == static_cast<size_t>(-1)) 267 { 268 // In case of error, in order to stop at the exact place we 269 // have to start again from the beginning with a series of 270 // mbrtowc. 271 for (__from = __tmp_from;; __from += __conv) 272 { 273 __conv = mbrtowc(NULL, __from, __end - __from, 274 &__tmp_state); 275 if (__conv == static_cast<size_t>(-1) 276 || __conv == static_cast<size_t>(-2)) 277 break; 278 } 279 __state = __tmp_state; 280 __ret += __from - __tmp_from; 281 break; 282 } 283 if (!__from) 284 __from = __from_chunk_end; 285 286 __ret += __from - __tmp_from; 287 __max -= __conv; 288 289 if (__from < __end && __max) 290 { 291 // XXX Probably wrong for stateful encodings 292 __tmp_state = __state; 293 ++__from; 294 ++__ret; 295 --__max; 296 } 297 } 298 299#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 300 __uselocale(__old); 301#endif 302 303 return __ret; 304 } 305#endif 306 307_GLIBCXX_END_NAMESPACE 308