1// std::codecvt implementation details, GNU version -*- C++ -*- 2 3// Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009 4// Free Software Foundation, Inc. 5// 6// This file is part of the GNU ISO C++ Library. This library is free 7// software; you can redistribute it and/or modify it under the 8// terms of the GNU General Public License as published by the 9// Free Software Foundation; either version 3, or (at your option) 10// any later version. 11 12// This library is distributed in the hope that it will be useful, 13// but WITHOUT ANY WARRANTY; without even the implied warranty of 14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15// GNU General Public License for more details. 16 17// Under Section 7 of GPL version 3, you are granted additional 18// permissions described in the GCC Runtime Library Exception, version 19// 3.1, as published by the Free Software Foundation. 20 21// You should have received a copy of the GNU General Public License and 22// a copy of the GCC Runtime Library Exception along with this program; 23// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24// <http://www.gnu.org/licenses/>. 25 26// 27// ISO C++ 14882: 22.2.1.5 - Template class codecvt 28// 29 30// Written by Benjamin Kosnik <bkoz@redhat.com> 31 32#include <locale> 33#include <cstdlib> // For MB_CUR_MAX 34#include <climits> // For MB_LEN_MAX 35#include <bits/c++locale_internal.h> 36 37_GLIBCXX_BEGIN_NAMESPACE(std) 38 39 // Specializations. 40#ifdef _GLIBCXX_USE_WCHAR_T 41 codecvt_base::result 42 codecvt<wchar_t, char, mbstate_t>:: 43 do_out(state_type& __state, const intern_type* __from, 44 const intern_type* __from_end, const intern_type*& __from_next, 45 extern_type* __to, extern_type* __to_end, 46 extern_type*& __to_next) const 47 { 48 result __ret = ok; 49 state_type __tmp_state(__state); 50 51#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 52 __c_locale __old = __uselocale(_M_c_locale_codecvt); 53#endif 54 55 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 56 // in case we fall back to wcrtomb and then continue, in a loop. 57 // NB: wcsnrtombs is a GNU extension 58 for (__from_next = __from, __to_next = __to; 59 __from_next < __from_end && __to_next < __to_end 60 && __ret == ok;) 61 { 62 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 63 __from_end - __from_next); 64 if (!__from_chunk_end) 65 __from_chunk_end = __from_end; 66 67 __from = __from_next; 68 const size_t __conv = wcsnrtombs(__to_next, &__from_next, 69 __from_chunk_end - __from_next, 70 __to_end - __to_next, &__state); 71 if (__conv == static_cast<size_t>(-1)) 72 { 73 // In case of error, in order to stop at the exact place we 74 // have to start again from the beginning with a series of 75 // wcrtomb. 76 for (; __from < __from_next; ++__from) 77 __to_next += wcrtomb(__to_next, *__from, &__tmp_state); 78 __state = __tmp_state; 79 __ret = error; 80 } 81 else if (__from_next && __from_next < __from_chunk_end) 82 { 83 __to_next += __conv; 84 __ret = partial; 85 } 86 else 87 { 88 __from_next = __from_chunk_end; 89 __to_next += __conv; 90 } 91 92 if (__from_next < __from_end && __ret == ok) 93 { 94 extern_type __buf[MB_LEN_MAX]; 95 __tmp_state = __state; 96 const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state); 97 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 98 __ret = partial; 99 else 100 { 101 memcpy(__to_next, __buf, __conv2); 102 __state = __tmp_state; 103 __to_next += __conv2; 104 ++__from_next; 105 } 106 } 107 } 108 109#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 110 __uselocale(__old); 111#endif 112 113 return __ret; 114 } 115 116 codecvt_base::result 117 codecvt<wchar_t, char, mbstate_t>:: 118 do_in(state_type& __state, const extern_type* __from, 119 const extern_type* __from_end, const extern_type*& __from_next, 120 intern_type* __to, intern_type* __to_end, 121 intern_type*& __to_next) const 122 { 123 result __ret = ok; 124 state_type __tmp_state(__state); 125 126#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 127 __c_locale __old = __uselocale(_M_c_locale_codecvt); 128#endif 129 130 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 131 // in case we store a L'\0' and then continue, in a loop. 132 // NB: mbsnrtowcs is a GNU extension 133 for (__from_next = __from, __to_next = __to; 134 __from_next < __from_end && __to_next < __to_end 135 && __ret == ok;) 136 { 137 const extern_type* __from_chunk_end; 138 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 139 __from_end 140 - __from_next)); 141 if (!__from_chunk_end) 142 __from_chunk_end = __from_end; 143 144 __from = __from_next; 145 size_t __conv = mbsnrtowcs(__to_next, &__from_next, 146 __from_chunk_end - __from_next, 147 __to_end - __to_next, &__state); 148 if (__conv == static_cast<size_t>(-1)) 149 { 150 // In case of error, in order to stop at the exact place we 151 // have to start again from the beginning with a series of 152 // mbrtowc. 153 for (;; ++__to_next, __from += __conv) 154 { 155 __conv = mbrtowc(__to_next, __from, __from_end - __from, 156 &__tmp_state); 157 if (__conv == static_cast<size_t>(-1) 158 || __conv == static_cast<size_t>(-2)) 159 break; 160 } 161 __from_next = __from; 162 __state = __tmp_state; 163 __ret = error; 164 } 165 else if (__from_next && __from_next < __from_chunk_end) 166 { 167 // It is unclear what to return in this case (see DR 382). 168 __to_next += __conv; 169 __ret = partial; 170 } 171 else 172 { 173 __from_next = __from_chunk_end; 174 __to_next += __conv; 175 } 176 177 if (__from_next < __from_end && __ret == ok) 178 { 179 if (__to_next < __to_end) 180 { 181 // XXX Probably wrong for stateful encodings 182 __tmp_state = __state; 183 ++__from_next; 184 *__to_next++ = L'\0'; 185 } 186 else 187 __ret = partial; 188 } 189 } 190 191#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 192 __uselocale(__old); 193#endif 194 195 return __ret; 196 } 197 198 int 199 codecvt<wchar_t, char, mbstate_t>:: 200 do_encoding() const throw() 201 { 202 // XXX This implementation assumes that the encoding is 203 // stateless and is either single-byte or variable-width. 204 int __ret = 0; 205#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 206 __c_locale __old = __uselocale(_M_c_locale_codecvt); 207#endif 208 if (MB_CUR_MAX == 1) 209 __ret = 1; 210#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 211 __uselocale(__old); 212#endif 213 return __ret; 214 } 215 216 int 217 codecvt<wchar_t, char, mbstate_t>:: 218 do_max_length() const throw() 219 { 220#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 221 __c_locale __old = __uselocale(_M_c_locale_codecvt); 222#endif 223 // XXX Probably wrong for stateful encodings. 224 int __ret = MB_CUR_MAX; 225#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 226 __uselocale(__old); 227#endif 228 return __ret; 229 } 230 231 int 232 codecvt<wchar_t, char, mbstate_t>:: 233 do_length(state_type& __state, const extern_type* __from, 234 const extern_type* __end, size_t __max) const 235 { 236 int __ret = 0; 237 state_type __tmp_state(__state); 238 239#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 240 __c_locale __old = __uselocale(_M_c_locale_codecvt); 241#endif 242 243 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 244 // in case we advance past it and then continue, in a loop. 245 // NB: mbsnrtowcs is a GNU extension 246 247 // A dummy internal buffer is needed in order for mbsnrtocws to consider 248 // its fourth parameter (it wouldn't with NULL as first parameter). 249 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 250 * __max)); 251 while (__from < __end && __max) 252 { 253 const extern_type* __from_chunk_end; 254 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 255 __end 256 - __from)); 257 if (!__from_chunk_end) 258 __from_chunk_end = __end; 259 260 const extern_type* __tmp_from = __from; 261 size_t __conv = mbsnrtowcs(__to, &__from, 262 __from_chunk_end - __from, 263 __max, &__state); 264 if (__conv == static_cast<size_t>(-1)) 265 { 266 // In case of error, in order to stop at the exact place we 267 // have to start again from the beginning with a series of 268 // mbrtowc. 269 for (__from = __tmp_from;; __from += __conv) 270 { 271 __conv = mbrtowc(NULL, __from, __end - __from, 272 &__tmp_state); 273 if (__conv == static_cast<size_t>(-1) 274 || __conv == static_cast<size_t>(-2)) 275 break; 276 } 277 __state = __tmp_state; 278 __ret += __from - __tmp_from; 279 break; 280 } 281 if (!__from) 282 __from = __from_chunk_end; 283 284 __ret += __from - __tmp_from; 285 __max -= __conv; 286 287 if (__from < __end && __max) 288 { 289 // XXX Probably wrong for stateful encodings 290 __tmp_state = __state; 291 ++__from; 292 ++__ret; 293 --__max; 294 } 295 } 296 297#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) 298 __uselocale(__old); 299#endif 300 301 return __ret; 302 } 303#endif 304 305_GLIBCXX_END_NAMESPACE 306