1// std::codecvt implementation details, DragonFly version -*- C++ -*- 2 3// Copyright (C) 2015-2020 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// Under Section 7 of GPL version 3, you are granted additional 17// permissions described in the GCC Runtime Library Exception, version 18// 3.1, as published by the Free Software Foundation. 19 20// You should have received a copy of the GNU General Public License and 21// a copy of the GCC Runtime Library Exception along with this program; 22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23// <http://www.gnu.org/licenses/>. 24 25// 26// ISO C++ 14882: 22.2.1.5 - Template class codecvt 27// 28 29// Written by Benjamin Kosnik <bkoz@redhat.com> 30// Modified for DragonFly by John Marino <gnugcc@marino.st> 31 32#include <locale> 33#include <cstring> 34#include <cstdlib> // For MB_CUR_MAX 35#include <climits> // For MB_LEN_MAX 36 37#include "xlocale_port.h" 38 39namespace std _GLIBCXX_VISIBILITY(default) 40{ 41_GLIBCXX_BEGIN_NAMESPACE_VERSION 42 43 // Specializations. 44#ifdef _GLIBCXX_USE_WCHAR_T 45 codecvt_base::result 46 codecvt<wchar_t, char, mbstate_t>:: 47 do_out(state_type& __state, const intern_type* __from, 48 const intern_type* __from_end, const intern_type*& __from_next, 49 extern_type* __to, extern_type* __to_end, 50 extern_type*& __to_next) const 51 { 52 result __ret = ok; 53 state_type __tmp_state(__state); 54 55 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 56 // in case we fall back to wcrtomb and then continue, in a loop. 57 // NB: wcsnrtombs is a GNU extension 58 for (__from_next = __from, __to_next = __to; 59 __from_next < __from_end && __to_next < __to_end 60 && __ret == ok;) 61 { 62 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 63 __from_end - __from_next); 64 if (!__from_chunk_end) 65 __from_chunk_end = __from_end; 66 67 __from = __from_next; 68 const size_t __conv = wcsnrtombs_l(__to_next, &__from_next, 69 __from_chunk_end - __from_next, 70 __to_end - __to_next, &__state, 71 (locale_t)_M_c_locale_codecvt); 72 if (__conv == static_cast<size_t>(-1)) 73 { 74 // In case of error, in order to stop at the exact place we 75 // have to start again from the beginning with a series of 76 // wcrtomb. 77 for (; __from < __from_next; ++__from) 78 __to_next += wcrtomb_l(__to_next, *__from, &__tmp_state, 79 (locale_t)_M_c_locale_codecvt); 80 __state = __tmp_state; 81 __ret = error; 82 } 83 else if (__from_next && __from_next < __from_chunk_end) 84 { 85 __to_next += __conv; 86 __ret = partial; 87 } 88 else 89 { 90 __from_next = __from_chunk_end; 91 __to_next += __conv; 92 } 93 94 if (__from_next < __from_end && __ret == ok) 95 { 96 extern_type __buf[MB_LEN_MAX]; 97 __tmp_state = __state; 98 const size_t __conv2 = wcrtomb_l(__buf, *__from_next, &__tmp_state, 99 (locale_t)_M_c_locale_codecvt); 100 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 101 __ret = partial; 102 else 103 { 104 memcpy(__to_next, __buf, __conv2); 105 __state = __tmp_state; 106 __to_next += __conv2; 107 ++__from_next; 108 } 109 } 110 } 111 112 return __ret; 113 } 114 115 codecvt_base::result 116 codecvt<wchar_t, char, mbstate_t>:: 117 do_in(state_type& __state, const extern_type* __from, 118 const extern_type* __from_end, const extern_type*& __from_next, 119 intern_type* __to, intern_type* __to_end, 120 intern_type*& __to_next) const 121 { 122 result __ret = ok; 123 state_type __tmp_state(__state); 124 125 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 126 // in case we store a L'\0' and then continue, in a loop. 127 // NB: mbsnrtowcs is a GNU extension 128 for (__from_next = __from, __to_next = __to; 129 __from_next < __from_end && __to_next < __to_end 130 && __ret == ok;) 131 { 132 const extern_type* __from_chunk_end; 133 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 134 __from_end 135 - __from_next)); 136 if (!__from_chunk_end) 137 __from_chunk_end = __from_end; 138 139 __from = __from_next; 140 size_t __conv = mbsnrtowcs_l(__to_next, &__from_next, 141 __from_chunk_end - __from_next, 142 __to_end - __to_next, &__state, 143 (locale_t)_M_c_locale_codecvt); 144 if (__conv == static_cast<size_t>(-1)) 145 { 146 // In case of error, in order to stop at the exact place we 147 // have to start again from the beginning with a series of 148 // mbrtowc. 149 for (;; ++__to_next, __from += __conv) 150 { 151 __conv = mbrtowc_l(__to_next, __from, __from_end - __from, 152 &__tmp_state, (locale_t)_M_c_locale_codecvt); 153 if (__conv == static_cast<size_t>(-1) 154 || __conv == static_cast<size_t>(-2)) 155 break; 156 } 157 __from_next = __from; 158 __state = __tmp_state; 159 __ret = error; 160 } 161 else if (__from_next && __from_next < __from_chunk_end) 162 { 163 // It is unclear what to return in this case (see DR 382). 164 __to_next += __conv; 165 __ret = partial; 166 } 167 else 168 { 169 __from_next = __from_chunk_end; 170 __to_next += __conv; 171 } 172 173 if (__from_next < __from_end && __ret == ok) 174 { 175 if (__to_next < __to_end) 176 { 177 // XXX Probably wrong for stateful encodings 178 __tmp_state = __state; 179 ++__from_next; 180 *__to_next++ = L'\0'; 181 } 182 else 183 __ret = partial; 184 } 185 } 186 187 return __ret; 188 } 189 190 int 191 codecvt<wchar_t, char, mbstate_t>:: 192 do_encoding() const throw() 193 { 194 // XXX This implementation assumes that the encoding is 195 // stateless and is either single-byte or variable-width. 196 return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt) == 1 ? 1 : 0; 197 } 198 199 int 200 codecvt<wchar_t, char, mbstate_t>:: 201 do_max_length() const throw() 202 { 203 // XXX Probably wrong for stateful encodings. 204 return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt); 205 } 206 207 int 208 codecvt<wchar_t, char, mbstate_t>:: 209 do_length(state_type& __state, const extern_type* __from, 210 const extern_type* __end, size_t __max) const 211 { 212 int __ret = 0; 213 state_type __tmp_state(__state); 214 215 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 216 // in case we advance past it and then continue, in a loop. 217 // NB: mbsnrtowcs is a GNU extension 218 219 // A dummy internal buffer is needed in order for mbsnrtocws to consider 220 // its fourth parameter (it wouldn't with NULL as first parameter). 221 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 222 * __max)); 223 while (__from < __end && __max) 224 { 225 const extern_type* __from_chunk_end; 226 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 227 __end 228 - __from)); 229 if (!__from_chunk_end) 230 __from_chunk_end = __end; 231 232 const extern_type* __tmp_from = __from; 233 size_t __conv = mbsnrtowcs_l(__to, &__from, 234 __from_chunk_end - __from, 235 __max, &__state, 236 (locale_t)_M_c_locale_codecvt); 237 if (__conv == static_cast<size_t>(-1)) 238 { 239 // In case of error, in order to stop at the exact place we 240 // have to start again from the beginning with a series of 241 // mbrtowc. 242 for (__from = __tmp_from;; __from += __conv) 243 { 244 __conv = mbrtowc_l(0, __from, __end - __from, 245 &__tmp_state, (locale_t)_M_c_locale_codecvt); 246 if (__conv == static_cast<size_t>(-1) 247 || __conv == static_cast<size_t>(-2)) 248 break; 249 } 250 __state = __tmp_state; 251 __ret += __from - __tmp_from; 252 break; 253 } 254 if (!__from) 255 __from = __from_chunk_end; 256 257 __ret += __from - __tmp_from; 258 __max -= __conv; 259 260 if (__from < __end && __max) 261 { 262 // XXX Probably wrong for stateful encodings 263 __tmp_state = __state; 264 ++__from; 265 ++__ret; 266 --__max; 267 } 268 } 269 270 return __ret; 271 } 272#endif 273 274_GLIBCXX_END_NAMESPACE_VERSION 275} // namespace 276