1// std::codecvt implementation details, DragonFly version -*- C++ -*-
2
3// Copyright (C) 2015-2022 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25//
26// ISO C++ 14882: 22.2.1.5 - Template class codecvt
27//
28
29// Written by Benjamin Kosnik <bkoz@redhat.com>
30// Modified for DragonFly by John Marino <gnugcc@marino.st>
31
32#include <locale>
33#include <cstring>
34#include <cstdlib>  // For MB_CUR_MAX
35#include <climits>  // For MB_LEN_MAX
36
37#include "xlocale_port.h"
38
39namespace std _GLIBCXX_VISIBILITY(default)
40{
41_GLIBCXX_BEGIN_NAMESPACE_VERSION
42
43  // Specializations.
44#ifdef _GLIBCXX_USE_WCHAR_T
45  codecvt_base::result
46  codecvt<wchar_t, char, mbstate_t>::
47  do_out(state_type& __state, const intern_type* __from,
48	 const intern_type* __from_end, const intern_type*& __from_next,
49	 extern_type* __to, extern_type* __to_end,
50	 extern_type*& __to_next) const
51  {
52    result __ret = ok;
53    state_type __tmp_state(__state);
54
55    // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56    // in case we fall back to wcrtomb and then continue, in a loop.
57    // NB: wcsnrtombs is a GNU extension
58    for (__from_next = __from, __to_next = __to;
59	 __from_next < __from_end && __to_next < __to_end
60	 && __ret == ok;)
61      {
62	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
63						      __from_end - __from_next);
64	if (!__from_chunk_end)
65	  __from_chunk_end = __from_end;
66
67	__from = __from_next;
68	const size_t __conv = wcsnrtombs_l(__to_next, &__from_next,
69					 __from_chunk_end - __from_next,
70					 __to_end - __to_next, &__state,
71					 (locale_t)_M_c_locale_codecvt);
72	if (__conv == static_cast<size_t>(-1))
73	  {
74	    // In case of error, in order to stop at the exact place we
75	    // have to start again from the beginning with a series of
76	    // wcrtomb.
77	    for (; __from < __from_next; ++__from)
78	      __to_next += wcrtomb_l(__to_next, *__from, &__tmp_state,
79		(locale_t)_M_c_locale_codecvt);
80	    __state = __tmp_state;
81	    __ret = error;
82	  }
83	else if (__from_next && __from_next < __from_chunk_end)
84	  {
85	    __to_next += __conv;
86	    __ret = partial;
87	  }
88	else
89	  {
90	    __from_next = __from_chunk_end;
91	    __to_next += __conv;
92	  }
93
94	if (__from_next < __from_end && __ret == ok)
95	  {
96	    extern_type __buf[MB_LEN_MAX];
97	    __tmp_state = __state;
98	    const size_t __conv2 = wcrtomb_l(__buf, *__from_next, &__tmp_state,
99		(locale_t)_M_c_locale_codecvt);
100	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
101	      __ret = partial;
102	    else
103	      {
104		memcpy(__to_next, __buf, __conv2);
105		__state = __tmp_state;
106		__to_next += __conv2;
107		++__from_next;
108	      }
109	  }
110      }
111
112    return __ret;
113  }
114
115  codecvt_base::result
116  codecvt<wchar_t, char, mbstate_t>::
117  do_in(state_type& __state, const extern_type* __from,
118	const extern_type* __from_end, const extern_type*& __from_next,
119	intern_type* __to, intern_type* __to_end,
120	intern_type*& __to_next) const
121  {
122    result __ret = ok;
123    state_type __tmp_state(__state);
124
125    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
126    // in case we store a L'\0' and then continue, in a loop.
127    // NB: mbsnrtowcs is a GNU extension
128    for (__from_next = __from, __to_next = __to;
129	 __from_next < __from_end && __to_next < __to_end
130	 && __ret == ok;)
131      {
132	const extern_type* __from_chunk_end;
133	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
134								  __from_end
135								  - __from_next));
136	if (!__from_chunk_end)
137	  __from_chunk_end = __from_end;
138
139	__from = __from_next;
140	size_t __conv = mbsnrtowcs_l(__to_next, &__from_next,
141				   __from_chunk_end - __from_next,
142				   __to_end - __to_next, &__state,
143				   (locale_t)_M_c_locale_codecvt);
144	if (__conv == static_cast<size_t>(-1))
145	  {
146	    // In case of error, in order to stop at the exact place we
147	    // have to start again from the beginning with a series of
148	    // mbrtowc.
149	    for (;; ++__to_next, __from += __conv)
150	      {
151		__conv = mbrtowc_l(__to_next, __from, __from_end - __from,
152				 &__tmp_state, (locale_t)_M_c_locale_codecvt);
153		if (__conv == static_cast<size_t>(-1)
154		    || __conv == static_cast<size_t>(-2))
155		  break;
156	      }
157	    __from_next = __from;
158	    __state = __tmp_state;
159	    __ret = error;
160	  }
161	else if (__from_next && __from_next < __from_chunk_end)
162	  {
163	    // It is unclear what to return in this case (see DR 382).
164	    __to_next += __conv;
165	    __ret = partial;
166	  }
167	else
168	  {
169	    __from_next = __from_chunk_end;
170	    __to_next += __conv;
171	  }
172
173	if (__from_next < __from_end && __ret == ok)
174	  {
175	    if (__to_next < __to_end)
176	      {
177		// XXX Probably wrong for stateful encodings
178		__tmp_state = __state;
179		++__from_next;
180		*__to_next++ = L'\0';
181	      }
182	    else
183	      __ret = partial;
184	  }
185      }
186
187    return __ret;
188  }
189
190  int
191  codecvt<wchar_t, char, mbstate_t>::
192  do_encoding() const throw()
193  {
194    // XXX This implementation assumes that the encoding is
195    // stateless and is either single-byte or variable-width.
196    return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt) == 1 ? 1 : 0;
197  }
198
199  int
200  codecvt<wchar_t, char, mbstate_t>::
201  do_max_length() const throw()
202  {
203    // XXX Probably wrong for stateful encodings.
204    return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt);
205  }
206
207  int
208  codecvt<wchar_t, char, mbstate_t>::
209  do_length(state_type& __state, const extern_type* __from,
210	    const extern_type* __end, size_t __max) const
211  {
212    int __ret = 0;
213    state_type __tmp_state(__state);
214
215    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
216    // in case we advance past it and then continue, in a loop.
217    // NB: mbsnrtowcs is a GNU extension
218
219    // A dummy internal buffer is needed in order for mbsnrtocws to consider
220    // its fourth parameter (it wouldn't with NULL as first parameter).
221    wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
222							   * __max));
223    while (__from < __end && __max)
224      {
225	const extern_type* __from_chunk_end;
226	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
227								  __end
228								  - __from));
229	if (!__from_chunk_end)
230	  __from_chunk_end = __end;
231
232	const extern_type* __tmp_from = __from;
233	size_t __conv = mbsnrtowcs_l(__to, &__from,
234				   __from_chunk_end - __from,
235				   __max, &__state,
236				   (locale_t)_M_c_locale_codecvt);
237	if (__conv == static_cast<size_t>(-1))
238	  {
239	    // In case of error, in order to stop at the exact place we
240	    // have to start again from the beginning with a series of
241	    // mbrtowc.
242	    for (__from = __tmp_from;; __from += __conv)
243	      {
244		__conv = mbrtowc_l(0, __from, __end - __from,
245				 &__tmp_state, (locale_t)_M_c_locale_codecvt);
246		if (__conv == static_cast<size_t>(-1)
247		    || __conv == static_cast<size_t>(-2))
248		  break;
249	      }
250	    __state = __tmp_state;
251	    __ret += __from - __tmp_from;
252	    break;
253	  }
254	if (!__from)
255	  __from = __from_chunk_end;
256
257	__ret += __from - __tmp_from;
258	__max -= __conv;
259
260	if (__from < __end && __max)
261	  {
262	    // XXX Probably wrong for stateful encodings
263	    __tmp_state = __state;
264	    ++__from;
265	    ++__ret;
266	    --__max;
267	  }
268      }
269
270    return __ret;
271  }
272#endif
273
274_GLIBCXX_END_NAMESPACE_VERSION
275} // namespace
276