1// std::codecvt implementation details, GNU version -*- C++ -*-
2
3// Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009
4// Free Software Foundation, Inc.
5//
6// This file is part of the GNU ISO C++ Library.  This library is free
7// software; you can redistribute it and/or modify it under the
8// terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 3, or (at your option)
10// any later version.
11
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15// GNU General Public License for more details.
16
17// Under Section 7 of GPL version 3, you are granted additional
18// permissions described in the GCC Runtime Library Exception, version
19// 3.1, as published by the Free Software Foundation.
20
21// You should have received a copy of the GNU General Public License and
22// a copy of the GCC Runtime Library Exception along with this program;
23// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24// <http://www.gnu.org/licenses/>.
25
26//
27// ISO C++ 14882: 22.2.1.5 - Template class codecvt
28//
29
30// Written by Benjamin Kosnik <bkoz@redhat.com>
31
32#include <locale>
33#include <cstdlib>  // For MB_CUR_MAX
34#include <climits>  // For MB_LEN_MAX
35#include <bits/c++locale_internal.h>
36
37_GLIBCXX_BEGIN_NAMESPACE(std)
38
39  // Specializations.
40#ifdef _GLIBCXX_USE_WCHAR_T
41  codecvt_base::result
42  codecvt<wchar_t, char, mbstate_t>::
43  do_out(state_type& __state, const intern_type* __from,
44	 const intern_type* __from_end, const intern_type*& __from_next,
45	 extern_type* __to, extern_type* __to_end,
46	 extern_type*& __to_next) const
47  {
48    result __ret = ok;
49    state_type __tmp_state(__state);
50
51#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
52    __c_locale __old = __uselocale(_M_c_locale_codecvt);
53#endif
54
55    // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56    // in case we fall back to wcrtomb and then continue, in a loop.
57    // NB: wcsnrtombs is a GNU extension
58    for (__from_next = __from, __to_next = __to;
59	 __from_next < __from_end && __to_next < __to_end
60	 && __ret == ok;)
61      {
62	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
63						      __from_end - __from_next);
64	if (!__from_chunk_end)
65	  __from_chunk_end = __from_end;
66
67	__from = __from_next;
68	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
69					 __from_chunk_end - __from_next,
70					 __to_end - __to_next, &__state);
71	if (__conv == static_cast<size_t>(-1))
72	  {
73	    // In case of error, in order to stop at the exact place we
74	    // have to start again from the beginning with a series of
75	    // wcrtomb.
76	    for (; __from < __from_next; ++__from)
77	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
78	    __state = __tmp_state;
79	    __ret = error;
80	  }
81	else if (__from_next && __from_next < __from_chunk_end)
82	  {
83	    __to_next += __conv;
84	    __ret = partial;
85	  }
86	else
87	  {
88	    __from_next = __from_chunk_end;
89	    __to_next += __conv;
90	  }
91
92	if (__from_next < __from_end && __ret == ok)
93	  {
94	    extern_type __buf[MB_LEN_MAX];
95	    __tmp_state = __state;
96	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
97	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
98	      __ret = partial;
99	    else
100	      {
101		memcpy(__to_next, __buf, __conv2);
102		__state = __tmp_state;
103		__to_next += __conv2;
104		++__from_next;
105	      }
106	  }
107      }
108
109#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
110    __uselocale(__old);
111#endif
112
113    return __ret;
114  }
115
116  codecvt_base::result
117  codecvt<wchar_t, char, mbstate_t>::
118  do_in(state_type& __state, const extern_type* __from,
119	const extern_type* __from_end, const extern_type*& __from_next,
120	intern_type* __to, intern_type* __to_end,
121	intern_type*& __to_next) const
122  {
123    result __ret = ok;
124    state_type __tmp_state(__state);
125
126#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
127    __c_locale __old = __uselocale(_M_c_locale_codecvt);
128#endif
129
130    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
131    // in case we store a L'\0' and then continue, in a loop.
132    // NB: mbsnrtowcs is a GNU extension
133    for (__from_next = __from, __to_next = __to;
134	 __from_next < __from_end && __to_next < __to_end
135	 && __ret == ok;)
136      {
137	const extern_type* __from_chunk_end;
138	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
139								  __from_end
140								  - __from_next));
141	if (!__from_chunk_end)
142	  __from_chunk_end = __from_end;
143
144	__from = __from_next;
145	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
146				   __from_chunk_end - __from_next,
147				   __to_end - __to_next, &__state);
148	if (__conv == static_cast<size_t>(-1))
149	  {
150	    // In case of error, in order to stop at the exact place we
151	    // have to start again from the beginning with a series of
152	    // mbrtowc.
153	    for (;; ++__to_next, __from += __conv)
154	      {
155		__conv = mbrtowc(__to_next, __from, __from_end - __from,
156				 &__tmp_state);
157		if (__conv == static_cast<size_t>(-1)
158		    || __conv == static_cast<size_t>(-2))
159		  break;
160	      }
161	    __from_next = __from;
162	    __state = __tmp_state;
163	    __ret = error;
164	  }
165	else if (__from_next && __from_next < __from_chunk_end)
166	  {
167	    // It is unclear what to return in this case (see DR 382).
168	    __to_next += __conv;
169	    __ret = partial;
170	  }
171	else
172	  {
173	    __from_next = __from_chunk_end;
174	    __to_next += __conv;
175	  }
176
177	if (__from_next < __from_end && __ret == ok)
178	  {
179	    if (__to_next < __to_end)
180	      {
181		// XXX Probably wrong for stateful encodings
182		__tmp_state = __state;
183		++__from_next;
184		*__to_next++ = L'\0';
185	      }
186	    else
187	      __ret = partial;
188	  }
189      }
190
191#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
192    __uselocale(__old);
193#endif
194
195    return __ret;
196  }
197
198  int
199  codecvt<wchar_t, char, mbstate_t>::
200  do_encoding() const throw()
201  {
202    // XXX This implementation assumes that the encoding is
203    // stateless and is either single-byte or variable-width.
204    int __ret = 0;
205#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
206    __c_locale __old = __uselocale(_M_c_locale_codecvt);
207#endif
208    if (MB_CUR_MAX == 1)
209      __ret = 1;
210#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
211    __uselocale(__old);
212#endif
213    return __ret;
214  }
215
216  int
217  codecvt<wchar_t, char, mbstate_t>::
218  do_max_length() const throw()
219  {
220#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
221    __c_locale __old = __uselocale(_M_c_locale_codecvt);
222#endif
223    // XXX Probably wrong for stateful encodings.
224    int __ret = MB_CUR_MAX;
225#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
226    __uselocale(__old);
227#endif
228    return __ret;
229  }
230
231  int
232  codecvt<wchar_t, char, mbstate_t>::
233  do_length(state_type& __state, const extern_type* __from,
234	    const extern_type* __end, size_t __max) const
235  {
236    int __ret = 0;
237    state_type __tmp_state(__state);
238
239#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
240    __c_locale __old = __uselocale(_M_c_locale_codecvt);
241#endif
242
243    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
244    // in case we advance past it and then continue, in a loop.
245    // NB: mbsnrtowcs is a GNU extension
246
247    // A dummy internal buffer is needed in order for mbsnrtocws to consider
248    // its fourth parameter (it wouldn't with NULL as first parameter).
249    wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
250							   * __max));
251    while (__from < __end && __max)
252      {
253	const extern_type* __from_chunk_end;
254	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
255								  __end
256								  - __from));
257	if (!__from_chunk_end)
258	  __from_chunk_end = __end;
259
260	const extern_type* __tmp_from = __from;
261	size_t __conv = mbsnrtowcs(__to, &__from,
262				   __from_chunk_end - __from,
263				   __max, &__state);
264	if (__conv == static_cast<size_t>(-1))
265	  {
266	    // In case of error, in order to stop at the exact place we
267	    // have to start again from the beginning with a series of
268	    // mbrtowc.
269	    for (__from = __tmp_from;; __from += __conv)
270	      {
271		__conv = mbrtowc(NULL, __from, __end - __from,
272				 &__tmp_state);
273		if (__conv == static_cast<size_t>(-1)
274		    || __conv == static_cast<size_t>(-2))
275		  break;
276	      }
277	    __state = __tmp_state;
278	    __ret += __from - __tmp_from;
279	    break;
280	  }
281	if (!__from)
282	  __from = __from_chunk_end;
283
284	__ret += __from - __tmp_from;
285	__max -= __conv;
286
287	if (__from < __end && __max)
288	  {
289	    // XXX Probably wrong for stateful encodings
290	    __tmp_state = __state;
291	    ++__from;
292	    ++__ret;
293	    --__max;
294	  }
295      }
296
297#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
298    __uselocale(__old);
299#endif
300
301    return __ret;
302  }
303#endif
304
305_GLIBCXX_END_NAMESPACE
306