1// std::codecvt implementation details, GNU version -*- C++ -*-
2
3// Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 2, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License along
17// with this library; see the file COPYING.  If not, write to the Free
18// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19// USA.
20
21// As a special exception, you may use this file as part of a free software
22// library without restriction.  Specifically, if other files instantiate
23// templates or use macros or inline functions from this file, or you compile
24// this file and link it with other files to produce an executable, this
25// file does not by itself cause the resulting executable to be covered by
26// the GNU General Public License.  This exception does not however
27// invalidate any other reasons why the executable file might be covered by
28// the GNU General Public License.
29
30//
31// ISO C++ 14882: 22.2.1.5 - Template class codecvt
32//
33
34// Written by Benjamin Kosnik <bkoz@redhat.com>
35
36#include <locale>
37#include <bits/c++locale_internal.h>
38
39_GLIBCXX_BEGIN_NAMESPACE(std)
40
41  // Specializations.
42#ifdef _GLIBCXX_USE_WCHAR_T
43  codecvt_base::result
44  codecvt<wchar_t, char, mbstate_t>::
45  do_out(state_type& __state, const intern_type* __from,
46	 const intern_type* __from_end, const intern_type*& __from_next,
47	 extern_type* __to, extern_type* __to_end,
48	 extern_type*& __to_next) const
49  {
50    result __ret = ok;
51    state_type __tmp_state(__state);
52
53#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
54    __c_locale __old = __uselocale(_M_c_locale_codecvt);
55#endif
56
57    // wcsnrtombs is *very* fast but stops if encounters NUL characters:
58    // in case we fall back to wcrtomb and then continue, in a loop.
59    // NB: wcsnrtombs is a GNU extension
60    for (__from_next = __from, __to_next = __to;
61	 __from_next < __from_end && __to_next < __to_end
62	 && __ret == ok;)
63      {
64	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
65						      __from_end - __from_next);
66	if (!__from_chunk_end)
67	  __from_chunk_end = __from_end;
68
69	__from = __from_next;
70	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
71					 __from_chunk_end - __from_next,
72					 __to_end - __to_next, &__state);
73	if (__conv == static_cast<size_t>(-1))
74	  {
75	    // In case of error, in order to stop at the exact place we
76	    // have to start again from the beginning with a series of
77	    // wcrtomb.
78	    for (; __from < __from_next; ++__from)
79	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
80	    __state = __tmp_state;
81	    __ret = error;
82	  }
83	else if (__from_next && __from_next < __from_chunk_end)
84	  {
85	    __to_next += __conv;
86	    __ret = partial;
87	  }
88	else
89	  {
90	    __from_next = __from_chunk_end;
91	    __to_next += __conv;
92	  }
93
94	if (__from_next < __from_end && __ret == ok)
95	  {
96	    extern_type __buf[MB_LEN_MAX];
97	    __tmp_state = __state;
98	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
99	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
100	      __ret = partial;
101	    else
102	      {
103		memcpy(__to_next, __buf, __conv2);
104		__state = __tmp_state;
105		__to_next += __conv2;
106		++__from_next;
107	      }
108	  }
109      }
110
111#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
112    __uselocale(__old);
113#endif
114
115    return __ret;
116  }
117
118  codecvt_base::result
119  codecvt<wchar_t, char, mbstate_t>::
120  do_in(state_type& __state, const extern_type* __from,
121	const extern_type* __from_end, const extern_type*& __from_next,
122	intern_type* __to, intern_type* __to_end,
123	intern_type*& __to_next) const
124  {
125    result __ret = ok;
126    state_type __tmp_state(__state);
127
128#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
129    __c_locale __old = __uselocale(_M_c_locale_codecvt);
130#endif
131
132    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
133    // in case we store a L'\0' and then continue, in a loop.
134    // NB: mbsnrtowcs is a GNU extension
135    for (__from_next = __from, __to_next = __to;
136	 __from_next < __from_end && __to_next < __to_end
137	 && __ret == ok;)
138      {
139	const extern_type* __from_chunk_end;
140	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
141								  __from_end
142								  - __from_next));
143	if (!__from_chunk_end)
144	  __from_chunk_end = __from_end;
145
146	__from = __from_next;
147	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
148				   __from_chunk_end - __from_next,
149				   __to_end - __to_next, &__state);
150	if (__conv == static_cast<size_t>(-1))
151	  {
152	    // In case of error, in order to stop at the exact place we
153	    // have to start again from the beginning with a series of
154	    // mbrtowc.
155	    for (;; ++__to_next, __from += __conv)
156	      {
157		__conv = mbrtowc(__to_next, __from, __from_end - __from,
158				 &__tmp_state);
159		if (__conv == static_cast<size_t>(-1)
160		    || __conv == static_cast<size_t>(-2))
161		  break;
162	      }
163	    __from_next = __from;
164	    __state = __tmp_state;
165	    __ret = error;
166	  }
167	else if (__from_next && __from_next < __from_chunk_end)
168	  {
169	    // It is unclear what to return in this case (see DR 382).
170	    __to_next += __conv;
171	    __ret = partial;
172	  }
173	else
174	  {
175	    __from_next = __from_chunk_end;
176	    __to_next += __conv;
177	  }
178
179	if (__from_next < __from_end && __ret == ok)
180	  {
181	    if (__to_next < __to_end)
182	      {
183		// XXX Probably wrong for stateful encodings
184		__tmp_state = __state;
185		++__from_next;
186		*__to_next++ = L'\0';
187	      }
188	    else
189	      __ret = partial;
190	  }
191      }
192
193#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
194    __uselocale(__old);
195#endif
196
197    return __ret;
198  }
199
200  int
201  codecvt<wchar_t, char, mbstate_t>::
202  do_encoding() const throw()
203  {
204    // XXX This implementation assumes that the encoding is
205    // stateless and is either single-byte or variable-width.
206    int __ret = 0;
207#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
208    __c_locale __old = __uselocale(_M_c_locale_codecvt);
209#endif
210    if (MB_CUR_MAX == 1)
211      __ret = 1;
212#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
213    __uselocale(__old);
214#endif
215    return __ret;
216  }
217
218  int
219  codecvt<wchar_t, char, mbstate_t>::
220  do_max_length() const throw()
221  {
222#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
223    __c_locale __old = __uselocale(_M_c_locale_codecvt);
224#endif
225    // XXX Probably wrong for stateful encodings.
226    int __ret = MB_CUR_MAX;
227#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
228    __uselocale(__old);
229#endif
230    return __ret;
231  }
232
233  int
234  codecvt<wchar_t, char, mbstate_t>::
235  do_length(state_type& __state, const extern_type* __from,
236	    const extern_type* __end, size_t __max) const
237  {
238    int __ret = 0;
239    state_type __tmp_state(__state);
240
241#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
242    __c_locale __old = __uselocale(_M_c_locale_codecvt);
243#endif
244
245    // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
246    // in case we advance past it and then continue, in a loop.
247    // NB: mbsnrtowcs is a GNU extension
248
249    // A dummy internal buffer is needed in order for mbsnrtocws to consider
250    // its fourth parameter (it wouldn't with NULL as first parameter).
251    wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
252							   * __max));
253    while (__from < __end && __max)
254      {
255	const extern_type* __from_chunk_end;
256	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
257								  __end
258								  - __from));
259	if (!__from_chunk_end)
260	  __from_chunk_end = __end;
261
262	const extern_type* __tmp_from = __from;
263	size_t __conv = mbsnrtowcs(__to, &__from,
264				   __from_chunk_end - __from,
265				   __max, &__state);
266	if (__conv == static_cast<size_t>(-1))
267	  {
268	    // In case of error, in order to stop at the exact place we
269	    // have to start again from the beginning with a series of
270	    // mbrtowc.
271	    for (__from = __tmp_from;; __from += __conv)
272	      {
273		__conv = mbrtowc(NULL, __from, __end - __from,
274				 &__tmp_state);
275		if (__conv == static_cast<size_t>(-1)
276		    || __conv == static_cast<size_t>(-2))
277		  break;
278	      }
279	    __state = __tmp_state;
280	    __ret += __from - __tmp_from;
281	    break;
282	  }
283	if (!__from)
284	  __from = __from_chunk_end;
285
286	__ret += __from - __tmp_from;
287	__max -= __conv;
288
289	if (__from < __end && __max)
290	  {
291	    // XXX Probably wrong for stateful encodings
292	    __tmp_state = __state;
293	    ++__from;
294	    ++__ret;
295	    --__max;
296	  }
297      }
298
299#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
300    __uselocale(__old);
301#endif
302
303    return __ret;
304  }
305#endif
306
307_GLIBCXX_END_NAMESPACE
308