1103447Skan// std::codecvt implementation details, generic version -*- C++ -*-
2103447Skan
3169691Skan// Copyright (C) 2002, 2005 Free Software Foundation, Inc.
4103447Skan//
5103447Skan// This file is part of the GNU ISO C++ Library.  This library is free
6103447Skan// software; you can redistribute it and/or modify it under the
7103447Skan// terms of the GNU General Public License as published by the
8103447Skan// Free Software Foundation; either version 2, or (at your option)
9103447Skan// any later version.
10103447Skan
11103447Skan// This library is distributed in the hope that it will be useful,
12103447Skan// but WITHOUT ANY WARRANTY; without even the implied warranty of
13103447Skan// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14103447Skan// GNU General Public License for more details.
15103447Skan
16103447Skan// You should have received a copy of the GNU General Public License along
17103447Skan// with this library; see the file COPYING.  If not, write to the Free
18169691Skan// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19103447Skan// USA.
20103447Skan
21103447Skan// As a special exception, you may use this file as part of a free software
22103447Skan// library without restriction.  Specifically, if other files instantiate
23103447Skan// templates or use macros or inline functions from this file, or you compile
24103447Skan// this file and link it with other files to produce an executable, this
25103447Skan// file does not by itself cause the resulting executable to be covered by
26103447Skan// the GNU General Public License.  This exception does not however
27103447Skan// invalidate any other reasons why the executable file might be covered by
28103447Skan// the GNU General Public License.
29103447Skan
30103447Skan//
31103447Skan// ISO C++ 14882: 22.2.1.5 - Template class codecvt
32103447Skan//
33103447Skan
34103447Skan// Written by Benjamin Kosnik <bkoz@redhat.com>
35103447Skan
36103447Skan#include <locale>
37103447Skan
38169691Skan_GLIBCXX_BEGIN_NAMESPACE(std)
39169691Skan
40103447Skan  // Specializations.
41132720Skan#ifdef _GLIBCXX_USE_WCHAR_T
42103447Skan  codecvt_base::result
43103447Skan  codecvt<wchar_t, char, mbstate_t>::
44103447Skan  do_out(state_type& __state, const intern_type* __from,
45103447Skan	 const intern_type* __from_end, const intern_type*& __from_next,
46103447Skan	 extern_type* __to, extern_type* __to_end,
47103447Skan	 extern_type*& __to_next) const
48103447Skan  {
49132720Skan    result __ret = ok;
50132720Skan    // The conversion must be done using a temporary destination buffer
51132720Skan    // since it is not possible to pass the size of the buffer to wcrtomb
52132720Skan    state_type __tmp_state(__state);
53103447Skan
54132720Skan    // The conversion must be done by calling wcrtomb in a loop rather
55132720Skan    // than using wcsrtombs because wcsrtombs assumes that the input is
56132720Skan    // zero-terminated.
57132720Skan
58132720Skan    // Either we can upper bound the total number of external characters to
59132720Skan    // something smaller than __to_end - __to or the conversion must be done
60132720Skan    // using a temporary destination buffer since it is not possible to
61132720Skan    // pass the size of the buffer to wcrtomb
62132720Skan    if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0)
63132720Skan      while (__from < __from_end)
64132720Skan	{
65132720Skan	  const size_t __conv = wcrtomb(__to, *__from, &__tmp_state);
66132720Skan	  if (__conv == static_cast<size_t>(-1))
67132720Skan	    {
68132720Skan	      __ret = error;
69132720Skan	      break;
70132720Skan	    }
71132720Skan	  __state = __tmp_state;
72132720Skan	  __to += __conv;
73132720Skan	  __from++;
74132720Skan	}
75132720Skan    else
76103447Skan      {
77132720Skan	extern_type __buf[MB_LEN_MAX];
78132720Skan	while (__from < __from_end && __to < __to_end)
79132720Skan	  {
80132720Skan	    const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state);
81132720Skan	    if (__conv == static_cast<size_t>(-1))
82132720Skan	      {
83132720Skan		__ret = error;
84132720Skan		break;
85132720Skan	      }
86132720Skan	    else if (__conv > static_cast<size_t>(__to_end - __to))
87132720Skan	      {
88132720Skan		__ret = partial;
89132720Skan		break;
90132720Skan	      }
91132720Skan
92132720Skan	    memcpy(__to, __buf, __conv);
93132720Skan	    __state = __tmp_state;
94132720Skan	    __to += __conv;
95132720Skan	    __from++;
96132720Skan	  }
97103447Skan      }
98132720Skan
99132720Skan    if (__ret == ok && __from < __from_end)
100132720Skan      __ret = partial;
101132720Skan
102132720Skan    __from_next = __from;
103132720Skan    __to_next = __to;
104103447Skan    return __ret;
105103447Skan  }
106103447Skan
107103447Skan  codecvt_base::result
108103447Skan  codecvt<wchar_t, char, mbstate_t>::
109103447Skan  do_in(state_type& __state, const extern_type* __from,
110103447Skan	const extern_type* __from_end, const extern_type*& __from_next,
111103447Skan	intern_type* __to, intern_type* __to_end,
112103447Skan	intern_type*& __to_next) const
113103447Skan  {
114132720Skan    result __ret = ok;
115132720Skan    // This temporary state object is neccessary so __state won't be modified
116132720Skan    // if [__from, __from_end) is a partial multibyte character.
117132720Skan    state_type __tmp_state(__state);
118103447Skan
119132720Skan    // Conversion must be done by calling mbrtowc in a loop rather than
120132720Skan    // by calling mbsrtowcs because mbsrtowcs assumes that the input
121132720Skan    // sequence is zero-terminated.
122132720Skan    while (__from < __from_end && __to < __to_end)
123103447Skan      {
124132720Skan	size_t __conv = mbrtowc(__to, __from, __from_end - __from,
125132720Skan				&__tmp_state);
126132720Skan	if (__conv == static_cast<size_t>(-1))
127132720Skan	  {
128132720Skan	    __ret = error;
129132720Skan	    break;
130132720Skan	  }
131132720Skan	else if (__conv == static_cast<size_t>(-2))
132132720Skan	  {
133132720Skan	    // It is unclear what to return in this case (see DR 382).
134132720Skan	    __ret = partial;
135132720Skan	    break;
136132720Skan	  }
137132720Skan	else if (__conv == 0)
138132720Skan	  {
139132720Skan	    // XXX Probably wrong for stateful encodings
140132720Skan	    __conv = 1;
141132720Skan	    *__to = L'\0';
142132720Skan	  }
143132720Skan
144132720Skan	__state = __tmp_state;
145132720Skan	__to++;
146132720Skan	__from += __conv;
147103447Skan      }
148132720Skan
149132720Skan    // It is not clear that __from < __from_end implies __ret != ok
150132720Skan    // (see DR 382).
151132720Skan    if (__ret == ok && __from < __from_end)
152132720Skan      __ret = partial;
153132720Skan
154132720Skan    __from_next = __from;
155132720Skan    __to_next = __to;
156132720Skan    return __ret;
157132720Skan  }
158132720Skan
159132720Skan  int
160132720Skan  codecvt<wchar_t, char, mbstate_t>::
161132720Skan  do_encoding() const throw()
162132720Skan  {
163132720Skan    // XXX This implementation assumes that the encoding is
164132720Skan    // stateless and is either single-byte or variable-width.
165132720Skan    int __ret = 0;
166132720Skan    if (MB_CUR_MAX == 1)
167132720Skan      __ret = 1;
168132720Skan    return __ret;
169132720Skan  }
170132720Skan
171132720Skan  int
172132720Skan  codecvt<wchar_t, char, mbstate_t>::
173132720Skan  do_max_length() const throw()
174132720Skan  {
175132720Skan    // XXX Probably wrong for stateful encodings.
176132720Skan    int __ret = MB_CUR_MAX;
177132720Skan    return __ret;
178132720Skan  }
179132720Skan
180132720Skan  int
181132720Skan  codecvt<wchar_t, char, mbstate_t>::
182132720Skan  do_length(state_type& __state, const extern_type* __from,
183132720Skan	    const extern_type* __end, size_t __max) const
184132720Skan  {
185132720Skan    int __ret = 0;
186132720Skan    state_type __tmp_state(__state);
187132720Skan
188132720Skan    while (__from < __end && __max)
189103447Skan      {
190132720Skan	size_t __conv = mbrtowc(NULL, __from, __end - __from, &__tmp_state);
191132720Skan	if (__conv == static_cast<size_t>(-1))
192132720Skan	  {
193132720Skan	    // Invalid source character
194132720Skan	    break;
195132720Skan	  }
196132720Skan	else if (__conv == static_cast<size_t>(-2))
197132720Skan	  {
198132720Skan	    // Remainder of input does not form a complete destination
199132720Skan	    // character.
200132720Skan	    break;
201132720Skan	  }
202132720Skan	else if (__conv == 0)
203132720Skan	  {
204132720Skan	    // XXX Probably wrong for stateful encodings
205132720Skan	    __conv = 1;
206132720Skan	  }
207132720Skan
208132720Skan	__state = __tmp_state;
209132720Skan	__from += __conv;
210132720Skan	__ret += __conv;
211132720Skan	__max--;
212103447Skan      }
213132720Skan
214103447Skan    return __ret;
215103447Skan  }
216103447Skan#endif
217169691Skan
218169691Skan_GLIBCXX_END_NAMESPACE
219