1// Locale support (codecvt) -*- C++ -*-
2
3// Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4// 2009, 2010  Free Software Foundation, Inc.
5//
6// This file is part of the GNU ISO C++ Library.  This library is free
7// software; you can redistribute it and/or modify it under the
8// terms of the GNU General Public License as published by the
9// Free Software Foundation; either version 3, or (at your option)
10// any later version.
11
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15// GNU General Public License for more details.
16
17// Under Section 7 of GPL version 3, you are granted additional
18// permissions described in the GCC Runtime Library Exception, version
19// 3.1, as published by the Free Software Foundation.
20
21// You should have received a copy of the GNU General Public License and
22// a copy of the GCC Runtime Library Exception along with this program;
23// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24// <http://www.gnu.org/licenses/>.
25
26/** @file bits/codecvt.h
27 *  This is an internal header file, included by other library headers.
28 *  You should not attempt to use it directly.
29 */
30
31//
32// ISO C++ 14882: 22.2.1.5 Template class codecvt
33//
34
35// Written by Benjamin Kosnik <bkoz@redhat.com>
36
37#ifndef _CODECVT_H
38#define _CODECVT_H 1
39
40#pragma GCC system_header
41
42_GLIBCXX_BEGIN_NAMESPACE(std)
43
44  /// Empty base class for codecvt facet [22.2.1.5].
45  class codecvt_base
46  {
47  public:
48    enum result
49    {
50      ok,
51      partial,
52      error,
53      noconv
54    };
55  };
56
57  /**
58   *  @brief  Common base for codecvt functions.
59   *
60   *  This template class provides implementations of the public functions
61   *  that forward to the protected virtual functions.
62   *
63   *  This template also provides abstract stubs for the protected virtual
64   *  functions.
65  */
66  template<typename _InternT, typename _ExternT, typename _StateT>
67    class __codecvt_abstract_base
68    : public locale::facet, public codecvt_base
69    {
70    public:
71      // Types:
72      typedef codecvt_base::result	result;
73      typedef _InternT			intern_type;
74      typedef _ExternT			extern_type;
75      typedef _StateT			state_type;
76
77      // 22.2.1.5.1 codecvt members
78      /**
79       *  @brief  Convert from internal to external character set.
80       *
81       *  Converts input string of intern_type to output string of
82       *  extern_type.  This is analogous to wcsrtombs.  It does this by
83       *  calling codecvt::do_out.
84       *
85       *  The source and destination character sets are determined by the
86       *  facet's locale, internal and external types.
87       *
88       *  The characters in [from,from_end) are converted and written to
89       *  [to,to_end).  from_next and to_next are set to point to the
90       *  character following the last successfully converted character,
91       *  respectively.  If the result needed no conversion, from_next and
92       *  to_next are not affected.
93       *
94       *  The @a state argument should be initialized if the input is at the
95       *  beginning and carried from a previous call if continuing
96       *  conversion.  There are no guarantees about how @a state is used.
97       *
98       *  The result returned is a member of codecvt_base::result.  If
99       *  all the input is converted, returns codecvt_base::ok.  If no
100       *  conversion is necessary, returns codecvt_base::noconv.  If
101       *  the input ends early or there is insufficient space in the
102       *  output, returns codecvt_base::partial.  Otherwise the
103       *  conversion failed and codecvt_base::error is returned.
104       *
105       *  @param  state  Persistent conversion state data.
106       *  @param  from  Start of input.
107       *  @param  from_end  End of input.
108       *  @param  from_next  Returns start of unconverted data.
109       *  @param  to  Start of output buffer.
110       *  @param  to_end  End of output buffer.
111       *  @param  to_next  Returns start of unused output area.
112       *  @return  codecvt_base::result.
113      */
114      result
115      out(state_type& __state, const intern_type* __from,
116	  const intern_type* __from_end, const intern_type*& __from_next,
117	  extern_type* __to, extern_type* __to_end,
118	  extern_type*& __to_next) const
119      {
120	return this->do_out(__state, __from, __from_end, __from_next,
121			    __to, __to_end, __to_next);
122      }
123
124      /**
125       *  @brief  Reset conversion state.
126       *
127       *  Writes characters to output that would restore @a state to initial
128       *  conditions.  The idea is that if a partial conversion occurs, then
129       *  the converting the characters written by this function would leave
130       *  the state in initial conditions, rather than partial conversion
131       *  state.  It does this by calling codecvt::do_unshift().
132       *
133       *  For example, if 4 external characters always converted to 1 internal
134       *  character, and input to in() had 6 external characters with state
135       *  saved, this function would write two characters to the output and
136       *  set the state to initialized conditions.
137       *
138       *  The source and destination character sets are determined by the
139       *  facet's locale, internal and external types.
140       *
141       *  The result returned is a member of codecvt_base::result.  If the
142       *  state could be reset and data written, returns codecvt_base::ok.  If
143       *  no conversion is necessary, returns codecvt_base::noconv.  If the
144       *  output has insufficient space, returns codecvt_base::partial.
145       *  Otherwise the reset failed and codecvt_base::error is returned.
146       *
147       *  @param  state  Persistent conversion state data.
148       *  @param  to  Start of output buffer.
149       *  @param  to_end  End of output buffer.
150       *  @param  to_next  Returns start of unused output area.
151       *  @return  codecvt_base::result.
152      */
153      result
154      unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
155	      extern_type*& __to_next) const
156      { return this->do_unshift(__state, __to,__to_end,__to_next); }
157
158      /**
159       *  @brief  Convert from external to internal character set.
160       *
161       *  Converts input string of extern_type to output string of
162       *  intern_type.  This is analogous to mbsrtowcs.  It does this by
163       *  calling codecvt::do_in.
164       *
165       *  The source and destination character sets are determined by the
166       *  facet's locale, internal and external types.
167       *
168       *  The characters in [from,from_end) are converted and written to
169       *  [to,to_end).  from_next and to_next are set to point to the
170       *  character following the last successfully converted character,
171       *  respectively.  If the result needed no conversion, from_next and
172       *  to_next are not affected.
173       *
174       *  The @a state argument should be initialized if the input is at the
175       *  beginning and carried from a previous call if continuing
176       *  conversion.  There are no guarantees about how @a state is used.
177       *
178       *  The result returned is a member of codecvt_base::result.  If
179       *  all the input is converted, returns codecvt_base::ok.  If no
180       *  conversion is necessary, returns codecvt_base::noconv.  If
181       *  the input ends early or there is insufficient space in the
182       *  output, returns codecvt_base::partial.  Otherwise the
183       *  conversion failed and codecvt_base::error is returned.
184       *
185       *  @param  state  Persistent conversion state data.
186       *  @param  from  Start of input.
187       *  @param  from_end  End of input.
188       *  @param  from_next  Returns start of unconverted data.
189       *  @param  to  Start of output buffer.
190       *  @param  to_end  End of output buffer.
191       *  @param  to_next  Returns start of unused output area.
192       *  @return  codecvt_base::result.
193      */
194      result
195      in(state_type& __state, const extern_type* __from,
196	 const extern_type* __from_end, const extern_type*& __from_next,
197	 intern_type* __to, intern_type* __to_end,
198	 intern_type*& __to_next) const
199      {
200	return this->do_in(__state, __from, __from_end, __from_next,
201			   __to, __to_end, __to_next);
202      }
203
204      int
205      encoding() const throw()
206      { return this->do_encoding(); }
207
208      bool
209      always_noconv() const throw()
210      { return this->do_always_noconv(); }
211
212      int
213      length(state_type& __state, const extern_type* __from,
214	     const extern_type* __end, size_t __max) const
215      { return this->do_length(__state, __from, __end, __max); }
216
217      int
218      max_length() const throw()
219      { return this->do_max_length(); }
220
221    protected:
222      explicit
223      __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
224
225      virtual
226      ~__codecvt_abstract_base() { }
227
228      /**
229       *  @brief  Convert from internal to external character set.
230       *
231       *  Converts input string of intern_type to output string of
232       *  extern_type.  This function is a hook for derived classes to change
233       *  the value returned.  @see out for more information.
234      */
235      virtual result
236      do_out(state_type& __state, const intern_type* __from,
237	     const intern_type* __from_end, const intern_type*& __from_next,
238	     extern_type* __to, extern_type* __to_end,
239	     extern_type*& __to_next) const = 0;
240
241      virtual result
242      do_unshift(state_type& __state, extern_type* __to,
243		 extern_type* __to_end, extern_type*& __to_next) const = 0;
244
245      virtual result
246      do_in(state_type& __state, const extern_type* __from,
247	    const extern_type* __from_end, const extern_type*& __from_next,
248	    intern_type* __to, intern_type* __to_end,
249	    intern_type*& __to_next) const = 0;
250
251      virtual int
252      do_encoding() const throw() = 0;
253
254      virtual bool
255      do_always_noconv() const throw() = 0;
256
257      virtual int
258      do_length(state_type&, const extern_type* __from,
259		const extern_type* __end, size_t __max) const = 0;
260
261      virtual int
262      do_max_length() const throw() = 0;
263    };
264
265
266
267  /**
268   *  @brief  Primary class template codecvt.
269   *  @ingroup locales
270   *
271   *  NB: Generic, mostly useless implementation.
272   *
273  */
274   template<typename _InternT, typename _ExternT, typename _StateT>
275    class codecvt
276    : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
277    {
278    public:
279      // Types:
280      typedef codecvt_base::result	result;
281      typedef _InternT			intern_type;
282      typedef _ExternT			extern_type;
283      typedef _StateT			state_type;
284
285    protected:
286      __c_locale			_M_c_locale_codecvt;
287
288    public:
289      static locale::id			id;
290
291      explicit
292      codecvt(size_t __refs = 0)
293      : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
294
295      explicit
296      codecvt(__c_locale __cloc, size_t __refs = 0);
297
298    protected:
299      virtual
300      ~codecvt() { }
301
302      virtual result
303      do_out(state_type& __state, const intern_type* __from,
304	     const intern_type* __from_end, const intern_type*& __from_next,
305	     extern_type* __to, extern_type* __to_end,
306	     extern_type*& __to_next) const;
307
308      virtual result
309      do_unshift(state_type& __state, extern_type* __to,
310		 extern_type* __to_end, extern_type*& __to_next) const;
311
312      virtual result
313      do_in(state_type& __state, const extern_type* __from,
314	    const extern_type* __from_end, const extern_type*& __from_next,
315	    intern_type* __to, intern_type* __to_end,
316	    intern_type*& __to_next) const;
317
318      virtual int
319      do_encoding() const throw();
320
321      virtual bool
322      do_always_noconv() const throw();
323
324      virtual int
325      do_length(state_type&, const extern_type* __from,
326		const extern_type* __end, size_t __max) const;
327
328      virtual int
329      do_max_length() const throw();
330    };
331
332  template<typename _InternT, typename _ExternT, typename _StateT>
333    locale::id codecvt<_InternT, _ExternT, _StateT>::id;
334
335  /// class codecvt<char, char, mbstate_t> specialization.
336  template<>
337    class codecvt<char, char, mbstate_t>
338    : public __codecvt_abstract_base<char, char, mbstate_t>
339    {
340    public:
341      // Types:
342      typedef char			intern_type;
343      typedef char			extern_type;
344      typedef mbstate_t			state_type;
345
346    protected:
347      __c_locale			_M_c_locale_codecvt;
348
349    public:
350      static locale::id id;
351
352      explicit
353      codecvt(size_t __refs = 0);
354
355      explicit
356      codecvt(__c_locale __cloc, size_t __refs = 0);
357
358    protected:
359      virtual
360      ~codecvt();
361
362      virtual result
363      do_out(state_type& __state, const intern_type* __from,
364	     const intern_type* __from_end, const intern_type*& __from_next,
365	     extern_type* __to, extern_type* __to_end,
366	     extern_type*& __to_next) const;
367
368      virtual result
369      do_unshift(state_type& __state, extern_type* __to,
370		 extern_type* __to_end, extern_type*& __to_next) const;
371
372      virtual result
373      do_in(state_type& __state, const extern_type* __from,
374	    const extern_type* __from_end, const extern_type*& __from_next,
375	    intern_type* __to, intern_type* __to_end,
376	    intern_type*& __to_next) const;
377
378      virtual int
379      do_encoding() const throw();
380
381      virtual bool
382      do_always_noconv() const throw();
383
384      virtual int
385      do_length(state_type&, const extern_type* __from,
386		const extern_type* __end, size_t __max) const;
387
388      virtual int
389      do_max_length() const throw();
390  };
391
392#ifdef _GLIBCXX_USE_WCHAR_T
393  /// class codecvt<wchar_t, char, mbstate_t> specialization.
394  template<>
395    class codecvt<wchar_t, char, mbstate_t>
396    : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
397    {
398    public:
399      // Types:
400      typedef wchar_t			intern_type;
401      typedef char			extern_type;
402      typedef mbstate_t			state_type;
403
404    protected:
405      __c_locale			_M_c_locale_codecvt;
406
407    public:
408      static locale::id			id;
409
410      explicit
411      codecvt(size_t __refs = 0);
412
413      explicit
414      codecvt(__c_locale __cloc, size_t __refs = 0);
415
416    protected:
417      virtual
418      ~codecvt();
419
420      virtual result
421      do_out(state_type& __state, const intern_type* __from,
422	     const intern_type* __from_end, const intern_type*& __from_next,
423	     extern_type* __to, extern_type* __to_end,
424	     extern_type*& __to_next) const;
425
426      virtual result
427      do_unshift(state_type& __state,
428		 extern_type* __to, extern_type* __to_end,
429		 extern_type*& __to_next) const;
430
431      virtual result
432      do_in(state_type& __state,
433	     const extern_type* __from, const extern_type* __from_end,
434	     const extern_type*& __from_next,
435	     intern_type* __to, intern_type* __to_end,
436	     intern_type*& __to_next) const;
437
438      virtual
439      int do_encoding() const throw();
440
441      virtual
442      bool do_always_noconv() const throw();
443
444      virtual
445      int do_length(state_type&, const extern_type* __from,
446		    const extern_type* __end, size_t __max) const;
447
448      virtual int
449      do_max_length() const throw();
450    };
451#endif //_GLIBCXX_USE_WCHAR_T
452
453  /// class codecvt_byname [22.2.1.6].
454  template<typename _InternT, typename _ExternT, typename _StateT>
455    class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
456    {
457    public:
458      explicit
459      codecvt_byname(const char* __s, size_t __refs = 0)
460      : codecvt<_InternT, _ExternT, _StateT>(__refs)
461      {
462	if (__builtin_strcmp(__s, "C") != 0
463	    && __builtin_strcmp(__s, "POSIX") != 0)
464	  {
465	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
466	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
467	  }
468      }
469
470    protected:
471      virtual
472      ~codecvt_byname() { }
473    };
474
475  // Inhibit implicit instantiations for required instantiations,
476  // which are defined via explicit instantiations elsewhere.
477  // NB: This syntax is a GNU extension.
478#if _GLIBCXX_EXTERN_TEMPLATE
479  extern template class codecvt_byname<char, char, mbstate_t>;
480
481  extern template
482    const codecvt<char, char, mbstate_t>&
483    use_facet<codecvt<char, char, mbstate_t> >(const locale&);
484
485  extern template
486    bool
487    has_facet<codecvt<char, char, mbstate_t> >(const locale&);
488
489#ifdef _GLIBCXX_USE_WCHAR_T
490  extern template class codecvt_byname<wchar_t, char, mbstate_t>;
491
492  extern template
493    const codecvt<wchar_t, char, mbstate_t>&
494    use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
495
496  extern template
497    bool
498    has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
499#endif
500#endif
501
502_GLIBCXX_END_NAMESPACE
503
504#endif // _CODECVT_H
505