codecvt.h revision 132720
1// Locale support (codecvt) -*- C++ -*-
2
3// Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 2, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License along
17// with this library; see the file COPYING.  If not, write to the Free
18// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19// USA.
20
21// As a special exception, you may use this file as part of a free software
22// library without restriction.  Specifically, if other files instantiate
23// templates or use macros or inline functions from this file, or you compile
24// this file and link it with other files to produce an executable, this
25// file does not by itself cause the resulting executable to be covered by
26// the GNU General Public License.  This exception does not however
27// invalidate any other reasons why the executable file might be covered by
28// the GNU General Public License.
29
30//
31// ISO C++ 14882: 22.2.1.5 Template class codecvt
32//
33
34// Written by Benjamin Kosnik <bkoz@cygnus.com>
35
36/** @file codecvt.h
37 *  This is an internal header file, included by other library headers.
38 *  You should not attempt to use it directly.
39 */
40
41#ifndef _CODECVT_H
42#define _CODECVT_H 1
43
44#pragma GCC system_header
45
46  //  22.2.1.5  Template class codecvt
47  /// Base class for codecvt facet providing conversion result enum.
48  class codecvt_base
49  {
50  public:
51    enum result
52    {
53      ok,
54      partial,
55      error,
56      noconv
57    };
58  };
59
60  // Template class __codecvt_abstract_base
61  // NB: An abstract base class that fills in the public inlines, so
62  // that the specializations don't have to re-copy the public
63  // interface.
64  /**
65   *  @brief  Common base for codecvt facet
66   *
67   *  This template class provides implementations of the public functions
68   *  that forward to the protected virtual functions.
69   *
70   *  This template also provides abstract stubs for the protected virtual
71   *  functions.
72  */
73  template<typename _InternT, typename _ExternT, typename _StateT>
74    class __codecvt_abstract_base
75    : public locale::facet, public codecvt_base
76    {
77    public:
78      // Types:
79      typedef codecvt_base::result	result;
80      typedef _InternT			intern_type;
81      typedef _ExternT			extern_type;
82      typedef _StateT			state_type;
83
84      // 22.2.1.5.1 codecvt members
85      /**
86       *  @brief  Convert from internal to external character set.
87       *
88       *  Converts input string of intern_type to output string of
89       *  extern_type.  This is analogous to wcsrtombs.  It does this by
90       *  calling codecvt::do_out.
91       *
92       *  The source and destination character sets are determined by the
93       *  facet's locale, internal and external types.
94       *
95       *  The characters in [from,from_end) are converted and written to
96       *  [to,to_end).  from_next and to_next are set to point to the
97       *  character following the last successfully converted character,
98       *  respectively.  If the result needed no conversion, from_next and
99       *  to_next are not affected.
100       *
101       *  The @a state argument should be intialized if the input is at the
102       *  beginning and carried from a previous call if continuing
103       *  conversion.  There are no guarantees about how @a state is used.
104       *
105       *  The result returned is a member of codecvt_base::result.  If all the
106       *  input is converted, returns codecvt_base::ok.  If no conversion is
107       *  necessary, returns codecvt_base::noconv.  If the input ends early or
108       *  there is insufficient space in the output, returns codecvt_base::partial.
109       *  Otherwise the conversion failed and codecvt_base::error is returned.
110       *
111       *  @param  state  Persistent conversion state data.
112       *  @param  from  Start of input.
113       *  @param  from_end  End of input.
114       *  @param  from_next  Returns start of unconverted data.
115       *  @param  to  Start of output buffer.
116       *  @param  to_end  End of output buffer.
117       *  @param  to_next  Returns start of unused output area.
118       *  @return  codecvt_base::result.
119      */
120      result
121      out(state_type& __state, const intern_type* __from,
122	  const intern_type* __from_end, const intern_type*& __from_next,
123	  extern_type* __to, extern_type* __to_end,
124	  extern_type*& __to_next) const
125      {
126	return this->do_out(__state, __from, __from_end, __from_next,
127			    __to, __to_end, __to_next);
128      }
129
130      /**
131       *  @brief  Reset conversion state.
132       *
133       *  Writes characters to output that would restore @a state to initial
134       *  conditions.  The idea is that if a partial conversion occurs, then
135       *  the converting the characters written by this function would leave
136       *  the state in initial conditions, rather than partial conversion
137       *  state.  It does this by calling codecvt::do_unshift().
138       *
139       *  For example, if 4 external characters always converted to 1 internal
140       *  character, and input to in() had 6 external characters with state
141       *  saved, this function would write two characters to the output and
142       *  set the state to initialized conditions.
143       *
144       *  The source and destination character sets are determined by the
145       *  facet's locale, internal and external types.
146       *
147       *  The result returned is a member of codecvt_base::result.  If the
148       *  state could be reset and data written, returns codecvt_base::ok.  If
149       *  no conversion is necessary, returns codecvt_base::noconv.  If the
150       *  output has insufficient space, returns codecvt_base::partial.
151       *  Otherwise the reset failed and codecvt_base::error is returned.
152       *
153       *  @param  state  Persistent conversion state data.
154       *  @param  to  Start of output buffer.
155       *  @param  to_end  End of output buffer.
156       *  @param  to_next  Returns start of unused output area.
157       *  @return  codecvt_base::result.
158      */
159      result
160      unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
161	      extern_type*& __to_next) const
162      { return this->do_unshift(__state, __to,__to_end,__to_next); }
163
164      /**
165       *  @brief  Convert from external to internal character set.
166       *
167       *  Converts input string of extern_type to output string of
168       *  intern_type.  This is analogous to mbsrtowcs.  It does this by
169       *  calling codecvt::do_in.
170       *
171       *  The source and destination character sets are determined by the
172       *  facet's locale, internal and external types.
173       *
174       *  The characters in [from,from_end) are converted and written to
175       *  [to,to_end).  from_next and to_next are set to point to the
176       *  character following the last successfully converted character,
177       *  respectively.  If the result needed no conversion, from_next and
178       *  to_next are not affected.
179       *
180       *  The @a state argument should be intialized if the input is at the
181       *  beginning and carried from a previous call if continuing
182       *  conversion.  There are no guarantees about how @a state is used.
183       *
184       *  The result returned is a member of codecvt_base::result.  If all the
185       *  input is converted, returns codecvt_base::ok.  If no conversion is
186       *  necessary, returns codecvt_base::noconv.  If the input ends early or
187       *  there is insufficient space in the output, returns codecvt_base::partial.
188       *  Otherwise the conversion failed and codecvt_base::error is returned.
189       *
190       *  @param  state  Persistent conversion state data.
191       *  @param  from  Start of input.
192       *  @param  from_end  End of input.
193       *  @param  from_next  Returns start of unconverted data.
194       *  @param  to  Start of output buffer.
195       *  @param  to_end  End of output buffer.
196       *  @param  to_next  Returns start of unused output area.
197       *  @return  codecvt_base::result.
198      */
199      result
200      in(state_type& __state, const extern_type* __from,
201	 const extern_type* __from_end, const extern_type*& __from_next,
202	 intern_type* __to, intern_type* __to_end,
203	 intern_type*& __to_next) const
204      {
205	return this->do_in(__state, __from, __from_end, __from_next,
206			   __to, __to_end, __to_next);
207      }
208
209      int
210      encoding() const throw()
211      { return this->do_encoding(); }
212
213      bool
214      always_noconv() const throw()
215      { return this->do_always_noconv(); }
216
217      int
218      length(state_type& __state, const extern_type* __from,
219	     const extern_type* __end, size_t __max) const
220      { return this->do_length(__state, __from, __end, __max); }
221
222      int
223      max_length() const throw()
224      { return this->do_max_length(); }
225
226    protected:
227      explicit
228      __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
229
230      virtual
231      ~__codecvt_abstract_base() { }
232
233      /**
234       *  @brief  Convert from internal to external character set.
235       *
236       *  Converts input string of intern_type to output string of
237       *  extern_type.  This function is a hook for derived classes to change
238       *  the value returned.  @see out for more information.
239      */
240      virtual result
241      do_out(state_type& __state, const intern_type* __from,
242	     const intern_type* __from_end, const intern_type*& __from_next,
243	     extern_type* __to, extern_type* __to_end,
244	     extern_type*& __to_next) const = 0;
245
246      virtual result
247      do_unshift(state_type& __state, extern_type* __to,
248		 extern_type* __to_end, extern_type*& __to_next) const = 0;
249
250      virtual result
251      do_in(state_type& __state, const extern_type* __from,
252	    const extern_type* __from_end, const extern_type*& __from_next,
253	    intern_type* __to, intern_type* __to_end,
254	    intern_type*& __to_next) const = 0;
255
256      virtual int
257      do_encoding() const throw() = 0;
258
259      virtual bool
260      do_always_noconv() const throw() = 0;
261
262      virtual int
263      do_length(state_type&, const extern_type* __from,
264		const extern_type* __end, size_t __max) const = 0;
265
266      virtual int
267      do_max_length() const throw() = 0;
268    };
269
270  // 22.2.1.5 Template class codecvt
271  // NB: Generic, mostly useless implementation.
272  template<typename _InternT, typename _ExternT, typename _StateT>
273    class codecvt
274    : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
275    {
276    public:
277      // Types:
278      typedef codecvt_base::result	result;
279      typedef _InternT			intern_type;
280      typedef _ExternT			extern_type;
281      typedef _StateT			state_type;
282
283    protected:
284      __c_locale			_M_c_locale_codecvt;
285
286    public:
287      static locale::id			id;
288
289      explicit
290      codecvt(size_t __refs = 0)
291      : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
292
293      explicit
294      codecvt(__c_locale __cloc, size_t __refs = 0);
295
296    protected:
297      virtual
298      ~codecvt() { }
299
300      virtual result
301      do_out(state_type& __state, const intern_type* __from,
302	     const intern_type* __from_end, const intern_type*& __from_next,
303	     extern_type* __to, extern_type* __to_end,
304	     extern_type*& __to_next) const;
305
306      virtual result
307      do_unshift(state_type& __state, extern_type* __to,
308		 extern_type* __to_end, extern_type*& __to_next) const;
309
310      virtual result
311      do_in(state_type& __state, const extern_type* __from,
312	    const extern_type* __from_end, const extern_type*& __from_next,
313	    intern_type* __to, intern_type* __to_end,
314	    intern_type*& __to_next) const;
315
316      virtual int
317      do_encoding() const throw();
318
319      virtual bool
320      do_always_noconv() const throw();
321
322      virtual int
323      do_length(state_type&, const extern_type* __from,
324		const extern_type* __end, size_t __max) const;
325
326      virtual int
327      do_max_length() const throw();
328    };
329
330  template<typename _InternT, typename _ExternT, typename _StateT>
331    locale::id codecvt<_InternT, _ExternT, _StateT>::id;
332
333  // codecvt<char, char, mbstate_t> required specialization
334  template<>
335    class codecvt<char, char, mbstate_t>
336    : public __codecvt_abstract_base<char, char, mbstate_t>
337    {
338    public:
339      // Types:
340      typedef char			intern_type;
341      typedef char			extern_type;
342      typedef mbstate_t			state_type;
343
344    protected:
345      __c_locale			_M_c_locale_codecvt;
346
347    public:
348      static locale::id id;
349
350      explicit
351      codecvt(size_t __refs = 0);
352
353      explicit
354      codecvt(__c_locale __cloc, size_t __refs = 0);
355
356    protected:
357      virtual
358      ~codecvt();
359
360      virtual result
361      do_out(state_type& __state, const intern_type* __from,
362	     const intern_type* __from_end, const intern_type*& __from_next,
363	     extern_type* __to, extern_type* __to_end,
364	     extern_type*& __to_next) const;
365
366      virtual result
367      do_unshift(state_type& __state, extern_type* __to,
368		 extern_type* __to_end, extern_type*& __to_next) const;
369
370      virtual result
371      do_in(state_type& __state, const extern_type* __from,
372	    const extern_type* __from_end, const extern_type*& __from_next,
373	    intern_type* __to, intern_type* __to_end,
374	    intern_type*& __to_next) const;
375
376      virtual int
377      do_encoding() const throw();
378
379      virtual bool
380      do_always_noconv() const throw();
381
382      virtual int
383      do_length(state_type&, const extern_type* __from,
384		const extern_type* __end, size_t __max) const;
385
386      virtual int
387      do_max_length() const throw();
388  };
389
390#ifdef _GLIBCXX_USE_WCHAR_T
391  // codecvt<wchar_t, char, mbstate_t> required specialization
392  template<>
393    class codecvt<wchar_t, char, mbstate_t>
394    : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
395    {
396    public:
397      // Types:
398      typedef wchar_t			intern_type;
399      typedef char			extern_type;
400      typedef mbstate_t			state_type;
401
402    protected:
403      __c_locale			_M_c_locale_codecvt;
404
405    public:
406      static locale::id			id;
407
408      explicit
409      codecvt(size_t __refs = 0);
410
411      explicit
412      codecvt(__c_locale __cloc, size_t __refs = 0);
413
414    protected:
415      virtual
416      ~codecvt();
417
418      virtual result
419      do_out(state_type& __state, const intern_type* __from,
420	     const intern_type* __from_end, const intern_type*& __from_next,
421	     extern_type* __to, extern_type* __to_end,
422	     extern_type*& __to_next) const;
423
424      virtual result
425      do_unshift(state_type& __state,
426		 extern_type* __to, extern_type* __to_end,
427		 extern_type*& __to_next) const;
428
429      virtual result
430      do_in(state_type& __state,
431	     const extern_type* __from, const extern_type* __from_end,
432	     const extern_type*& __from_next,
433	     intern_type* __to, intern_type* __to_end,
434	     intern_type*& __to_next) const;
435
436      virtual
437      int do_encoding() const throw();
438
439      virtual
440      bool do_always_noconv() const throw();
441
442      virtual
443      int do_length(state_type&, const extern_type* __from,
444		    const extern_type* __end, size_t __max) const;
445
446      virtual int
447      do_max_length() const throw();
448    };
449#endif //_GLIBCXX_USE_WCHAR_T
450
451  // 22.2.1.6  Template class codecvt_byname
452  template<typename _InternT, typename _ExternT, typename _StateT>
453    class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
454    {
455    public:
456      explicit
457      codecvt_byname(const char* __s, size_t __refs = 0)
458      : codecvt<_InternT, _ExternT, _StateT>(__refs)
459      {
460	if (std::strcmp(__s, "C") != 0 && std::strcmp(__s, "POSIX") != 0)
461	  {
462	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
463	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
464	  }
465      }
466
467    protected:
468      virtual
469      ~codecvt_byname() { }
470    };
471
472  // Include host and configuration specific partial specializations
473  // with additional functionality, if possible.
474#ifdef _GLIBCXX_USE_WCHAR_T
475  #include <bits/codecvt_specializations.h>
476#endif
477
478#endif // _CODECVT_H
479