codecvt.h revision 132720
1// Locale support (codecvt) -*- C++ -*- 2 3// Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 2, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// You should have received a copy of the GNU General Public License along 17// with this library; see the file COPYING. If not, write to the Free 18// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19// USA. 20 21// As a special exception, you may use this file as part of a free software 22// library without restriction. Specifically, if other files instantiate 23// templates or use macros or inline functions from this file, or you compile 24// this file and link it with other files to produce an executable, this 25// file does not by itself cause the resulting executable to be covered by 26// the GNU General Public License. This exception does not however 27// invalidate any other reasons why the executable file might be covered by 28// the GNU General Public License. 29 30// 31// ISO C++ 14882: 22.2.1.5 Template class codecvt 32// 33 34// Written by Benjamin Kosnik <bkoz@cygnus.com> 35 36/** @file codecvt.h 37 * This is an internal header file, included by other library headers. 38 * You should not attempt to use it directly. 39 */ 40 41#ifndef _CODECVT_H 42#define _CODECVT_H 1 43 44#pragma GCC system_header 45 46 // 22.2.1.5 Template class codecvt 47 /// Base class for codecvt facet providing conversion result enum. 48 class codecvt_base 49 { 50 public: 51 enum result 52 { 53 ok, 54 partial, 55 error, 56 noconv 57 }; 58 }; 59 60 // Template class __codecvt_abstract_base 61 // NB: An abstract base class that fills in the public inlines, so 62 // that the specializations don't have to re-copy the public 63 // interface. 64 /** 65 * @brief Common base for codecvt facet 66 * 67 * This template class provides implementations of the public functions 68 * that forward to the protected virtual functions. 69 * 70 * This template also provides abstract stubs for the protected virtual 71 * functions. 72 */ 73 template<typename _InternT, typename _ExternT, typename _StateT> 74 class __codecvt_abstract_base 75 : public locale::facet, public codecvt_base 76 { 77 public: 78 // Types: 79 typedef codecvt_base::result result; 80 typedef _InternT intern_type; 81 typedef _ExternT extern_type; 82 typedef _StateT state_type; 83 84 // 22.2.1.5.1 codecvt members 85 /** 86 * @brief Convert from internal to external character set. 87 * 88 * Converts input string of intern_type to output string of 89 * extern_type. This is analogous to wcsrtombs. It does this by 90 * calling codecvt::do_out. 91 * 92 * The source and destination character sets are determined by the 93 * facet's locale, internal and external types. 94 * 95 * The characters in [from,from_end) are converted and written to 96 * [to,to_end). from_next and to_next are set to point to the 97 * character following the last successfully converted character, 98 * respectively. If the result needed no conversion, from_next and 99 * to_next are not affected. 100 * 101 * The @a state argument should be intialized if the input is at the 102 * beginning and carried from a previous call if continuing 103 * conversion. There are no guarantees about how @a state is used. 104 * 105 * The result returned is a member of codecvt_base::result. If all the 106 * input is converted, returns codecvt_base::ok. If no conversion is 107 * necessary, returns codecvt_base::noconv. If the input ends early or 108 * there is insufficient space in the output, returns codecvt_base::partial. 109 * Otherwise the conversion failed and codecvt_base::error is returned. 110 * 111 * @param state Persistent conversion state data. 112 * @param from Start of input. 113 * @param from_end End of input. 114 * @param from_next Returns start of unconverted data. 115 * @param to Start of output buffer. 116 * @param to_end End of output buffer. 117 * @param to_next Returns start of unused output area. 118 * @return codecvt_base::result. 119 */ 120 result 121 out(state_type& __state, const intern_type* __from, 122 const intern_type* __from_end, const intern_type*& __from_next, 123 extern_type* __to, extern_type* __to_end, 124 extern_type*& __to_next) const 125 { 126 return this->do_out(__state, __from, __from_end, __from_next, 127 __to, __to_end, __to_next); 128 } 129 130 /** 131 * @brief Reset conversion state. 132 * 133 * Writes characters to output that would restore @a state to initial 134 * conditions. The idea is that if a partial conversion occurs, then 135 * the converting the characters written by this function would leave 136 * the state in initial conditions, rather than partial conversion 137 * state. It does this by calling codecvt::do_unshift(). 138 * 139 * For example, if 4 external characters always converted to 1 internal 140 * character, and input to in() had 6 external characters with state 141 * saved, this function would write two characters to the output and 142 * set the state to initialized conditions. 143 * 144 * The source and destination character sets are determined by the 145 * facet's locale, internal and external types. 146 * 147 * The result returned is a member of codecvt_base::result. If the 148 * state could be reset and data written, returns codecvt_base::ok. If 149 * no conversion is necessary, returns codecvt_base::noconv. If the 150 * output has insufficient space, returns codecvt_base::partial. 151 * Otherwise the reset failed and codecvt_base::error is returned. 152 * 153 * @param state Persistent conversion state data. 154 * @param to Start of output buffer. 155 * @param to_end End of output buffer. 156 * @param to_next Returns start of unused output area. 157 * @return codecvt_base::result. 158 */ 159 result 160 unshift(state_type& __state, extern_type* __to, extern_type* __to_end, 161 extern_type*& __to_next) const 162 { return this->do_unshift(__state, __to,__to_end,__to_next); } 163 164 /** 165 * @brief Convert from external to internal character set. 166 * 167 * Converts input string of extern_type to output string of 168 * intern_type. This is analogous to mbsrtowcs. It does this by 169 * calling codecvt::do_in. 170 * 171 * The source and destination character sets are determined by the 172 * facet's locale, internal and external types. 173 * 174 * The characters in [from,from_end) are converted and written to 175 * [to,to_end). from_next and to_next are set to point to the 176 * character following the last successfully converted character, 177 * respectively. If the result needed no conversion, from_next and 178 * to_next are not affected. 179 * 180 * The @a state argument should be intialized if the input is at the 181 * beginning and carried from a previous call if continuing 182 * conversion. There are no guarantees about how @a state is used. 183 * 184 * The result returned is a member of codecvt_base::result. If all the 185 * input is converted, returns codecvt_base::ok. If no conversion is 186 * necessary, returns codecvt_base::noconv. If the input ends early or 187 * there is insufficient space in the output, returns codecvt_base::partial. 188 * Otherwise the conversion failed and codecvt_base::error is returned. 189 * 190 * @param state Persistent conversion state data. 191 * @param from Start of input. 192 * @param from_end End of input. 193 * @param from_next Returns start of unconverted data. 194 * @param to Start of output buffer. 195 * @param to_end End of output buffer. 196 * @param to_next Returns start of unused output area. 197 * @return codecvt_base::result. 198 */ 199 result 200 in(state_type& __state, const extern_type* __from, 201 const extern_type* __from_end, const extern_type*& __from_next, 202 intern_type* __to, intern_type* __to_end, 203 intern_type*& __to_next) const 204 { 205 return this->do_in(__state, __from, __from_end, __from_next, 206 __to, __to_end, __to_next); 207 } 208 209 int 210 encoding() const throw() 211 { return this->do_encoding(); } 212 213 bool 214 always_noconv() const throw() 215 { return this->do_always_noconv(); } 216 217 int 218 length(state_type& __state, const extern_type* __from, 219 const extern_type* __end, size_t __max) const 220 { return this->do_length(__state, __from, __end, __max); } 221 222 int 223 max_length() const throw() 224 { return this->do_max_length(); } 225 226 protected: 227 explicit 228 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { } 229 230 virtual 231 ~__codecvt_abstract_base() { } 232 233 /** 234 * @brief Convert from internal to external character set. 235 * 236 * Converts input string of intern_type to output string of 237 * extern_type. This function is a hook for derived classes to change 238 * the value returned. @see out for more information. 239 */ 240 virtual result 241 do_out(state_type& __state, const intern_type* __from, 242 const intern_type* __from_end, const intern_type*& __from_next, 243 extern_type* __to, extern_type* __to_end, 244 extern_type*& __to_next) const = 0; 245 246 virtual result 247 do_unshift(state_type& __state, extern_type* __to, 248 extern_type* __to_end, extern_type*& __to_next) const = 0; 249 250 virtual result 251 do_in(state_type& __state, const extern_type* __from, 252 const extern_type* __from_end, const extern_type*& __from_next, 253 intern_type* __to, intern_type* __to_end, 254 intern_type*& __to_next) const = 0; 255 256 virtual int 257 do_encoding() const throw() = 0; 258 259 virtual bool 260 do_always_noconv() const throw() = 0; 261 262 virtual int 263 do_length(state_type&, const extern_type* __from, 264 const extern_type* __end, size_t __max) const = 0; 265 266 virtual int 267 do_max_length() const throw() = 0; 268 }; 269 270 // 22.2.1.5 Template class codecvt 271 // NB: Generic, mostly useless implementation. 272 template<typename _InternT, typename _ExternT, typename _StateT> 273 class codecvt 274 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT> 275 { 276 public: 277 // Types: 278 typedef codecvt_base::result result; 279 typedef _InternT intern_type; 280 typedef _ExternT extern_type; 281 typedef _StateT state_type; 282 283 protected: 284 __c_locale _M_c_locale_codecvt; 285 286 public: 287 static locale::id id; 288 289 explicit 290 codecvt(size_t __refs = 0) 291 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { } 292 293 explicit 294 codecvt(__c_locale __cloc, size_t __refs = 0); 295 296 protected: 297 virtual 298 ~codecvt() { } 299 300 virtual result 301 do_out(state_type& __state, const intern_type* __from, 302 const intern_type* __from_end, const intern_type*& __from_next, 303 extern_type* __to, extern_type* __to_end, 304 extern_type*& __to_next) const; 305 306 virtual result 307 do_unshift(state_type& __state, extern_type* __to, 308 extern_type* __to_end, extern_type*& __to_next) const; 309 310 virtual result 311 do_in(state_type& __state, const extern_type* __from, 312 const extern_type* __from_end, const extern_type*& __from_next, 313 intern_type* __to, intern_type* __to_end, 314 intern_type*& __to_next) const; 315 316 virtual int 317 do_encoding() const throw(); 318 319 virtual bool 320 do_always_noconv() const throw(); 321 322 virtual int 323 do_length(state_type&, const extern_type* __from, 324 const extern_type* __end, size_t __max) const; 325 326 virtual int 327 do_max_length() const throw(); 328 }; 329 330 template<typename _InternT, typename _ExternT, typename _StateT> 331 locale::id codecvt<_InternT, _ExternT, _StateT>::id; 332 333 // codecvt<char, char, mbstate_t> required specialization 334 template<> 335 class codecvt<char, char, mbstate_t> 336 : public __codecvt_abstract_base<char, char, mbstate_t> 337 { 338 public: 339 // Types: 340 typedef char intern_type; 341 typedef char extern_type; 342 typedef mbstate_t state_type; 343 344 protected: 345 __c_locale _M_c_locale_codecvt; 346 347 public: 348 static locale::id id; 349 350 explicit 351 codecvt(size_t __refs = 0); 352 353 explicit 354 codecvt(__c_locale __cloc, size_t __refs = 0); 355 356 protected: 357 virtual 358 ~codecvt(); 359 360 virtual result 361 do_out(state_type& __state, const intern_type* __from, 362 const intern_type* __from_end, const intern_type*& __from_next, 363 extern_type* __to, extern_type* __to_end, 364 extern_type*& __to_next) const; 365 366 virtual result 367 do_unshift(state_type& __state, extern_type* __to, 368 extern_type* __to_end, extern_type*& __to_next) const; 369 370 virtual result 371 do_in(state_type& __state, const extern_type* __from, 372 const extern_type* __from_end, const extern_type*& __from_next, 373 intern_type* __to, intern_type* __to_end, 374 intern_type*& __to_next) const; 375 376 virtual int 377 do_encoding() const throw(); 378 379 virtual bool 380 do_always_noconv() const throw(); 381 382 virtual int 383 do_length(state_type&, const extern_type* __from, 384 const extern_type* __end, size_t __max) const; 385 386 virtual int 387 do_max_length() const throw(); 388 }; 389 390#ifdef _GLIBCXX_USE_WCHAR_T 391 // codecvt<wchar_t, char, mbstate_t> required specialization 392 template<> 393 class codecvt<wchar_t, char, mbstate_t> 394 : public __codecvt_abstract_base<wchar_t, char, mbstate_t> 395 { 396 public: 397 // Types: 398 typedef wchar_t intern_type; 399 typedef char extern_type; 400 typedef mbstate_t state_type; 401 402 protected: 403 __c_locale _M_c_locale_codecvt; 404 405 public: 406 static locale::id id; 407 408 explicit 409 codecvt(size_t __refs = 0); 410 411 explicit 412 codecvt(__c_locale __cloc, size_t __refs = 0); 413 414 protected: 415 virtual 416 ~codecvt(); 417 418 virtual result 419 do_out(state_type& __state, const intern_type* __from, 420 const intern_type* __from_end, const intern_type*& __from_next, 421 extern_type* __to, extern_type* __to_end, 422 extern_type*& __to_next) const; 423 424 virtual result 425 do_unshift(state_type& __state, 426 extern_type* __to, extern_type* __to_end, 427 extern_type*& __to_next) const; 428 429 virtual result 430 do_in(state_type& __state, 431 const extern_type* __from, const extern_type* __from_end, 432 const extern_type*& __from_next, 433 intern_type* __to, intern_type* __to_end, 434 intern_type*& __to_next) const; 435 436 virtual 437 int do_encoding() const throw(); 438 439 virtual 440 bool do_always_noconv() const throw(); 441 442 virtual 443 int do_length(state_type&, const extern_type* __from, 444 const extern_type* __end, size_t __max) const; 445 446 virtual int 447 do_max_length() const throw(); 448 }; 449#endif //_GLIBCXX_USE_WCHAR_T 450 451 // 22.2.1.6 Template class codecvt_byname 452 template<typename _InternT, typename _ExternT, typename _StateT> 453 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> 454 { 455 public: 456 explicit 457 codecvt_byname(const char* __s, size_t __refs = 0) 458 : codecvt<_InternT, _ExternT, _StateT>(__refs) 459 { 460 if (std::strcmp(__s, "C") != 0 && std::strcmp(__s, "POSIX") != 0) 461 { 462 this->_S_destroy_c_locale(this->_M_c_locale_codecvt); 463 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s); 464 } 465 } 466 467 protected: 468 virtual 469 ~codecvt_byname() { } 470 }; 471 472 // Include host and configuration specific partial specializations 473 // with additional functionality, if possible. 474#ifdef _GLIBCXX_USE_WCHAR_T 475 #include <bits/codecvt_specializations.h> 476#endif 477 478#endif // _CODECVT_H 479