1// Locale support (codecvt) -*- C++ -*- 2 3// Copyright (C) 2015 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// Under Section 7 of GPL version 3, you are granted additional 17// permissions described in the GCC Runtime Library Exception, version 18// 3.1, as published by the Free Software Foundation. 19 20// You should have received a copy of the GNU General Public License and 21// a copy of the GCC Runtime Library Exception along with this program; 22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23// <http://www.gnu.org/licenses/>. 24 25#include <codecvt> 26#include <cstring> // std::memcpy, std::memcmp 27#include <bits/stl_algobase.h> // std::max 28 29#ifdef _GLIBCXX_USE_C99_STDINT_TR1 30namespace std _GLIBCXX_VISIBILITY(default) 31{ 32_GLIBCXX_BEGIN_NAMESPACE_VERSION 33 34namespace 35{ 36 // Largest code point that fits in a single UTF-16 code unit. 37 const char32_t max_single_utf16_unit = 0xFFFF; 38 39 const char32_t max_code_point = 0x10FFFF; 40 41 // The functions below rely on maxcode < incomplete_mb_character 42 // (which is enforced by the codecvt_utf* classes on construction). 43 const char32_t incomplete_mb_character = char32_t(-2); 44 const char32_t invalid_mb_sequence = char32_t(-1); 45 46 template<typename Elem> 47 struct range 48 { 49 Elem* next; 50 Elem* end; 51 52 Elem operator*() const { return *next; } 53 54 range& operator++() { ++next; return *this; } 55 56 size_t size() const { return end - next; } 57 }; 58 59 // Multibyte sequences can have "header" consisting of Byte Order Mark 60 const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF }; 61 const unsigned char utf16_bom[4] = { 0xFE, 0xFF }; 62 const unsigned char utf16le_bom[4] = { 0xFF, 0xFE }; 63 64 template<size_t N> 65 inline bool 66 write_bom(range<char>& to, const unsigned char (&bom)[N]) 67 { 68 if (to.size() < N) 69 return false; 70 memcpy(to.next, bom, N); 71 to.next += N; 72 return true; 73 } 74 75 // If generate_header is set in mode write out UTF-8 BOM. 76 bool 77 write_utf8_bom(range<char>& to, codecvt_mode mode) 78 { 79 if (mode & generate_header) 80 return write_bom(to, utf8_bom); 81 return true; 82 } 83 84 // If generate_header is set in mode write out the UTF-16 BOM indicated 85 // by whether little_endian is set in mode. 86 bool 87 write_utf16_bom(range<char16_t>& to, codecvt_mode mode) 88 { 89 if (mode & generate_header) 90 { 91 if (!to.size()) 92 return false; 93 auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom; 94 std::memcpy(to.next, bom, 2); 95 ++to.next; 96 } 97 return true; 98 } 99 100 template<size_t N> 101 inline bool 102 read_bom(range<const char>& from, const unsigned char (&bom)[N]) 103 { 104 if (from.size() >= N && !memcmp(from.next, bom, N)) 105 { 106 from.next += N; 107 return true; 108 } 109 return false; 110 } 111 112 // If consume_header is set in mode update from.next to after any BOM. 113 void 114 read_utf8_bom(range<const char>& from, codecvt_mode mode) 115 { 116 if (mode & consume_header) 117 read_bom(from, utf8_bom); 118 } 119 120 // If consume_header is set in mode update from.next to after any BOM. 121 // Return little_endian iff the UTF-16LE BOM was present. 122 codecvt_mode 123 read_utf16_bom(range<const char16_t>& from, codecvt_mode mode) 124 { 125 if (mode & consume_header && from.size()) 126 { 127 if (*from.next == 0xFEFF) 128 ++from.next; 129 else if (*from.next == 0xFFFE) 130 { 131 ++from.next; 132 return little_endian; 133 } 134 } 135 return {}; 136 } 137 138 // Read a codepoint from a UTF-8 multibyte sequence. 139 // Updates from.next if the codepoint is not greater than maxcode. 140 // Returns invalid_mb_sequence, incomplete_mb_character or the code point. 141 char32_t 142 read_utf8_code_point(range<const char>& from, unsigned long maxcode) 143 { 144 const size_t avail = from.size(); 145 if (avail == 0) 146 return incomplete_mb_character; 147 unsigned char c1 = from.next[0]; 148 // https://en.wikipedia.org/wiki/UTF-8#Sample_code 149 if (c1 < 0x80) 150 { 151 ++from.next; 152 return c1; 153 } 154 else if (c1 < 0xC2) // continuation or overlong 2-byte sequence 155 return invalid_mb_sequence; 156 else if (c1 < 0xE0) // 2-byte sequence 157 { 158 if (avail < 2) 159 return incomplete_mb_character; 160 unsigned char c2 = from.next[1]; 161 if ((c2 & 0xC0) != 0x80) 162 return invalid_mb_sequence; 163 char32_t c = (c1 << 6) + c2 - 0x3080; 164 if (c <= maxcode) 165 from.next += 2; 166 return c; 167 } 168 else if (c1 < 0xF0) // 3-byte sequence 169 { 170 if (avail < 3) 171 return incomplete_mb_character; 172 unsigned char c2 = from.next[1]; 173 if ((c2 & 0xC0) != 0x80) 174 return invalid_mb_sequence; 175 if (c1 == 0xE0 && c2 < 0xA0) // overlong 176 return invalid_mb_sequence; 177 unsigned char c3 = from.next[2]; 178 if ((c3 & 0xC0) != 0x80) 179 return invalid_mb_sequence; 180 char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080; 181 if (c <= maxcode) 182 from.next += 3; 183 return c; 184 } 185 else if (c1 < 0xF5) // 4-byte sequence 186 { 187 if (avail < 4) 188 return incomplete_mb_character; 189 unsigned char c2 = from.next[1]; 190 if ((c2 & 0xC0) != 0x80) 191 return invalid_mb_sequence; 192 if (c1 == 0xF0 && c2 < 0x90) // overlong 193 return invalid_mb_sequence; 194 if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF 195 return invalid_mb_sequence; 196 unsigned char c3 = from.next[2]; 197 if ((c3 & 0xC0) != 0x80) 198 return invalid_mb_sequence; 199 unsigned char c4 = from.next[3]; 200 if ((c4 & 0xC0) != 0x80) 201 return invalid_mb_sequence; 202 char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080; 203 if (c <= maxcode) 204 from.next += 4; 205 return c; 206 } 207 else // > U+10FFFF 208 return invalid_mb_sequence; 209 } 210 211 bool 212 write_utf8_code_point(range<char>& to, char32_t code_point) 213 { 214 if (code_point < 0x80) 215 { 216 if (to.size() < 1) 217 return false; 218 *to.next++ = code_point; 219 } 220 else if (code_point <= 0x7FF) 221 { 222 if (to.size() < 2) 223 return false; 224 *to.next++ = (code_point >> 6) + 0xC0; 225 *to.next++ = (code_point & 0x3F) + 0x80; 226 } 227 else if (code_point <= 0xFFFF) 228 { 229 if (to.size() < 3) 230 return false; 231 *to.next++ = (code_point >> 12) + 0xE0; 232 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; 233 *to.next++ = (code_point & 0x3F) + 0x80; 234 } 235 else if (code_point <= 0x10FFFF) 236 { 237 if (to.size() < 4) 238 return false; 239 *to.next++ = (code_point >> 18) + 0xF0; 240 *to.next++ = ((code_point >> 12) & 0x3F) + 0x80; 241 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; 242 *to.next++ = (code_point & 0x3F) + 0x80; 243 } 244 else 245 return false; 246 return true; 247 } 248 249 inline char16_t 250 adjust_byte_order(char16_t c, codecvt_mode mode) 251 { 252#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 253 return (mode & little_endian) ? __builtin_bswap16(c) : c; 254#else 255 return (mode & little_endian) ? c : __builtin_bswap16(c); 256#endif 257 } 258 259 // Return true if c is a high-surrogate (aka leading) code point. 260 inline bool 261 is_high_surrogate(char32_t c) 262 { 263 return c >= 0xD800 && c <= 0xDBFF; 264 } 265 266 // Return true if c is a low-surrogate (aka trailing) code point. 267 inline bool 268 is_low_surrogate(char32_t c) 269 { 270 return c >= 0xDC00 && c <= 0xDFFF; 271 } 272 273 inline char32_t 274 surrogate_pair_to_code_point(char32_t high, char32_t low) 275 { 276 return (high << 10) + low - 0x35FDC00; 277 } 278 279 // Read a codepoint from a UTF-16 multibyte sequence. 280 // The sequence's endianness is indicated by (mode & little_endian). 281 // Updates from.next if the codepoint is not greater than maxcode. 282 // Returns invalid_mb_sequence, incomplete_mb_character or the code point. 283 char32_t 284 read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode, 285 codecvt_mode mode) 286 { 287 const size_t avail = from.size(); 288 if (avail == 0) 289 return incomplete_mb_character; 290 int inc = 1; 291 char32_t c = adjust_byte_order(from.next[0], mode); 292 if (is_high_surrogate(c)) 293 { 294 if (avail < 2) 295 return incomplete_mb_character; 296 const char16_t c2 = adjust_byte_order(from.next[1], mode); 297 if (is_low_surrogate(c2)) 298 { 299 c = surrogate_pair_to_code_point(c, c2); 300 inc = 2; 301 } 302 else 303 return invalid_mb_sequence; 304 } 305 else if (is_low_surrogate(c)) 306 return invalid_mb_sequence; 307 if (c <= maxcode) 308 from.next += inc; 309 return c; 310 } 311 312 template<typename C> 313 bool 314 write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode) 315 { 316 static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit"); 317 318 if (codepoint < max_single_utf16_unit) 319 { 320 if (to.size() > 0) 321 { 322 *to.next = adjust_byte_order(codepoint, mode); 323 ++to.next; 324 return true; 325 } 326 } 327 else if (to.size() > 1) 328 { 329 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4 330 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); 331 char16_t lead = LEAD_OFFSET + (codepoint >> 10); 332 char16_t trail = 0xDC00 + (codepoint & 0x3FF); 333 to.next[0] = adjust_byte_order(lead, mode); 334 to.next[1] = adjust_byte_order(trail, mode); 335 to.next += 2; 336 return true; 337 } 338 return false; 339 } 340 341 // utf8 -> ucs4 342 codecvt_base::result 343 ucs4_in(range<const char>& from, range<char32_t>& to, 344 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 345 { 346 read_utf8_bom(from, mode); 347 while (from.size() && to.size()) 348 { 349 const char32_t codepoint = read_utf8_code_point(from, maxcode); 350 if (codepoint == incomplete_mb_character) 351 return codecvt_base::partial; 352 if (codepoint > maxcode) 353 return codecvt_base::error; 354 *to.next++ = codepoint; 355 } 356 return from.size() ? codecvt_base::partial : codecvt_base::ok; 357 } 358 359 // ucs4 -> utf8 360 codecvt_base::result 361 ucs4_out(range<const char32_t>& from, range<char>& to, 362 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 363 { 364 if (!write_utf8_bom(to, mode)) 365 return codecvt_base::partial; 366 while (from.size()) 367 { 368 const char32_t c = from.next[0]; 369 if (c > maxcode) 370 return codecvt_base::error; 371 if (!write_utf8_code_point(to, c)) 372 return codecvt_base::partial; 373 ++from.next; 374 } 375 return codecvt_base::ok; 376 } 377 378 // utf16 -> ucs4 379 codecvt_base::result 380 ucs4_in(range<const char16_t>& from, range<char32_t>& to, 381 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 382 { 383 if (read_utf16_bom(from, mode) == little_endian) 384 mode = codecvt_mode(mode & little_endian); 385 while (from.size() && to.size()) 386 { 387 const char32_t codepoint = read_utf16_code_point(from, maxcode, mode); 388 if (codepoint == incomplete_mb_character) 389 return codecvt_base::partial; 390 if (codepoint > maxcode) 391 return codecvt_base::error; 392 *to.next++ = codepoint; 393 } 394 return from.size() ? codecvt_base::partial : codecvt_base::ok; 395 } 396 397 // ucs4 -> utf16 398 codecvt_base::result 399 ucs4_out(range<const char32_t>& from, range<char16_t>& to, 400 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 401 { 402 if (!write_utf16_bom(to, mode)) 403 return codecvt_base::partial; 404 while (from.size()) 405 { 406 const char32_t c = from.next[0]; 407 if (c > maxcode) 408 return codecvt_base::error; 409 if (!write_utf16_code_point(to, c, mode)) 410 return codecvt_base::partial; 411 ++from.next; 412 } 413 return codecvt_base::ok; 414 } 415 416 // utf8 -> utf16 417 template<typename C> 418 codecvt_base::result 419 utf16_in(range<const char>& from, range<C>& to, 420 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 421 { 422 read_utf8_bom(from, mode); 423 while (from.size() && to.size()) 424 { 425 const char* const first = from.next; 426 const char32_t codepoint = read_utf8_code_point(from, maxcode); 427 if (codepoint == incomplete_mb_character) 428 return codecvt_base::partial; 429 if (codepoint > maxcode) 430 return codecvt_base::error; 431 if (!write_utf16_code_point(to, codepoint, mode)) 432 { 433 from.next = first; 434 return codecvt_base::partial; 435 } 436 } 437 return codecvt_base::ok; 438 } 439 440 // utf16 -> utf8 441 template<typename C> 442 codecvt_base::result 443 utf16_out(range<const C>& from, range<char>& to, 444 unsigned long maxcode = max_code_point, codecvt_mode mode = {}) 445 { 446 if (!write_utf8_bom(to, mode)) 447 return codecvt_base::partial; 448 while (from.size()) 449 { 450 char32_t c = from.next[0]; 451 int inc = 1; 452 if (is_high_surrogate(c)) 453 { 454 if (from.size() < 2) 455 return codecvt_base::ok; // stop converting at this point 456 457 const char32_t c2 = from.next[1]; 458 if (is_low_surrogate(c2)) 459 { 460 c = surrogate_pair_to_code_point(c, c2); 461 inc = 2; 462 } 463 else 464 return codecvt_base::error; 465 } 466 else if (is_low_surrogate(c)) 467 return codecvt_base::error; 468 if (c > maxcode) 469 return codecvt_base::error; 470 if (!write_utf8_code_point(to, c)) 471 return codecvt_base::partial; 472 from.next += inc; 473 } 474 return codecvt_base::ok; 475 } 476 477 // return pos such that [begin,pos) is valid UTF-16 string no longer than max 478 const char* 479 utf16_span(const char* begin, const char* end, size_t max, 480 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 481 { 482 range<const char> from{ begin, end }; 483 read_utf8_bom(from, mode); 484 size_t count = 0; 485 while (count+1 < max) 486 { 487 char32_t c = read_utf8_code_point(from, maxcode); 488 if (c > maxcode) 489 return from.next; 490 else if (c > max_single_utf16_unit) 491 ++count; 492 ++count; 493 } 494 if (count+1 == max) // take one more character if it fits in a single unit 495 read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode)); 496 return from.next; 497 } 498 499 // utf8 -> ucs2 500 codecvt_base::result 501 ucs2_in(range<const char>& from, range<char16_t>& to, 502 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 503 { 504 return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode); 505 } 506 507 // ucs2 -> utf8 508 codecvt_base::result 509 ucs2_out(range<const char16_t>& from, range<char>& to, 510 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 511 { 512 return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode); 513 } 514 515 // ucs2 -> utf16 516 codecvt_base::result 517 ucs2_out(range<const char16_t>& from, range<char16_t>& to, 518 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 519 { 520 if (!write_utf16_bom(to, mode)) 521 return codecvt_base::partial; 522 while (from.size() && to.size()) 523 { 524 char16_t c = from.next[0]; 525 if (is_high_surrogate(c)) 526 return codecvt_base::error; 527 if (c > maxcode) 528 return codecvt_base::error; 529 *to.next++ = adjust_byte_order(c, mode); 530 ++from.next; 531 } 532 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial; 533 } 534 535 // utf16 -> ucs2 536 codecvt_base::result 537 ucs2_in(range<const char16_t>& from, range<char16_t>& to, 538 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 539 { 540 if (read_utf16_bom(from, mode) == little_endian) 541 mode = codecvt_mode(mode & little_endian); 542 maxcode = std::max(max_single_utf16_unit, maxcode); 543 while (from.size() && to.size()) 544 { 545 const char32_t c = read_utf16_code_point(from, maxcode, mode); 546 if (c == incomplete_mb_character) 547 return codecvt_base::partial; 548 if (c > maxcode) 549 return codecvt_base::error; 550 *to.next++ = c; 551 } 552 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial; 553 } 554 555 const char16_t* 556 ucs2_span(const char16_t* begin, const char16_t* end, size_t max, 557 char32_t maxcode, codecvt_mode mode) 558 { 559 range<const char16_t> from{ begin, end }; 560 if (read_utf16_bom(from, mode) == little_endian) 561 mode = codecvt_mode(mode & little_endian); 562 maxcode = std::max(max_single_utf16_unit, maxcode); 563 char32_t c = 0; 564 while (max-- && c <= maxcode) 565 c = read_utf16_code_point(from, maxcode, mode); 566 return from.next; 567 } 568 569 const char* 570 ucs2_span(const char* begin, const char* end, size_t max, 571 char32_t maxcode, codecvt_mode mode) 572 { 573 range<const char> from{ begin, end }; 574 read_utf8_bom(from, mode); 575 maxcode = std::max(max_single_utf16_unit, maxcode); 576 char32_t c = 0; 577 while (max-- && c <= maxcode) 578 c = read_utf8_code_point(from, maxcode); 579 return from.next; 580 } 581 582 // return pos such that [begin,pos) is valid UCS-4 string no longer than max 583 const char* 584 ucs4_span(const char* begin, const char* end, size_t max, 585 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 586 { 587 range<const char> from{ begin, end }; 588 read_utf8_bom(from, mode); 589 char32_t c = 0; 590 while (max-- && c <= maxcode) 591 c = read_utf8_code_point(from, maxcode); 592 return from.next; 593 } 594 595 // return pos such that [begin,pos) is valid UCS-4 string no longer than max 596 const char16_t* 597 ucs4_span(const char16_t* begin, const char16_t* end, size_t max, 598 char32_t maxcode = max_code_point, codecvt_mode mode = {}) 599 { 600 range<const char16_t> from{ begin, end }; 601 if (read_utf16_bom(from, mode) == little_endian) 602 mode = codecvt_mode(mode & little_endian); 603 char32_t c = 0; 604 while (max-- && c <= maxcode) 605 c = read_utf16_code_point(from, maxcode, mode); 606 return from.next; 607 } 608} 609 610// Define members of codecvt<char16_t, char, mbstate_t> specialization. 611// Converts from UTF-8 to UTF-16. 612 613locale::id codecvt<char16_t, char, mbstate_t>::id; 614 615codecvt<char16_t, char, mbstate_t>::~codecvt() { } 616 617codecvt_base::result 618codecvt<char16_t, char, mbstate_t>:: 619do_out(state_type&, 620 const intern_type* __from, 621 const intern_type* __from_end, const intern_type*& __from_next, 622 extern_type* __to, extern_type* __to_end, 623 extern_type*& __to_next) const 624{ 625 range<const char16_t> from{ __from, __from_end }; 626 range<char> to{ __to, __to_end }; 627 auto res = utf16_out(from, to); 628 __from_next = from.next; 629 __to_next = to.next; 630 return res; 631} 632 633codecvt_base::result 634codecvt<char16_t, char, mbstate_t>:: 635do_unshift(state_type&, extern_type* __to, extern_type*, 636 extern_type*& __to_next) const 637{ 638 __to_next = __to; 639 return noconv; // we don't use mbstate_t for the unicode facets 640} 641 642codecvt_base::result 643codecvt<char16_t, char, mbstate_t>:: 644do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 645 const extern_type*& __from_next, 646 intern_type* __to, intern_type* __to_end, 647 intern_type*& __to_next) const 648{ 649 range<const char> from{ __from, __from_end }; 650 range<char16_t> to{ __to, __to_end }; 651#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 652 codecvt_mode mode = {}; 653#else 654 codecvt_mode mode = little_endian; 655#endif 656 auto res = utf16_in(from, to, max_code_point, mode); 657 __from_next = from.next; 658 __to_next = to.next; 659 return res; 660} 661 662int 663codecvt<char16_t, char, mbstate_t>::do_encoding() const throw() 664{ return 0; } 665 666bool 667codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw() 668{ return false; } 669 670int 671codecvt<char16_t, char, mbstate_t>:: 672do_length(state_type&, const extern_type* __from, 673 const extern_type* __end, size_t __max) const 674{ 675 __end = utf16_span(__from, __end, __max); 676 return __end - __from; 677} 678 679int 680codecvt<char16_t, char, mbstate_t>::do_max_length() const throw() 681{ 682 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, 683 // whereas 4 byte sequences require two 16-bit code units. 684 return 3; 685} 686 687// Define members of codecvt<char32_t, char, mbstate_t> specialization. 688// Converts from UTF-8 to UTF-32 (aka UCS-4). 689 690locale::id codecvt<char32_t, char, mbstate_t>::id; 691 692codecvt<char32_t, char, mbstate_t>::~codecvt() { } 693 694codecvt_base::result 695codecvt<char32_t, char, mbstate_t>:: 696do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 697 const intern_type*& __from_next, 698 extern_type* __to, extern_type* __to_end, 699 extern_type*& __to_next) const 700{ 701 range<const char32_t> from{ __from, __from_end }; 702 range<char> to{ __to, __to_end }; 703 auto res = ucs4_out(from, to); 704 __from_next = from.next; 705 __to_next = to.next; 706 return res; 707} 708 709codecvt_base::result 710codecvt<char32_t, char, mbstate_t>:: 711do_unshift(state_type&, extern_type* __to, extern_type*, 712 extern_type*& __to_next) const 713{ 714 __to_next = __to; 715 return noconv; 716} 717 718codecvt_base::result 719codecvt<char32_t, char, mbstate_t>:: 720do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 721 const extern_type*& __from_next, 722 intern_type* __to, intern_type* __to_end, 723 intern_type*& __to_next) const 724{ 725 range<const char> from{ __from, __from_end }; 726 range<char32_t> to{ __to, __to_end }; 727 auto res = ucs4_in(from, to); 728 __from_next = from.next; 729 __to_next = to.next; 730 return res; 731} 732 733int 734codecvt<char32_t, char, mbstate_t>::do_encoding() const throw() 735{ return 0; } 736 737bool 738codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw() 739{ return false; } 740 741int 742codecvt<char32_t, char, mbstate_t>:: 743do_length(state_type&, const extern_type* __from, 744 const extern_type* __end, size_t __max) const 745{ 746 __end = ucs4_span(__from, __end, __max); 747 return __end - __from; 748} 749 750int 751codecvt<char32_t, char, mbstate_t>::do_max_length() const throw() 752{ return 4; } 753 754// Define members of codecvt_utf8<char16_t> base class implementation. 755// Converts from UTF-8 to UCS-2. 756 757__codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { } 758 759codecvt_base::result 760__codecvt_utf8_base<char16_t>:: 761do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 762 const intern_type*& __from_next, 763 extern_type* __to, extern_type* __to_end, 764 extern_type*& __to_next) const 765{ 766 range<const char16_t> from{ __from, __from_end }; 767 range<char> to{ __to, __to_end }; 768 auto res = ucs2_out(from, to, _M_maxcode, _M_mode); 769 __from_next = from.next; 770 __to_next = to.next; 771 return res; 772} 773 774codecvt_base::result 775__codecvt_utf8_base<char16_t>:: 776do_unshift(state_type&, extern_type* __to, extern_type*, 777 extern_type*& __to_next) const 778{ 779 __to_next = __to; 780 return noconv; 781} 782 783codecvt_base::result 784__codecvt_utf8_base<char16_t>:: 785do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 786 const extern_type*& __from_next, 787 intern_type* __to, intern_type* __to_end, 788 intern_type*& __to_next) const 789{ 790 range<const char> from{ __from, __from_end }; 791 range<char16_t> to{ __to, __to_end }; 792 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header)); 793#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ 794 mode = codecvt_mode(mode | little_endian); 795#endif 796 auto res = ucs2_in(from, to, _M_maxcode, mode); 797 __from_next = from.next; 798 __to_next = to.next; 799 return res; 800} 801 802int 803__codecvt_utf8_base<char16_t>::do_encoding() const throw() 804{ return 0; } 805 806bool 807__codecvt_utf8_base<char16_t>::do_always_noconv() const throw() 808{ return false; } 809 810int 811__codecvt_utf8_base<char16_t>:: 812do_length(state_type&, const extern_type* __from, 813 const extern_type* __end, size_t __max) const 814{ 815 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode); 816 return __end - __from; 817} 818 819int 820__codecvt_utf8_base<char16_t>::do_max_length() const throw() 821{ return 3; } 822 823// Define members of codecvt_utf8<char32_t> base class implementation. 824// Converts from UTF-8 to UTF-32 (aka UCS-4). 825 826__codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { } 827 828codecvt_base::result 829__codecvt_utf8_base<char32_t>:: 830do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 831 const intern_type*& __from_next, 832 extern_type* __to, extern_type* __to_end, 833 extern_type*& __to_next) const 834{ 835 range<const char32_t> from{ __from, __from_end }; 836 range<char> to{ __to, __to_end }; 837 auto res = ucs4_out(from, to, _M_maxcode, _M_mode); 838 __from_next = from.next; 839 __to_next = to.next; 840 return res; 841} 842 843codecvt_base::result 844__codecvt_utf8_base<char32_t>:: 845do_unshift(state_type&, extern_type* __to, extern_type*, 846 extern_type*& __to_next) const 847{ 848 __to_next = __to; 849 return noconv; 850} 851 852codecvt_base::result 853__codecvt_utf8_base<char32_t>:: 854do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 855 const extern_type*& __from_next, 856 intern_type* __to, intern_type* __to_end, 857 intern_type*& __to_next) const 858{ 859 range<const char> from{ __from, __from_end }; 860 range<char32_t> to{ __to, __to_end }; 861 auto res = ucs4_in(from, to, _M_maxcode, _M_mode); 862 __from_next = from.next; 863 __to_next = to.next; 864 return res; 865} 866 867int 868__codecvt_utf8_base<char32_t>::do_encoding() const throw() 869{ return 0; } 870 871bool 872__codecvt_utf8_base<char32_t>::do_always_noconv() const throw() 873{ return false; } 874 875int 876__codecvt_utf8_base<char32_t>:: 877do_length(state_type&, const extern_type* __from, 878 const extern_type* __end, size_t __max) const 879{ 880 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode); 881 return __end - __from; 882} 883 884int 885__codecvt_utf8_base<char32_t>::do_max_length() const throw() 886{ return 4; } 887 888#ifdef _GLIBCXX_USE_WCHAR_T 889// Define members of codecvt_utf8<wchar_t> base class implementation. 890// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t). 891 892__codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { } 893 894codecvt_base::result 895__codecvt_utf8_base<wchar_t>:: 896do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 897 const intern_type*& __from_next, 898 extern_type* __to, extern_type* __to_end, 899 extern_type*& __to_next) const 900{ 901 range<char> to{ __to, __to_end }; 902#if __SIZEOF_WCHAR_T__ == 2 903 range<const char16_t> from{ 904 reinterpret_cast<const char16_t*>(__from), 905 reinterpret_cast<const char16_t*>(__from_end) 906 }; 907 auto res = ucs2_out(from, to, _M_maxcode, _M_mode); 908#elif __SIZEOF_WCHAR_T__ == 4 909 range<const char32_t> from{ 910 reinterpret_cast<const char32_t*>(__from), 911 reinterpret_cast<const char32_t*>(__from_end) 912 }; 913 auto res = ucs4_out(from, to, _M_maxcode, _M_mode); 914#else 915 return codecvt_base::error; 916#endif 917 __from_next = reinterpret_cast<const wchar_t*>(from.next); 918 __to_next = to.next; 919 return res; 920} 921 922codecvt_base::result 923__codecvt_utf8_base<wchar_t>:: 924do_unshift(state_type&, extern_type* __to, extern_type*, 925 extern_type*& __to_next) const 926{ 927 __to_next = __to; 928 return noconv; 929} 930 931codecvt_base::result 932__codecvt_utf8_base<wchar_t>:: 933do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 934 const extern_type*& __from_next, 935 intern_type* __to, intern_type* __to_end, 936 intern_type*& __to_next) const 937{ 938 range<const char> from{ __from, __from_end }; 939#if __SIZEOF_WCHAR_T__ == 2 940 range<char16_t> to{ 941 reinterpret_cast<char16_t*>(__to), 942 reinterpret_cast<char16_t*>(__to_end) 943 }; 944 auto res = ucs2_in(from, to, _M_maxcode, _M_mode); 945#elif __SIZEOF_WCHAR_T__ == 4 946 range<char32_t> to{ 947 reinterpret_cast<char32_t*>(__to), 948 reinterpret_cast<char32_t*>(__to_end) 949 }; 950 auto res = ucs4_in(from, to, _M_maxcode, _M_mode); 951#else 952 return codecvt_base::error; 953#endif 954 __from_next = from.next; 955 __to_next = reinterpret_cast<wchar_t*>(to.next); 956 return res; 957} 958 959int 960__codecvt_utf8_base<wchar_t>::do_encoding() const throw() 961{ return 0; } 962 963bool 964__codecvt_utf8_base<wchar_t>::do_always_noconv() const throw() 965{ return false; } 966 967int 968__codecvt_utf8_base<wchar_t>:: 969do_length(state_type&, const extern_type* __from, 970 const extern_type* __end, size_t __max) const 971{ 972#if __SIZEOF_WCHAR_T__ == 2 973 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode); 974#elif __SIZEOF_WCHAR_T__ == 4 975 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode); 976#else 977 __end = __from; 978#endif 979 return __end - __from; 980} 981 982int 983__codecvt_utf8_base<wchar_t>::do_max_length() const throw() 984{ return 4; } 985#endif 986 987// Define members of codecvt_utf16<char16_t> base class implementation. 988// Converts from UTF-16 to UCS-2. 989 990__codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { } 991 992codecvt_base::result 993__codecvt_utf16_base<char16_t>:: 994do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 995 const intern_type*& __from_next, 996 extern_type* __to, extern_type* __to_end, 997 extern_type*& __to_next) const 998{ 999 range<const char16_t> from{ __from, __from_end }; 1000 range<char16_t> to{ 1001 reinterpret_cast<char16_t*>(__to), 1002 reinterpret_cast<char16_t*>(__to_end) 1003 }; 1004 auto res = ucs2_out(from, to, _M_maxcode, _M_mode); 1005 __from_next = from.next; 1006 __to_next = reinterpret_cast<char*>(to.next); 1007 return res; 1008} 1009 1010codecvt_base::result 1011__codecvt_utf16_base<char16_t>:: 1012do_unshift(state_type&, extern_type* __to, extern_type*, 1013 extern_type*& __to_next) const 1014{ 1015 __to_next = __to; 1016 return noconv; 1017} 1018 1019codecvt_base::result 1020__codecvt_utf16_base<char16_t>:: 1021do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1022 const extern_type*& __from_next, 1023 intern_type* __to, intern_type* __to_end, 1024 intern_type*& __to_next) const 1025{ 1026 range<const char16_t> from{ 1027 reinterpret_cast<const char16_t*>(__from), 1028 reinterpret_cast<const char16_t*>(__from_end) 1029 }; 1030 range<char16_t> to{ __to, __to_end }; 1031 auto res = ucs2_in(from, to, _M_maxcode, _M_mode); 1032 __from_next = reinterpret_cast<const char*>(from.next); 1033 __to_next = to.next; 1034 return res; 1035} 1036 1037int 1038__codecvt_utf16_base<char16_t>::do_encoding() const throw() 1039{ return 1; } 1040 1041bool 1042__codecvt_utf16_base<char16_t>::do_always_noconv() const throw() 1043{ return false; } 1044 1045int 1046__codecvt_utf16_base<char16_t>:: 1047do_length(state_type&, const extern_type* __from, 1048 const extern_type* __end, size_t __max) const 1049{ 1050 auto next = reinterpret_cast<const char16_t*>(__from); 1051 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max, 1052 _M_maxcode, _M_mode); 1053 return reinterpret_cast<const char*>(next) - __from; 1054} 1055 1056int 1057__codecvt_utf16_base<char16_t>::do_max_length() const throw() 1058{ return 3; } 1059 1060// Define members of codecvt_utf16<char32_t> base class implementation. 1061// Converts from UTF-16 to UTF-32 (aka UCS-4). 1062 1063__codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { } 1064 1065codecvt_base::result 1066__codecvt_utf16_base<char32_t>:: 1067do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 1068 const intern_type*& __from_next, 1069 extern_type* __to, extern_type* __to_end, 1070 extern_type*& __to_next) const 1071{ 1072 range<const char32_t> from{ __from, __from_end }; 1073 range<char16_t> to{ 1074 reinterpret_cast<char16_t*>(__to), 1075 reinterpret_cast<char16_t*>(__to_end) 1076 }; 1077 auto res = ucs4_out(from, to, _M_maxcode, _M_mode); 1078 __from_next = from.next; 1079 __to_next = reinterpret_cast<char*>(to.next); 1080 return res; 1081} 1082 1083codecvt_base::result 1084__codecvt_utf16_base<char32_t>:: 1085do_unshift(state_type&, extern_type* __to, extern_type*, 1086 extern_type*& __to_next) const 1087{ 1088 __to_next = __to; 1089 return noconv; 1090} 1091 1092codecvt_base::result 1093__codecvt_utf16_base<char32_t>:: 1094do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1095 const extern_type*& __from_next, 1096 intern_type* __to, intern_type* __to_end, 1097 intern_type*& __to_next) const 1098{ 1099 range<const char16_t> from{ 1100 reinterpret_cast<const char16_t*>(__from), 1101 reinterpret_cast<const char16_t*>(__from_end) 1102 }; 1103 range<char32_t> to{ __to, __to_end }; 1104 auto res = ucs4_in(from, to, _M_maxcode, _M_mode); 1105 __from_next = reinterpret_cast<const char*>(from.next); 1106 __to_next = to.next; 1107 return res; 1108} 1109 1110int 1111__codecvt_utf16_base<char32_t>::do_encoding() const throw() 1112{ return 0; } 1113 1114bool 1115__codecvt_utf16_base<char32_t>::do_always_noconv() const throw() 1116{ return false; } 1117 1118int 1119__codecvt_utf16_base<char32_t>:: 1120do_length(state_type&, const extern_type* __from, 1121 const extern_type* __end, size_t __max) const 1122{ 1123 auto next = reinterpret_cast<const char16_t*>(__from); 1124 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max, 1125 _M_maxcode, _M_mode); 1126 return reinterpret_cast<const char*>(next) - __from; 1127} 1128 1129int 1130__codecvt_utf16_base<char32_t>::do_max_length() const throw() 1131{ return 4; } 1132 1133#ifdef _GLIBCXX_USE_WCHAR_T 1134// Define members of codecvt_utf16<wchar_t> base class implementation. 1135// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t). 1136 1137__codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { } 1138 1139codecvt_base::result 1140__codecvt_utf16_base<wchar_t>:: 1141do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 1142 const intern_type*& __from_next, 1143 extern_type* __to, extern_type* __to_end, 1144 extern_type*& __to_next) const 1145{ 1146 range<char> to{ __to, __to_end }; 1147#if __SIZEOF_WCHAR_T__ == 2 1148 range<const char16_t> from{ 1149 reinterpret_cast<const char16_t*>(__from), 1150 reinterpret_cast<const char16_t*>(__from_end) 1151 }; 1152 auto res = ucs2_out(from, to, _M_maxcode, _M_mode); 1153#elif __SIZEOF_WCHAR_T__ == 4 1154 range<const char32_t> from{ 1155 reinterpret_cast<const char32_t*>(__from), 1156 reinterpret_cast<const char32_t*>(__from_end) 1157 }; 1158 auto res = ucs4_out(from, to, _M_maxcode, _M_mode); 1159#else 1160 return codecvt_base::error; 1161#endif 1162 __from_next = reinterpret_cast<const wchar_t*>(from.next); 1163 __to_next = to.next; 1164 return res; 1165} 1166 1167codecvt_base::result 1168__codecvt_utf16_base<wchar_t>:: 1169do_unshift(state_type&, extern_type* __to, extern_type*, 1170 extern_type*& __to_next) const 1171{ 1172 __to_next = __to; 1173 return noconv; 1174} 1175 1176codecvt_base::result 1177__codecvt_utf16_base<wchar_t>:: 1178do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1179 const extern_type*& __from_next, 1180 intern_type* __to, intern_type* __to_end, 1181 intern_type*& __to_next) const 1182{ 1183 range<const char> from{ __from, __from_end }; 1184#if __SIZEOF_WCHAR_T__ == 2 1185 range<char16_t> to{ 1186 reinterpret_cast<char16_t*>(__to), 1187 reinterpret_cast<char16_t*>(__to_end) 1188 }; 1189 auto res = ucs2_in(from, to, _M_maxcode, _M_mode); 1190#elif __SIZEOF_WCHAR_T__ == 4 1191 range<char32_t> to{ 1192 reinterpret_cast<char32_t*>(__to), 1193 reinterpret_cast<char32_t*>(__to_end) 1194 }; 1195 auto res = ucs4_in(from, to, _M_maxcode, _M_mode); 1196#else 1197 return codecvt_base::error; 1198#endif 1199 __from_next = from.next; 1200 __to_next = reinterpret_cast<wchar_t*>(to.next); 1201 return res; 1202} 1203 1204int 1205__codecvt_utf16_base<wchar_t>::do_encoding() const throw() 1206{ return 0; } 1207 1208bool 1209__codecvt_utf16_base<wchar_t>::do_always_noconv() const throw() 1210{ return false; } 1211 1212int 1213__codecvt_utf16_base<wchar_t>:: 1214do_length(state_type&, const extern_type* __from, 1215 const extern_type* __end, size_t __max) const 1216{ 1217 auto next = reinterpret_cast<const char16_t*>(__from); 1218#if __SIZEOF_WCHAR_T__ == 2 1219 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max, 1220 _M_maxcode, _M_mode); 1221#elif __SIZEOF_WCHAR_T__ == 4 1222 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max, 1223 _M_maxcode, _M_mode); 1224#endif 1225 return reinterpret_cast<const char*>(next) - __from; 1226} 1227 1228int 1229__codecvt_utf16_base<wchar_t>::do_max_length() const throw() 1230{ return 4; } 1231#endif 1232 1233// Define members of codecvt_utf8_utf16<char16_t> base class implementation. 1234// Converts from UTF-8 to UTF-16. 1235 1236__codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { } 1237 1238codecvt_base::result 1239__codecvt_utf8_utf16_base<char16_t>:: 1240do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 1241 const intern_type*& __from_next, 1242 extern_type* __to, extern_type* __to_end, 1243 extern_type*& __to_next) const 1244{ 1245 range<const char16_t> from{ __from, __from_end }; 1246 range<char> to{ __to, __to_end }; 1247 auto res = utf16_out(from, to, _M_maxcode, _M_mode); 1248 __from_next = from.next; 1249 __to_next = to.next; 1250 return res; 1251} 1252 1253codecvt_base::result 1254__codecvt_utf8_utf16_base<char16_t>:: 1255do_unshift(state_type&, extern_type* __to, extern_type*, 1256 extern_type*& __to_next) const 1257{ 1258 __to_next = __to; 1259 return noconv; 1260} 1261 1262codecvt_base::result 1263__codecvt_utf8_utf16_base<char16_t>:: 1264do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1265 const extern_type*& __from_next, 1266 intern_type* __to, intern_type* __to_end, 1267 intern_type*& __to_next) const 1268{ 1269 range<const char> from{ __from, __from_end }; 1270 range<char16_t> to{ __to, __to_end }; 1271 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header)); 1272#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ 1273 mode = codecvt_mode(mode | little_endian); 1274#endif 1275 auto res = utf16_in(from, to, _M_maxcode, mode); 1276 __from_next = from.next; 1277 __to_next = to.next; 1278 return res; 1279} 1280 1281int 1282__codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw() 1283{ return 0; } 1284 1285bool 1286__codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw() 1287{ return false; } 1288 1289int 1290__codecvt_utf8_utf16_base<char16_t>:: 1291do_length(state_type&, const extern_type* __from, 1292 const extern_type* __end, size_t __max) const 1293{ 1294 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode); 1295 return __end - __from; 1296} 1297 1298int 1299__codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw() 1300{ 1301 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, 1302 // whereas 4 byte sequences require two 16-bit code units. 1303 return 3; 1304} 1305 1306// Define members of codecvt_utf8_utf16<char32_t> base class implementation. 1307// Converts from UTF-8 to UTF-16. 1308 1309__codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { } 1310 1311codecvt_base::result 1312__codecvt_utf8_utf16_base<char32_t>:: 1313do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 1314 const intern_type*& __from_next, 1315 extern_type* __to, extern_type* __to_end, 1316 extern_type*& __to_next) const 1317{ 1318 range<const char32_t> from{ __from, __from_end }; 1319 range<char> to{ __to, __to_end }; 1320 auto res = utf16_out(from, to, _M_maxcode, _M_mode); 1321 __from_next = from.next; 1322 __to_next = to.next; 1323 return res; 1324} 1325 1326codecvt_base::result 1327__codecvt_utf8_utf16_base<char32_t>:: 1328do_unshift(state_type&, extern_type* __to, extern_type*, 1329 extern_type*& __to_next) const 1330{ 1331 __to_next = __to; 1332 return noconv; 1333} 1334 1335codecvt_base::result 1336__codecvt_utf8_utf16_base<char32_t>:: 1337do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1338 const extern_type*& __from_next, 1339 intern_type* __to, intern_type* __to_end, 1340 intern_type*& __to_next) const 1341{ 1342 range<const char> from{ __from, __from_end }; 1343 range<char32_t> to{ __to, __to_end }; 1344 auto res = utf16_in(from, to, _M_maxcode, _M_mode); 1345 __from_next = from.next; 1346 __to_next = to.next; 1347 return res; 1348} 1349 1350int 1351__codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw() 1352{ return 0; } 1353 1354bool 1355__codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw() 1356{ return false; } 1357 1358int 1359__codecvt_utf8_utf16_base<char32_t>:: 1360do_length(state_type&, const extern_type* __from, 1361 const extern_type* __end, size_t __max) const 1362{ 1363 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode); 1364 return __end - __from; 1365} 1366 1367int 1368__codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw() 1369{ 1370 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, 1371 // whereas 4 byte sequences require two 16-bit code units. 1372 return 3; 1373} 1374 1375#ifdef _GLIBCXX_USE_WCHAR_T 1376// Define members of codecvt_utf8_utf16<wchar_t> base class implementation. 1377// Converts from UTF-8 to UTF-16. 1378 1379__codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { } 1380 1381codecvt_base::result 1382__codecvt_utf8_utf16_base<wchar_t>:: 1383do_out(state_type&, const intern_type* __from, const intern_type* __from_end, 1384 const intern_type*& __from_next, 1385 extern_type* __to, extern_type* __to_end, 1386 extern_type*& __to_next) const 1387{ 1388 range<const wchar_t> from{ __from, __from_end }; 1389 range<char> to{ __to, __to_end }; 1390 auto res = utf16_out(from, to, _M_maxcode, _M_mode); 1391 __from_next = from.next; 1392 __to_next = to.next; 1393 return res; 1394} 1395 1396codecvt_base::result 1397__codecvt_utf8_utf16_base<wchar_t>:: 1398do_unshift(state_type&, extern_type* __to, extern_type*, 1399 extern_type*& __to_next) const 1400{ 1401 __to_next = __to; 1402 return noconv; 1403} 1404 1405codecvt_base::result 1406__codecvt_utf8_utf16_base<wchar_t>:: 1407do_in(state_type&, const extern_type* __from, const extern_type* __from_end, 1408 const extern_type*& __from_next, 1409 intern_type* __to, intern_type* __to_end, 1410 intern_type*& __to_next) const 1411{ 1412 range<const char> from{ __from, __from_end }; 1413 range<wchar_t> to{ __to, __to_end }; 1414 auto res = utf16_in(from, to, _M_maxcode, _M_mode); 1415 __from_next = from.next; 1416 __to_next = to.next; 1417 return res; 1418} 1419 1420int 1421__codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw() 1422{ return 0; } 1423 1424bool 1425__codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw() 1426{ return false; } 1427 1428int 1429__codecvt_utf8_utf16_base<wchar_t>:: 1430do_length(state_type&, const extern_type* __from, 1431 const extern_type* __end, size_t __max) const 1432{ 1433 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode); 1434 return __end - __from; 1435} 1436 1437int 1438__codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw() 1439{ 1440 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, 1441 // whereas 4 byte sequences require two 16-bit code units. 1442 return 3; 1443} 1444#endif 1445 1446inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>; 1447inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>; 1448template class codecvt_byname<char16_t, char, mbstate_t>; 1449template class codecvt_byname<char32_t, char, mbstate_t>; 1450 1451_GLIBCXX_END_NAMESPACE_VERSION 1452} 1453#endif // _GLIBCXX_USE_C99_STDINT_TR1 1454