1// Locale support (codecvt) -*- C++ -*-
2
3// Copyright (C) 2015 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25#include <codecvt>
26#include <cstring>		// std::memcpy, std::memcmp
27#include <bits/stl_algobase.h>	// std::max
28
29#ifdef _GLIBCXX_USE_C99_STDINT_TR1
30namespace std _GLIBCXX_VISIBILITY(default)
31{
32_GLIBCXX_BEGIN_NAMESPACE_VERSION
33
34namespace
35{
36  // Largest code point that fits in a single UTF-16 code unit.
37  const char32_t max_single_utf16_unit = 0xFFFF;
38
39  const char32_t max_code_point = 0x10FFFF;
40
41  // The functions below rely on maxcode < incomplete_mb_character
42  // (which is enforced by the codecvt_utf* classes on construction).
43  const char32_t incomplete_mb_character = char32_t(-2);
44  const char32_t invalid_mb_sequence = char32_t(-1);
45
46  template<typename Elem>
47    struct range
48    {
49      Elem* next;
50      Elem* end;
51
52      Elem operator*() const { return *next; }
53
54      range& operator++() { ++next; return *this; }
55
56      size_t size() const { return end - next; }
57    };
58
59  // Multibyte sequences can have "header" consisting of Byte Order Mark
60  const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
61  const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
62  const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
63
64  template<size_t N>
65    inline bool
66    write_bom(range<char>& to, const unsigned char (&bom)[N])
67    {
68      if (to.size() < N)
69	return false;
70      memcpy(to.next, bom, N);
71      to.next += N;
72      return true;
73    }
74
75  // If generate_header is set in mode write out UTF-8 BOM.
76  bool
77  write_utf8_bom(range<char>& to, codecvt_mode mode)
78  {
79    if (mode & generate_header)
80      return write_bom(to, utf8_bom);
81    return true;
82  }
83
84  // If generate_header is set in mode write out the UTF-16 BOM indicated
85  // by whether little_endian is set in mode.
86  bool
87  write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
88  {
89    if (mode & generate_header)
90    {
91      if (!to.size())
92	return false;
93      auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
94      std::memcpy(to.next, bom, 2);
95      ++to.next;
96    }
97    return true;
98  }
99
100  template<size_t N>
101    inline bool
102    read_bom(range<const char>& from, const unsigned char (&bom)[N])
103    {
104      if (from.size() >= N && !memcmp(from.next, bom, N))
105	{
106	  from.next += N;
107	  return true;
108	}
109      return false;
110    }
111
112  // If consume_header is set in mode update from.next to after any BOM.
113  void
114  read_utf8_bom(range<const char>& from, codecvt_mode mode)
115  {
116    if (mode & consume_header)
117      read_bom(from, utf8_bom);
118  }
119
120  // If consume_header is set in mode update from.next to after any BOM.
121  // Return little_endian iff the UTF-16LE BOM was present.
122  codecvt_mode
123  read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
124  {
125    if (mode & consume_header && from.size())
126      {
127	if (*from.next == 0xFEFF)
128	  ++from.next;
129	else if (*from.next == 0xFFFE)
130	  {
131	    ++from.next;
132	    return little_endian;
133	  }
134      }
135    return {};
136  }
137
138  // Read a codepoint from a UTF-8 multibyte sequence.
139  // Updates from.next if the codepoint is not greater than maxcode.
140  // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
141  char32_t
142  read_utf8_code_point(range<const char>& from, unsigned long maxcode)
143  {
144    const size_t avail = from.size();
145    if (avail == 0)
146      return incomplete_mb_character;
147    unsigned char c1 = from.next[0];
148    // https://en.wikipedia.org/wiki/UTF-8#Sample_code
149    if (c1 < 0x80)
150    {
151      ++from.next;
152      return c1;
153    }
154    else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
155      return invalid_mb_sequence;
156    else if (c1 < 0xE0) // 2-byte sequence
157    {
158      if (avail < 2)
159	return incomplete_mb_character;
160      unsigned char c2 = from.next[1];
161      if ((c2 & 0xC0) != 0x80)
162	return invalid_mb_sequence;
163      char32_t c = (c1 << 6) + c2 - 0x3080;
164      if (c <= maxcode)
165	from.next += 2;
166      return c;
167    }
168    else if (c1 < 0xF0) // 3-byte sequence
169    {
170      if (avail < 3)
171	return incomplete_mb_character;
172      unsigned char c2 = from.next[1];
173      if ((c2 & 0xC0) != 0x80)
174	return invalid_mb_sequence;
175      if (c1 == 0xE0 && c2 < 0xA0) // overlong
176	return invalid_mb_sequence;
177      unsigned char c3 = from.next[2];
178      if ((c3 & 0xC0) != 0x80)
179	return invalid_mb_sequence;
180      char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
181      if (c <= maxcode)
182	from.next += 3;
183      return c;
184    }
185    else if (c1 < 0xF5) // 4-byte sequence
186    {
187      if (avail < 4)
188	return incomplete_mb_character;
189      unsigned char c2 = from.next[1];
190      if ((c2 & 0xC0) != 0x80)
191	return invalid_mb_sequence;
192      if (c1 == 0xF0 && c2 < 0x90) // overlong
193	return invalid_mb_sequence;
194      if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
195      return invalid_mb_sequence;
196      unsigned char c3 = from.next[2];
197      if ((c3 & 0xC0) != 0x80)
198	return invalid_mb_sequence;
199      unsigned char c4 = from.next[3];
200      if ((c4 & 0xC0) != 0x80)
201	return invalid_mb_sequence;
202      char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
203      if (c <= maxcode)
204	from.next += 4;
205      return c;
206    }
207    else // > U+10FFFF
208      return invalid_mb_sequence;
209  }
210
211  bool
212  write_utf8_code_point(range<char>& to, char32_t code_point)
213  {
214    if (code_point < 0x80)
215      {
216	if (to.size() < 1)
217	  return false;
218	*to.next++ = code_point;
219      }
220    else if (code_point <= 0x7FF)
221      {
222	if (to.size() < 2)
223	  return false;
224	*to.next++ = (code_point >> 6) + 0xC0;
225	*to.next++ = (code_point & 0x3F) + 0x80;
226      }
227    else if (code_point <= 0xFFFF)
228      {
229	if (to.size() < 3)
230	  return false;
231	*to.next++ = (code_point >> 12) + 0xE0;
232	*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
233	*to.next++ = (code_point & 0x3F) + 0x80;
234      }
235    else if (code_point <= 0x10FFFF)
236      {
237	if (to.size() < 4)
238	  return false;
239	*to.next++ = (code_point >> 18) + 0xF0;
240	*to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
241	*to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
242	*to.next++ = (code_point & 0x3F) + 0x80;
243      }
244    else
245      return false;
246    return true;
247  }
248
249  inline char16_t
250  adjust_byte_order(char16_t c, codecvt_mode mode)
251  {
252#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
253    return (mode & little_endian) ? __builtin_bswap16(c) : c;
254#else
255    return (mode & little_endian) ? c : __builtin_bswap16(c);
256#endif
257  }
258
259  // Return true if c is a high-surrogate (aka leading) code point.
260  inline bool
261  is_high_surrogate(char32_t c)
262  {
263    return c >= 0xD800 && c <= 0xDBFF;
264  }
265
266  // Return true if c is a low-surrogate (aka trailing) code point.
267  inline bool
268  is_low_surrogate(char32_t c)
269  {
270    return c >= 0xDC00 && c <= 0xDFFF;
271  }
272
273  inline char32_t
274  surrogate_pair_to_code_point(char32_t high, char32_t low)
275  {
276    return (high << 10) + low - 0x35FDC00;
277  }
278
279  // Read a codepoint from a UTF-16 multibyte sequence.
280  // The sequence's endianness is indicated by (mode & little_endian).
281  // Updates from.next if the codepoint is not greater than maxcode.
282  // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
283  char32_t
284  read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
285			codecvt_mode mode)
286  {
287    const size_t avail = from.size();
288    if (avail == 0)
289      return incomplete_mb_character;
290    int inc = 1;
291    char32_t c = adjust_byte_order(from.next[0], mode);
292    if (is_high_surrogate(c))
293      {
294	if (avail < 2)
295	  return incomplete_mb_character;
296	const char16_t c2 = adjust_byte_order(from.next[1], mode);
297	if (is_low_surrogate(c2))
298	  {
299	    c = surrogate_pair_to_code_point(c, c2);
300	    inc = 2;
301	  }
302	else
303	  return invalid_mb_sequence;
304      }
305    else if (is_low_surrogate(c))
306      return invalid_mb_sequence;
307    if (c <= maxcode)
308      from.next += inc;
309    return c;
310  }
311
312  template<typename C>
313  bool
314  write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
315  {
316    static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
317
318    if (codepoint < max_single_utf16_unit)
319      {
320	if (to.size() > 0)
321	  {
322	    *to.next = adjust_byte_order(codepoint, mode);
323	    ++to.next;
324	    return true;
325	  }
326      }
327    else if (to.size() > 1)
328      {
329	// Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
330	const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
331	char16_t lead = LEAD_OFFSET + (codepoint >> 10);
332	char16_t trail = 0xDC00 + (codepoint & 0x3FF);
333	to.next[0] = adjust_byte_order(lead, mode);
334	to.next[1] = adjust_byte_order(trail, mode);
335	to.next += 2;
336	return true;
337      }
338    return false;
339  }
340
341  // utf8 -> ucs4
342  codecvt_base::result
343  ucs4_in(range<const char>& from, range<char32_t>& to,
344          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
345  {
346    read_utf8_bom(from, mode);
347    while (from.size() && to.size())
348      {
349	const char32_t codepoint = read_utf8_code_point(from, maxcode);
350	if (codepoint == incomplete_mb_character)
351	  return codecvt_base::partial;
352	if (codepoint > maxcode)
353	  return codecvt_base::error;
354	*to.next++ = codepoint;
355      }
356    return from.size() ? codecvt_base::partial : codecvt_base::ok;
357  }
358
359  // ucs4 -> utf8
360  codecvt_base::result
361  ucs4_out(range<const char32_t>& from, range<char>& to,
362           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
363  {
364    if (!write_utf8_bom(to, mode))
365      return codecvt_base::partial;
366    while (from.size())
367      {
368	const char32_t c = from.next[0];
369	if (c > maxcode)
370	  return codecvt_base::error;
371	if (!write_utf8_code_point(to, c))
372	  return codecvt_base::partial;
373	++from.next;
374      }
375    return codecvt_base::ok;
376  }
377
378  // utf16 -> ucs4
379  codecvt_base::result
380  ucs4_in(range<const char16_t>& from, range<char32_t>& to,
381          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
382  {
383    if (read_utf16_bom(from, mode) == little_endian)
384      mode = codecvt_mode(mode & little_endian);
385    while (from.size() && to.size())
386      {
387	const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
388	if (codepoint == incomplete_mb_character)
389	  return codecvt_base::partial;
390	if (codepoint > maxcode)
391	  return codecvt_base::error;
392	*to.next++ = codepoint;
393      }
394    return from.size() ? codecvt_base::partial : codecvt_base::ok;
395  }
396
397  // ucs4 -> utf16
398  codecvt_base::result
399  ucs4_out(range<const char32_t>& from, range<char16_t>& to,
400           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
401  {
402    if (!write_utf16_bom(to, mode))
403      return codecvt_base::partial;
404    while (from.size())
405      {
406	const char32_t c = from.next[0];
407	if (c > maxcode)
408	  return codecvt_base::error;
409	if (!write_utf16_code_point(to, c, mode))
410	  return codecvt_base::partial;
411	++from.next;
412      }
413    return codecvt_base::ok;
414  }
415
416  // utf8 -> utf16
417  template<typename C>
418  codecvt_base::result
419  utf16_in(range<const char>& from, range<C>& to,
420           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
421  {
422    read_utf8_bom(from, mode);
423    while (from.size() && to.size())
424      {
425	const char* const first = from.next;
426	const char32_t codepoint = read_utf8_code_point(from, maxcode);
427	if (codepoint == incomplete_mb_character)
428	  return codecvt_base::partial;
429	if (codepoint > maxcode)
430	  return codecvt_base::error;
431	if (!write_utf16_code_point(to, codepoint, mode))
432	  {
433	    from.next = first;
434	    return codecvt_base::partial;
435	  }
436      }
437    return codecvt_base::ok;
438  }
439
440  // utf16 -> utf8
441  template<typename C>
442  codecvt_base::result
443  utf16_out(range<const C>& from, range<char>& to,
444            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
445  {
446    if (!write_utf8_bom(to, mode))
447      return codecvt_base::partial;
448    while (from.size())
449      {
450	char32_t c = from.next[0];
451	int inc = 1;
452	if (is_high_surrogate(c))
453	  {
454	    if (from.size() < 2)
455	      return codecvt_base::ok; // stop converting at this point
456
457	    const char32_t c2 = from.next[1];
458	    if (is_low_surrogate(c2))
459	      {
460		c = surrogate_pair_to_code_point(c, c2);
461		inc = 2;
462	      }
463	    else
464	      return codecvt_base::error;
465	  }
466	else if (is_low_surrogate(c))
467	  return codecvt_base::error;
468	if (c > maxcode)
469	  return codecvt_base::error;
470	if (!write_utf8_code_point(to, c))
471	  return codecvt_base::partial;
472	from.next += inc;
473      }
474    return codecvt_base::ok;
475  }
476
477  // return pos such that [begin,pos) is valid UTF-16 string no longer than max
478  const char*
479  utf16_span(const char* begin, const char* end, size_t max,
480	     char32_t maxcode = max_code_point, codecvt_mode mode = {})
481  {
482    range<const char> from{ begin, end };
483    read_utf8_bom(from, mode);
484    size_t count = 0;
485    while (count+1 < max)
486      {
487	char32_t c = read_utf8_code_point(from, maxcode);
488	if (c > maxcode)
489	  return from.next;
490	else if (c > max_single_utf16_unit)
491	  ++count;
492	++count;
493      }
494    if (count+1 == max) // take one more character if it fits in a single unit
495      read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
496    return from.next;
497  }
498
499  // utf8 -> ucs2
500  codecvt_base::result
501  ucs2_in(range<const char>& from, range<char16_t>& to,
502	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
503  {
504    return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
505  }
506
507  // ucs2 -> utf8
508  codecvt_base::result
509  ucs2_out(range<const char16_t>& from, range<char>& to,
510	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
511  {
512    return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
513  }
514
515  // ucs2 -> utf16
516  codecvt_base::result
517  ucs2_out(range<const char16_t>& from, range<char16_t>& to,
518	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
519  {
520    if (!write_utf16_bom(to, mode))
521      return codecvt_base::partial;
522    while (from.size() && to.size())
523      {
524	char16_t c = from.next[0];
525	if (is_high_surrogate(c))
526	  return codecvt_base::error;
527	if (c > maxcode)
528	  return codecvt_base::error;
529	*to.next++ = adjust_byte_order(c, mode);
530	++from.next;
531      }
532    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
533  }
534
535  // utf16 -> ucs2
536  codecvt_base::result
537  ucs2_in(range<const char16_t>& from, range<char16_t>& to,
538	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
539  {
540    if (read_utf16_bom(from, mode) == little_endian)
541      mode = codecvt_mode(mode & little_endian);
542    maxcode = std::max(max_single_utf16_unit, maxcode);
543    while (from.size() && to.size())
544      {
545	const char32_t c = read_utf16_code_point(from, maxcode, mode);
546	if (c == incomplete_mb_character)
547	  return codecvt_base::partial;
548	if (c > maxcode)
549	  return codecvt_base::error;
550	*to.next++ = c;
551      }
552    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
553  }
554
555  const char16_t*
556  ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
557            char32_t maxcode, codecvt_mode mode)
558  {
559    range<const char16_t> from{ begin, end };
560    if (read_utf16_bom(from, mode) == little_endian)
561      mode = codecvt_mode(mode & little_endian);
562    maxcode = std::max(max_single_utf16_unit, maxcode);
563    char32_t c = 0;
564    while (max-- && c <= maxcode)
565      c = read_utf16_code_point(from, maxcode, mode);
566    return from.next;
567  }
568
569  const char*
570  ucs2_span(const char* begin, const char* end, size_t max,
571            char32_t maxcode, codecvt_mode mode)
572  {
573    range<const char> from{ begin, end };
574    read_utf8_bom(from, mode);
575    maxcode = std::max(max_single_utf16_unit, maxcode);
576    char32_t c = 0;
577    while (max-- && c <= maxcode)
578      c = read_utf8_code_point(from, maxcode);
579    return from.next;
580  }
581
582  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
583  const char*
584  ucs4_span(const char* begin, const char* end, size_t max,
585            char32_t maxcode = max_code_point, codecvt_mode mode = {})
586  {
587    range<const char> from{ begin, end };
588    read_utf8_bom(from, mode);
589    char32_t c = 0;
590    while (max-- && c <= maxcode)
591      c = read_utf8_code_point(from, maxcode);
592    return from.next;
593  }
594
595  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
596  const char16_t*
597  ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
598            char32_t maxcode = max_code_point, codecvt_mode mode = {})
599  {
600    range<const char16_t> from{ begin, end };
601    if (read_utf16_bom(from, mode) == little_endian)
602      mode = codecvt_mode(mode & little_endian);
603    char32_t c = 0;
604    while (max-- && c <= maxcode)
605      c = read_utf16_code_point(from, maxcode, mode);
606    return from.next;
607  }
608}
609
610// Define members of codecvt<char16_t, char, mbstate_t> specialization.
611// Converts from UTF-8 to UTF-16.
612
613locale::id codecvt<char16_t, char, mbstate_t>::id;
614
615codecvt<char16_t, char, mbstate_t>::~codecvt() { }
616
617codecvt_base::result
618codecvt<char16_t, char, mbstate_t>::
619do_out(state_type&,
620       const intern_type* __from,
621       const intern_type* __from_end, const intern_type*& __from_next,
622       extern_type* __to, extern_type* __to_end,
623       extern_type*& __to_next) const
624{
625  range<const char16_t> from{ __from, __from_end };
626  range<char> to{ __to, __to_end };
627  auto res = utf16_out(from, to);
628  __from_next = from.next;
629  __to_next = to.next;
630  return res;
631}
632
633codecvt_base::result
634codecvt<char16_t, char, mbstate_t>::
635do_unshift(state_type&, extern_type* __to, extern_type*,
636	   extern_type*& __to_next) const
637{
638  __to_next = __to;
639  return noconv; // we don't use mbstate_t for the unicode facets
640}
641
642codecvt_base::result
643codecvt<char16_t, char, mbstate_t>::
644do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
645      const extern_type*& __from_next,
646      intern_type* __to, intern_type* __to_end,
647      intern_type*& __to_next) const
648{
649  range<const char> from{ __from, __from_end };
650  range<char16_t> to{ __to, __to_end };
651#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
652  codecvt_mode mode = {};
653#else
654  codecvt_mode mode = little_endian;
655#endif
656  auto res = utf16_in(from, to, max_code_point, mode);
657  __from_next = from.next;
658  __to_next = to.next;
659  return res;
660}
661
662int
663codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
664{ return 0; }
665
666bool
667codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
668{ return false; }
669
670int
671codecvt<char16_t, char, mbstate_t>::
672do_length(state_type&, const extern_type* __from,
673	  const extern_type* __end, size_t __max) const
674{
675  __end = utf16_span(__from, __end, __max);
676  return __end - __from;
677}
678
679int
680codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
681{
682  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
683  // whereas 4 byte sequences require two 16-bit code units.
684  return 3;
685}
686
687// Define members of codecvt<char32_t, char, mbstate_t> specialization.
688// Converts from UTF-8 to UTF-32 (aka UCS-4).
689
690locale::id codecvt<char32_t, char, mbstate_t>::id;
691
692codecvt<char32_t, char, mbstate_t>::~codecvt() { }
693
694codecvt_base::result
695codecvt<char32_t, char, mbstate_t>::
696do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
697       const intern_type*& __from_next,
698       extern_type* __to, extern_type* __to_end,
699       extern_type*& __to_next) const
700{
701  range<const char32_t> from{ __from, __from_end };
702  range<char> to{ __to, __to_end };
703  auto res = ucs4_out(from, to);
704  __from_next = from.next;
705  __to_next = to.next;
706  return res;
707}
708
709codecvt_base::result
710codecvt<char32_t, char, mbstate_t>::
711do_unshift(state_type&, extern_type* __to, extern_type*,
712	   extern_type*& __to_next) const
713{
714  __to_next = __to;
715  return noconv;
716}
717
718codecvt_base::result
719codecvt<char32_t, char, mbstate_t>::
720do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
721      const extern_type*& __from_next,
722      intern_type* __to, intern_type* __to_end,
723      intern_type*& __to_next) const
724{
725  range<const char> from{ __from, __from_end };
726  range<char32_t> to{ __to, __to_end };
727  auto res = ucs4_in(from, to);
728  __from_next = from.next;
729  __to_next = to.next;
730  return res;
731}
732
733int
734codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
735{ return 0; }
736
737bool
738codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
739{ return false; }
740
741int
742codecvt<char32_t, char, mbstate_t>::
743do_length(state_type&, const extern_type* __from,
744	  const extern_type* __end, size_t __max) const
745{
746  __end = ucs4_span(__from, __end, __max);
747  return __end - __from;
748}
749
750int
751codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
752{ return 4; }
753
754// Define members of codecvt_utf8<char16_t> base class implementation.
755// Converts from UTF-8 to UCS-2.
756
757__codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
758
759codecvt_base::result
760__codecvt_utf8_base<char16_t>::
761do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
762       const intern_type*& __from_next,
763       extern_type* __to, extern_type* __to_end,
764       extern_type*& __to_next) const
765{
766  range<const char16_t> from{ __from, __from_end };
767  range<char> to{ __to, __to_end };
768  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
769  __from_next = from.next;
770  __to_next = to.next;
771  return res;
772}
773
774codecvt_base::result
775__codecvt_utf8_base<char16_t>::
776do_unshift(state_type&, extern_type* __to, extern_type*,
777	   extern_type*& __to_next) const
778{
779  __to_next = __to;
780  return noconv;
781}
782
783codecvt_base::result
784__codecvt_utf8_base<char16_t>::
785do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
786      const extern_type*& __from_next,
787      intern_type* __to, intern_type* __to_end,
788      intern_type*& __to_next) const
789{
790  range<const char> from{ __from, __from_end };
791  range<char16_t> to{ __to, __to_end };
792  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
793#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
794  mode = codecvt_mode(mode | little_endian);
795#endif
796  auto res = ucs2_in(from, to, _M_maxcode, mode);
797  __from_next = from.next;
798  __to_next = to.next;
799  return res;
800}
801
802int
803__codecvt_utf8_base<char16_t>::do_encoding() const throw()
804{ return 0; }
805
806bool
807__codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
808{ return false; }
809
810int
811__codecvt_utf8_base<char16_t>::
812do_length(state_type&, const extern_type* __from,
813	  const extern_type* __end, size_t __max) const
814{
815  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
816  return __end - __from;
817}
818
819int
820__codecvt_utf8_base<char16_t>::do_max_length() const throw()
821{ return 3; }
822
823// Define members of codecvt_utf8<char32_t> base class implementation.
824// Converts from UTF-8 to UTF-32 (aka UCS-4).
825
826__codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
827
828codecvt_base::result
829__codecvt_utf8_base<char32_t>::
830do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
831       const intern_type*& __from_next,
832       extern_type* __to, extern_type* __to_end,
833       extern_type*& __to_next) const
834{
835  range<const char32_t> from{ __from, __from_end };
836  range<char> to{ __to, __to_end };
837  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
838  __from_next = from.next;
839  __to_next = to.next;
840  return res;
841}
842
843codecvt_base::result
844__codecvt_utf8_base<char32_t>::
845do_unshift(state_type&, extern_type* __to, extern_type*,
846	   extern_type*& __to_next) const
847{
848  __to_next = __to;
849  return noconv;
850}
851
852codecvt_base::result
853__codecvt_utf8_base<char32_t>::
854do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
855      const extern_type*& __from_next,
856      intern_type* __to, intern_type* __to_end,
857      intern_type*& __to_next) const
858{
859  range<const char> from{ __from, __from_end };
860  range<char32_t> to{ __to, __to_end };
861  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
862  __from_next = from.next;
863  __to_next = to.next;
864  return res;
865}
866
867int
868__codecvt_utf8_base<char32_t>::do_encoding() const throw()
869{ return 0; }
870
871bool
872__codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
873{ return false; }
874
875int
876__codecvt_utf8_base<char32_t>::
877do_length(state_type&, const extern_type* __from,
878	  const extern_type* __end, size_t __max) const
879{
880  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
881  return __end - __from;
882}
883
884int
885__codecvt_utf8_base<char32_t>::do_max_length() const throw()
886{ return 4; }
887
888#ifdef _GLIBCXX_USE_WCHAR_T
889// Define members of codecvt_utf8<wchar_t> base class implementation.
890// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
891
892__codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
893
894codecvt_base::result
895__codecvt_utf8_base<wchar_t>::
896do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
897       const intern_type*& __from_next,
898       extern_type* __to, extern_type* __to_end,
899       extern_type*& __to_next) const
900{
901  range<char> to{ __to, __to_end };
902#if __SIZEOF_WCHAR_T__ == 2
903  range<const char16_t> from{
904    reinterpret_cast<const char16_t*>(__from),
905    reinterpret_cast<const char16_t*>(__from_end)
906  };
907  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
908#elif __SIZEOF_WCHAR_T__ == 4
909  range<const char32_t> from{
910    reinterpret_cast<const char32_t*>(__from),
911    reinterpret_cast<const char32_t*>(__from_end)
912  };
913  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
914#else
915  return codecvt_base::error;
916#endif
917  __from_next = reinterpret_cast<const wchar_t*>(from.next);
918  __to_next = to.next;
919  return res;
920}
921
922codecvt_base::result
923__codecvt_utf8_base<wchar_t>::
924do_unshift(state_type&, extern_type* __to, extern_type*,
925	   extern_type*& __to_next) const
926{
927  __to_next = __to;
928  return noconv;
929}
930
931codecvt_base::result
932__codecvt_utf8_base<wchar_t>::
933do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
934      const extern_type*& __from_next,
935      intern_type* __to, intern_type* __to_end,
936      intern_type*& __to_next) const
937{
938  range<const char> from{ __from, __from_end };
939#if __SIZEOF_WCHAR_T__ == 2
940  range<char16_t> to{
941    reinterpret_cast<char16_t*>(__to),
942    reinterpret_cast<char16_t*>(__to_end)
943  };
944  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
945#elif __SIZEOF_WCHAR_T__ == 4
946  range<char32_t> to{
947    reinterpret_cast<char32_t*>(__to),
948    reinterpret_cast<char32_t*>(__to_end)
949  };
950  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
951#else
952  return codecvt_base::error;
953#endif
954  __from_next = from.next;
955  __to_next = reinterpret_cast<wchar_t*>(to.next);
956  return res;
957}
958
959int
960__codecvt_utf8_base<wchar_t>::do_encoding() const throw()
961{ return 0; }
962
963bool
964__codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
965{ return false; }
966
967int
968__codecvt_utf8_base<wchar_t>::
969do_length(state_type&, const extern_type* __from,
970	  const extern_type* __end, size_t __max) const
971{
972#if __SIZEOF_WCHAR_T__ == 2
973  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
974#elif __SIZEOF_WCHAR_T__ == 4
975  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
976#else
977  __end = __from;
978#endif
979  return __end - __from;
980}
981
982int
983__codecvt_utf8_base<wchar_t>::do_max_length() const throw()
984{ return 4; }
985#endif
986
987// Define members of codecvt_utf16<char16_t> base class implementation.
988// Converts from UTF-16 to UCS-2.
989
990__codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
991
992codecvt_base::result
993__codecvt_utf16_base<char16_t>::
994do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
995       const intern_type*& __from_next,
996       extern_type* __to, extern_type* __to_end,
997       extern_type*& __to_next) const
998{
999  range<const char16_t> from{ __from, __from_end };
1000  range<char16_t> to{
1001    reinterpret_cast<char16_t*>(__to),
1002    reinterpret_cast<char16_t*>(__to_end)
1003  };
1004  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1005  __from_next = from.next;
1006  __to_next = reinterpret_cast<char*>(to.next);
1007  return res;
1008}
1009
1010codecvt_base::result
1011__codecvt_utf16_base<char16_t>::
1012do_unshift(state_type&, extern_type* __to, extern_type*,
1013	   extern_type*& __to_next) const
1014{
1015  __to_next = __to;
1016  return noconv;
1017}
1018
1019codecvt_base::result
1020__codecvt_utf16_base<char16_t>::
1021do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1022      const extern_type*& __from_next,
1023      intern_type* __to, intern_type* __to_end,
1024      intern_type*& __to_next) const
1025{
1026  range<const char16_t> from{
1027    reinterpret_cast<const char16_t*>(__from),
1028    reinterpret_cast<const char16_t*>(__from_end)
1029  };
1030  range<char16_t> to{ __to, __to_end };
1031  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1032  __from_next = reinterpret_cast<const char*>(from.next);
1033  __to_next = to.next;
1034  return res;
1035}
1036
1037int
1038__codecvt_utf16_base<char16_t>::do_encoding() const throw()
1039{ return 1; }
1040
1041bool
1042__codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1043{ return false; }
1044
1045int
1046__codecvt_utf16_base<char16_t>::
1047do_length(state_type&, const extern_type* __from,
1048	  const extern_type* __end, size_t __max) const
1049{
1050  auto next = reinterpret_cast<const char16_t*>(__from);
1051  next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1052		   _M_maxcode, _M_mode);
1053  return reinterpret_cast<const char*>(next) - __from;
1054}
1055
1056int
1057__codecvt_utf16_base<char16_t>::do_max_length() const throw()
1058{ return 3; }
1059
1060// Define members of codecvt_utf16<char32_t> base class implementation.
1061// Converts from UTF-16 to UTF-32 (aka UCS-4).
1062
1063__codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1064
1065codecvt_base::result
1066__codecvt_utf16_base<char32_t>::
1067do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1068       const intern_type*& __from_next,
1069       extern_type* __to, extern_type* __to_end,
1070       extern_type*& __to_next) const
1071{
1072  range<const char32_t> from{ __from, __from_end };
1073  range<char16_t> to{
1074    reinterpret_cast<char16_t*>(__to),
1075    reinterpret_cast<char16_t*>(__to_end)
1076  };
1077  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1078  __from_next = from.next;
1079  __to_next = reinterpret_cast<char*>(to.next);
1080  return res;
1081}
1082
1083codecvt_base::result
1084__codecvt_utf16_base<char32_t>::
1085do_unshift(state_type&, extern_type* __to, extern_type*,
1086	   extern_type*& __to_next) const
1087{
1088  __to_next = __to;
1089  return noconv;
1090}
1091
1092codecvt_base::result
1093__codecvt_utf16_base<char32_t>::
1094do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1095      const extern_type*& __from_next,
1096      intern_type* __to, intern_type* __to_end,
1097      intern_type*& __to_next) const
1098{
1099  range<const char16_t> from{
1100    reinterpret_cast<const char16_t*>(__from),
1101    reinterpret_cast<const char16_t*>(__from_end)
1102  };
1103  range<char32_t> to{ __to, __to_end };
1104  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1105  __from_next = reinterpret_cast<const char*>(from.next);
1106  __to_next = to.next;
1107  return res;
1108}
1109
1110int
1111__codecvt_utf16_base<char32_t>::do_encoding() const throw()
1112{ return 0; }
1113
1114bool
1115__codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1116{ return false; }
1117
1118int
1119__codecvt_utf16_base<char32_t>::
1120do_length(state_type&, const extern_type* __from,
1121	  const extern_type* __end, size_t __max) const
1122{
1123  auto next = reinterpret_cast<const char16_t*>(__from);
1124  next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1125		   _M_maxcode, _M_mode);
1126  return reinterpret_cast<const char*>(next) - __from;
1127}
1128
1129int
1130__codecvt_utf16_base<char32_t>::do_max_length() const throw()
1131{ return 4; }
1132
1133#ifdef _GLIBCXX_USE_WCHAR_T
1134// Define members of codecvt_utf16<wchar_t> base class implementation.
1135// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1136
1137__codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1138
1139codecvt_base::result
1140__codecvt_utf16_base<wchar_t>::
1141do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1142       const intern_type*& __from_next,
1143       extern_type* __to, extern_type* __to_end,
1144       extern_type*& __to_next) const
1145{
1146  range<char> to{ __to, __to_end };
1147#if __SIZEOF_WCHAR_T__ == 2
1148  range<const char16_t> from{
1149    reinterpret_cast<const char16_t*>(__from),
1150    reinterpret_cast<const char16_t*>(__from_end)
1151  };
1152  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1153#elif __SIZEOF_WCHAR_T__ == 4
1154  range<const char32_t> from{
1155    reinterpret_cast<const char32_t*>(__from),
1156    reinterpret_cast<const char32_t*>(__from_end)
1157  };
1158  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1159#else
1160  return codecvt_base::error;
1161#endif
1162  __from_next = reinterpret_cast<const wchar_t*>(from.next);
1163  __to_next = to.next;
1164  return res;
1165}
1166
1167codecvt_base::result
1168__codecvt_utf16_base<wchar_t>::
1169do_unshift(state_type&, extern_type* __to, extern_type*,
1170	   extern_type*& __to_next) const
1171{
1172  __to_next = __to;
1173  return noconv;
1174}
1175
1176codecvt_base::result
1177__codecvt_utf16_base<wchar_t>::
1178do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1179      const extern_type*& __from_next,
1180      intern_type* __to, intern_type* __to_end,
1181      intern_type*& __to_next) const
1182{
1183  range<const char> from{ __from, __from_end };
1184#if __SIZEOF_WCHAR_T__ == 2
1185  range<char16_t> to{
1186    reinterpret_cast<char16_t*>(__to),
1187    reinterpret_cast<char16_t*>(__to_end)
1188  };
1189  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1190#elif __SIZEOF_WCHAR_T__ == 4
1191  range<char32_t> to{
1192    reinterpret_cast<char32_t*>(__to),
1193    reinterpret_cast<char32_t*>(__to_end)
1194  };
1195  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1196#else
1197  return codecvt_base::error;
1198#endif
1199  __from_next = from.next;
1200  __to_next = reinterpret_cast<wchar_t*>(to.next);
1201  return res;
1202}
1203
1204int
1205__codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1206{ return 0; }
1207
1208bool
1209__codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1210{ return false; }
1211
1212int
1213__codecvt_utf16_base<wchar_t>::
1214do_length(state_type&, const extern_type* __from,
1215	  const extern_type* __end, size_t __max) const
1216{
1217  auto next = reinterpret_cast<const char16_t*>(__from);
1218#if __SIZEOF_WCHAR_T__ == 2
1219  next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1220		   _M_maxcode, _M_mode);
1221#elif __SIZEOF_WCHAR_T__ == 4
1222  next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1223		   _M_maxcode, _M_mode);
1224#endif
1225  return reinterpret_cast<const char*>(next) - __from;
1226}
1227
1228int
1229__codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1230{ return 4; }
1231#endif
1232
1233// Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1234// Converts from UTF-8 to UTF-16.
1235
1236__codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1237
1238codecvt_base::result
1239__codecvt_utf8_utf16_base<char16_t>::
1240do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1241       const intern_type*& __from_next,
1242       extern_type* __to, extern_type* __to_end,
1243       extern_type*& __to_next) const
1244{
1245  range<const char16_t> from{ __from, __from_end };
1246  range<char> to{ __to, __to_end };
1247  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1248  __from_next = from.next;
1249  __to_next = to.next;
1250  return res;
1251}
1252
1253codecvt_base::result
1254__codecvt_utf8_utf16_base<char16_t>::
1255do_unshift(state_type&, extern_type* __to, extern_type*,
1256	   extern_type*& __to_next) const
1257{
1258  __to_next = __to;
1259  return noconv;
1260}
1261
1262codecvt_base::result
1263__codecvt_utf8_utf16_base<char16_t>::
1264do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1265      const extern_type*& __from_next,
1266      intern_type* __to, intern_type* __to_end,
1267      intern_type*& __to_next) const
1268{
1269  range<const char> from{ __from, __from_end };
1270  range<char16_t> to{ __to, __to_end };
1271  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1272#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1273  mode = codecvt_mode(mode | little_endian);
1274#endif
1275  auto res = utf16_in(from, to, _M_maxcode, mode);
1276  __from_next = from.next;
1277  __to_next = to.next;
1278  return res;
1279}
1280
1281int
1282__codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1283{ return 0; }
1284
1285bool
1286__codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1287{ return false; }
1288
1289int
1290__codecvt_utf8_utf16_base<char16_t>::
1291do_length(state_type&, const extern_type* __from,
1292	  const extern_type* __end, size_t __max) const
1293{
1294  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1295  return __end - __from;
1296}
1297
1298int
1299__codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1300{
1301  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1302  // whereas 4 byte sequences require two 16-bit code units.
1303  return 3;
1304}
1305
1306// Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1307// Converts from UTF-8 to UTF-16.
1308
1309__codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1310
1311codecvt_base::result
1312__codecvt_utf8_utf16_base<char32_t>::
1313do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1314       const intern_type*& __from_next,
1315       extern_type* __to, extern_type* __to_end,
1316       extern_type*& __to_next) const
1317{
1318  range<const char32_t> from{ __from, __from_end };
1319  range<char> to{ __to, __to_end };
1320  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1321  __from_next = from.next;
1322  __to_next = to.next;
1323  return res;
1324}
1325
1326codecvt_base::result
1327__codecvt_utf8_utf16_base<char32_t>::
1328do_unshift(state_type&, extern_type* __to, extern_type*,
1329	   extern_type*& __to_next) const
1330{
1331  __to_next = __to;
1332  return noconv;
1333}
1334
1335codecvt_base::result
1336__codecvt_utf8_utf16_base<char32_t>::
1337do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1338      const extern_type*& __from_next,
1339      intern_type* __to, intern_type* __to_end,
1340      intern_type*& __to_next) const
1341{
1342  range<const char> from{ __from, __from_end };
1343  range<char32_t> to{ __to, __to_end };
1344  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1345  __from_next = from.next;
1346  __to_next = to.next;
1347  return res;
1348}
1349
1350int
1351__codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1352{ return 0; }
1353
1354bool
1355__codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1356{ return false; }
1357
1358int
1359__codecvt_utf8_utf16_base<char32_t>::
1360do_length(state_type&, const extern_type* __from,
1361	  const extern_type* __end, size_t __max) const
1362{
1363  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1364  return __end - __from;
1365}
1366
1367int
1368__codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1369{
1370  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1371  // whereas 4 byte sequences require two 16-bit code units.
1372  return 3;
1373}
1374
1375#ifdef _GLIBCXX_USE_WCHAR_T
1376// Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1377// Converts from UTF-8 to UTF-16.
1378
1379__codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1380
1381codecvt_base::result
1382__codecvt_utf8_utf16_base<wchar_t>::
1383do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1384       const intern_type*& __from_next,
1385       extern_type* __to, extern_type* __to_end,
1386       extern_type*& __to_next) const
1387{
1388  range<const wchar_t> from{ __from, __from_end };
1389  range<char> to{ __to, __to_end };
1390  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1391  __from_next = from.next;
1392  __to_next = to.next;
1393  return res;
1394}
1395
1396codecvt_base::result
1397__codecvt_utf8_utf16_base<wchar_t>::
1398do_unshift(state_type&, extern_type* __to, extern_type*,
1399	   extern_type*& __to_next) const
1400{
1401  __to_next = __to;
1402  return noconv;
1403}
1404
1405codecvt_base::result
1406__codecvt_utf8_utf16_base<wchar_t>::
1407do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1408      const extern_type*& __from_next,
1409      intern_type* __to, intern_type* __to_end,
1410      intern_type*& __to_next) const
1411{
1412  range<const char> from{ __from, __from_end };
1413  range<wchar_t> to{ __to, __to_end };
1414  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1415  __from_next = from.next;
1416  __to_next = to.next;
1417  return res;
1418}
1419
1420int
1421__codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1422{ return 0; }
1423
1424bool
1425__codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1426{ return false; }
1427
1428int
1429__codecvt_utf8_utf16_base<wchar_t>::
1430do_length(state_type&, const extern_type* __from,
1431	  const extern_type* __end, size_t __max) const
1432{
1433  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1434  return __end - __from;
1435}
1436
1437int
1438__codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1439{
1440  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1441  // whereas 4 byte sequences require two 16-bit code units.
1442  return 3;
1443}
1444#endif
1445
1446inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1447inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1448template class codecvt_byname<char16_t, char, mbstate_t>;
1449template class codecvt_byname<char32_t, char, mbstate_t>;
1450
1451_GLIBCXX_END_NAMESPACE_VERSION
1452}
1453#endif // _GLIBCXX_USE_C99_STDINT_TR1
1454