1/* Conversion UTF-8 to UCS-4.
2   Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "unistr.h"
22
23#if defined IN_LIBUNISTRING || HAVE_INLINE
24
25int
26u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27{
28  uint8_t c = *s;
29
30  if (c >= 0xc2)
31    {
32      if (c < 0xe0)
33        {
34          if (n >= 2)
35            {
36              if ((s[1] ^ 0x80) < 0x40)
37                {
38                  *puc = ((unsigned int) (c & 0x1f) << 6)
39                         | (unsigned int) (s[1] ^ 0x80);
40                  return 2;
41                }
42              /* invalid multibyte character */
43            }
44          else
45            {
46              /* incomplete multibyte character */
47              *puc = 0xfffd;
48              return n;
49            }
50        }
51      else if (c < 0xf0)
52        {
53          if (n >= 3)
54            {
55              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
56                  && (c >= 0xe1 || s[1] >= 0xa0)
57                  && (c != 0xed || s[1] < 0xa0))
58                {
59                  *puc = ((unsigned int) (c & 0x0f) << 12)
60                         | ((unsigned int) (s[1] ^ 0x80) << 6)
61                         | (unsigned int) (s[2] ^ 0x80);
62                  return 3;
63                }
64              /* invalid multibyte character */
65            }
66          else
67            {
68              /* incomplete multibyte character */
69              *puc = 0xfffd;
70              return n;
71            }
72        }
73      else if (c < 0xf8)
74        {
75          if (n >= 4)
76            {
77              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
78                  && (s[3] ^ 0x80) < 0x40
79                  && (c >= 0xf1 || s[1] >= 0x90)
80#if 1
81                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
82#endif
83                 )
84                {
85                  *puc = ((unsigned int) (c & 0x07) << 18)
86                         | ((unsigned int) (s[1] ^ 0x80) << 12)
87                         | ((unsigned int) (s[2] ^ 0x80) << 6)
88                         | (unsigned int) (s[3] ^ 0x80);
89                  return 4;
90                }
91              /* invalid multibyte character */
92            }
93          else
94            {
95              /* incomplete multibyte character */
96              *puc = 0xfffd;
97              return n;
98            }
99        }
100#if 0
101      else if (c < 0xfc)
102        {
103          if (n >= 5)
104            {
105              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
106                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
107                  && (c >= 0xf9 || s[1] >= 0x88))
108                {
109                  *puc = ((unsigned int) (c & 0x03) << 24)
110                         | ((unsigned int) (s[1] ^ 0x80) << 18)
111                         | ((unsigned int) (s[2] ^ 0x80) << 12)
112                         | ((unsigned int) (s[3] ^ 0x80) << 6)
113                         | (unsigned int) (s[4] ^ 0x80);
114                  return 5;
115                }
116              /* invalid multibyte character */
117            }
118          else
119            {
120              /* incomplete multibyte character */
121              *puc = 0xfffd;
122              return n;
123            }
124        }
125      else if (c < 0xfe)
126        {
127          if (n >= 6)
128            {
129              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
130                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
131                  && (s[5] ^ 0x80) < 0x40
132                  && (c >= 0xfd || s[1] >= 0x84))
133                {
134                  *puc = ((unsigned int) (c & 0x01) << 30)
135                         | ((unsigned int) (s[1] ^ 0x80) << 24)
136                         | ((unsigned int) (s[2] ^ 0x80) << 18)
137                         | ((unsigned int) (s[3] ^ 0x80) << 12)
138                         | ((unsigned int) (s[4] ^ 0x80) << 6)
139                         | (unsigned int) (s[5] ^ 0x80);
140                  return 6;
141                }
142              /* invalid multibyte character */
143            }
144          else
145            {
146              /* incomplete multibyte character */
147              *puc = 0xfffd;
148              return n;
149            }
150        }
151#endif
152    }
153  /* invalid multibyte character */
154  *puc = 0xfffd;
155  return 1;
156}
157
158#endif
159