1/* Conversion UTF-8 to UCS-4.
2   Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20/* Specification.  */
21#include "unistr.h"
22
23#if defined IN_LIBUNISTRING || HAVE_INLINE
24
25int
26u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27{
28  uint8_t c = *s;
29
30  if (c >= 0xc2)
31    {
32      if (c < 0xe0)
33        {
34          if (n >= 2)
35            {
36#if CONFIG_UNICODE_SAFETY
37              if ((s[1] ^ 0x80) < 0x40)
38#endif
39                {
40                  *puc = ((unsigned int) (c & 0x1f) << 6)
41                         | (unsigned int) (s[1] ^ 0x80);
42                  return 2;
43                }
44              /* invalid multibyte character */
45            }
46          else
47            {
48              /* incomplete multibyte character */
49              *puc = 0xfffd;
50              return n;
51            }
52        }
53      else if (c < 0xf0)
54        {
55          if (n >= 3)
56            {
57#if CONFIG_UNICODE_SAFETY
58              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
59                  && (c >= 0xe1 || s[1] >= 0xa0)
60                  && (c != 0xed || s[1] < 0xa0))
61#endif
62                {
63                  *puc = ((unsigned int) (c & 0x0f) << 12)
64                         | ((unsigned int) (s[1] ^ 0x80) << 6)
65                         | (unsigned int) (s[2] ^ 0x80);
66                  return 3;
67                }
68              /* invalid multibyte character */
69            }
70          else
71            {
72              /* incomplete multibyte character */
73              *puc = 0xfffd;
74              return n;
75            }
76        }
77      else if (c < 0xf8)
78        {
79          if (n >= 4)
80            {
81#if CONFIG_UNICODE_SAFETY
82              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
83                  && (s[3] ^ 0x80) < 0x40
84                  && (c >= 0xf1 || s[1] >= 0x90)
85#if 1
86                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
87#endif
88                 )
89#endif
90                {
91                  *puc = ((unsigned int) (c & 0x07) << 18)
92                         | ((unsigned int) (s[1] ^ 0x80) << 12)
93                         | ((unsigned int) (s[2] ^ 0x80) << 6)
94                         | (unsigned int) (s[3] ^ 0x80);
95                  return 4;
96                }
97              /* invalid multibyte character */
98            }
99          else
100            {
101              /* incomplete multibyte character */
102              *puc = 0xfffd;
103              return n;
104            }
105        }
106#if 0
107      else if (c < 0xfc)
108        {
109          if (n >= 5)
110            {
111#if CONFIG_UNICODE_SAFETY
112              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
113                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
114                  && (c >= 0xf9 || s[1] >= 0x88))
115#endif
116                {
117                  *puc = ((unsigned int) (c & 0x03) << 24)
118                         | ((unsigned int) (s[1] ^ 0x80) << 18)
119                         | ((unsigned int) (s[2] ^ 0x80) << 12)
120                         | ((unsigned int) (s[3] ^ 0x80) << 6)
121                         | (unsigned int) (s[4] ^ 0x80);
122                  return 5;
123                }
124              /* invalid multibyte character */
125            }
126          else
127            {
128              /* incomplete multibyte character */
129              *puc = 0xfffd;
130              return n;
131            }
132        }
133      else if (c < 0xfe)
134        {
135          if (n >= 6)
136            {
137#if CONFIG_UNICODE_SAFETY
138              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
139                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
140                  && (s[5] ^ 0x80) < 0x40
141                  && (c >= 0xfd || s[1] >= 0x84))
142#endif
143                {
144                  *puc = ((unsigned int) (c & 0x01) << 30)
145                         | ((unsigned int) (s[1] ^ 0x80) << 24)
146                         | ((unsigned int) (s[2] ^ 0x80) << 18)
147                         | ((unsigned int) (s[3] ^ 0x80) << 12)
148                         | ((unsigned int) (s[4] ^ 0x80) << 6)
149                         | (unsigned int) (s[5] ^ 0x80);
150                  return 6;
151                }
152              /* invalid multibyte character */
153            }
154          else
155            {
156              /* incomplete multibyte character */
157              *puc = 0xfffd;
158              return n;
159            }
160        }
161#endif
162    }
163  /* invalid multibyte character */
164  *puc = 0xfffd;
165  return 1;
166}
167
168#endif
169