1/* Look at first character in UTF-8 string.
2   Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20#if defined IN_LIBUNISTRING
21/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
22   'static inline'.  */
23# include "unistring-notinline.h"
24#endif
25
26/* Specification.  */
27#include "unistr.h"
28
29#if !HAVE_INLINE
30
31int
32u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
33{
34  uint8_t c = *s;
35
36  if (c < 0x80)
37    {
38      *puc = c;
39      return 1;
40    }
41  else if (c >= 0xc2)
42    {
43      if (c < 0xe0)
44        {
45          if (n >= 2)
46            {
47#if CONFIG_UNICODE_SAFETY
48              if ((s[1] ^ 0x80) < 0x40)
49#endif
50                {
51                  *puc = ((unsigned int) (c & 0x1f) << 6)
52                         | (unsigned int) (s[1] ^ 0x80);
53                  return 2;
54                }
55              /* invalid multibyte character */
56            }
57          else
58            {
59              /* incomplete multibyte character */
60              *puc = 0xfffd;
61              return n;
62            }
63        }
64      else if (c < 0xf0)
65        {
66          if (n >= 3)
67            {
68#if CONFIG_UNICODE_SAFETY
69              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
70                  && (c >= 0xe1 || s[1] >= 0xa0)
71                  && (c != 0xed || s[1] < 0xa0))
72#endif
73                {
74                  *puc = ((unsigned int) (c & 0x0f) << 12)
75                         | ((unsigned int) (s[1] ^ 0x80) << 6)
76                         | (unsigned int) (s[2] ^ 0x80);
77                  return 3;
78                }
79              /* invalid multibyte character */
80            }
81          else
82            {
83              /* incomplete multibyte character */
84              *puc = 0xfffd;
85              return n;
86            }
87        }
88      else if (c < 0xf8)
89        {
90          if (n >= 4)
91            {
92#if CONFIG_UNICODE_SAFETY
93              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
94                  && (s[3] ^ 0x80) < 0x40
95                  && (c >= 0xf1 || s[1] >= 0x90)
96#if 1
97                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
98#endif
99                 )
100#endif
101                {
102                  *puc = ((unsigned int) (c & 0x07) << 18)
103                         | ((unsigned int) (s[1] ^ 0x80) << 12)
104                         | ((unsigned int) (s[2] ^ 0x80) << 6)
105                         | (unsigned int) (s[3] ^ 0x80);
106                  return 4;
107                }
108              /* invalid multibyte character */
109            }
110          else
111            {
112              /* incomplete multibyte character */
113              *puc = 0xfffd;
114              return n;
115            }
116        }
117#if 0
118      else if (c < 0xfc)
119        {
120          if (n >= 5)
121            {
122#if CONFIG_UNICODE_SAFETY
123              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
124                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
125                  && (c >= 0xf9 || s[1] >= 0x88))
126#endif
127                {
128                  *puc = ((unsigned int) (c & 0x03) << 24)
129                         | ((unsigned int) (s[1] ^ 0x80) << 18)
130                         | ((unsigned int) (s[2] ^ 0x80) << 12)
131                         | ((unsigned int) (s[3] ^ 0x80) << 6)
132                         | (unsigned int) (s[4] ^ 0x80);
133                  return 5;
134                }
135              /* invalid multibyte character */
136            }
137          else
138            {
139              /* incomplete multibyte character */
140              *puc = 0xfffd;
141              return n;
142            }
143        }
144      else if (c < 0xfe)
145        {
146          if (n >= 6)
147            {
148#if CONFIG_UNICODE_SAFETY
149              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
150                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
151                  && (s[5] ^ 0x80) < 0x40
152                  && (c >= 0xfd || s[1] >= 0x84))
153#endif
154                {
155                  *puc = ((unsigned int) (c & 0x01) << 30)
156                         | ((unsigned int) (s[1] ^ 0x80) << 24)
157                         | ((unsigned int) (s[2] ^ 0x80) << 18)
158                         | ((unsigned int) (s[3] ^ 0x80) << 12)
159                         | ((unsigned int) (s[4] ^ 0x80) << 6)
160                         | (unsigned int) (s[5] ^ 0x80);
161                  return 6;
162                }
163              /* invalid multibyte character */
164            }
165          else
166            {
167              /* incomplete multibyte character */
168              *puc = 0xfffd;
169              return n;
170            }
171        }
172#endif
173    }
174  /* invalid multibyte character */
175  *puc = 0xfffd;
176  return 1;
177}
178
179#endif
180