1/* Look at first character in UTF-8 string.
2   Copyright (C) 1999-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify it
6   under the terms of the GNU Lesser General Public License as published
7   by the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#include <config.h>
19
20#if defined IN_LIBUNISTRING
21/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'.  */
22# include "unistring-notinline.h"
23#endif
24
25/* Specification.  */
26#include "unistr.h"
27
28#if !HAVE_INLINE
29
30int
31u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
32{
33  uint8_t c = *s;
34
35  if (c < 0x80)
36    {
37      *puc = c;
38      return 1;
39    }
40  else if (c >= 0xc2)
41    {
42      if (c < 0xe0)
43        {
44          if (n >= 2)
45            {
46              if ((s[1] ^ 0x80) < 0x40)
47                {
48                  *puc = ((unsigned int) (c & 0x1f) << 6)
49                         | (unsigned int) (s[1] ^ 0x80);
50                  return 2;
51                }
52              /* invalid multibyte character */
53            }
54          else
55            {
56              /* incomplete multibyte character */
57              *puc = 0xfffd;
58              return n;
59            }
60        }
61      else if (c < 0xf0)
62        {
63          if (n >= 3)
64            {
65              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
66                  && (c >= 0xe1 || s[1] >= 0xa0)
67                  && (c != 0xed || s[1] < 0xa0))
68                {
69                  *puc = ((unsigned int) (c & 0x0f) << 12)
70                         | ((unsigned int) (s[1] ^ 0x80) << 6)
71                         | (unsigned int) (s[2] ^ 0x80);
72                  return 3;
73                }
74              /* invalid multibyte character */
75            }
76          else
77            {
78              /* incomplete multibyte character */
79              *puc = 0xfffd;
80              return n;
81            }
82        }
83      else if (c < 0xf8)
84        {
85          if (n >= 4)
86            {
87              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
88                  && (s[3] ^ 0x80) < 0x40
89                  && (c >= 0xf1 || s[1] >= 0x90)
90#if 1
91                  && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
92#endif
93                 )
94                {
95                  *puc = ((unsigned int) (c & 0x07) << 18)
96                         | ((unsigned int) (s[1] ^ 0x80) << 12)
97                         | ((unsigned int) (s[2] ^ 0x80) << 6)
98                         | (unsigned int) (s[3] ^ 0x80);
99                  return 4;
100                }
101              /* invalid multibyte character */
102            }
103          else
104            {
105              /* incomplete multibyte character */
106              *puc = 0xfffd;
107              return n;
108            }
109        }
110#if 0
111      else if (c < 0xfc)
112        {
113          if (n >= 5)
114            {
115              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
116                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
117                  && (c >= 0xf9 || s[1] >= 0x88))
118                {
119                  *puc = ((unsigned int) (c & 0x03) << 24)
120                         | ((unsigned int) (s[1] ^ 0x80) << 18)
121                         | ((unsigned int) (s[2] ^ 0x80) << 12)
122                         | ((unsigned int) (s[3] ^ 0x80) << 6)
123                         | (unsigned int) (s[4] ^ 0x80);
124                  return 5;
125                }
126              /* invalid multibyte character */
127            }
128          else
129            {
130              /* incomplete multibyte character */
131              *puc = 0xfffd;
132              return n;
133            }
134        }
135      else if (c < 0xfe)
136        {
137          if (n >= 6)
138            {
139              if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
140                  && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
141                  && (s[5] ^ 0x80) < 0x40
142                  && (c >= 0xfd || s[1] >= 0x84))
143                {
144                  *puc = ((unsigned int) (c & 0x01) << 30)
145                         | ((unsigned int) (s[1] ^ 0x80) << 24)
146                         | ((unsigned int) (s[2] ^ 0x80) << 18)
147                         | ((unsigned int) (s[3] ^ 0x80) << 12)
148                         | ((unsigned int) (s[4] ^ 0x80) << 6)
149                         | (unsigned int) (s[5] ^ 0x80);
150                  return 6;
151                }
152              /* invalid multibyte character */
153            }
154          else
155            {
156              /* incomplete multibyte character */
157              *puc = 0xfffd;
158              return n;
159            }
160        }
161#endif
162    }
163  /* invalid multibyte character */
164  *puc = 0xfffd;
165  return 1;
166}
167
168#endif
169