1/*-
2 * Copyright (c) 2002-2004 Tim J. Robbins
3 * All rights reserved.
4 *
5 * Copyright (c) 2011 The FreeBSD Foundation
6 * All rights reserved.
7 * Portions of this software were developed by David Chisnall
8 * under sponsorship from the FreeBSD Foundation.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31/*
32 * PRC National Standard GB 18030-2000 encoding of Chinese text.
33 *
34 * See gb18030(5) for details.
35 */
36
37#include <sys/param.h>
38__FBSDID("$FreeBSD$");
39
40#include <errno.h>
41#include <runetype.h>
42#include <stdlib.h>
43#include <string.h>
44#include <wchar.h>
45#include "mblocal.h"
46
47static size_t	_GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
48		    size_t, mbstate_t * __restrict);
49static int	_GB18030_mbsinit(const mbstate_t *);
50static size_t	_GB18030_wcrtomb(char * __restrict, wchar_t,
51		    mbstate_t * __restrict);
52
53typedef struct {
54	int	count;
55	u_char	bytes[4];
56} _GB18030State;
57
58int
59_GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl)
60{
61
62	l->__mbrtowc = _GB18030_mbrtowc;
63	l->__wcrtomb = _GB18030_wcrtomb;
64	l->__mbsinit = _GB18030_mbsinit;
65	l->runes = rl;
66	l->__mb_cur_max = 4;
67	l->__mb_sb_limit = 128;
68
69	return (0);
70}
71
72static int
73_GB18030_mbsinit(const mbstate_t *ps)
74{
75
76	return (ps == NULL || ((const _GB18030State *)ps)->count == 0);
77}
78
79static size_t
80_GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
81    size_t n, mbstate_t * __restrict ps)
82{
83	_GB18030State *gs;
84	wchar_t wch;
85	int ch, len, ocount;
86	size_t ncopy;
87
88	gs = (_GB18030State *)ps;
89
90	if (gs->count < 0 || gs->count > sizeof(gs->bytes)) {
91		errno = EINVAL;
92		return ((size_t)-1);
93	}
94
95	if (s == NULL) {
96		s = "";
97		n = 1;
98		pwc = NULL;
99	}
100
101	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
102	memcpy(gs->bytes + gs->count, s, ncopy);
103	ocount = gs->count;
104	gs->count += ncopy;
105	s = (char *)gs->bytes;
106	n = gs->count;
107
108	if (n == 0)
109		/* Incomplete multibyte sequence */
110		return ((size_t)-2);
111
112	/*
113	 * Single byte:		[00-7f]
114	 * Two byte:		[81-fe][40-7e,80-fe]
115	 * Four byte:		[81-fe][30-39][81-fe][30-39]
116	 */
117	ch = (unsigned char)*s++;
118	if (ch <= 0x7f) {
119		len = 1;
120		wch = ch;
121	} else if (ch >= 0x81 && ch <= 0xfe) {
122		wch = ch;
123		if (n < 2)
124			return ((size_t)-2);
125		ch = (unsigned char)*s++;
126		if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) {
127			wch = (wch << 8) | ch;
128			len = 2;
129		} else if (ch >= 0x30 && ch <= 0x39) {
130			/*
131			 * Strip high bit off the wide character we will
132			 * eventually output so that it is positive when
133			 * cast to wint_t on 32-bit twos-complement machines.
134			 */
135			wch = ((wch & 0x7f) << 8) | ch;
136			if (n < 3)
137				return ((size_t)-2);
138			ch = (unsigned char)*s++;
139			if (ch < 0x81 || ch > 0xfe)
140				goto ilseq;
141			wch = (wch << 8) | ch;
142			if (n < 4)
143				return ((size_t)-2);
144			ch = (unsigned char)*s++;
145			if (ch < 0x30 || ch > 0x39)
146				goto ilseq;
147			wch = (wch << 8) | ch;
148			len = 4;
149		} else
150			goto ilseq;
151	} else
152		goto ilseq;
153
154	if (pwc != NULL)
155		*pwc = wch;
156	gs->count = 0;
157	return (wch == L'\0' ? 0 : len - ocount);
158ilseq:
159	errno = EILSEQ;
160	return ((size_t)-1);
161}
162
163static size_t
164_GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
165{
166	_GB18030State *gs;
167	size_t len;
168	int c;
169
170	gs = (_GB18030State *)ps;
171
172	if (gs->count != 0) {
173		errno = EINVAL;
174		return ((size_t)-1);
175	}
176
177	if (s == NULL)
178		/* Reset to initial shift state (no-op) */
179		return (1);
180	if ((wc & ~0x7fffffff) != 0)
181		goto ilseq;
182	if (wc & 0x7f000000) {
183		/* Replace high bit that mbrtowc() removed. */
184		wc |= 0x80000000;
185		c = (wc >> 24) & 0xff;
186		if (c < 0x81 || c > 0xfe)
187			goto ilseq;
188		*s++ = c;
189		c = (wc >> 16) & 0xff;
190		if (c < 0x30 || c > 0x39)
191			goto ilseq;
192		*s++ = c;
193		c = (wc >> 8) & 0xff;
194		if (c < 0x81 || c > 0xfe)
195			goto ilseq;
196		*s++ = c;
197		c = wc & 0xff;
198		if (c < 0x30 || c > 0x39)
199			goto ilseq;
200		*s++ = c;
201		len = 4;
202	} else if (wc & 0x00ff0000)
203		goto ilseq;
204	else if (wc & 0x0000ff00) {
205		c = (wc >> 8) & 0xff;
206		if (c < 0x81 || c > 0xfe)
207			goto ilseq;
208		*s++ = c;
209		c = wc & 0xff;
210		if (c < 0x40 || c == 0x7f || c == 0xff)
211			goto ilseq;
212		*s++ = c;
213		len = 2;
214	} else if (wc <= 0x7f) {
215		*s++ = wc;
216		len = 1;
217	} else
218		goto ilseq;
219
220	return (len);
221ilseq:
222	errno = EILSEQ;
223	return ((size_t)-1);
224}
225