1127834Stjr/*-
2127834Stjr * Copyright (c) 2002-2004 Tim J. Robbins
3127834Stjr * All rights reserved.
4118146Sache *
5235785Stheraven * Copyright (c) 2011 The FreeBSD Foundation
6235785Stheraven * All rights reserved.
7235785Stheraven * Portions of this software were developed by David Chisnall
8235785Stheraven * under sponsorship from the FreeBSD Foundation.
9235785Stheraven *
10118146Sache * Redistribution and use in source and binary forms, with or without
11118146Sache * modification, are permitted provided that the following conditions
12118146Sache * are met:
13118146Sache * 1. Redistributions of source code must retain the above copyright
14118146Sache *    notice, this list of conditions and the following disclaimer.
15118146Sache * 2. Redistributions in binary form must reproduce the above copyright
16118146Sache *    notice, this list of conditions and the following disclaimer in the
17118146Sache *    documentation and/or other materials provided with the distribution.
18118146Sache *
19127834Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20118146Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21118146Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22127834Stjr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23118146Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24118146Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25118146Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26118146Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27118146Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28118146Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29118146Sache * SUCH DAMAGE.
30118146Sache */
31127834Stjr/*
32127834Stjr * PRC National Standard GB 18030-2000 encoding of Chinese text.
33127834Stjr *
34127834Stjr * See gb18030(5) for details.
35127834Stjr */
36118146Sache
37128004Stjr#include <sys/param.h>
38118146Sache__FBSDID("$FreeBSD$");
39118146Sache
40127834Stjr#include <errno.h>
41127834Stjr#include <runetype.h>
42118146Sache#include <stdlib.h>
43128004Stjr#include <string.h>
44127834Stjr#include <wchar.h>
45129153Stjr#include "mblocal.h"
46118146Sache
47142654Sphantomstatic size_t	_GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
48142654Sphantom		    size_t, mbstate_t * __restrict);
49142654Sphantomstatic int	_GB18030_mbsinit(const mbstate_t *);
50142654Sphantomstatic size_t	_GB18030_wcrtomb(char * __restrict, wchar_t,
51142654Sphantom		    mbstate_t * __restrict);
52127834Stjr
53128004Stjrtypedef struct {
54128004Stjr	int	count;
55128004Stjr	u_char	bytes[4];
56128004Stjr} _GB18030State;
57128004Stjr
58118146Sacheint
59235785Stheraven_GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl)
60118146Sache{
61127834Stjr
62235785Stheraven	l->__mbrtowc = _GB18030_mbrtowc;
63235785Stheraven	l->__wcrtomb = _GB18030_wcrtomb;
64235785Stheraven	l->__mbsinit = _GB18030_mbsinit;
65235785Stheraven	l->runes = rl;
66235785Stheraven	l->__mb_cur_max = 4;
67235785Stheraven	l->__mb_sb_limit = 128;
68127834Stjr
69118146Sache	return (0);
70118146Sache}
71118146Sache
72142654Sphantomstatic int
73128004Stjr_GB18030_mbsinit(const mbstate_t *ps)
74128004Stjr{
75128004Stjr
76128081Stjr	return (ps == NULL || ((const _GB18030State *)ps)->count == 0);
77128004Stjr}
78128004Stjr
79142654Sphantomstatic size_t
80127834Stjr_GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
81128004Stjr    size_t n, mbstate_t * __restrict ps)
82118146Sache{
83128004Stjr	_GB18030State *gs;
84127834Stjr	wchar_t wch;
85128004Stjr	int ch, len, ocount;
86128004Stjr	size_t ncopy;
87118146Sache
88128004Stjr	gs = (_GB18030State *)ps;
89128004Stjr
90128155Stjr	if (gs->count < 0 || gs->count > sizeof(gs->bytes)) {
91128155Stjr		errno = EINVAL;
92128155Stjr		return ((size_t)-1);
93128155Stjr	}
94128155Stjr
95128004Stjr	if (s == NULL) {
96128004Stjr		s = "";
97128004Stjr		n = 1;
98128004Stjr		pwc = NULL;
99128004Stjr	}
100128004Stjr
101128004Stjr	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
102128004Stjr	memcpy(gs->bytes + gs->count, s, ncopy);
103128004Stjr	ocount = gs->count;
104128004Stjr	gs->count += ncopy;
105128004Stjr	s = (char *)gs->bytes;
106128004Stjr	n = gs->count;
107128004Stjr
108127834Stjr	if (n == 0)
109127834Stjr		/* Incomplete multibyte sequence */
110127834Stjr		return ((size_t)-2);
111118146Sache
112127834Stjr	/*
113127834Stjr	 * Single byte:		[00-7f]
114127834Stjr	 * Two byte:		[81-fe][40-7e,80-fe]
115127834Stjr	 * Four byte:		[81-fe][30-39][81-fe][30-39]
116127834Stjr	 */
117127834Stjr	ch = (unsigned char)*s++;
118127834Stjr	if (ch <= 0x7f) {
119127834Stjr		len = 1;
120127834Stjr		wch = ch;
121127834Stjr	} else if (ch >= 0x81 && ch <= 0xfe) {
122127834Stjr		wch = ch;
123127834Stjr		if (n < 2)
124127834Stjr			return ((size_t)-2);
125127834Stjr		ch = (unsigned char)*s++;
126127834Stjr		if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) {
127127834Stjr			wch = (wch << 8) | ch;
128127834Stjr			len = 2;
129127834Stjr		} else if (ch >= 0x30 && ch <= 0x39) {
130127834Stjr			/*
131127834Stjr			 * Strip high bit off the wide character we will
132127834Stjr			 * eventually output so that it is positive when
133127834Stjr			 * cast to wint_t on 32-bit twos-complement machines.
134127834Stjr			 */
135127834Stjr			wch = ((wch & 0x7f) << 8) | ch;
136127834Stjr			if (n < 3)
137127834Stjr				return ((size_t)-2);
138127834Stjr			ch = (unsigned char)*s++;
139127834Stjr			if (ch < 0x81 || ch > 0xfe)
140127834Stjr				goto ilseq;
141127834Stjr			wch = (wch << 8) | ch;
142127834Stjr			if (n < 4)
143127834Stjr				return ((size_t)-2);
144127834Stjr			ch = (unsigned char)*s++;
145127834Stjr			if (ch < 0x30 || ch > 0x39)
146127834Stjr				goto ilseq;
147127834Stjr			wch = (wch << 8) | ch;
148127834Stjr			len = 4;
149127834Stjr		} else
150127834Stjr			goto ilseq;
151127834Stjr	} else
152127834Stjr		goto ilseq;
153118146Sache
154127834Stjr	if (pwc != NULL)
155127834Stjr		*pwc = wch;
156128004Stjr	gs->count = 0;
157128004Stjr	return (wch == L'\0' ? 0 : len - ocount);
158127834Stjrilseq:
159127834Stjr	errno = EILSEQ;
160127834Stjr	return ((size_t)-1);
161118146Sache}
162118146Sache
163142654Sphantomstatic size_t
164128155Stjr_GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
165118146Sache{
166128155Stjr	_GB18030State *gs;
167127834Stjr	size_t len;
168127834Stjr	int c;
169118146Sache
170128155Stjr	gs = (_GB18030State *)ps;
171128155Stjr
172128155Stjr	if (gs->count != 0) {
173128155Stjr		errno = EINVAL;
174128155Stjr		return ((size_t)-1);
175128155Stjr	}
176128155Stjr
177127834Stjr	if (s == NULL)
178127834Stjr		/* Reset to initial shift state (no-op) */
179127834Stjr		return (1);
180127834Stjr	if ((wc & ~0x7fffffff) != 0)
181127834Stjr		goto ilseq;
182127834Stjr	if (wc & 0x7f000000) {
183127834Stjr		/* Replace high bit that mbrtowc() removed. */
184127834Stjr		wc |= 0x80000000;
185127834Stjr		c = (wc >> 24) & 0xff;
186127834Stjr		if (c < 0x81 || c > 0xfe)
187127834Stjr			goto ilseq;
188127834Stjr		*s++ = c;
189127834Stjr		c = (wc >> 16) & 0xff;
190127834Stjr		if (c < 0x30 || c > 0x39)
191127834Stjr			goto ilseq;
192127834Stjr		*s++ = c;
193127834Stjr		c = (wc >> 8) & 0xff;
194127834Stjr		if (c < 0x81 || c > 0xfe)
195127834Stjr			goto ilseq;
196127834Stjr		*s++ = c;
197127834Stjr		c = wc & 0xff;
198127834Stjr		if (c < 0x30 || c > 0x39)
199127834Stjr			goto ilseq;
200127834Stjr		*s++ = c;
201127834Stjr		len = 4;
202127834Stjr	} else if (wc & 0x00ff0000)
203127834Stjr		goto ilseq;
204127834Stjr	else if (wc & 0x0000ff00) {
205127834Stjr		c = (wc >> 8) & 0xff;
206127834Stjr		if (c < 0x81 || c > 0xfe)
207127834Stjr			goto ilseq;
208127834Stjr		*s++ = c;
209127834Stjr		c = wc & 0xff;
210127834Stjr		if (c < 0x40 || c == 0x7f || c == 0xff)
211127834Stjr			goto ilseq;
212127834Stjr		*s++ = c;
213127834Stjr		len = 2;
214127834Stjr	} else if (wc <= 0x7f) {
215127834Stjr		*s++ = wc;
216127834Stjr		len = 1;
217127834Stjr	} else
218127834Stjr		goto ilseq;
219127834Stjr
220127834Stjr	return (len);
221127834Stjrilseq:
222127834Stjr	errno = EILSEQ;
223127834Stjr	return ((size_t)-1);
224118146Sache}
225