gb2312.c revision 128004
1/*-
2 * Copyright (c) 2004 Tim J. Robbins. All rights reserved.
3 * Copyright (c) 2003 David Xu <davidxu@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29__FBSDID("$FreeBSD: head/lib/libc/locale/gb2312.c 128004 2004-04-07 10:48:19Z tjr $");
30
31#include <runetype.h>
32#include <stdlib.h>
33#include <string.h>
34#include <wchar.h>
35
36extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
37    size_t, mbstate_t * __restrict);
38extern int (*__mbsinit)(const mbstate_t *);
39extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
40
41int	_GB2312_init(_RuneLocale *);
42size_t	_GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
43	    mbstate_t * __restrict);
44int	_GB2312_mbsinit(const mbstate_t *);
45size_t	_GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
46
47typedef struct {
48	int	count;
49	u_char	bytes[2];
50} _GB2312State;
51
52int
53_GB2312_init(_RuneLocale *rl)
54{
55
56	_CurrentRuneLocale = rl;
57	__mbrtowc = _GB2312_mbrtowc;
58	__wcrtomb = _GB2312_wcrtomb;
59	__mbsinit = _GB2312_mbsinit;
60	__mb_cur_max = 2;
61	return (0);
62}
63
64int
65_GB2312_mbsinit(const mbstate_t *ps)
66{
67
68	return (ps == NULL || ((_GB2312State *)ps)->count == 0);
69}
70
71static __inline int
72_GB2312_check(const char *str, size_t n)
73{
74	const u_char *s = (const u_char *)str;
75
76	if (n == 0)
77		/* Incomplete multibyte sequence */
78		return (-2);
79	if (s[0] >= 0xa1 && s[0] <= 0xfe) {
80		if (n < 2)
81			/* Incomplete multibyte sequence */
82			return (-2);
83		if (s[1] < 0xa1 || s[1] > 0xfe)
84			/* Invalid multibyte sequence */
85			return (-1);
86		return (2);
87	} else if (s[0] & 0x80) {
88		/* Invalid multibyte sequence */
89		return (-1);
90	}
91	return (1);
92}
93
94size_t
95_GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
96    mbstate_t * __restrict ps)
97{
98	_GB2312State *gs;
99	wchar_t wc;
100	int i, len, ocount;
101	size_t ncopy;
102
103	gs = (_GB2312State *)gs;
104
105	if (s == NULL) {
106		s = "";
107		n = 1;
108		pwc = NULL;
109	}
110
111	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
112	memcpy(gs->bytes + gs->count, s, ncopy);
113	ocount = gs->count;
114	gs->count += ncopy;
115	s = (char *)gs->bytes;
116	n = gs->count;
117
118	if ((len = _GB2312_check(s, n)) < 0)
119		return ((size_t)len);
120	wc = 0;
121	i = len;
122	while (i-- > 0)
123		wc = (wc << 8) | (unsigned char)*s++;
124	if (pwc != NULL)
125		*pwc = wc;
126	gs->count = 0;
127	return (wc == L'\0' ? 0 : len - ocount);
128}
129
130size_t
131_GB2312_wcrtomb(char * __restrict s, wchar_t wc,
132    mbstate_t * __restrict ps __unused)
133{
134
135	if (s == NULL)
136		/* Reset to initial shift state (no-op) */
137		return (1);
138	if (wc & 0x8000) {
139		*s++ = (wc >> 8) & 0xff;
140		*s = wc & 0xff;
141		return (2);
142	}
143	*s = wc & 0xff;
144	return (1);
145}
146