gb2312.c revision 129153
1122145Sdavidxu/*- 2128004Stjr * Copyright (c) 2004 Tim J. Robbins. All rights reserved. 3122145Sdavidxu * Copyright (c) 2003 David Xu <davidxu@freebsd.org> 4122145Sdavidxu * All rights reserved. 5122145Sdavidxu * 6122145Sdavidxu * Redistribution and use in source and binary forms, with or without 7122145Sdavidxu * modification, are permitted provided that the following conditions 8122145Sdavidxu * are met: 9122145Sdavidxu * 1. Redistributions of source code must retain the above copyright 10122145Sdavidxu * notice, this list of conditions and the following disclaimer. 11122145Sdavidxu * 2. Redistributions in binary form must reproduce the above copyright 12122145Sdavidxu * notice, this list of conditions and the following disclaimer in the 13122145Sdavidxu * documentation and/or other materials provided with the distribution. 14122145Sdavidxu * 15122145Sdavidxu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16122145Sdavidxu * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17122145Sdavidxu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18122145Sdavidxu * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19122145Sdavidxu * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20122145Sdavidxu * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21122145Sdavidxu * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22122145Sdavidxu * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23122145Sdavidxu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24122145Sdavidxu * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25122145Sdavidxu * SUCH DAMAGE. 26122145Sdavidxu */ 27122145Sdavidxu 28128004Stjr#include <sys/param.h> 29122145Sdavidxu__FBSDID("$FreeBSD: head/lib/libc/locale/gb2312.c 129153 2004-05-12 14:09:04Z tjr $"); 30122145Sdavidxu 31128155Stjr#include <errno.h> 32122145Sdavidxu#include <runetype.h> 33122145Sdavidxu#include <stdlib.h> 34128004Stjr#include <string.h> 35122145Sdavidxu#include <wchar.h> 36129153Stjr#include "mblocal.h" 37122145Sdavidxu 38122145Sdavidxuint _GB2312_init(_RuneLocale *); 39122145Sdavidxusize_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, 40122145Sdavidxu mbstate_t * __restrict); 41128004Stjrint _GB2312_mbsinit(const mbstate_t *); 42122145Sdavidxusize_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); 43122145Sdavidxu 44128004Stjrtypedef struct { 45128004Stjr int count; 46128004Stjr u_char bytes[2]; 47128004Stjr} _GB2312State; 48128004Stjr 49122145Sdavidxuint 50122145Sdavidxu_GB2312_init(_RuneLocale *rl) 51122145Sdavidxu{ 52122145Sdavidxu 53122145Sdavidxu _CurrentRuneLocale = rl; 54122145Sdavidxu __mbrtowc = _GB2312_mbrtowc; 55122145Sdavidxu __wcrtomb = _GB2312_wcrtomb; 56128004Stjr __mbsinit = _GB2312_mbsinit; 57122145Sdavidxu __mb_cur_max = 2; 58122145Sdavidxu return (0); 59122145Sdavidxu} 60122145Sdavidxu 61128004Stjrint 62128004Stjr_GB2312_mbsinit(const mbstate_t *ps) 63128004Stjr{ 64128004Stjr 65128081Stjr return (ps == NULL || ((const _GB2312State *)ps)->count == 0); 66128004Stjr} 67128004Stjr 68122282Stjrstatic __inline int 69122145Sdavidxu_GB2312_check(const char *str, size_t n) 70122145Sdavidxu{ 71122145Sdavidxu const u_char *s = (const u_char *)str; 72122145Sdavidxu 73122145Sdavidxu if (n == 0) 74122145Sdavidxu /* Incomplete multibyte sequence */ 75122145Sdavidxu return (-2); 76122145Sdavidxu if (s[0] >= 0xa1 && s[0] <= 0xfe) { 77122145Sdavidxu if (n < 2) 78122145Sdavidxu /* Incomplete multibyte sequence */ 79122145Sdavidxu return (-2); 80122145Sdavidxu if (s[1] < 0xa1 || s[1] > 0xfe) 81122145Sdavidxu /* Invalid multibyte sequence */ 82122145Sdavidxu return (-1); 83122145Sdavidxu return (2); 84122145Sdavidxu } else if (s[0] & 0x80) { 85122145Sdavidxu /* Invalid multibyte sequence */ 86122145Sdavidxu return (-1); 87122145Sdavidxu } 88122145Sdavidxu return (1); 89122145Sdavidxu} 90122145Sdavidxu 91122145Sdavidxusize_t 92122145Sdavidxu_GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, 93128004Stjr mbstate_t * __restrict ps) 94122145Sdavidxu{ 95128004Stjr _GB2312State *gs; 96122145Sdavidxu wchar_t wc; 97128004Stjr int i, len, ocount; 98128004Stjr size_t ncopy; 99122145Sdavidxu 100128089Sdavidxu gs = (_GB2312State *)ps; 101128004Stjr 102128155Stjr if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { 103128155Stjr errno = EINVAL; 104128155Stjr return ((size_t)-1); 105128155Stjr } 106128155Stjr 107128004Stjr if (s == NULL) { 108128004Stjr s = ""; 109128004Stjr n = 1; 110128004Stjr pwc = NULL; 111128004Stjr } 112128004Stjr 113128004Stjr ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); 114128004Stjr memcpy(gs->bytes + gs->count, s, ncopy); 115128004Stjr ocount = gs->count; 116128004Stjr gs->count += ncopy; 117128004Stjr s = (char *)gs->bytes; 118128004Stjr n = gs->count; 119128004Stjr 120122145Sdavidxu if ((len = _GB2312_check(s, n)) < 0) 121122145Sdavidxu return ((size_t)len); 122122145Sdavidxu wc = 0; 123122145Sdavidxu i = len; 124122145Sdavidxu while (i-- > 0) 125122145Sdavidxu wc = (wc << 8) | (unsigned char)*s++; 126122145Sdavidxu if (pwc != NULL) 127122145Sdavidxu *pwc = wc; 128128004Stjr gs->count = 0; 129128004Stjr return (wc == L'\0' ? 0 : len - ocount); 130122145Sdavidxu} 131122145Sdavidxu 132122145Sdavidxusize_t 133128155Stjr_GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) 134122145Sdavidxu{ 135128155Stjr _GB2312State *gs; 136122145Sdavidxu 137128155Stjr gs = (_GB2312State *)ps; 138128155Stjr 139128155Stjr if (gs->count != 0) { 140128155Stjr errno = EINVAL; 141128155Stjr return ((size_t)-1); 142128155Stjr } 143128155Stjr 144122145Sdavidxu if (s == NULL) 145122145Sdavidxu /* Reset to initial shift state (no-op) */ 146122145Sdavidxu return (1); 147122145Sdavidxu if (wc & 0x8000) { 148122145Sdavidxu *s++ = (wc >> 8) & 0xff; 149122145Sdavidxu *s = wc & 0xff; 150122145Sdavidxu return (2); 151122145Sdavidxu } 152122145Sdavidxu *s = wc & 0xff; 153122145Sdavidxu return (1); 154122145Sdavidxu} 155