gb2312.c revision 128004
1122145Sdavidxu/*- 2128004Stjr * Copyright (c) 2004 Tim J. Robbins. All rights reserved. 3122145Sdavidxu * Copyright (c) 2003 David Xu <davidxu@freebsd.org> 4122145Sdavidxu * All rights reserved. 5122145Sdavidxu * 6122145Sdavidxu * Redistribution and use in source and binary forms, with or without 7122145Sdavidxu * modification, are permitted provided that the following conditions 8122145Sdavidxu * are met: 9122145Sdavidxu * 1. Redistributions of source code must retain the above copyright 10122145Sdavidxu * notice, this list of conditions and the following disclaimer. 11122145Sdavidxu * 2. Redistributions in binary form must reproduce the above copyright 12122145Sdavidxu * notice, this list of conditions and the following disclaimer in the 13122145Sdavidxu * documentation and/or other materials provided with the distribution. 14122145Sdavidxu * 15122145Sdavidxu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16122145Sdavidxu * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17122145Sdavidxu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18122145Sdavidxu * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19122145Sdavidxu * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20122145Sdavidxu * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21122145Sdavidxu * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22122145Sdavidxu * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23122145Sdavidxu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24122145Sdavidxu * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25122145Sdavidxu * SUCH DAMAGE. 26122145Sdavidxu */ 27122145Sdavidxu 28128004Stjr#include <sys/param.h> 29122145Sdavidxu__FBSDID("$FreeBSD: head/lib/libc/locale/gb2312.c 128004 2004-04-07 10:48:19Z tjr $"); 30122145Sdavidxu 31122145Sdavidxu#include <runetype.h> 32122145Sdavidxu#include <stdlib.h> 33128004Stjr#include <string.h> 34122145Sdavidxu#include <wchar.h> 35122145Sdavidxu 36122145Sdavidxuextern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, 37122145Sdavidxu size_t, mbstate_t * __restrict); 38128004Stjrextern int (*__mbsinit)(const mbstate_t *); 39122145Sdavidxuextern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); 40122145Sdavidxu 41122145Sdavidxuint _GB2312_init(_RuneLocale *); 42122145Sdavidxusize_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, 43122145Sdavidxu mbstate_t * __restrict); 44128004Stjrint _GB2312_mbsinit(const mbstate_t *); 45122145Sdavidxusize_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); 46122145Sdavidxu 47128004Stjrtypedef struct { 48128004Stjr int count; 49128004Stjr u_char bytes[2]; 50128004Stjr} _GB2312State; 51128004Stjr 52122145Sdavidxuint 53122145Sdavidxu_GB2312_init(_RuneLocale *rl) 54122145Sdavidxu{ 55122145Sdavidxu 56122145Sdavidxu _CurrentRuneLocale = rl; 57122145Sdavidxu __mbrtowc = _GB2312_mbrtowc; 58122145Sdavidxu __wcrtomb = _GB2312_wcrtomb; 59128004Stjr __mbsinit = _GB2312_mbsinit; 60122145Sdavidxu __mb_cur_max = 2; 61122145Sdavidxu return (0); 62122145Sdavidxu} 63122145Sdavidxu 64128004Stjrint 65128004Stjr_GB2312_mbsinit(const mbstate_t *ps) 66128004Stjr{ 67128004Stjr 68128004Stjr return (ps == NULL || ((_GB2312State *)ps)->count == 0); 69128004Stjr} 70128004Stjr 71122282Stjrstatic __inline int 72122145Sdavidxu_GB2312_check(const char *str, size_t n) 73122145Sdavidxu{ 74122145Sdavidxu const u_char *s = (const u_char *)str; 75122145Sdavidxu 76122145Sdavidxu if (n == 0) 77122145Sdavidxu /* Incomplete multibyte sequence */ 78122145Sdavidxu return (-2); 79122145Sdavidxu if (s[0] >= 0xa1 && s[0] <= 0xfe) { 80122145Sdavidxu if (n < 2) 81122145Sdavidxu /* Incomplete multibyte sequence */ 82122145Sdavidxu return (-2); 83122145Sdavidxu if (s[1] < 0xa1 || s[1] > 0xfe) 84122145Sdavidxu /* Invalid multibyte sequence */ 85122145Sdavidxu return (-1); 86122145Sdavidxu return (2); 87122145Sdavidxu } else if (s[0] & 0x80) { 88122145Sdavidxu /* Invalid multibyte sequence */ 89122145Sdavidxu return (-1); 90122145Sdavidxu } 91122145Sdavidxu return (1); 92122145Sdavidxu} 93122145Sdavidxu 94122145Sdavidxusize_t 95122145Sdavidxu_GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, 96128004Stjr mbstate_t * __restrict ps) 97122145Sdavidxu{ 98128004Stjr _GB2312State *gs; 99122145Sdavidxu wchar_t wc; 100128004Stjr int i, len, ocount; 101128004Stjr size_t ncopy; 102122145Sdavidxu 103128004Stjr gs = (_GB2312State *)gs; 104128004Stjr 105128004Stjr if (s == NULL) { 106128004Stjr s = ""; 107128004Stjr n = 1; 108128004Stjr pwc = NULL; 109128004Stjr } 110128004Stjr 111128004Stjr ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); 112128004Stjr memcpy(gs->bytes + gs->count, s, ncopy); 113128004Stjr ocount = gs->count; 114128004Stjr gs->count += ncopy; 115128004Stjr s = (char *)gs->bytes; 116128004Stjr n = gs->count; 117128004Stjr 118122145Sdavidxu if ((len = _GB2312_check(s, n)) < 0) 119122145Sdavidxu return ((size_t)len); 120122145Sdavidxu wc = 0; 121122145Sdavidxu i = len; 122122145Sdavidxu while (i-- > 0) 123122145Sdavidxu wc = (wc << 8) | (unsigned char)*s++; 124122145Sdavidxu if (pwc != NULL) 125122145Sdavidxu *pwc = wc; 126128004Stjr gs->count = 0; 127128004Stjr return (wc == L'\0' ? 0 : len - ocount); 128122145Sdavidxu} 129122145Sdavidxu 130122145Sdavidxusize_t 131122145Sdavidxu_GB2312_wcrtomb(char * __restrict s, wchar_t wc, 132122145Sdavidxu mbstate_t * __restrict ps __unused) 133122145Sdavidxu{ 134122145Sdavidxu 135122145Sdavidxu if (s == NULL) 136122145Sdavidxu /* Reset to initial shift state (no-op) */ 137122145Sdavidxu return (1); 138122145Sdavidxu if (wc & 0x8000) { 139122145Sdavidxu *s++ = (wc >> 8) & 0xff; 140122145Sdavidxu *s = wc & 0xff; 141122145Sdavidxu return (2); 142122145Sdavidxu } 143122145Sdavidxu *s = wc & 0xff; 144122145Sdavidxu return (1); 145122145Sdavidxu} 146