gb2312.c revision 290494
1122145Sdavidxu/*- 2290494Sbapt * Copyright 2013 Garrett D'Amore <garrett@damore.org> 3290494Sbapt * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 4128004Stjr * Copyright (c) 2004 Tim J. Robbins. All rights reserved. 5122145Sdavidxu * Copyright (c) 2003 David Xu <davidxu@freebsd.org> 6122145Sdavidxu * All rights reserved. 7122145Sdavidxu * 8227753Stheraven * Copyright (c) 2011 The FreeBSD Foundation 9227753Stheraven * All rights reserved. 10227753Stheraven * Portions of this software were developed by David Chisnall 11227753Stheraven * under sponsorship from the FreeBSD Foundation. 12227753Stheraven * 13122145Sdavidxu * Redistribution and use in source and binary forms, with or without 14122145Sdavidxu * modification, are permitted provided that the following conditions 15122145Sdavidxu * are met: 16122145Sdavidxu * 1. Redistributions of source code must retain the above copyright 17122145Sdavidxu * notice, this list of conditions and the following disclaimer. 18122145Sdavidxu * 2. Redistributions in binary form must reproduce the above copyright 19122145Sdavidxu * notice, this list of conditions and the following disclaimer in the 20122145Sdavidxu * documentation and/or other materials provided with the distribution. 21122145Sdavidxu * 22122145Sdavidxu * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23122145Sdavidxu * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24122145Sdavidxu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25122145Sdavidxu * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26122145Sdavidxu * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27122145Sdavidxu * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28122145Sdavidxu * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29122145Sdavidxu * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30122145Sdavidxu * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31122145Sdavidxu * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32122145Sdavidxu * SUCH DAMAGE. 33122145Sdavidxu */ 34122145Sdavidxu 35128004Stjr#include <sys/param.h> 36122145Sdavidxu__FBSDID("$FreeBSD: head/lib/libc/locale/gb2312.c 290494 2015-11-07 12:43:35Z bapt $"); 37122145Sdavidxu 38128155Stjr#include <errno.h> 39122145Sdavidxu#include <runetype.h> 40122145Sdavidxu#include <stdlib.h> 41128004Stjr#include <string.h> 42122145Sdavidxu#include <wchar.h> 43129153Stjr#include "mblocal.h" 44122145Sdavidxu 45142654Sphantomstatic size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, 46142654Sphantom size_t, mbstate_t * __restrict); 47142654Sphantomstatic int _GB2312_mbsinit(const mbstate_t *); 48142654Sphantomstatic size_t _GB2312_wcrtomb(char * __restrict, wchar_t, 49142654Sphantom mbstate_t * __restrict); 50290494Sbaptstatic size_t _GB2312_mbsnrtowcs(wchar_t * __restrict, 51290494Sbapt const char ** __restrict, size_t, size_t, 52290494Sbapt mbstate_t * __restrict); 53290494Sbaptstatic size_t _GB2312_wcsnrtombs(char * __restrict, 54290494Sbapt const wchar_t ** __restrict, size_t, size_t, 55290494Sbapt mbstate_t * __restrict); 56122145Sdavidxu 57290494Sbapt 58128004Stjrtypedef struct { 59128004Stjr int count; 60128004Stjr u_char bytes[2]; 61128004Stjr} _GB2312State; 62128004Stjr 63122145Sdavidxuint 64227753Stheraven_GB2312_init(struct xlocale_ctype *l, _RuneLocale *rl) 65122145Sdavidxu{ 66122145Sdavidxu 67227753Stheraven l->runes = rl; 68227753Stheraven l->__mbrtowc = _GB2312_mbrtowc; 69227753Stheraven l->__wcrtomb = _GB2312_wcrtomb; 70227753Stheraven l->__mbsinit = _GB2312_mbsinit; 71290494Sbapt l->__mbsnrtowcs = _GB2312_mbsnrtowcs; 72290494Sbapt l->__wcsnrtombs = _GB2312_wcsnrtombs; 73227753Stheraven l->__mb_cur_max = 2; 74227753Stheraven l->__mb_sb_limit = 128; 75122145Sdavidxu return (0); 76122145Sdavidxu} 77122145Sdavidxu 78142654Sphantomstatic int 79128004Stjr_GB2312_mbsinit(const mbstate_t *ps) 80128004Stjr{ 81128004Stjr 82128081Stjr return (ps == NULL || ((const _GB2312State *)ps)->count == 0); 83128004Stjr} 84128004Stjr 85290494Sbaptstatic int 86122145Sdavidxu_GB2312_check(const char *str, size_t n) 87122145Sdavidxu{ 88122145Sdavidxu const u_char *s = (const u_char *)str; 89122145Sdavidxu 90122145Sdavidxu if (n == 0) 91122145Sdavidxu /* Incomplete multibyte sequence */ 92122145Sdavidxu return (-2); 93122145Sdavidxu if (s[0] >= 0xa1 && s[0] <= 0xfe) { 94122145Sdavidxu if (n < 2) 95122145Sdavidxu /* Incomplete multibyte sequence */ 96122145Sdavidxu return (-2); 97122145Sdavidxu if (s[1] < 0xa1 || s[1] > 0xfe) 98122145Sdavidxu /* Invalid multibyte sequence */ 99122145Sdavidxu return (-1); 100122145Sdavidxu return (2); 101122145Sdavidxu } else if (s[0] & 0x80) { 102122145Sdavidxu /* Invalid multibyte sequence */ 103122145Sdavidxu return (-1); 104290494Sbapt } 105122145Sdavidxu return (1); 106122145Sdavidxu} 107122145Sdavidxu 108142654Sphantomstatic size_t 109122145Sdavidxu_GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, 110128004Stjr mbstate_t * __restrict ps) 111122145Sdavidxu{ 112128004Stjr _GB2312State *gs; 113122145Sdavidxu wchar_t wc; 114128004Stjr int i, len, ocount; 115128004Stjr size_t ncopy; 116122145Sdavidxu 117128089Sdavidxu gs = (_GB2312State *)ps; 118128004Stjr 119128155Stjr if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { 120128155Stjr errno = EINVAL; 121128155Stjr return ((size_t)-1); 122128155Stjr } 123128155Stjr 124128004Stjr if (s == NULL) { 125128004Stjr s = ""; 126128004Stjr n = 1; 127128004Stjr pwc = NULL; 128128004Stjr } 129128004Stjr 130128004Stjr ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); 131128004Stjr memcpy(gs->bytes + gs->count, s, ncopy); 132128004Stjr ocount = gs->count; 133128004Stjr gs->count += ncopy; 134128004Stjr s = (char *)gs->bytes; 135128004Stjr n = gs->count; 136128004Stjr 137122145Sdavidxu if ((len = _GB2312_check(s, n)) < 0) 138122145Sdavidxu return ((size_t)len); 139122145Sdavidxu wc = 0; 140122145Sdavidxu i = len; 141122145Sdavidxu while (i-- > 0) 142122145Sdavidxu wc = (wc << 8) | (unsigned char)*s++; 143122145Sdavidxu if (pwc != NULL) 144122145Sdavidxu *pwc = wc; 145128004Stjr gs->count = 0; 146128004Stjr return (wc == L'\0' ? 0 : len - ocount); 147122145Sdavidxu} 148122145Sdavidxu 149142654Sphantomstatic size_t 150128155Stjr_GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) 151122145Sdavidxu{ 152128155Stjr _GB2312State *gs; 153122145Sdavidxu 154128155Stjr gs = (_GB2312State *)ps; 155128155Stjr 156128155Stjr if (gs->count != 0) { 157128155Stjr errno = EINVAL; 158128155Stjr return ((size_t)-1); 159128155Stjr } 160128155Stjr 161122145Sdavidxu if (s == NULL) 162122145Sdavidxu /* Reset to initial shift state (no-op) */ 163122145Sdavidxu return (1); 164122145Sdavidxu if (wc & 0x8000) { 165122145Sdavidxu *s++ = (wc >> 8) & 0xff; 166122145Sdavidxu *s = wc & 0xff; 167122145Sdavidxu return (2); 168122145Sdavidxu } 169122145Sdavidxu *s = wc & 0xff; 170122145Sdavidxu return (1); 171122145Sdavidxu} 172290494Sbapt 173290494Sbaptstatic size_t 174290494Sbapt_GB2312_mbsnrtowcs(wchar_t * __restrict dst, 175290494Sbapt const char ** __restrict src, size_t nms, size_t len, 176290494Sbapt mbstate_t * __restrict ps) 177290494Sbapt{ 178290494Sbapt return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc)); 179290494Sbapt} 180290494Sbapt 181290494Sbaptstatic size_t 182290494Sbapt_GB2312_wcsnrtombs(char * __restrict dst, 183290494Sbapt const wchar_t ** __restrict src, size_t nwc, size_t len, 184290494Sbapt mbstate_t * __restrict ps) 185290494Sbapt{ 186290494Sbapt return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb)); 187290494Sbapt} 188