1127834Stjr/*- 2127834Stjr * Copyright (c) 2002-2004 Tim J. Robbins 3127834Stjr * All rights reserved. 4118146Sache * 5235785Stheraven * Copyright (c) 2011 The FreeBSD Foundation 6235785Stheraven * All rights reserved. 7235785Stheraven * Portions of this software were developed by David Chisnall 8235785Stheraven * under sponsorship from the FreeBSD Foundation. 9235785Stheraven * 10118146Sache * Redistribution and use in source and binary forms, with or without 11118146Sache * modification, are permitted provided that the following conditions 12118146Sache * are met: 13118146Sache * 1. Redistributions of source code must retain the above copyright 14118146Sache * notice, this list of conditions and the following disclaimer. 15118146Sache * 2. Redistributions in binary form must reproduce the above copyright 16118146Sache * notice, this list of conditions and the following disclaimer in the 17118146Sache * documentation and/or other materials provided with the distribution. 18118146Sache * 19127834Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20118146Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21118146Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22127834Stjr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23118146Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24118146Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25118146Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26118146Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27118146Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28118146Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29118146Sache * SUCH DAMAGE. 30118146Sache */ 31127834Stjr/* 32127834Stjr * PRC National Standard GB 18030-2000 encoding of Chinese text. 33127834Stjr * 34127834Stjr * See gb18030(5) for details. 35127834Stjr */ 36118146Sache 37128004Stjr#include <sys/param.h> 38118146Sache__FBSDID("$FreeBSD$"); 39118146Sache 40127834Stjr#include <errno.h> 41127834Stjr#include <runetype.h> 42118146Sache#include <stdlib.h> 43128004Stjr#include <string.h> 44127834Stjr#include <wchar.h> 45129153Stjr#include "mblocal.h" 46118146Sache 47142654Sphantomstatic size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, 48142654Sphantom size_t, mbstate_t * __restrict); 49142654Sphantomstatic int _GB18030_mbsinit(const mbstate_t *); 50142654Sphantomstatic size_t _GB18030_wcrtomb(char * __restrict, wchar_t, 51142654Sphantom mbstate_t * __restrict); 52127834Stjr 53128004Stjrtypedef struct { 54128004Stjr int count; 55128004Stjr u_char bytes[4]; 56128004Stjr} _GB18030State; 57128004Stjr 58118146Sacheint 59235785Stheraven_GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl) 60118146Sache{ 61127834Stjr 62235785Stheraven l->__mbrtowc = _GB18030_mbrtowc; 63235785Stheraven l->__wcrtomb = _GB18030_wcrtomb; 64235785Stheraven l->__mbsinit = _GB18030_mbsinit; 65235785Stheraven l->runes = rl; 66235785Stheraven l->__mb_cur_max = 4; 67235785Stheraven l->__mb_sb_limit = 128; 68127834Stjr 69118146Sache return (0); 70118146Sache} 71118146Sache 72142654Sphantomstatic int 73128004Stjr_GB18030_mbsinit(const mbstate_t *ps) 74128004Stjr{ 75128004Stjr 76128081Stjr return (ps == NULL || ((const _GB18030State *)ps)->count == 0); 77128004Stjr} 78128004Stjr 79142654Sphantomstatic size_t 80127834Stjr_GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 81128004Stjr size_t n, mbstate_t * __restrict ps) 82118146Sache{ 83128004Stjr _GB18030State *gs; 84127834Stjr wchar_t wch; 85128004Stjr int ch, len, ocount; 86128004Stjr size_t ncopy; 87118146Sache 88128004Stjr gs = (_GB18030State *)ps; 89128004Stjr 90128155Stjr if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { 91128155Stjr errno = EINVAL; 92128155Stjr return ((size_t)-1); 93128155Stjr } 94128155Stjr 95128004Stjr if (s == NULL) { 96128004Stjr s = ""; 97128004Stjr n = 1; 98128004Stjr pwc = NULL; 99128004Stjr } 100128004Stjr 101128004Stjr ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); 102128004Stjr memcpy(gs->bytes + gs->count, s, ncopy); 103128004Stjr ocount = gs->count; 104128004Stjr gs->count += ncopy; 105128004Stjr s = (char *)gs->bytes; 106128004Stjr n = gs->count; 107128004Stjr 108127834Stjr if (n == 0) 109127834Stjr /* Incomplete multibyte sequence */ 110127834Stjr return ((size_t)-2); 111118146Sache 112127834Stjr /* 113127834Stjr * Single byte: [00-7f] 114127834Stjr * Two byte: [81-fe][40-7e,80-fe] 115127834Stjr * Four byte: [81-fe][30-39][81-fe][30-39] 116127834Stjr */ 117127834Stjr ch = (unsigned char)*s++; 118127834Stjr if (ch <= 0x7f) { 119127834Stjr len = 1; 120127834Stjr wch = ch; 121127834Stjr } else if (ch >= 0x81 && ch <= 0xfe) { 122127834Stjr wch = ch; 123127834Stjr if (n < 2) 124127834Stjr return ((size_t)-2); 125127834Stjr ch = (unsigned char)*s++; 126127834Stjr if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { 127127834Stjr wch = (wch << 8) | ch; 128127834Stjr len = 2; 129127834Stjr } else if (ch >= 0x30 && ch <= 0x39) { 130127834Stjr /* 131127834Stjr * Strip high bit off the wide character we will 132127834Stjr * eventually output so that it is positive when 133127834Stjr * cast to wint_t on 32-bit twos-complement machines. 134127834Stjr */ 135127834Stjr wch = ((wch & 0x7f) << 8) | ch; 136127834Stjr if (n < 3) 137127834Stjr return ((size_t)-2); 138127834Stjr ch = (unsigned char)*s++; 139127834Stjr if (ch < 0x81 || ch > 0xfe) 140127834Stjr goto ilseq; 141127834Stjr wch = (wch << 8) | ch; 142127834Stjr if (n < 4) 143127834Stjr return ((size_t)-2); 144127834Stjr ch = (unsigned char)*s++; 145127834Stjr if (ch < 0x30 || ch > 0x39) 146127834Stjr goto ilseq; 147127834Stjr wch = (wch << 8) | ch; 148127834Stjr len = 4; 149127834Stjr } else 150127834Stjr goto ilseq; 151127834Stjr } else 152127834Stjr goto ilseq; 153118146Sache 154127834Stjr if (pwc != NULL) 155127834Stjr *pwc = wch; 156128004Stjr gs->count = 0; 157128004Stjr return (wch == L'\0' ? 0 : len - ocount); 158127834Stjrilseq: 159127834Stjr errno = EILSEQ; 160127834Stjr return ((size_t)-1); 161118146Sache} 162118146Sache 163142654Sphantomstatic size_t 164128155Stjr_GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) 165118146Sache{ 166128155Stjr _GB18030State *gs; 167127834Stjr size_t len; 168127834Stjr int c; 169118146Sache 170128155Stjr gs = (_GB18030State *)ps; 171128155Stjr 172128155Stjr if (gs->count != 0) { 173128155Stjr errno = EINVAL; 174128155Stjr return ((size_t)-1); 175128155Stjr } 176128155Stjr 177127834Stjr if (s == NULL) 178127834Stjr /* Reset to initial shift state (no-op) */ 179127834Stjr return (1); 180127834Stjr if ((wc & ~0x7fffffff) != 0) 181127834Stjr goto ilseq; 182127834Stjr if (wc & 0x7f000000) { 183127834Stjr /* Replace high bit that mbrtowc() removed. */ 184127834Stjr wc |= 0x80000000; 185127834Stjr c = (wc >> 24) & 0xff; 186127834Stjr if (c < 0x81 || c > 0xfe) 187127834Stjr goto ilseq; 188127834Stjr *s++ = c; 189127834Stjr c = (wc >> 16) & 0xff; 190127834Stjr if (c < 0x30 || c > 0x39) 191127834Stjr goto ilseq; 192127834Stjr *s++ = c; 193127834Stjr c = (wc >> 8) & 0xff; 194127834Stjr if (c < 0x81 || c > 0xfe) 195127834Stjr goto ilseq; 196127834Stjr *s++ = c; 197127834Stjr c = wc & 0xff; 198127834Stjr if (c < 0x30 || c > 0x39) 199127834Stjr goto ilseq; 200127834Stjr *s++ = c; 201127834Stjr len = 4; 202127834Stjr } else if (wc & 0x00ff0000) 203127834Stjr goto ilseq; 204127834Stjr else if (wc & 0x0000ff00) { 205127834Stjr c = (wc >> 8) & 0xff; 206127834Stjr if (c < 0x81 || c > 0xfe) 207127834Stjr goto ilseq; 208127834Stjr *s++ = c; 209127834Stjr c = wc & 0xff; 210127834Stjr if (c < 0x40 || c == 0x7f || c == 0xff) 211127834Stjr goto ilseq; 212127834Stjr *s++ = c; 213127834Stjr len = 2; 214127834Stjr } else if (wc <= 0x7f) { 215127834Stjr *s++ = wc; 216127834Stjr len = 1; 217127834Stjr } else 218127834Stjr goto ilseq; 219127834Stjr 220127834Stjr return (len); 221127834Stjrilseq: 222127834Stjr errno = EILSEQ; 223127834Stjr return ((size_t)-1); 224118146Sache} 225