gb18030.c revision 127834
1127834Stjr/*- 2127834Stjr * Copyright (c) 2002-2004 Tim J. Robbins 3127834Stjr * All rights reserved. 4118146Sache * 5118146Sache * Redistribution and use in source and binary forms, with or without 6118146Sache * modification, are permitted provided that the following conditions 7118146Sache * are met: 8118146Sache * 1. Redistributions of source code must retain the above copyright 9118146Sache * notice, this list of conditions and the following disclaimer. 10118146Sache * 2. Redistributions in binary form must reproduce the above copyright 11118146Sache * notice, this list of conditions and the following disclaimer in the 12118146Sache * documentation and/or other materials provided with the distribution. 13118146Sache * 14127834Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15118146Sache * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16118146Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17127834Stjr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18118146Sache * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19118146Sache * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20118146Sache * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21118146Sache * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22118146Sache * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23118146Sache * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24118146Sache * SUCH DAMAGE. 25118146Sache */ 26127834Stjr/* 27127834Stjr * PRC National Standard GB 18030-2000 encoding of Chinese text. 28127834Stjr * 29127834Stjr * See gb18030(5) for details. 30127834Stjr */ 31118146Sache 32118146Sache#include <sys/cdefs.h> 33118146Sache__FBSDID("$FreeBSD: head/lib/libc/locale/gb18030.c 127834 2004-04-04 11:00:42Z tjr $"); 34118146Sache 35127834Stjr#include <errno.h> 36127834Stjr#include <runetype.h> 37118146Sache#include <stdlib.h> 38127834Stjr#include <wchar.h> 39118146Sache 40127834Stjrextern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, 41127834Stjr size_t, mbstate_t * __restrict); 42127834Stjrextern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); 43118146Sache 44127834Stjrint _GB18030_init(_RuneLocale *); 45127834Stjrsize_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, 46127834Stjr mbstate_t * __restrict); 47127834Stjrsize_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); 48127834Stjr 49118146Sacheint 50127834Stjr_GB18030_init(_RuneLocale *rl) 51118146Sache{ 52127834Stjr 53127834Stjr __mbrtowc = _GB18030_mbrtowc; 54127834Stjr __wcrtomb = _GB18030_wcrtomb; 55118146Sache _CurrentRuneLocale = rl; 56118146Sache __mb_cur_max = 4; 57127834Stjr 58118146Sache return (0); 59118146Sache} 60118146Sache 61127834Stjrsize_t 62127834Stjr_GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, 63127834Stjr size_t n, mbstate_t * __restrict ps __unused) 64118146Sache{ 65127834Stjr wchar_t wch; 66127834Stjr int ch, len; 67118146Sache 68127834Stjr if (s == NULL) 69127834Stjr /* Reset to initial shift state (no-op) */ 70127834Stjr return (0); 71127834Stjr if (n == 0) 72127834Stjr /* Incomplete multibyte sequence */ 73127834Stjr return ((size_t)-2); 74118146Sache 75127834Stjr /* 76127834Stjr * Single byte: [00-7f] 77127834Stjr * Two byte: [81-fe][40-7e,80-fe] 78127834Stjr * Four byte: [81-fe][30-39][81-fe][30-39] 79127834Stjr */ 80127834Stjr ch = (unsigned char)*s++; 81127834Stjr if (ch <= 0x7f) { 82127834Stjr len = 1; 83127834Stjr wch = ch; 84127834Stjr } else if (ch >= 0x81 && ch <= 0xfe) { 85127834Stjr wch = ch; 86127834Stjr if (n < 2) 87127834Stjr return ((size_t)-2); 88127834Stjr ch = (unsigned char)*s++; 89127834Stjr if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { 90127834Stjr wch = (wch << 8) | ch; 91127834Stjr len = 2; 92127834Stjr } else if (ch >= 0x30 && ch <= 0x39) { 93127834Stjr /* 94127834Stjr * Strip high bit off the wide character we will 95127834Stjr * eventually output so that it is positive when 96127834Stjr * cast to wint_t on 32-bit twos-complement machines. 97127834Stjr */ 98127834Stjr wch = ((wch & 0x7f) << 8) | ch; 99127834Stjr if (n < 3) 100127834Stjr return ((size_t)-2); 101127834Stjr ch = (unsigned char)*s++; 102127834Stjr if (ch < 0x81 || ch > 0xfe) 103127834Stjr goto ilseq; 104127834Stjr wch = (wch << 8) | ch; 105127834Stjr if (n < 4) 106127834Stjr return ((size_t)-2); 107127834Stjr ch = (unsigned char)*s++; 108127834Stjr if (ch < 0x30 || ch > 0x39) 109127834Stjr goto ilseq; 110127834Stjr wch = (wch << 8) | ch; 111127834Stjr len = 4; 112127834Stjr } else 113127834Stjr goto ilseq; 114127834Stjr } else 115127834Stjr goto ilseq; 116118146Sache 117127834Stjr if (pwc != NULL) 118127834Stjr *pwc = wch; 119127834Stjr return (wch == L'\0' ? 0 : len); 120127834Stjrilseq: 121127834Stjr errno = EILSEQ; 122127834Stjr return ((size_t)-1); 123118146Sache} 124118146Sache 125127834Stjrsize_t 126127834Stjr_GB18030_wcrtomb(char * __restrict s, wchar_t wc, 127127834Stjr mbstate_t * __restrict ps __unused) 128118146Sache{ 129127834Stjr size_t len; 130127834Stjr int c; 131118146Sache 132127834Stjr if (s == NULL) 133127834Stjr /* Reset to initial shift state (no-op) */ 134127834Stjr return (1); 135127834Stjr 136127834Stjr if ((wc & ~0x7fffffff) != 0) 137127834Stjr goto ilseq; 138127834Stjr if (wc & 0x7f000000) { 139127834Stjr /* Replace high bit that mbrtowc() removed. */ 140127834Stjr wc |= 0x80000000; 141127834Stjr c = (wc >> 24) & 0xff; 142127834Stjr if (c < 0x81 || c > 0xfe) 143127834Stjr goto ilseq; 144127834Stjr *s++ = c; 145127834Stjr c = (wc >> 16) & 0xff; 146127834Stjr if (c < 0x30 || c > 0x39) 147127834Stjr goto ilseq; 148127834Stjr *s++ = c; 149127834Stjr c = (wc >> 8) & 0xff; 150127834Stjr if (c < 0x81 || c > 0xfe) 151127834Stjr goto ilseq; 152127834Stjr *s++ = c; 153127834Stjr c = wc & 0xff; 154127834Stjr if (c < 0x30 || c > 0x39) 155127834Stjr goto ilseq; 156127834Stjr *s++ = c; 157127834Stjr len = 4; 158127834Stjr } else if (wc & 0x00ff0000) 159127834Stjr goto ilseq; 160127834Stjr else if (wc & 0x0000ff00) { 161127834Stjr c = (wc >> 8) & 0xff; 162127834Stjr if (c < 0x81 || c > 0xfe) 163127834Stjr goto ilseq; 164127834Stjr *s++ = c; 165127834Stjr c = wc & 0xff; 166127834Stjr if (c < 0x40 || c == 0x7f || c == 0xff) 167127834Stjr goto ilseq; 168127834Stjr *s++ = c; 169127834Stjr len = 2; 170127834Stjr } else if (wc <= 0x7f) { 171127834Stjr *s++ = wc; 172127834Stjr len = 1; 173127834Stjr } else 174127834Stjr goto ilseq; 175127834Stjr 176127834Stjr return (len); 177127834Stjrilseq: 178127834Stjr errno = EILSEQ; 179127834Stjr return ((size_t)-1); 180118146Sache} 181