1139823Simp/* $NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $ */ 21541Srgrimes 31541Srgrimes/*- 41541Srgrimes * Copyright (c)2003 Citrus Project, 51541Srgrimes * All rights reserved. 61541Srgrimes * 71541Srgrimes * Redistribution and use in source and binary forms, with or without 81541Srgrimes * modification, are permitted provided that the following conditions 91541Srgrimes * are met: 101541Srgrimes * 1. Redistributions of source code must retain the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer. 121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 131541Srgrimes * notice, this list of conditions and the following disclaimer in the 141541Srgrimes * documentation and/or other materials provided with the distribution. 151541Srgrimes * 161541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 171541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 181541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 191541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 201541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 211541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 221541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 231541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 241541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 251541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261541Srgrimes * SUCH DAMAGE. 271541Srgrimes */ 281541Srgrimes 291541Srgrimes#include <sys/cdefs.h> 301541Srgrimes#if defined(LIBC_SCCS) && !defined(lint) 311541Srgrimes__RCSID("$NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $"); 3236503Speter#endif /* LIBC_SCCS and not lint */ 331541Srgrimes 341541Srgrimes#include <assert.h> 3583651Speter#include <errno.h> 3683651Speter#include <string.h> 3783651Speter#include <stdio.h> 381541Srgrimes#include <stdlib.h> 391541Srgrimes#include <stddef.h> 401541Srgrimes#include <wchar.h> 411541Srgrimes#include <sys/types.h> 421541Srgrimes#include <limits.h> 4383651Speter 44190380Srwatson#include "citrus_namespace.h" 45190380Srwatson#include "citrus_types.h" 461541Srgrimes#include "citrus_bcs.h" 4748274Speter#include "citrus_module.h" 4848274Speter#include "citrus_ctype.h" 4960041Sphk#include "citrus_stdenc.h" 5031886Sbde#include "citrus_gbk2k.h" 511541Srgrimes 521541Srgrimes 531541Srgrimes/* ---------------------------------------------------------------------- 541541Srgrimes * private stuffs used by templates 551541Srgrimes */ 561541Srgrimes 571541Srgrimestypedef struct _GBK2KState { 589336Sdfr char ch[4]; 592997Swollman int chlen; 602997Swollman} _GBK2KState; 6183651Speter 621541Srgrimestypedef struct { 633305Sphk int mb_cur_max; 6412662Sdg} _GBK2KEncodingInfo; 6512662Sdg 6692783Sjefftypedef struct { 673305Sphk _GBK2KEncodingInfo ei; 68122698Salfred struct { 69122698Salfred /* for future multi-locale facility */ 701541Srgrimes _GBK2KState s_mblen; 719336Sdfr _GBK2KState s_mbrlen; 7283651Speter _GBK2KState s_mbrtowc; 7383651Speter _GBK2KState s_mbtowc; 74190380Srwatson _GBK2KState s_mbsrtowcs; 751541Srgrimes _GBK2KState s_mbsnrtowcs; 7683651Speter _GBK2KState s_wcrtomb; 7783651Speter _GBK2KState s_wcsrtombs; 781541Srgrimes _GBK2KState s_wcsnrtombs; 791541Srgrimes _GBK2KState s_wctomb; 801541Srgrimes } states; 811541Srgrimes} _GBK2KCTypeInfo; 82158739Smohans 83158739Smohans#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 84158739Smohans#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 85158739Smohans 86158739Smohans#define _FUNCNAME(m) _citrus_GBK2K_##m 87190380Srwatson#define _ENCODING_INFO _GBK2KEncodingInfo 88190380Srwatson#define _CTYPE_INFO _GBK2KCTypeInfo 89190380Srwatson#define _ENCODING_STATE _GBK2KState 90190380Srwatson#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 91190380Srwatson#define _ENCODING_IS_STATE_DEPENDENT 0 92190380Srwatson#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 93190380Srwatson 94190380Srwatsonstatic __inline void 95190380Srwatson/*ARGSUSED*/ 96190380Srwatson_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei, 97190380Srwatson _GBK2KState * __restrict s) 98190380Srwatson{ 99190380Srwatson memset(s, 0, sizeof(*s)); 100190380Srwatson} 101190380Srwatson 102190380Srwatsonstatic __inline void 103190380Srwatson/*ARGSUSED*/ 104190380Srwatson_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei, 105158739Smohans void * __restrict pspriv, 1061541Srgrimes const _GBK2KState * __restrict s) 1071541Srgrimes{ 1081541Srgrimes memcpy(pspriv, (const void *)s, sizeof(*s)); 10983651Speter} 11083651Speter 11183651Speterstatic __inline void 11283651Speter/*ARGSUSED*/ 1131541Srgrimes_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei, 1141541Srgrimes _GBK2KState * __restrict s, 115176224Sjhb const void * __restrict pspriv) 11612911Sphk{ 11783651Speter memcpy((void *)s, pspriv, sizeof(*s)); 11812911Sphk} 11912911Sphk 12083651Speterstatic __inline int 12183651Speter_mb_singlebyte(int c) 1229336Sdfr{ 123184588Sdfr c &= 0xff; 12483651Speter return (c <= 0x7f); 125138496Sps} 126184588Sdfr 12783651Speterstatic __inline int 128176224Sjhb_mb_leadbyte(int c) 1299759Sbde{ 1309336Sdfr c &= 0xff; 1319336Sdfr return (0x81 <= c && c <= 0xfe); 1329336Sdfr} 1339336Sdfr 1349336Sdfrstatic __inline int 1359336Sdfr_mb_trailbyte(int c) 1369336Sdfr{ 1379336Sdfr c &= 0xff; 1389336Sdfr return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 1399336Sdfr} 1409336Sdfr 1419336Sdfrstatic __inline int 1429336Sdfr_mb_surrogate(int c) 1439336Sdfr{ 1449336Sdfr c &= 0xff; 1459336Sdfr return (0x30 <= c && c <= 0x39); 1469336Sdfr} 1479336Sdfr 1489336Sdfrstatic __inline int 1499336Sdfr_mb_count(wchar_t v) 1509336Sdfr{ 1519336Sdfr u_int32_t c; 1529336Sdfr 1539336Sdfr c = (u_int32_t)v; /* XXX */ 1549336Sdfr if (!(c & 0xffffff00)) 1559336Sdfr return (1); 1569336Sdfr if (!(c & 0xffff0000)) 1579336Sdfr return (2); 1589336Sdfr return (4); 15960938Sjake} 1603664Sphk 161176224Sjhb#define _PSENC (psenc->ch[psenc->chlen - 1]) 162176224Sjhb#define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 163176224Sjhb 164176224Sjhbstatic int 165176224Sjhb_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 166176224Sjhb wchar_t * __restrict pwc, 167176224Sjhb const char ** __restrict s, size_t n, 168176224Sjhb _GBK2KState * __restrict psenc, 169176224Sjhb size_t * __restrict nresult) 170176224Sjhb{ 171176224Sjhb int chlenbak, len; 172176224Sjhb const char *s0, *s1; 173176224Sjhb wchar_t wc; 174176224Sjhb 175176224Sjhb _DIAGASSERT(ei != NULL); 176176224Sjhb /* pwc may be NULL */ 177176224Sjhb _DIAGASSERT(s != NULL); 178176224Sjhb _DIAGASSERT(psenc != NULL); 179176224Sjhb 180176224Sjhb s0 = *s; 1811541Srgrimes 1821541Srgrimes if (s0 == NULL) { 1831541Srgrimes /* _citrus_GBK2K_init_state(ei, psenc); */ 1841541Srgrimes psenc->chlen = 0; 1851541Srgrimes *nresult = 0; 1861541Srgrimes return (0); 18783651Speter } 1881541Srgrimes 18983651Speter chlenbak = psenc->chlen; 1901541Srgrimes 191177599Sru switch (psenc->chlen) { 1921541Srgrimes case 3: 193177599Sru if (!_mb_leadbyte (_PSENC)) 1941541Srgrimes goto invalid; 1951541Srgrimes /* FALLTHROUGH */ 1961541Srgrimes case 2: 1971541Srgrimes if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 1981541Srgrimes goto invalid; 1991541Srgrimes /* FALLTHROUGH */ 2001541Srgrimes case 1: 2011541Srgrimes if (!_mb_leadbyte (_PSENC)) 2021541Srgrimes goto invalid; 2031541Srgrimes /* FALLTHOROUGH */ 2041541Srgrimes case 0: 20583651Speter break; 20683651Speter default: 207152652Srees goto invalid; 2081541Srgrimes } 20983651Speter 21083651Speter for (;;) { 21183651Speter if (n-- < 1) 21283651Speter goto restart; 21383651Speter 21483651Speter _PUSH_PSENC(*s0++); 2151541Srgrimes 2161541Srgrimes switch (psenc->chlen) { 217177599Sru case 1: 2189336Sdfr if (_mb_singlebyte(_PSENC)) 219177599Sru goto convert; 2209336Sdfr if (_mb_leadbyte (_PSENC)) 2219336Sdfr continue; 2221541Srgrimes goto ilseq; 2239336Sdfr case 2: 2241541Srgrimes if (_mb_trailbyte (_PSENC)) 2251541Srgrimes goto convert; 2261541Srgrimes if (ei->mb_cur_max == 4 && 2271541Srgrimes _mb_surrogate (_PSENC)) 2281541Srgrimes continue; 2291541Srgrimes goto ilseq; 2301541Srgrimes case 3: 2311541Srgrimes if (_mb_leadbyte (_PSENC)) 23284002Speter continue; 23317186Sdfr goto ilseq; 234152652Srees case 4: 235176224Sjhb if (_mb_surrogate (_PSENC)) 2361541Srgrimes goto convert; 2371541Srgrimes goto ilseq; 23883651Speter } 23983651Speter } 24083651Speter 24183651Speterconvert: 2421541Srgrimes len = psenc->chlen; 24383651Speter s1 = &psenc->ch[0]; 24483651Speter wc = 0; 2451541Srgrimes while (len-- > 0) 2461541Srgrimes wc = (wc << 8) | (*s1++ & 0xff); 2471541Srgrimes 2481541Srgrimes if (pwc != NULL) 2491541Srgrimes *pwc = wc; 2501541Srgrimes *s = s0; 2511541Srgrimes *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 2521541Srgrimes /* _citrus_GBK2K_init_state(ei, psenc); */ 2531541Srgrimes psenc->chlen = 0; 25484002Speter 2551541Srgrimes return (0); 2561541Srgrimes 2571541Srgrimesrestart: 2581541Srgrimes *s = s0; 2591541Srgrimes *nresult = (size_t)-2; 2601541Srgrimes 2611541Srgrimes return (0); 2621541Srgrimes 2631541Srgrimesinvalid: 26483651Speter return (EINVAL); 2659336Sdfr 2669336Sdfrilseq: 2679336Sdfr *nresult = (size_t)-1; 2689336Sdfr return (EILSEQ); 26984002Speter} 27083651Speter 27183651Speterstatic int 2721541Srgrimes_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 2739336Sdfr char * __restrict s, size_t n, wchar_t wc, 27499797Sdillon _GBK2KState * __restrict psenc, 2751541Srgrimes size_t * __restrict nresult) 2761541Srgrimes{ 2771541Srgrimes int len, ret; 2781541Srgrimes 2791541Srgrimes _DIAGASSERT(ei != NULL); 28017186Sdfr _DIAGASSERT(s != NULL); 28117186Sdfr _DIAGASSERT(psenc != NULL); 2821541Srgrimes 2831549Srgrimes if (psenc->chlen != 0) { 28483651Speter ret = EINVAL; 2851541Srgrimes goto err; 28683651Speter } 28783651Speter 28883651Speter len = _mb_count(wc); 2891541Srgrimes if (n < len) { 2901541Srgrimes ret = E2BIG; 2911541Srgrimes goto err; 29236519Speter } 29317186Sdfr 29417186Sdfr switch (len) { 29536519Speter case 1: 29617186Sdfr if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 2971541Srgrimes ret = EILSEQ; 2981541Srgrimes goto err; 2991541Srgrimes } 3001541Srgrimes break; 3011541Srgrimes case 2: 3021541Srgrimes if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 3031541Srgrimes !_mb_trailbyte (_PUSH_PSENC(wc ))) { 3041541Srgrimes ret = EILSEQ; 3051541Srgrimes goto err; 3061541Srgrimes } 3071541Srgrimes break; 3081541Srgrimes case 4: 3091541Srgrimes if (ei->mb_cur_max != 4 || 3101541Srgrimes !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 3111541Srgrimes !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 312177599Sru !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 3131541Srgrimes !_mb_surrogate (_PUSH_PSENC(wc ))) { 314177599Sru ret = EILSEQ; 3151541Srgrimes goto err; 3161541Srgrimes } 3171541Srgrimes break; 3181541Srgrimes } 3191541Srgrimes 3201541Srgrimes _DIAGASSERT(len == psenc->chlen); 3211541Srgrimes 3221541Srgrimes memcpy(s, psenc->ch, psenc->chlen); 3231541Srgrimes *nresult = psenc->chlen; 3241541Srgrimes /* _citrus_GBK2K_init_state(ei, psenc); */ 3251541Srgrimes psenc->chlen = 0; 3261541Srgrimes 3271541Srgrimes return (0); 3281541Srgrimes 3291541Srgrimeserr: 3301541Srgrimes *nresult = (size_t)-1; 3311541Srgrimes return ret; 3321541Srgrimes} 3331541Srgrimes 3341541Srgrimesstatic __inline int 3351541Srgrimes/*ARGSUSED*/ 3361541Srgrimes_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei, 3371541Srgrimes _csid_t * __restrict csid, 338104908Smike _index_t * __restrict idx, wchar_t wc) 339104908Smike{ 34017186Sdfr u_int8_t ch, cl; 3411541Srgrimes 3421541Srgrimes _DIAGASSERT(csid != NULL && idx != NULL); 3431541Srgrimes 3441541Srgrimes if ((u_int32_t)wc<0x80) { 345177599Sru /* ISO646 */ 3461541Srgrimes *csid = 0; 3471541Srgrimes *idx = (_index_t)wc; 3481541Srgrimes } else if ((u_int32_t)wc>=0x10000) { 3491541Srgrimes /* GBKUCS : XXX */ 3501541Srgrimes *csid = 3; 3511541Srgrimes *idx = (_index_t)wc; 3521541Srgrimes } else { 3531541Srgrimes ch = (u_int8_t)(wc >> 8); 3541541Srgrimes cl = (u_int8_t)wc; 3551541Srgrimes if (ch>=0xA1 && cl>=0xA1) { 3561541Srgrimes /* EUC G1 */ 3571541Srgrimes *csid = 1; 3581541Srgrimes *idx = (_index_t)wc & 0x7F7FU; 3591541Srgrimes } else { 3601541Srgrimes /* extended area (0x8140-) */ 3611541Srgrimes *csid = 2; 3621541Srgrimes *idx = (_index_t)wc; 3631549Srgrimes } 36483651Speter } 3651541Srgrimes 36683651Speter return 0; 3671541Srgrimes} 36836541Speter 3691541Srgrimesstatic __inline int 3701541Srgrimes/*ARGSUSED*/ 3711541Srgrimes_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 3721541Srgrimes wchar_t * __restrict wc, 3731541Srgrimes _csid_t csid, _index_t idx) 3741541Srgrimes{ 37536541Speter 3761541Srgrimes _DIAGASSERT(wc != NULL); 3771541Srgrimes 3781541Srgrimes switch (csid) { 3791541Srgrimes case 0: 3801541Srgrimes /* ISO646 */ 3811541Srgrimes *wc = (wchar_t)idx; 3821541Srgrimes break; 3831541Srgrimes case 1: 3841541Srgrimes /* EUC G1 */ 3851541Srgrimes *wc = (wchar_t)idx | 0x8080U; 3861541Srgrimes break; 3871541Srgrimes case 2: 3881541Srgrimes /* extended area */ 3891541Srgrimes *wc = (wchar_t)idx; 390177599Sru break; 3911541Srgrimes case 3: 392177599Sru /* GBKUCS : XXX */ 3931541Srgrimes if (ei->mb_cur_max != 4) 3941541Srgrimes return EINVAL; 3951541Srgrimes *wc = (wchar_t)idx; 39636541Speter break; 3971541Srgrimes default: 3981541Srgrimes return EILSEQ; 3991541Srgrimes } 4001541Srgrimes 4011541Srgrimes return 0; 4021541Srgrimes} 4031541Srgrimes 4041541Srgrimesstatic __inline int 4051541Srgrimes/*ARGSUSED*/ 4061541Srgrimes_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei, 4071541Srgrimes _GBK2KState * __restrict psenc, 4081541Srgrimes int * __restrict rstate) 4091541Srgrimes{ 4101541Srgrimes 4111541Srgrimes if (psenc->chlen == 0) 4121541Srgrimes *rstate = _STDENC_SDGEN_INITIAL; 4131541Srgrimes else 4141541Srgrimes *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 4151541Srgrimes 4161541Srgrimes return 0; 4171541Srgrimes} 4181541Srgrimes 4191541Srgrimesstatic int 4201541Srgrimes/*ARGSUSED*/ 4211541Srgrimes_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 4221541Srgrimes const void * __restrict var, size_t lenvar) 4231541Srgrimes{ 4241541Srgrimes const char *p; 4251549Srgrimes 42683651Speter _DIAGASSERT(ei != NULL); 4271541Srgrimes 42883651Speter p = var; 4291541Srgrimes#define MATCH(x, act) \ 43092783Sjeffdo { \ 43192783Sjeff if (lenvar >= (sizeof(#x)-1) && \ 4321541Srgrimes _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 4331541Srgrimes act; \ 4341541Srgrimes lenvar -= sizeof(#x)-1; \ 4351541Srgrimes p += sizeof(#x)-1; \ 4361541Srgrimes } \ 4371541Srgrimes} while (0) 4381541Srgrimes memset((void *)ei, 0, sizeof(*ei)); 4391541Srgrimes ei->mb_cur_max = 4; 4401541Srgrimes while (lenvar>0) { 4411541Srgrimes switch (_bcs_tolower(*p)) { 4423664Sphk case '2': 4439336Sdfr MATCH("2byte", ei->mb_cur_max = 2); 4449336Sdfr break; 4459336Sdfr } 4461541Srgrimes p++; 44719449Sdfr lenvar--; 44899797Sdillon } 44999797Sdillon 45019449Sdfr return (0); 4511541Srgrimes} 4521541Srgrimes 4531541Srgrimesstatic void 4541541Srgrimes/*ARGSUSED*/ 4551541Srgrimes_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei) 456184588Sdfr{ 4573664Sphk} 458184588Sdfr 459148162Sps 460184588Sdfr/* ---------------------------------------------------------------------- 461158739Smohans * public interface for ctype 462172600Smohans */ 46316365Sphk 46442957Sdillon_CITRUS_CTYPE_DECLS(GBK2K); 46542957Sdillon_CITRUS_CTYPE_DEF_OPS(GBK2K); 4661549Srgrimes 4671541Srgrimes#include "citrus_ctype_template.h" 4681541Srgrimes 46938894Sbde/* ---------------------------------------------------------------------- 47083651Speter * public interface for stdenc 47138894Sbde */ 472128111Speadar 47338894Sbde_CITRUS_STDENC_DECLS(GBK2K); 474184588Sdfr_CITRUS_STDENC_DEF_OPS(GBK2K); 475127421Srees 476128111Speadar#include "citrus_stdenc_template.h" 477128126Smarcel