1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2004, 2006 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor * 29219019Sgabor */ 30219019Sgabor 31219019Sgabor#include <sys/cdefs.h> 32219019Sgabor#include <sys/types.h> 33219019Sgabor 34219019Sgabor#include <assert.h> 35219019Sgabor#include <errno.h> 36219019Sgabor#include <limits.h> 37219019Sgabor#include <stddef.h> 38219019Sgabor#include <stdio.h> 39219019Sgabor#include <stdint.h> 40219019Sgabor#include <stdlib.h> 41219019Sgabor#include <string.h> 42219019Sgabor#include <wchar.h> 43219019Sgabor 44219019Sgabor#include "citrus_namespace.h" 45219019Sgabor#include "citrus_types.h" 46219019Sgabor#include "citrus_module.h" 47219019Sgabor#include "citrus_stdenc.h" 48219019Sgabor#include "citrus_zw.h" 49219019Sgabor 50219019Sgabor/* ---------------------------------------------------------------------- 51219019Sgabor * private stuffs used by templates 52219019Sgabor */ 53219019Sgabor 54219019Sgabortypedef struct { 55219019Sgabor int dummy; 56219019Sgabor} _ZWEncodingInfo; 57219019Sgabor 58219019Sgabortypedef enum { 59219019Sgabor NONE, AMBIGIOUS, ASCII, GB2312 60219019Sgabor} _ZWCharset; 61219019Sgabor 62219019Sgabortypedef struct { 63219019Sgabor _ZWCharset charset; 64219019Sgabor int chlen; 65219019Sgabor char ch[4]; 66219019Sgabor} _ZWState; 67219019Sgabor 68219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 69219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 70219019Sgabor 71219019Sgabor#define _FUNCNAME(m) _citrus_ZW_##m 72219019Sgabor#define _ENCODING_INFO _ZWEncodingInfo 73219019Sgabor#define _ENCODING_STATE _ZWState 74219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 75219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 1 76219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 77219019Sgabor 78219019Sgaborstatic __inline void 79219019Sgabor/*ARGSUSED*/ 80219019Sgabor_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused, 81219019Sgabor _ZWState * __restrict psenc) 82219019Sgabor{ 83219019Sgabor 84219019Sgabor psenc->chlen = 0; 85219019Sgabor psenc->charset = NONE; 86219019Sgabor} 87219019Sgabor 88260264Sdim#if 0 89219019Sgaborstatic __inline void 90219019Sgabor/*ARGSUSED*/ 91219019Sgabor_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused, 92219019Sgabor void *__restrict pspriv, const _ZWState * __restrict psenc) 93219019Sgabor{ 94219019Sgabor 95219019Sgabor memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 96219019Sgabor} 97219019Sgabor 98219019Sgaborstatic __inline void 99219019Sgabor/*ARGSUSED*/ 100219019Sgabor_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused, 101219019Sgabor _ZWState * __restrict psenc, const void * __restrict pspriv) 102219019Sgabor{ 103219019Sgabor 104219019Sgabor memcpy((void *)psenc, pspriv, sizeof(*psenc)); 105219019Sgabor} 106260264Sdim#endif 107219019Sgabor 108219019Sgaborstatic int 109219019Sgabor_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 110219019Sgabor wchar_t * __restrict pwc, char **__restrict s, size_t n, 111219019Sgabor _ZWState * __restrict psenc, size_t * __restrict nresult) 112219019Sgabor{ 113219019Sgabor char *s0; 114219019Sgabor wchar_t wc; 115219019Sgabor int ch, len; 116219019Sgabor 117219019Sgabor if (*s == NULL) { 118219019Sgabor _citrus_ZW_init_state(ei, psenc); 119219019Sgabor *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 120219019Sgabor return (0); 121219019Sgabor } 122219019Sgabor s0 = *s; 123219019Sgabor len = 0; 124219019Sgabor 125219019Sgabor#define STORE \ 126219019Sgabordo { \ 127219019Sgabor if (n-- < 1) { \ 128219019Sgabor *nresult = (size_t)-2; \ 129219019Sgabor *s = s0; \ 130219019Sgabor return (0); \ 131219019Sgabor } \ 132219019Sgabor ch = (unsigned char)*s0++; \ 133219019Sgabor if (len++ > MB_LEN_MAX || ch > 0x7F)\ 134219019Sgabor goto ilseq; \ 135219019Sgabor psenc->ch[psenc->chlen++] = ch; \ 136219019Sgabor} while (/*CONSTCOND*/0) 137219019Sgabor 138219019Sgaborloop: 139219019Sgabor switch (psenc->charset) { 140219019Sgabor case ASCII: 141219019Sgabor switch (psenc->chlen) { 142219019Sgabor case 0: 143219019Sgabor STORE; 144219019Sgabor switch (psenc->ch[0]) { 145219019Sgabor case '\0': case '\n': 146219019Sgabor psenc->charset = NONE; 147219019Sgabor } 148219019Sgabor /*FALLTHROUGH*/ 149219019Sgabor case 1: 150219019Sgabor break; 151219019Sgabor default: 152219019Sgabor return (EINVAL); 153219019Sgabor } 154219019Sgabor ch = (unsigned char)psenc->ch[0]; 155219019Sgabor if (ch > 0x7F) 156219019Sgabor goto ilseq; 157219019Sgabor wc = (wchar_t)ch; 158219019Sgabor psenc->chlen = 0; 159219019Sgabor break; 160219019Sgabor case NONE: 161219019Sgabor if (psenc->chlen != 0) 162219019Sgabor return (EINVAL); 163219019Sgabor STORE; 164219019Sgabor ch = (unsigned char)psenc->ch[0]; 165219019Sgabor if (ch != 'z') { 166219019Sgabor if (ch != '\n' && ch != '\0') 167219019Sgabor psenc->charset = ASCII; 168219019Sgabor wc = (wchar_t)ch; 169219019Sgabor psenc->chlen = 0; 170219019Sgabor break; 171219019Sgabor } 172219019Sgabor psenc->charset = AMBIGIOUS; 173219019Sgabor psenc->chlen = 0; 174219019Sgabor /* FALLTHROUGH */ 175219019Sgabor case AMBIGIOUS: 176219019Sgabor if (psenc->chlen != 0) 177219019Sgabor return (EINVAL); 178219019Sgabor STORE; 179219019Sgabor if (psenc->ch[0] != 'W') { 180219019Sgabor psenc->charset = ASCII; 181219019Sgabor wc = L'z'; 182219019Sgabor break; 183219019Sgabor } 184219019Sgabor psenc->charset = GB2312; 185219019Sgabor psenc->chlen = 0; 186219019Sgabor /* FALLTHROUGH */ 187219019Sgabor case GB2312: 188219019Sgabor switch (psenc->chlen) { 189219019Sgabor case 0: 190219019Sgabor STORE; 191219019Sgabor ch = (unsigned char)psenc->ch[0]; 192219019Sgabor if (ch == '\0') { 193219019Sgabor psenc->charset = NONE; 194219019Sgabor wc = (wchar_t)ch; 195219019Sgabor psenc->chlen = 0; 196219019Sgabor break; 197219019Sgabor } else if (ch == '\n') { 198219019Sgabor psenc->charset = NONE; 199219019Sgabor psenc->chlen = 0; 200219019Sgabor goto loop; 201219019Sgabor } 202219019Sgabor /*FALLTHROUGH*/ 203219019Sgabor case 1: 204219019Sgabor STORE; 205219019Sgabor if (psenc->ch[0] == ' ') { 206219019Sgabor ch = (unsigned char)psenc->ch[1]; 207219019Sgabor wc = (wchar_t)ch; 208219019Sgabor psenc->chlen = 0; 209219019Sgabor break; 210219019Sgabor } else if (psenc->ch[0] == '#') { 211219019Sgabor ch = (unsigned char)psenc->ch[1]; 212219019Sgabor if (ch == '\n') { 213219019Sgabor psenc->charset = NONE; 214219019Sgabor wc = (wchar_t)ch; 215219019Sgabor psenc->chlen = 0; 216219019Sgabor break; 217219019Sgabor } else if (ch == ' ') { 218219019Sgabor wc = (wchar_t)ch; 219219019Sgabor psenc->chlen = 0; 220219019Sgabor break; 221219019Sgabor } 222219019Sgabor } 223219019Sgabor ch = (unsigned char)psenc->ch[0]; 224219019Sgabor if (ch < 0x21 || ch > 0x7E) 225219019Sgabor goto ilseq; 226219019Sgabor wc = (wchar_t)(ch << 8); 227219019Sgabor ch = (unsigned char)psenc->ch[1]; 228219019Sgabor if (ch < 0x21 || ch > 0x7E) { 229219019Sgaborilseq: 230219019Sgabor *nresult = (size_t)-1; 231219019Sgabor return (EILSEQ); 232219019Sgabor } 233219019Sgabor wc |= (wchar_t)ch; 234219019Sgabor psenc->chlen = 0; 235219019Sgabor break; 236219019Sgabor default: 237219019Sgabor return (EINVAL); 238219019Sgabor } 239219019Sgabor break; 240219019Sgabor default: 241219019Sgabor return (EINVAL); 242219019Sgabor } 243219019Sgabor if (pwc != NULL) 244219019Sgabor *pwc = wc; 245219019Sgabor 246219019Sgabor *nresult = (size_t)(wc == 0 ? 0 : len); 247219019Sgabor *s = s0; 248219019Sgabor 249219019Sgabor return (0); 250219019Sgabor} 251219019Sgabor 252219019Sgaborstatic int 253219019Sgabor/*ARGSUSED*/ 254219019Sgabor_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused, 255219019Sgabor char *__restrict s, size_t n, wchar_t wc, 256219019Sgabor _ZWState * __restrict psenc, size_t * __restrict nresult) 257219019Sgabor{ 258219019Sgabor int ch; 259219019Sgabor 260219019Sgabor if (psenc->chlen != 0) 261219019Sgabor return (EINVAL); 262219019Sgabor if ((uint32_t)wc <= 0x7F) { 263219019Sgabor ch = (unsigned char)wc; 264219019Sgabor switch (psenc->charset) { 265219019Sgabor case NONE: 266219019Sgabor if (ch == '\0' || ch == '\n') 267219019Sgabor psenc->ch[psenc->chlen++] = ch; 268219019Sgabor else { 269219019Sgabor if (n < 4) 270219019Sgabor return (E2BIG); 271219019Sgabor n -= 4; 272219019Sgabor psenc->ch[psenc->chlen++] = 'z'; 273219019Sgabor psenc->ch[psenc->chlen++] = 'W'; 274219019Sgabor psenc->ch[psenc->chlen++] = ' '; 275219019Sgabor psenc->ch[psenc->chlen++] = ch; 276219019Sgabor psenc->charset = GB2312; 277219019Sgabor } 278219019Sgabor break; 279219019Sgabor case GB2312: 280219019Sgabor if (n < 2) 281219019Sgabor return (E2BIG); 282219019Sgabor n -= 2; 283219019Sgabor if (ch == '\0') { 284219019Sgabor psenc->ch[psenc->chlen++] = '\n'; 285219019Sgabor psenc->ch[psenc->chlen++] = '\0'; 286219019Sgabor psenc->charset = NONE; 287219019Sgabor } else if (ch == '\n') { 288219019Sgabor psenc->ch[psenc->chlen++] = '#'; 289219019Sgabor psenc->ch[psenc->chlen++] = '\n'; 290219019Sgabor psenc->charset = NONE; 291219019Sgabor } else { 292219019Sgabor psenc->ch[psenc->chlen++] = ' '; 293219019Sgabor psenc->ch[psenc->chlen++] = ch; 294219019Sgabor } 295219019Sgabor break; 296219019Sgabor default: 297219019Sgabor return (EINVAL); 298219019Sgabor } 299219019Sgabor } else if ((uint32_t)wc <= 0x7E7E) { 300219019Sgabor switch (psenc->charset) { 301219019Sgabor case NONE: 302219019Sgabor if (n < 2) 303219019Sgabor return (E2BIG); 304219019Sgabor n -= 2; 305219019Sgabor psenc->ch[psenc->chlen++] = 'z'; 306219019Sgabor psenc->ch[psenc->chlen++] = 'W'; 307219019Sgabor psenc->charset = GB2312; 308219019Sgabor /* FALLTHROUGH*/ 309219019Sgabor case GB2312: 310219019Sgabor if (n < 2) 311219019Sgabor return (E2BIG); 312219019Sgabor n -= 2; 313219019Sgabor ch = (wc >> 8) & 0xFF; 314219019Sgabor if (ch < 0x21 || ch > 0x7E) 315219019Sgabor goto ilseq; 316219019Sgabor psenc->ch[psenc->chlen++] = ch; 317219019Sgabor ch = wc & 0xFF; 318219019Sgabor if (ch < 0x21 || ch > 0x7E) 319219019Sgabor goto ilseq; 320219019Sgabor psenc->ch[psenc->chlen++] = ch; 321219019Sgabor break; 322219019Sgabor default: 323219019Sgabor return (EINVAL); 324219019Sgabor } 325219019Sgabor } else { 326219019Sgaborilseq: 327219019Sgabor *nresult = (size_t)-1; 328219019Sgabor return (EILSEQ); 329219019Sgabor } 330219019Sgabor memcpy(s, psenc->ch, psenc->chlen); 331219019Sgabor *nresult = psenc->chlen; 332219019Sgabor psenc->chlen = 0; 333219019Sgabor 334219019Sgabor return (0); 335219019Sgabor} 336219019Sgabor 337219019Sgaborstatic int 338219019Sgabor/*ARGSUSED*/ 339219019Sgabor_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused, 340219019Sgabor char * __restrict s, size_t n, _ZWState * __restrict psenc, 341219019Sgabor size_t * __restrict nresult) 342219019Sgabor{ 343219019Sgabor 344219019Sgabor if (psenc->chlen != 0) 345219019Sgabor return (EINVAL); 346219019Sgabor switch (psenc->charset) { 347219019Sgabor case GB2312: 348219019Sgabor if (n-- < 1) 349219019Sgabor return (E2BIG); 350219019Sgabor psenc->ch[psenc->chlen++] = '\n'; 351219019Sgabor psenc->charset = NONE; 352219019Sgabor /*FALLTHROUGH*/ 353219019Sgabor case NONE: 354219019Sgabor *nresult = psenc->chlen; 355219019Sgabor if (psenc->chlen > 0) { 356219019Sgabor memcpy(s, psenc->ch, psenc->chlen); 357219019Sgabor psenc->chlen = 0; 358219019Sgabor } 359219019Sgabor break; 360219019Sgabor default: 361219019Sgabor return (EINVAL); 362219019Sgabor } 363219019Sgabor 364219019Sgabor return (0); 365219019Sgabor} 366219019Sgabor 367219019Sgaborstatic __inline int 368219019Sgabor/*ARGSUSED*/ 369219019Sgabor_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused, 370219019Sgabor _ZWState * __restrict psenc, int * __restrict rstate) 371219019Sgabor{ 372219019Sgabor 373219019Sgabor switch (psenc->charset) { 374219019Sgabor case NONE: 375219019Sgabor if (psenc->chlen != 0) 376219019Sgabor return (EINVAL); 377219019Sgabor *rstate = _STDENC_SDGEN_INITIAL; 378219019Sgabor break; 379219019Sgabor case AMBIGIOUS: 380219019Sgabor if (psenc->chlen != 0) 381219019Sgabor return (EINVAL); 382219019Sgabor *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 383219019Sgabor break; 384219019Sgabor case ASCII: 385219019Sgabor case GB2312: 386219019Sgabor switch (psenc->chlen) { 387219019Sgabor case 0: 388219019Sgabor *rstate = _STDENC_SDGEN_STABLE; 389219019Sgabor break; 390219019Sgabor case 1: 391219019Sgabor *rstate = (psenc->ch[0] == '#') ? 392219019Sgabor _STDENC_SDGEN_INCOMPLETE_SHIFT : 393219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 394219019Sgabor break; 395219019Sgabor default: 396219019Sgabor return (EINVAL); 397219019Sgabor } 398219019Sgabor break; 399219019Sgabor default: 400219019Sgabor return (EINVAL); 401219019Sgabor } 402219019Sgabor return (0); 403219019Sgabor} 404219019Sgabor 405219019Sgaborstatic __inline int 406219019Sgabor/*ARGSUSED*/ 407219019Sgabor_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused, 408219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 409219019Sgabor{ 410219019Sgabor 411219019Sgabor *csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1; 412219019Sgabor *idx = (_index_t)wc; 413219019Sgabor 414219019Sgabor return (0); 415219019Sgabor} 416219019Sgabor 417219019Sgaborstatic __inline int 418219019Sgabor/*ARGSUSED*/ 419219019Sgabor_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused, 420219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 421219019Sgabor{ 422219019Sgabor 423219019Sgabor switch (csid) { 424219019Sgabor case 0: case 1: 425219019Sgabor break; 426219019Sgabor default: 427219019Sgabor return (EINVAL); 428219019Sgabor } 429219019Sgabor *wc = (wchar_t)idx; 430219019Sgabor 431219019Sgabor return (0); 432219019Sgabor} 433219019Sgabor 434219019Sgaborstatic void 435219019Sgabor/*ARGSUSED*/ 436219019Sgabor_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused) 437219019Sgabor{ 438219019Sgabor 439219019Sgabor} 440219019Sgabor 441219019Sgaborstatic int 442219019Sgabor/*ARGSUSED*/ 443219019Sgabor_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused, 444219019Sgabor const void *__restrict var __unused, size_t lenvar __unused) 445219019Sgabor{ 446219019Sgabor 447219019Sgabor return (0); 448219019Sgabor} 449219019Sgabor 450219019Sgabor/* ---------------------------------------------------------------------- 451219019Sgabor * public interface for stdenc 452219019Sgabor */ 453219019Sgabor 454219019Sgabor_CITRUS_STDENC_DECLS(ZW); 455219019Sgabor_CITRUS_STDENC_DEF_OPS(ZW); 456219019Sgabor 457219019Sgabor#include "citrus_stdenc_template.h" 458