1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2002 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor/* 31219019Sgabor * ja_JP.SJIS locale table for BSD4.4/rune 32219019Sgabor * version 1.0 33219019Sgabor * (C) Sin'ichiro MIYATANI / Phase One, Inc 34219019Sgabor * May 12, 1995 35219019Sgabor * 36219019Sgabor * Redistribution and use in source and binary forms, with or without 37219019Sgabor * modification, are permitted provided that the following conditions 38219019Sgabor * are met: 39219019Sgabor * 1. Redistributions of source code must retain the above copyright 40219019Sgabor * notice, this list of conditions and the following disclaimer. 41219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 42219019Sgabor * notice, this list of conditions and the following disclaimer in the 43219019Sgabor * documentation and/or other materials provided with the distribution. 44219019Sgabor * 3. All advertising materials mentioning features or use of this software 45219019Sgabor * must display the following acknowledgement: 46219019Sgabor * This product includes software developed by Phase One, Inc. 47219019Sgabor * 4. The name of Phase One, Inc. may be used to endorse or promote products 48219019Sgabor * derived from this software without specific prior written permission. 49219019Sgabor * 50219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60219019Sgabor * SUCH DAMAGE. 61219019Sgabor */ 62219019Sgabor 63219019Sgabor 64219019Sgabor#include <sys/cdefs.h> 65219019Sgabor#include <sys/types.h> 66219019Sgabor 67219019Sgabor#include <assert.h> 68219019Sgabor#include <errno.h> 69219019Sgabor#include <limits.h> 70219019Sgabor#include <stdbool.h> 71219019Sgabor#include <stddef.h> 72219019Sgabor#include <stdio.h> 73219019Sgabor#include <stdlib.h> 74219019Sgabor#include <string.h> 75219019Sgabor#include <wchar.h> 76219019Sgabor 77219019Sgabor#include "citrus_namespace.h" 78219019Sgabor#include "citrus_types.h" 79219019Sgabor#include "citrus_bcs.h" 80219019Sgabor#include "citrus_module.h" 81219019Sgabor#include "citrus_stdenc.h" 82219019Sgabor#include "citrus_mskanji.h" 83219019Sgabor 84219019Sgabor 85219019Sgabor/* ---------------------------------------------------------------------- 86219019Sgabor * private stuffs used by templates 87219019Sgabor */ 88219019Sgabor 89219019Sgabortypedef struct _MSKanjiState { 90219019Sgabor int chlen; 91219019Sgabor char ch[2]; 92219019Sgabor} _MSKanjiState; 93219019Sgabor 94219019Sgabortypedef struct { 95219019Sgabor int mode; 96219019Sgabor#define MODE_JIS2004 1 97219019Sgabor} _MSKanjiEncodingInfo; 98219019Sgabor 99219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 100219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 101219019Sgabor 102219019Sgabor#define _FUNCNAME(m) _citrus_MSKanji_##m 103219019Sgabor#define _ENCODING_INFO _MSKanjiEncodingInfo 104219019Sgabor#define _ENCODING_STATE _MSKanjiState 105219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) 2 106219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 0 107219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 108219019Sgabor 109219019Sgabor 110219019Sgaborstatic bool 111219019Sgabor_mskanji1(int c) 112219019Sgabor{ 113219019Sgabor 114219019Sgabor return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)); 115219019Sgabor} 116219019Sgabor 117219019Sgaborstatic bool 118219019Sgabor_mskanji2(int c) 119219019Sgabor{ 120219019Sgabor 121219019Sgabor return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc)); 122219019Sgabor} 123219019Sgabor 124219019Sgaborstatic __inline void 125219019Sgabor/*ARGSUSED*/ 126219019Sgabor_citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused, 127219019Sgabor _MSKanjiState * __restrict s) 128219019Sgabor{ 129219019Sgabor 130219019Sgabor s->chlen = 0; 131219019Sgabor} 132219019Sgabor 133260264Sdim#if 0 134219019Sgaborstatic __inline void 135219019Sgabor/*ARGSUSED*/ 136219019Sgabor_citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 137219019Sgabor void * __restrict pspriv, const _MSKanjiState * __restrict s) 138219019Sgabor{ 139219019Sgabor 140219019Sgabor memcpy(pspriv, (const void *)s, sizeof(*s)); 141219019Sgabor} 142219019Sgabor 143219019Sgaborstatic __inline void 144219019Sgabor/*ARGSUSED*/ 145219019Sgabor_citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 146219019Sgabor _MSKanjiState * __restrict s, const void * __restrict pspriv) 147219019Sgabor{ 148219019Sgabor 149219019Sgabor memcpy((void *)s, pspriv, sizeof(*s)); 150219019Sgabor} 151260264Sdim#endif 152219019Sgabor 153219019Sgaborstatic int 154219019Sgabor/*ARGSUSED*/ 155219019Sgabor_citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei, 156219019Sgabor wchar_t * __restrict pwc, char ** __restrict s, size_t n, 157219019Sgabor _MSKanjiState * __restrict psenc, size_t * __restrict nresult) 158219019Sgabor{ 159219019Sgabor char *s0; 160219019Sgabor wchar_t wchar; 161219019Sgabor int chlenbak, len; 162219019Sgabor 163219019Sgabor s0 = *s; 164219019Sgabor 165219019Sgabor if (s0 == NULL) { 166219019Sgabor _citrus_MSKanji_init_state(ei, psenc); 167219019Sgabor *nresult = 0; /* state independent */ 168219019Sgabor return (0); 169219019Sgabor } 170219019Sgabor 171219019Sgabor chlenbak = psenc->chlen; 172219019Sgabor 173219019Sgabor /* make sure we have the first byte in the buffer */ 174219019Sgabor switch (psenc->chlen) { 175219019Sgabor case 0: 176219019Sgabor if (n < 1) 177219019Sgabor goto restart; 178219019Sgabor psenc->ch[0] = *s0++; 179219019Sgabor psenc->chlen = 1; 180219019Sgabor n--; 181219019Sgabor break; 182219019Sgabor case 1: 183219019Sgabor break; 184219019Sgabor default: 185219019Sgabor /* illegal state */ 186219019Sgabor goto encoding_error; 187219019Sgabor } 188219019Sgabor 189219019Sgabor len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1; 190219019Sgabor while (psenc->chlen < len) { 191219019Sgabor if (n < 1) 192219019Sgabor goto restart; 193219019Sgabor psenc->ch[psenc->chlen] = *s0++; 194219019Sgabor psenc->chlen++; 195219019Sgabor n--; 196219019Sgabor } 197219019Sgabor 198219019Sgabor *s = s0; 199219019Sgabor 200219019Sgabor switch (len) { 201219019Sgabor case 1: 202219019Sgabor wchar = psenc->ch[0] & 0xff; 203219019Sgabor break; 204219019Sgabor case 2: 205219019Sgabor if (!_mskanji2(psenc->ch[1] & 0xff)) 206219019Sgabor goto encoding_error; 207219019Sgabor wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 208219019Sgabor break; 209219019Sgabor default: 210219019Sgabor /* illegal state */ 211219019Sgabor goto encoding_error; 212219019Sgabor } 213219019Sgabor 214219019Sgabor psenc->chlen = 0; 215219019Sgabor 216219019Sgabor if (pwc) 217219019Sgabor *pwc = wchar; 218219019Sgabor *nresult = wchar ? len - chlenbak : 0; 219219019Sgabor return (0); 220219019Sgabor 221219019Sgaborencoding_error: 222219019Sgabor psenc->chlen = 0; 223219019Sgabor *nresult = (size_t)-1; 224219019Sgabor return (EILSEQ); 225219019Sgabor 226219019Sgaborrestart: 227219019Sgabor *nresult = (size_t)-2; 228219019Sgabor *s = s0; 229219019Sgabor return (0); 230219019Sgabor} 231219019Sgabor 232219019Sgabor 233219019Sgaborstatic int 234219019Sgabor_citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused, 235219019Sgabor char * __restrict s, size_t n, wchar_t wc, 236219019Sgabor _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult) 237219019Sgabor{ 238219019Sgabor int ret; 239219019Sgabor 240219019Sgabor /* check invalid sequence */ 241219019Sgabor if (wc & ~0xffff) { 242219019Sgabor ret = EILSEQ; 243219019Sgabor goto err; 244219019Sgabor } 245219019Sgabor 246219019Sgabor if (wc & 0xff00) { 247219019Sgabor if (n < 2) { 248219019Sgabor ret = E2BIG; 249219019Sgabor goto err; 250219019Sgabor } 251219019Sgabor 252219019Sgabor s[0] = (wc >> 8) & 0xff; 253219019Sgabor s[1] = wc & 0xff; 254219019Sgabor if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) { 255219019Sgabor ret = EILSEQ; 256219019Sgabor goto err; 257219019Sgabor } 258219019Sgabor 259219019Sgabor *nresult = 2; 260219019Sgabor return (0); 261219019Sgabor } else { 262219019Sgabor if (n < 1) { 263219019Sgabor ret = E2BIG; 264219019Sgabor goto err; 265219019Sgabor } 266219019Sgabor 267219019Sgabor s[0] = wc & 0xff; 268219019Sgabor if (_mskanji1(s[0] & 0xff)) { 269219019Sgabor ret = EILSEQ; 270219019Sgabor goto err; 271219019Sgabor } 272219019Sgabor 273219019Sgabor *nresult = 1; 274219019Sgabor return (0); 275219019Sgabor } 276219019Sgabor 277219019Sgaborerr: 278219019Sgabor *nresult = (size_t)-1; 279219019Sgabor return (ret); 280219019Sgabor} 281219019Sgabor 282219019Sgabor 283219019Sgaborstatic __inline int 284219019Sgabor/*ARGSUSED*/ 285219019Sgabor_citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei, 286219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 287219019Sgabor{ 288219019Sgabor _index_t col, row; 289219019Sgabor int offset; 290219019Sgabor 291219019Sgabor if ((_wc_t)wc < 0x80) { 292219019Sgabor /* ISO-646 */ 293219019Sgabor *csid = 0; 294219019Sgabor *idx = (_index_t)wc; 295219019Sgabor } else if ((_wc_t)wc < 0x100) { 296219019Sgabor /* KANA */ 297219019Sgabor *csid = 1; 298219019Sgabor *idx = (_index_t)wc & 0x7F; 299219019Sgabor } else { 300219019Sgabor /* Kanji (containing Gaiji zone) */ 301219019Sgabor /* 302219019Sgabor * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)): 303219019Sgabor * 0x8140 - 0x817E -> 0x2121 - 0x215F 304219019Sgabor * 0x8180 - 0x819E -> 0x2160 - 0x217E 305219019Sgabor * 0x819F - 0x81FC -> 0x2221 - 0x227E 306219019Sgabor * 307219019Sgabor * 0x8240 - 0x827E -> 0x2321 - 0x235F 308219019Sgabor * ... 309219019Sgabor * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E 310219019Sgabor * 311219019Sgabor * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F 312219019Sgabor * ... 313219019Sgabor * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E 314219019Sgabor * 315219019Sgabor * extended Gaiji zone: 316219019Sgabor * 0xF040 - 0xFCFC 317219019Sgabor * 318219019Sgabor * JIS X0213-plane2: 319219019Sgabor * 0xF040 - 0xF09E -> 0x2121 - 0x217E 320219019Sgabor * 0xF140 - 0xF19E -> 0x2321 - 0x237E 321219019Sgabor * ... 322219019Sgabor * 0xF240 - 0xF29E -> 0x2521 - 0x257E 323219019Sgabor * 324219019Sgabor * 0xF09F - 0xF0FC -> 0x2821 - 0x287E 325219019Sgabor * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E 326219019Sgabor * ... 327219019Sgabor * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E 328219019Sgabor * 329219019Sgabor * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E 330219019Sgabor * ... 331219019Sgabor * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E 332219019Sgabor */ 333219019Sgabor row = ((_wc_t)wc >> 8) & 0xFF; 334219019Sgabor col = (_wc_t)wc & 0xFF; 335219019Sgabor if (!_mskanji1(row) || !_mskanji2(col)) 336219019Sgabor return (EILSEQ); 337219019Sgabor if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) { 338219019Sgabor *csid = 2; 339219019Sgabor offset = 0x81; 340219019Sgabor } else { 341219019Sgabor *csid = 3; 342219019Sgabor if ((_wc_t)wc <= 0xF49E) { 343219019Sgabor offset = (_wc_t)wc >= 0xF29F || 344219019Sgabor ((_wc_t)wc >= 0xF09F && 345219019Sgabor (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0; 346219019Sgabor } else 347219019Sgabor offset = 0xCE; 348219019Sgabor } 349219019Sgabor row -= offset; 350219019Sgabor if (row >= 0x5F) 351219019Sgabor row -= 0x40; 352219019Sgabor row = row * 2 + 0x21; 353219019Sgabor col -= 0x1F; 354219019Sgabor if (col >= 0x61) 355219019Sgabor col -= 1; 356219019Sgabor if (col > 0x7E) { 357219019Sgabor row += 1; 358219019Sgabor col -= 0x5E; 359219019Sgabor } 360219019Sgabor *idx = ((_index_t)row << 8) | col; 361219019Sgabor } 362219019Sgabor 363219019Sgabor return (0); 364219019Sgabor} 365219019Sgabor 366219019Sgaborstatic __inline int 367219019Sgabor/*ARGSUSED*/ 368219019Sgabor_citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei, 369219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 370219019Sgabor{ 371219019Sgabor uint32_t col, row; 372219019Sgabor int offset; 373219019Sgabor 374219019Sgabor switch (csid) { 375219019Sgabor case 0: 376219019Sgabor /* ISO-646 */ 377219019Sgabor if (idx >= 0x80) 378219019Sgabor return (EILSEQ); 379219019Sgabor *wc = (wchar_t)idx; 380219019Sgabor break; 381219019Sgabor case 1: 382219019Sgabor /* kana */ 383219019Sgabor if (idx >= 0x80) 384219019Sgabor return (EILSEQ); 385219019Sgabor *wc = (wchar_t)idx + 0x80; 386219019Sgabor break; 387219019Sgabor case 3: 388219019Sgabor if ((ei->mode & MODE_JIS2004) == 0) 389219019Sgabor return (EILSEQ); 390219019Sgabor /*FALLTHROUGH*/ 391219019Sgabor case 2: 392219019Sgabor /* kanji */ 393219019Sgabor row = (idx >> 8); 394219019Sgabor if (row < 0x21) 395219019Sgabor return (EILSEQ); 396219019Sgabor if (csid == 3) { 397219019Sgabor if (row <= 0x2F) 398219019Sgabor offset = (row == 0x22 || row >= 0x26) ? 399219019Sgabor 0xED : 0xF0; 400219019Sgabor else if (row >= 0x4D && row <= 0x7E) 401219019Sgabor offset = 0xCE; 402219019Sgabor else 403219019Sgabor return (EILSEQ); 404219019Sgabor } else { 405219019Sgabor if (row > 0x97) 406219019Sgabor return (EILSEQ); 407219019Sgabor offset = (row < 0x5F) ? 0x81 : 0xC1; 408219019Sgabor } 409219019Sgabor col = idx & 0xFF; 410219019Sgabor if (col < 0x21 || col > 0x7E) 411219019Sgabor return (EILSEQ); 412219019Sgabor row -= 0x21; col -= 0x21; 413219019Sgabor if ((row & 1) == 0) { 414219019Sgabor col += 0x40; 415219019Sgabor if (col >= 0x7F) 416219019Sgabor col += 1; 417219019Sgabor } else 418219019Sgabor col += 0x9F; 419219019Sgabor row = row / 2 + offset; 420219019Sgabor *wc = ((wchar_t)row << 8) | col; 421219019Sgabor break; 422219019Sgabor default: 423219019Sgabor return (EILSEQ); 424219019Sgabor } 425219019Sgabor 426219019Sgabor return (0); 427219019Sgabor} 428219019Sgabor 429219019Sgaborstatic __inline int 430219019Sgabor/*ARGSUSED*/ 431219019Sgabor_citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused, 432219019Sgabor _MSKanjiState * __restrict psenc, int * __restrict rstate) 433219019Sgabor{ 434219019Sgabor 435219019Sgabor *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 436219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 437219019Sgabor return (0); 438219019Sgabor} 439219019Sgabor 440219019Sgaborstatic int 441219019Sgabor/*ARGSUSED*/ 442219019Sgabor_citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei, 443219019Sgabor const void * __restrict var, size_t lenvar) 444219019Sgabor{ 445219019Sgabor const char *p; 446219019Sgabor 447219019Sgabor p = var; 448219019Sgabor memset((void *)ei, 0, sizeof(*ei)); 449219019Sgabor while (lenvar > 0) { 450219019Sgabor switch (_bcs_toupper(*p)) { 451219019Sgabor case 'J': 452219019Sgabor MATCH(JIS2004, ei->mode |= MODE_JIS2004); 453219019Sgabor break; 454219019Sgabor } 455219019Sgabor ++p; 456219019Sgabor --lenvar; 457219019Sgabor } 458219019Sgabor 459219019Sgabor return (0); 460219019Sgabor} 461219019Sgabor 462219019Sgaborstatic void 463219019Sgabor_citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused) 464219019Sgabor{ 465219019Sgabor 466219019Sgabor} 467219019Sgabor 468219019Sgabor/* ---------------------------------------------------------------------- 469219019Sgabor * public interface for stdenc 470219019Sgabor */ 471219019Sgabor 472219019Sgabor_CITRUS_STDENC_DECLS(MSKanji); 473219019Sgabor_CITRUS_STDENC_DEF_OPS(MSKanji); 474219019Sgabor 475219019Sgabor#include "citrus_stdenc_template.h" 476