1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2002 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor/* 31219019Sgabor * ja_JP.SJIS locale table for BSD4.4/rune 32219019Sgabor * version 1.0 33219019Sgabor * (C) Sin'ichiro MIYATANI / Phase One, Inc 34219019Sgabor * May 12, 1995 35219019Sgabor * 36219019Sgabor * Redistribution and use in source and binary forms, with or without 37219019Sgabor * modification, are permitted provided that the following conditions 38219019Sgabor * are met: 39219019Sgabor * 1. Redistributions of source code must retain the above copyright 40219019Sgabor * notice, this list of conditions and the following disclaimer. 41219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 42219019Sgabor * notice, this list of conditions and the following disclaimer in the 43219019Sgabor * documentation and/or other materials provided with the distribution. 44219019Sgabor * 3. All advertising materials mentioning features or use of this software 45219019Sgabor * must display the following acknowledgement: 46219019Sgabor * This product includes software developed by Phase One, Inc. 47219019Sgabor * 4. The name of Phase One, Inc. may be used to endorse or promote products 48219019Sgabor * derived from this software without specific prior written permission. 49219019Sgabor * 50219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60219019Sgabor * SUCH DAMAGE. 61219019Sgabor */ 62219019Sgabor 63219019Sgabor 64219019Sgabor#include <sys/cdefs.h> 65219019Sgabor#include <sys/types.h> 66219019Sgabor 67219019Sgabor#include <assert.h> 68219019Sgabor#include <errno.h> 69219019Sgabor#include <limits.h> 70219019Sgabor#include <stdbool.h> 71219019Sgabor#include <stddef.h> 72219019Sgabor#include <stdio.h> 73219019Sgabor#include <stdlib.h> 74219019Sgabor#include <string.h> 75219019Sgabor#include <wchar.h> 76219019Sgabor 77219019Sgabor#include "citrus_namespace.h" 78219019Sgabor#include "citrus_types.h" 79219019Sgabor#include "citrus_bcs.h" 80219019Sgabor#include "citrus_module.h" 81219019Sgabor#include "citrus_stdenc.h" 82219019Sgabor#include "citrus_mskanji.h" 83219019Sgabor 84219019Sgabor 85219019Sgabor/* ---------------------------------------------------------------------- 86219019Sgabor * private stuffs used by templates 87219019Sgabor */ 88219019Sgabor 89219019Sgabortypedef struct _MSKanjiState { 90219019Sgabor int chlen; 91219019Sgabor char ch[2]; 92219019Sgabor} _MSKanjiState; 93219019Sgabor 94219019Sgabortypedef struct { 95219019Sgabor int mode; 96219019Sgabor#define MODE_JIS2004 1 97219019Sgabor} _MSKanjiEncodingInfo; 98219019Sgabor 99219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 100219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 101219019Sgabor 102219019Sgabor#define _FUNCNAME(m) _citrus_MSKanji_##m 103219019Sgabor#define _ENCODING_INFO _MSKanjiEncodingInfo 104219019Sgabor#define _ENCODING_STATE _MSKanjiState 105219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) 2 106219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 0 107219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 108219019Sgabor 109219019Sgabor 110219019Sgaborstatic bool 111219019Sgabor_mskanji1(int c) 112219019Sgabor{ 113219019Sgabor 114219019Sgabor return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)); 115219019Sgabor} 116219019Sgabor 117219019Sgaborstatic bool 118219019Sgabor_mskanji2(int c) 119219019Sgabor{ 120219019Sgabor 121219019Sgabor return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc)); 122219019Sgabor} 123219019Sgabor 124219019Sgaborstatic __inline void 125219019Sgabor/*ARGSUSED*/ 126219019Sgabor_citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused, 127219019Sgabor _MSKanjiState * __restrict s) 128219019Sgabor{ 129219019Sgabor 130219019Sgabor s->chlen = 0; 131219019Sgabor} 132219019Sgabor 133219019Sgaborstatic __inline void 134219019Sgabor/*ARGSUSED*/ 135219019Sgabor_citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 136219019Sgabor void * __restrict pspriv, const _MSKanjiState * __restrict s) 137219019Sgabor{ 138219019Sgabor 139219019Sgabor memcpy(pspriv, (const void *)s, sizeof(*s)); 140219019Sgabor} 141219019Sgabor 142219019Sgaborstatic __inline void 143219019Sgabor/*ARGSUSED*/ 144219019Sgabor_citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 145219019Sgabor _MSKanjiState * __restrict s, const void * __restrict pspriv) 146219019Sgabor{ 147219019Sgabor 148219019Sgabor memcpy((void *)s, pspriv, sizeof(*s)); 149219019Sgabor} 150219019Sgabor 151219019Sgaborstatic int 152219019Sgabor/*ARGSUSED*/ 153219019Sgabor_citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei, 154252583Speter wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 155219019Sgabor _MSKanjiState * __restrict psenc, size_t * __restrict nresult) 156219019Sgabor{ 157252583Speter const char *s0; 158219019Sgabor wchar_t wchar; 159219019Sgabor int chlenbak, len; 160219019Sgabor 161219019Sgabor s0 = *s; 162219019Sgabor 163219019Sgabor if (s0 == NULL) { 164219019Sgabor _citrus_MSKanji_init_state(ei, psenc); 165219019Sgabor *nresult = 0; /* state independent */ 166219019Sgabor return (0); 167219019Sgabor } 168219019Sgabor 169219019Sgabor chlenbak = psenc->chlen; 170219019Sgabor 171219019Sgabor /* make sure we have the first byte in the buffer */ 172219019Sgabor switch (psenc->chlen) { 173219019Sgabor case 0: 174219019Sgabor if (n < 1) 175219019Sgabor goto restart; 176219019Sgabor psenc->ch[0] = *s0++; 177219019Sgabor psenc->chlen = 1; 178219019Sgabor n--; 179219019Sgabor break; 180219019Sgabor case 1: 181219019Sgabor break; 182219019Sgabor default: 183219019Sgabor /* illegal state */ 184219019Sgabor goto encoding_error; 185219019Sgabor } 186219019Sgabor 187219019Sgabor len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1; 188219019Sgabor while (psenc->chlen < len) { 189219019Sgabor if (n < 1) 190219019Sgabor goto restart; 191219019Sgabor psenc->ch[psenc->chlen] = *s0++; 192219019Sgabor psenc->chlen++; 193219019Sgabor n--; 194219019Sgabor } 195219019Sgabor 196219019Sgabor *s = s0; 197219019Sgabor 198219019Sgabor switch (len) { 199219019Sgabor case 1: 200219019Sgabor wchar = psenc->ch[0] & 0xff; 201219019Sgabor break; 202219019Sgabor case 2: 203219019Sgabor if (!_mskanji2(psenc->ch[1] & 0xff)) 204219019Sgabor goto encoding_error; 205219019Sgabor wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 206219019Sgabor break; 207219019Sgabor default: 208219019Sgabor /* illegal state */ 209219019Sgabor goto encoding_error; 210219019Sgabor } 211219019Sgabor 212219019Sgabor psenc->chlen = 0; 213219019Sgabor 214219019Sgabor if (pwc) 215219019Sgabor *pwc = wchar; 216219019Sgabor *nresult = wchar ? len - chlenbak : 0; 217219019Sgabor return (0); 218219019Sgabor 219219019Sgaborencoding_error: 220219019Sgabor psenc->chlen = 0; 221219019Sgabor *nresult = (size_t)-1; 222219019Sgabor return (EILSEQ); 223219019Sgabor 224219019Sgaborrestart: 225219019Sgabor *nresult = (size_t)-2; 226219019Sgabor *s = s0; 227219019Sgabor return (0); 228219019Sgabor} 229219019Sgabor 230219019Sgabor 231219019Sgaborstatic int 232219019Sgabor_citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused, 233219019Sgabor char * __restrict s, size_t n, wchar_t wc, 234219019Sgabor _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult) 235219019Sgabor{ 236219019Sgabor int ret; 237219019Sgabor 238219019Sgabor /* check invalid sequence */ 239219019Sgabor if (wc & ~0xffff) { 240219019Sgabor ret = EILSEQ; 241219019Sgabor goto err; 242219019Sgabor } 243219019Sgabor 244219019Sgabor if (wc & 0xff00) { 245219019Sgabor if (n < 2) { 246219019Sgabor ret = E2BIG; 247219019Sgabor goto err; 248219019Sgabor } 249219019Sgabor 250219019Sgabor s[0] = (wc >> 8) & 0xff; 251219019Sgabor s[1] = wc & 0xff; 252219019Sgabor if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) { 253219019Sgabor ret = EILSEQ; 254219019Sgabor goto err; 255219019Sgabor } 256219019Sgabor 257219019Sgabor *nresult = 2; 258219019Sgabor return (0); 259219019Sgabor } else { 260219019Sgabor if (n < 1) { 261219019Sgabor ret = E2BIG; 262219019Sgabor goto err; 263219019Sgabor } 264219019Sgabor 265219019Sgabor s[0] = wc & 0xff; 266219019Sgabor if (_mskanji1(s[0] & 0xff)) { 267219019Sgabor ret = EILSEQ; 268219019Sgabor goto err; 269219019Sgabor } 270219019Sgabor 271219019Sgabor *nresult = 1; 272219019Sgabor return (0); 273219019Sgabor } 274219019Sgabor 275219019Sgaborerr: 276219019Sgabor *nresult = (size_t)-1; 277219019Sgabor return (ret); 278219019Sgabor} 279219019Sgabor 280219019Sgabor 281219019Sgaborstatic __inline int 282219019Sgabor/*ARGSUSED*/ 283219019Sgabor_citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei, 284219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 285219019Sgabor{ 286219019Sgabor _index_t col, row; 287219019Sgabor int offset; 288219019Sgabor 289219019Sgabor if ((_wc_t)wc < 0x80) { 290219019Sgabor /* ISO-646 */ 291219019Sgabor *csid = 0; 292219019Sgabor *idx = (_index_t)wc; 293219019Sgabor } else if ((_wc_t)wc < 0x100) { 294219019Sgabor /* KANA */ 295219019Sgabor *csid = 1; 296219019Sgabor *idx = (_index_t)wc & 0x7F; 297219019Sgabor } else { 298219019Sgabor /* Kanji (containing Gaiji zone) */ 299219019Sgabor /* 300219019Sgabor * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)): 301219019Sgabor * 0x8140 - 0x817E -> 0x2121 - 0x215F 302219019Sgabor * 0x8180 - 0x819E -> 0x2160 - 0x217E 303219019Sgabor * 0x819F - 0x81FC -> 0x2221 - 0x227E 304219019Sgabor * 305219019Sgabor * 0x8240 - 0x827E -> 0x2321 - 0x235F 306219019Sgabor * ... 307219019Sgabor * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E 308219019Sgabor * 309219019Sgabor * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F 310219019Sgabor * ... 311219019Sgabor * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E 312219019Sgabor * 313219019Sgabor * extended Gaiji zone: 314219019Sgabor * 0xF040 - 0xFCFC 315219019Sgabor * 316219019Sgabor * JIS X0213-plane2: 317219019Sgabor * 0xF040 - 0xF09E -> 0x2121 - 0x217E 318219019Sgabor * 0xF140 - 0xF19E -> 0x2321 - 0x237E 319219019Sgabor * ... 320219019Sgabor * 0xF240 - 0xF29E -> 0x2521 - 0x257E 321219019Sgabor * 322219019Sgabor * 0xF09F - 0xF0FC -> 0x2821 - 0x287E 323219019Sgabor * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E 324219019Sgabor * ... 325219019Sgabor * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E 326219019Sgabor * 327219019Sgabor * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E 328219019Sgabor * ... 329219019Sgabor * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E 330219019Sgabor */ 331219019Sgabor row = ((_wc_t)wc >> 8) & 0xFF; 332219019Sgabor col = (_wc_t)wc & 0xFF; 333219019Sgabor if (!_mskanji1(row) || !_mskanji2(col)) 334219019Sgabor return (EILSEQ); 335219019Sgabor if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) { 336219019Sgabor *csid = 2; 337219019Sgabor offset = 0x81; 338219019Sgabor } else { 339219019Sgabor *csid = 3; 340219019Sgabor if ((_wc_t)wc <= 0xF49E) { 341219019Sgabor offset = (_wc_t)wc >= 0xF29F || 342219019Sgabor ((_wc_t)wc >= 0xF09F && 343219019Sgabor (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0; 344219019Sgabor } else 345219019Sgabor offset = 0xCE; 346219019Sgabor } 347219019Sgabor row -= offset; 348219019Sgabor if (row >= 0x5F) 349219019Sgabor row -= 0x40; 350219019Sgabor row = row * 2 + 0x21; 351219019Sgabor col -= 0x1F; 352219019Sgabor if (col >= 0x61) 353219019Sgabor col -= 1; 354219019Sgabor if (col > 0x7E) { 355219019Sgabor row += 1; 356219019Sgabor col -= 0x5E; 357219019Sgabor } 358219019Sgabor *idx = ((_index_t)row << 8) | col; 359219019Sgabor } 360219019Sgabor 361219019Sgabor return (0); 362219019Sgabor} 363219019Sgabor 364219019Sgaborstatic __inline int 365219019Sgabor/*ARGSUSED*/ 366219019Sgabor_citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei, 367219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 368219019Sgabor{ 369219019Sgabor uint32_t col, row; 370219019Sgabor int offset; 371219019Sgabor 372219019Sgabor switch (csid) { 373219019Sgabor case 0: 374219019Sgabor /* ISO-646 */ 375219019Sgabor if (idx >= 0x80) 376219019Sgabor return (EILSEQ); 377219019Sgabor *wc = (wchar_t)idx; 378219019Sgabor break; 379219019Sgabor case 1: 380219019Sgabor /* kana */ 381219019Sgabor if (idx >= 0x80) 382219019Sgabor return (EILSEQ); 383219019Sgabor *wc = (wchar_t)idx + 0x80; 384219019Sgabor break; 385219019Sgabor case 3: 386219019Sgabor if ((ei->mode & MODE_JIS2004) == 0) 387219019Sgabor return (EILSEQ); 388219019Sgabor /*FALLTHROUGH*/ 389219019Sgabor case 2: 390219019Sgabor /* kanji */ 391219019Sgabor row = (idx >> 8); 392219019Sgabor if (row < 0x21) 393219019Sgabor return (EILSEQ); 394219019Sgabor if (csid == 3) { 395219019Sgabor if (row <= 0x2F) 396219019Sgabor offset = (row == 0x22 || row >= 0x26) ? 397219019Sgabor 0xED : 0xF0; 398219019Sgabor else if (row >= 0x4D && row <= 0x7E) 399219019Sgabor offset = 0xCE; 400219019Sgabor else 401219019Sgabor return (EILSEQ); 402219019Sgabor } else { 403219019Sgabor if (row > 0x97) 404219019Sgabor return (EILSEQ); 405219019Sgabor offset = (row < 0x5F) ? 0x81 : 0xC1; 406219019Sgabor } 407219019Sgabor col = idx & 0xFF; 408219019Sgabor if (col < 0x21 || col > 0x7E) 409219019Sgabor return (EILSEQ); 410219019Sgabor row -= 0x21; col -= 0x21; 411219019Sgabor if ((row & 1) == 0) { 412219019Sgabor col += 0x40; 413219019Sgabor if (col >= 0x7F) 414219019Sgabor col += 1; 415219019Sgabor } else 416219019Sgabor col += 0x9F; 417219019Sgabor row = row / 2 + offset; 418219019Sgabor *wc = ((wchar_t)row << 8) | col; 419219019Sgabor break; 420219019Sgabor default: 421219019Sgabor return (EILSEQ); 422219019Sgabor } 423219019Sgabor 424219019Sgabor return (0); 425219019Sgabor} 426219019Sgabor 427219019Sgaborstatic __inline int 428219019Sgabor/*ARGSUSED*/ 429219019Sgabor_citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused, 430219019Sgabor _MSKanjiState * __restrict psenc, int * __restrict rstate) 431219019Sgabor{ 432219019Sgabor 433219019Sgabor *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 434219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 435219019Sgabor return (0); 436219019Sgabor} 437219019Sgabor 438219019Sgaborstatic int 439219019Sgabor/*ARGSUSED*/ 440219019Sgabor_citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei, 441219019Sgabor const void * __restrict var, size_t lenvar) 442219019Sgabor{ 443219019Sgabor const char *p; 444219019Sgabor 445219019Sgabor p = var; 446219019Sgabor memset((void *)ei, 0, sizeof(*ei)); 447219019Sgabor while (lenvar > 0) { 448219019Sgabor switch (_bcs_toupper(*p)) { 449219019Sgabor case 'J': 450219019Sgabor MATCH(JIS2004, ei->mode |= MODE_JIS2004); 451219019Sgabor break; 452219019Sgabor } 453219019Sgabor ++p; 454219019Sgabor --lenvar; 455219019Sgabor } 456219019Sgabor 457219019Sgabor return (0); 458219019Sgabor} 459219019Sgabor 460219019Sgaborstatic void 461219019Sgabor_citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused) 462219019Sgabor{ 463219019Sgabor 464219019Sgabor} 465219019Sgabor 466219019Sgabor/* ---------------------------------------------------------------------- 467219019Sgabor * public interface for stdenc 468219019Sgabor */ 469219019Sgabor 470219019Sgabor_CITRUS_STDENC_DECLS(MSKanji); 471219019Sgabor_CITRUS_STDENC_DEF_OPS(MSKanji); 472219019Sgabor 473219019Sgabor#include "citrus_stdenc_template.h" 474