1/* $NetBSD: citrus_big5.c,v 1.12 2008/06/14 16:01:07 tnozaki Exp $ */ 2 3/*- 4 * Copyright (c)2002, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61#include <sys/cdefs.h> 62#if defined(LIBC_SCCS) && !defined(lint) 63__RCSID("$NetBSD: citrus_big5.c,v 1.12 2008/06/14 16:01:07 tnozaki Exp $"); 64#endif /* LIBC_SCCS and not lint */ 65 66#include <sys/queue.h> 67#include <sys/types.h> 68#include <assert.h> 69#include <errno.h> 70#include <string.h> 71#include <stdint.h> 72#include <stdio.h> 73#include <stdlib.h> 74#include <stddef.h> 75#include <wchar.h> 76#include <limits.h> 77 78#include "citrus_namespace.h" 79#include "citrus_types.h" 80#include "citrus_bcs.h" 81#include "citrus_module.h" 82#include "citrus_ctype.h" 83#include "citrus_stdenc.h" 84#include "citrus_big5.h" 85 86#include "citrus_prop.h" 87 88/* ---------------------------------------------------------------------- 89 * private stuffs used by templates 90 */ 91 92typedef struct { 93 char ch[2]; 94 int chlen; 95} _BIG5State; 96 97typedef struct _BIG5Exclude { 98 TAILQ_ENTRY(_BIG5Exclude) entry; 99 wint_t start, end; 100} _BIG5Exclude; 101 102typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList; 103 104typedef struct { 105 int cell[0x100]; 106 _BIG5ExcludeList excludes; 107} _BIG5EncodingInfo; 108 109typedef struct { 110 _BIG5EncodingInfo ei; 111 struct { 112 /* for future multi-locale facility */ 113 _BIG5State s_mblen; 114 _BIG5State s_mbrlen; 115 _BIG5State s_mbrtowc; 116 _BIG5State s_mbtowc; 117 _BIG5State s_mbsrtowcs; 118 _BIG5State s_wcrtomb; 119 _BIG5State s_wcsrtombs; 120 _BIG5State s_wctomb; 121 } states; 122} _BIG5CTypeInfo; 123 124#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 125#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 126 127#define _FUNCNAME(m) _citrus_BIG5_##m 128#define _ENCODING_INFO _BIG5EncodingInfo 129#define _CTYPE_INFO _BIG5CTypeInfo 130#define _ENCODING_STATE _BIG5State 131#define _ENCODING_MB_CUR_MAX(_ei_) 2 132#define _ENCODING_IS_STATE_DEPENDENT 0 133#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 134 135 136static __inline void 137/*ARGSUSED*/ 138_citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei, 139 _BIG5State * __restrict s) 140{ 141 memset(s, 0, sizeof(*s)); 142} 143 144static __inline void 145/*ARGSUSED*/ 146_citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei, 147 void * __restrict pspriv, 148 const _BIG5State * __restrict s) 149{ 150 memcpy(pspriv, (const void *)s, sizeof(*s)); 151} 152 153static __inline void 154/*ARGSUSED*/ 155_citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei, 156 _BIG5State * __restrict s, 157 const void * __restrict pspriv) 158{ 159 memcpy((void *)s, pspriv, sizeof(*s)); 160} 161 162static __inline int 163_citrus_BIG5_check(_BIG5EncodingInfo *ei, u_int c) 164{ 165 _DIAGASSERT(ei != NULL); 166 167 return (ei->cell[c & 0xFF] & 0x1) ? 2 : 1; 168} 169 170static __inline int 171_citrus_BIG5_check2(_BIG5EncodingInfo *ei, u_int c) 172{ 173 _DIAGASSERT(ei != NULL); 174 175 return (ei->cell[c & 0xFF] & 0x2) ? 1 : 0; 176} 177 178static __inline int 179_citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c) 180{ 181 _BIG5Exclude *exclude; 182 183 _DIAGASSERT(ei != NULL); 184 185 TAILQ_FOREACH(exclude, &ei->excludes, entry) { 186 if (c >= exclude->start && c <= exclude->end) 187 return EILSEQ; 188 } 189 return 0; 190} 191 192static int 193_citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s, 194 uint64_t start, uint64_t end) 195{ 196 _BIG5EncodingInfo *ei; 197 int i; 198 uint64_t n; 199 200 _DIAGASSERT(ctx != NULL && *ctx != NULL); 201 202 if (start > 0xFF || end > 0xFF) 203 return EINVAL; 204 ei = (_BIG5EncodingInfo *)*ctx; 205 i = strcmp("row", s) ? 1 : 0; 206 i = 1 << i; 207 for (n = start; n <= end; ++n) 208 ei->cell[n & 0xFF] |= i; 209 return 0; 210} 211 212static int 213/*ARGSUSED*/ 214_citrus_BIG5_fill_excludes(void ** __restrict ctx, const char * __restrict s, 215 uint64_t start, uint64_t end) 216{ 217 _BIG5EncodingInfo *ei; 218 _BIG5Exclude *exclude; 219 220 _DIAGASSERT(ctx != NULL && *ctx != NULL); 221 222 if (start > 0xFFFF || end > 0xFFFF) 223 return EINVAL; 224 ei = (_BIG5EncodingInfo *)*ctx; 225 exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList); 226 if (exclude != NULL && (wint_t)start <= exclude->end) 227 return EINVAL; 228 exclude = (void *)malloc(sizeof(*exclude)); 229 if (exclude == NULL) 230 return ENOMEM; 231 exclude->start = (wint_t)start; 232 exclude->end = (wint_t)end; 233 TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry); 234 235 return 0; 236} 237 238static const _citrus_prop_hint_t root_hints[] = { 239 _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol), 240 _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol), 241 _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes), 242 _CITRUS_PROP_HINT_END 243}; 244 245static void 246/*ARGSUSED*/ 247_citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei) 248{ 249 _BIG5Exclude *exclude; 250 251 _DIAGASSERT(ei != NULL); 252 253 while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) { 254 TAILQ_REMOVE(&ei->excludes, exclude, entry); 255 free(exclude); 256 } 257} 258 259static int 260/*ARGSUSED*/ 261_citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei, 262 const void * __restrict var, size_t lenvar) 263{ 264 int err; 265 const char *s; 266 267 _DIAGASSERT(ei != NULL); 268 269 memset((void *)ei, 0, sizeof(*ei)); 270 TAILQ_INIT(&ei->excludes); 271 272 if (lenvar > 0 && var != NULL) { 273 s = _bcs_skip_ws_len((const char *)var, &lenvar); 274 if (lenvar > 0 && *s != '\0') { 275 err = _citrus_prop_parse_variable( 276 root_hints, (void *)ei, s, lenvar); 277 if (err == 0) 278 return 0; 279 280 _citrus_BIG5_encoding_module_uninit(ei); 281 memset((void *)ei, 0, sizeof(*ei)); 282 TAILQ_INIT(&ei->excludes); 283 } 284 } 285 286 /* fallback Big5-1984, for backward compatibility. */ 287 _citrus_BIG5_fill_rowcol((void **)&ei, "row", 0xA1, 0xFE); 288 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0x40, 0x7E); 289 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0xA1, 0xFE); 290 291 return 0; 292} 293 294static int 295/*ARGSUSED*/ 296_citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei, 297 wchar_t * __restrict pwc, 298 const char ** __restrict s, size_t n, 299 _BIG5State * __restrict psenc, 300 size_t * __restrict nresult) 301{ 302 wchar_t wchar; 303 int c; 304 int chlenbak; 305 const char *s0; 306 307 _DIAGASSERT(nresult != 0); 308 _DIAGASSERT(ei != NULL); 309 _DIAGASSERT(psenc != NULL); 310 _DIAGASSERT(s != NULL && *s != NULL); 311 312 s0 = *s; 313 314 if (s0 == NULL) { 315 _citrus_BIG5_init_state(ei, psenc); 316 *nresult = 0; 317 return (0); 318 } 319 320 chlenbak = psenc->chlen; 321 322 /* make sure we have the first byte in the buffer */ 323 switch (psenc->chlen) { 324 case 0: 325 if (n < 1) 326 goto restart; 327 psenc->ch[0] = *s0++; 328 psenc->chlen = 1; 329 n--; 330 break; 331 case 1: 332 break; 333 default: 334 /* illegal state */ 335 goto ilseq; 336 } 337 338 c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff); 339 if (c == 0) 340 goto ilseq; 341 while (psenc->chlen < c) { 342 if (n < 1) { 343 goto restart; 344 } 345 psenc->ch[psenc->chlen] = *s0++; 346 psenc->chlen++; 347 n--; 348 } 349 350 switch (c) { 351 case 1: 352 wchar = psenc->ch[0] & 0xff; 353 break; 354 case 2: 355 if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff)) 356 goto ilseq; 357 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 358 break; 359 default: 360 /* illegal state */ 361 goto ilseq; 362 } 363 364 if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0) 365 goto ilseq; 366 367 *s = s0; 368 psenc->chlen = 0; 369 if (pwc) 370 *pwc = wchar; 371 if (!wchar) 372 *nresult = 0; 373 else 374 *nresult = c - chlenbak; 375 376 return (0); 377 378ilseq: 379 psenc->chlen = 0; 380 *nresult = (size_t)-1; 381 return (EILSEQ); 382 383restart: 384 *s = s0; 385 *nresult = (size_t)-2; 386 return (0); 387} 388 389static int 390/*ARGSUSED*/ 391_citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei, 392 char * __restrict s, 393 size_t n, wchar_t wc, _BIG5State * __restrict psenc, 394 size_t * __restrict nresult) 395{ 396 size_t l, ret; 397 398 _DIAGASSERT(ei != NULL); 399 _DIAGASSERT(nresult != 0); 400 _DIAGASSERT(s != NULL); 401 402 /* check invalid sequence */ 403 if (wc & ~0xffff || 404 _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) { 405 ret = EILSEQ; 406 goto err; 407 } 408 409 if (wc & 0x8000) { 410 if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 || 411 !_citrus_BIG5_check2(ei, wc & 0xff)) { 412 ret = EILSEQ; 413 goto err; 414 } 415 l = 2; 416 } else { 417 if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) { 418 ret = EILSEQ; 419 goto err; 420 } 421 l = 1; 422 } 423 424 if (n < l) { 425 /* bound check failure */ 426 ret = E2BIG; 427 goto err; 428 } 429 430 if (l == 2) { 431 s[0] = (wc >> 8) & 0xff; 432 s[1] = wc & 0xff; 433 } else 434 s[0] = wc & 0xff; 435 436 *nresult = l; 437 438 return 0; 439 440err: 441 *nresult = (size_t)-1; 442 return ret; 443} 444 445static __inline int 446/*ARGSUSED*/ 447_citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei, 448 _csid_t * __restrict csid, 449 _index_t * __restrict idx, wchar_t wc) 450{ 451 452 _DIAGASSERT(csid != NULL && idx != NULL); 453 454 *csid = (wc < 0x100) ? 0 : 1; 455 *idx = (_index_t)wc; 456 457 return 0; 458} 459 460static __inline int 461/*ARGSUSED*/ 462_citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei, 463 wchar_t * __restrict wc, 464 _csid_t csid, _index_t idx) 465{ 466 _DIAGASSERT(wc != NULL); 467 468 switch (csid) { 469 case 0: 470 case 1: 471 *wc = (wchar_t)idx; 472 break; 473 default: 474 return EILSEQ; 475 } 476 477 return 0; 478} 479 480static __inline int 481/*ARGSUSED*/ 482_citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei, 483 _BIG5State * __restrict psenc, 484 int * __restrict rstate) 485{ 486 487 if (psenc->chlen == 0) 488 *rstate = _STDENC_SDGEN_INITIAL; 489 else 490 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 491 492 return 0; 493} 494 495/* ---------------------------------------------------------------------- 496 * public interface for ctype 497 */ 498 499_CITRUS_CTYPE_DECLS(BIG5); 500_CITRUS_CTYPE_DEF_OPS(BIG5); 501 502#include "citrus_ctype_template.h" 503 504 505/* ---------------------------------------------------------------------- 506 * public interface for stdenc 507 */ 508 509_CITRUS_STDENC_DECLS(BIG5); 510_CITRUS_STDENC_DEF_OPS(BIG5); 511 512#include "citrus_stdenc_template.h" 513