1/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 2 3/*- 4 * Copyright (c)2007 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28#include <sys/cdefs.h> 29#if defined(LIBC_SCCS) && !defined(lint) 30__RCSID("$NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $"); 31#endif /* LIBC_SCCS and not lint */ 32 33#include <sys/types.h> 34#include <assert.h> 35#include <errno.h> 36#include <string.h> 37#include <stdint.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <stddef.h> 41#include <wchar.h> 42#include <limits.h> 43 44#include "citrus_namespace.h" 45#include "citrus_types.h" 46#include "citrus_bcs.h" 47#include "citrus_module.h" 48#include "citrus_ctype.h" 49#include "citrus_stdenc.h" 50#include "citrus_dechanyu.h" 51 52/* ---------------------------------------------------------------------- 53 * private stuffs used by templates 54 */ 55 56typedef struct { 57 int chlen; 58 char ch[4]; 59} _DECHanyuState; 60 61typedef struct { 62 int dummy; 63} _DECHanyuEncodingInfo; 64 65typedef struct { 66 _DECHanyuEncodingInfo ei; 67 struct { 68 /* for future multi-locale facility */ 69 _DECHanyuState s_mblen; 70 _DECHanyuState s_mbrlen; 71 _DECHanyuState s_mbrtowc; 72 _DECHanyuState s_mbtowc; 73 _DECHanyuState s_mbsrtowcs; 74 _DECHanyuState s_mbsnrtowcs; 75 _DECHanyuState s_wcrtomb; 76 _DECHanyuState s_wcsrtombs; 77 _DECHanyuState s_wcsnrtombs; 78 _DECHanyuState s_wctomb; 79 } states; 80} _DECHanyuCTypeInfo; 81 82#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 83#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 84 85#define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 86#define _ENCODING_INFO _DECHanyuEncodingInfo 87#define _CTYPE_INFO _DECHanyuCTypeInfo 88#define _ENCODING_STATE _DECHanyuState 89#define _ENCODING_MB_CUR_MAX(_ei_) 4 90#define _ENCODING_IS_STATE_DEPENDENT 0 91#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 92 93static __inline void 94/*ARGSUSED*/ 95_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei, 96 _DECHanyuState * __restrict psenc) 97{ 98 /* ei may be null */ 99 _DIAGASSERT(psenc != NULL); 100 101 psenc->chlen = 0; 102} 103 104static __inline void 105/*ARGSUSED*/ 106_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei, 107 void * __restrict pspriv, 108 const _DECHanyuState * __restrict psenc) 109{ 110 /* ei may be null */ 111 _DIAGASSERT(pspriv != NULL); 112 _DIAGASSERT(psenc != NULL); 113 114 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 115} 116 117static __inline void 118/*ARGSUSED*/ 119_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei, 120 _DECHanyuState * __restrict psenc, 121 const void * __restrict pspriv) 122{ 123 /* ei may be null */ 124 _DIAGASSERT(psenc != NULL); 125 _DIAGASSERT(pspriv != NULL); 126 127 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 128} 129 130static void 131/*ARGSUSED*/ 132_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei) 133{ 134 /* ei may be null */ 135} 136 137static int 138/*ARGSUSED*/ 139_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei, 140 const void * __restrict var, size_t lenvar) 141{ 142 /* ei may be null */ 143 return 0; 144} 145 146static __inline int 147is_singlebyte(int c) 148{ 149 return c <= 0x7F; 150} 151 152static __inline int 153is_leadbyte(int c) 154{ 155 return c >= 0xA1 && c <= 0xFE; 156} 157 158static __inline int 159is_trailbyte(int c) 160{ 161 c &= ~0x80; 162 return c >= 0x21 && c <= 0x7E; 163} 164 165static __inline int 166is_hanyu1(int c) 167{ 168 return c == 0xC2; 169} 170 171static __inline int 172is_hanyu2(int c) 173{ 174 return c == 0xCB; 175} 176 177#define HANYUBIT 0xC2CB0000 178 179static __inline int 180is_94charset(int c) 181{ 182 return c >= 0x21 && c <= 0x7E; 183} 184 185static int 186/*ARGSUSED*/ 187_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 188 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 189 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 190{ 191 const char *s0; 192 int ch; 193 wchar_t wc; 194 195 /* ei may be unused */ 196 _DIAGASSERT(s != NULL); 197 _DIAGASSERT(psenc != NULL); 198 _DIAGASSERT(nresult != NULL); 199 200 if (*s == NULL) { 201 _citrus_DECHanyu_init_state(ei, psenc); 202 *nresult = _ENCODING_IS_STATE_DEPENDENT; 203 return 0; 204 } 205 s0 = *s; 206 207 wc = (wchar_t)0; 208 switch (psenc->chlen) { 209 case 0: 210 if (n-- < 1) 211 goto restart; 212 ch = *s0++ & 0xFF; 213 if (is_singlebyte(ch) != 0) { 214 if (pwc != NULL) 215 *pwc = (wchar_t)ch; 216 *nresult = (size_t)((ch == 0) ? 0 : 1); 217 *s = s0; 218 return 0; 219 } 220 if (is_leadbyte(ch) == 0) 221 goto ilseq; 222 psenc->ch[psenc->chlen++] = ch; 223 break; 224 case 1: 225 ch = psenc->ch[0] & 0xFF; 226 if (is_leadbyte(ch) == 0) 227 return EINVAL; 228 break; 229 case 2: case 3: 230 ch = psenc->ch[0] & 0xFF; 231 if (is_hanyu1(ch) != 0) { 232 ch = psenc->ch[1] & 0xFF; 233 if (is_hanyu2(ch) != 0) { 234 wc |= (wchar_t)HANYUBIT; 235 break; 236 } 237 } 238 /*FALLTHROUGH*/ 239 default: 240 return EINVAL; 241 } 242 243 switch (psenc->chlen) { 244 case 1: 245 if (is_hanyu1(ch) != 0) { 246 if (n-- < 1) 247 goto restart; 248 ch = *s0++ & 0xFF; 249 if (is_hanyu2(ch) == 0) 250 goto ilseq; 251 psenc->ch[psenc->chlen++] = ch; 252 wc |= (wchar_t)HANYUBIT; 253 if (n-- < 1) 254 goto restart; 255 ch = *s0++ & 0xFF; 256 if (is_leadbyte(ch) == 0) 257 goto ilseq; 258 psenc->ch[psenc->chlen++] = ch; 259 } 260 break; 261 case 2: 262 if (n-- < 1) 263 goto restart; 264 ch = *s0++ & 0xFF; 265 if (is_leadbyte(ch) == 0) 266 goto ilseq; 267 psenc->ch[psenc->chlen++] = ch; 268 break; 269 case 3: 270 ch = psenc->ch[2] & 0xFF; 271 if (is_leadbyte(ch) == 0) 272 return EINVAL; 273 } 274 if (n-- < 1) 275 goto restart; 276 wc |= (wchar_t)(ch << 8); 277 ch = *s0++ & 0xFF; 278 if (is_trailbyte(ch) == 0) 279 goto ilseq; 280 wc |= (wchar_t)ch; 281 if (pwc != NULL) 282 *pwc = wc; 283 *nresult = (size_t)(s0 - *s); 284 *s = s0; 285 psenc->chlen = 0; 286 287 return 0; 288 289restart: 290 *nresult = (size_t)-2; 291 *s = s0; 292 return 0; 293 294ilseq: 295 *nresult = (size_t)-1; 296 return EILSEQ; 297} 298 299static int 300/*ARGSUSED*/ 301_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei, 302 char * __restrict s, size_t n, wchar_t wc, 303 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 304{ 305 int ch; 306 307 /* ei may be unused */ 308 _DIAGASSERT(s != NULL); 309 _DIAGASSERT(psenc != NULL); 310 _DIAGASSERT(nresult != NULL); 311 312 if (psenc->chlen != 0) 313 return EINVAL; 314 315 /* XXX: assume wchar_t as int */ 316 if ((uint32_t)wc <= 0x7F) { 317 ch = wc & 0xFF; 318 } else { 319 if ((uint32_t)wc > 0xFFFF) { 320 if ((wc & ~0xFFFF) != HANYUBIT) 321 goto ilseq; 322 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 323 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 324 wc &= 0xFFFF; 325 } 326 ch = (wc >> 8) & 0xFF; 327 if (!is_leadbyte(ch)) 328 goto ilseq; 329 psenc->ch[psenc->chlen++] = ch; 330 ch = wc & 0xFF; 331 if (is_trailbyte(ch) == 0) 332 goto ilseq; 333 } 334 psenc->ch[psenc->chlen++] = ch; 335 if (n < psenc->chlen) { 336 *nresult = (size_t)-1; 337 return E2BIG; 338 } 339 memcpy(s, psenc->ch, psenc->chlen); 340 *nresult = psenc->chlen; 341 psenc->chlen = 0; 342 343 return 0; 344 345ilseq: 346 *nresult = (size_t)-1; 347 return EILSEQ; 348} 349 350static __inline int 351/*ARGSUSED*/ 352_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei, 353 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 354{ 355 int plane; 356 wchar_t mask; 357 358 /* ei may be unused */ 359 _DIAGASSERT(csid != NULL); 360 _DIAGASSERT(idx != NULL); 361 362 plane = 0; 363 mask = 0x7F; 364 /* XXX: assume wchar_t as int */ 365 if ((uint32_t)wc > 0x7F) { 366 if ((uint32_t)wc > 0xFFFF) { 367 if ((wc & ~0xFFFF) != HANYUBIT) 368 return EILSEQ; 369 plane += 2; 370 } 371 if (is_leadbyte((wc >> 8) & 0xFF) == 0 || 372 is_trailbyte(wc & 0xFF) == 0) 373 return EILSEQ; 374 plane += (wc & 0x80) ? 1 : 2; 375 mask |= 0x7F00; 376 } 377 *csid = plane; 378 *idx = (_index_t)(wc & mask); 379 380 return 0; 381} 382 383static __inline int 384/*ARGSUSED*/ 385_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei, 386 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 387{ 388 /* ei may be unused */ 389 _DIAGASSERT(wc != NULL); 390 391 if (csid == 0) { 392 if (idx > 0x7F) 393 return EILSEQ; 394 } else if (csid <= 4) { 395 if (is_94charset(idx >> 8) == 0) 396 return EILSEQ; 397 if (is_94charset(idx & 0xFF) == 0) 398 return EILSEQ; 399 if (csid % 2) 400 idx |= 0x80; 401 idx |= 0x8000; 402 if (csid > 2) 403 idx |= HANYUBIT; 404 } else 405 return EILSEQ; 406 *wc = (wchar_t)idx; 407 return 0; 408} 409 410static __inline int 411/*ARGSUSED*/ 412_citrus_DECHanyu_stdenc_get_state_desc_generic( 413 _DECHanyuEncodingInfo * __restrict ei, 414 _DECHanyuState * __restrict psenc, int * __restrict rstate) 415{ 416 /* ei may be unused */ 417 _DIAGASSERT(psenc != NULL); 418 _DIAGASSERT(rstate != NULL); 419 420 *rstate = (psenc->chlen == 0) 421 ? _STDENC_SDGEN_INITIAL 422 : _STDENC_SDGEN_INCOMPLETE_CHAR; 423 return 0; 424} 425 426/* ---------------------------------------------------------------------- 427 * public interface for ctype 428 */ 429 430_CITRUS_CTYPE_DECLS(DECHanyu); 431_CITRUS_CTYPE_DEF_OPS(DECHanyu); 432 433#include "citrus_ctype_template.h" 434 435 436/* ---------------------------------------------------------------------- 437 * public interface for stdenc 438 */ 439 440_CITRUS_STDENC_DECLS(DECHanyu); 441_CITRUS_STDENC_DEF_OPS(DECHanyu); 442 443#include "citrus_stdenc_template.h" 444