1/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 2 3/*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2007 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/types.h> 32 33#include <assert.h> 34#include <errno.h> 35#include <limits.h> 36#include <stddef.h> 37#include <stdint.h> 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41#include <wchar.h> 42 43#include "citrus_namespace.h" 44#include "citrus_types.h" 45#include "citrus_bcs.h" 46#include "citrus_module.h" 47#include "citrus_stdenc.h" 48#include "citrus_dechanyu.h" 49 50/* ---------------------------------------------------------------------- 51 * private stuffs used by templates 52 */ 53 54typedef struct { 55 size_t chlen; 56 char ch[4]; 57} _DECHanyuState; 58 59typedef struct { 60 int dummy; 61} _DECHanyuEncodingInfo; 62 63#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 64#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 65 66#define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 67#define _ENCODING_INFO _DECHanyuEncodingInfo 68#define _ENCODING_STATE _DECHanyuState 69#define _ENCODING_MB_CUR_MAX(_ei_) 4 70#define _ENCODING_IS_STATE_DEPENDENT 0 71#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 72 73static __inline void 74/*ARGSUSED*/ 75_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 76 _DECHanyuState * __restrict psenc) 77{ 78 79 psenc->chlen = 0; 80} 81 82#if 0 83static __inline void 84/*ARGSUSED*/ 85_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 86 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 87{ 88 89 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 90} 91 92static __inline void 93/*ARGSUSED*/ 94_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 95 _DECHanyuState * __restrict psenc, 96 const void * __restrict pspriv) 97{ 98 99 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 100} 101#endif 102 103static void 104/*ARGSUSED*/ 105_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 106{ 107 108 /* ei may be null */ 109} 110 111static int 112/*ARGSUSED*/ 113_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 114 const void * __restrict var __unused, size_t lenvar __unused) 115{ 116 117 /* ei may be null */ 118 return (0); 119} 120 121static __inline bool 122is_singlebyte(int c) 123{ 124 125 return (c <= 0x7F); 126} 127 128static __inline bool 129is_leadbyte(int c) 130{ 131 132 return (c >= 0xA1 && c <= 0xFE); 133} 134 135static __inline bool 136is_trailbyte(int c) 137{ 138 139 c &= ~0x80; 140 return (c >= 0x21 && c <= 0x7E); 141} 142 143static __inline bool 144is_hanyu1(int c) 145{ 146 147 return (c == 0xC2); 148} 149 150static __inline bool 151is_hanyu2(int c) 152{ 153 154 return (c == 0xCB); 155} 156 157#define HANYUBIT 0xC2CB0000 158 159static __inline bool 160is_94charset(int c) 161{ 162 163 return (c >= 0x21 && c <= 0x7E); 164} 165 166static int 167/*ARGSUSED*/ 168_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 169 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 170 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 171{ 172 char *s0; 173 wchar_t wc; 174 int ch; 175 176 if (*s == NULL) { 177 _citrus_DECHanyu_init_state(ei, psenc); 178 *nresult = _ENCODING_IS_STATE_DEPENDENT; 179 return (0); 180 } 181 s0 = *s; 182 183 wc = (wchar_t)0; 184 switch (psenc->chlen) { 185 case 0: 186 if (n-- < 1) 187 goto restart; 188 ch = *s0++ & 0xFF; 189 if (is_singlebyte(ch)) { 190 if (pwc != NULL) 191 *pwc = (wchar_t)ch; 192 *nresult = (size_t)((ch == 0) ? 0 : 1); 193 *s = s0; 194 return (0); 195 } 196 if (!is_leadbyte(ch)) 197 goto ilseq; 198 psenc->ch[psenc->chlen++] = ch; 199 break; 200 case 1: 201 ch = psenc->ch[0] & 0xFF; 202 if (!is_leadbyte(ch)) 203 return (EINVAL); 204 break; 205 case 2: case 3: 206 ch = psenc->ch[0] & 0xFF; 207 if (is_hanyu1(ch)) { 208 ch = psenc->ch[1] & 0xFF; 209 if (is_hanyu2(ch)) { 210 wc |= (wchar_t)HANYUBIT; 211 break; 212 } 213 } 214 /*FALLTHROUGH*/ 215 default: 216 return (EINVAL); 217 } 218 219 switch (psenc->chlen) { 220 case 1: 221 if (is_hanyu1(ch)) { 222 if (n-- < 1) 223 goto restart; 224 ch = *s0++ & 0xFF; 225 if (!is_hanyu2(ch)) 226 goto ilseq; 227 psenc->ch[psenc->chlen++] = ch; 228 wc |= (wchar_t)HANYUBIT; 229 if (n-- < 1) 230 goto restart; 231 ch = *s0++ & 0xFF; 232 if (!is_leadbyte(ch)) 233 goto ilseq; 234 psenc->ch[psenc->chlen++] = ch; 235 } 236 break; 237 case 2: 238 if (n-- < 1) 239 goto restart; 240 ch = *s0++ & 0xFF; 241 if (!is_leadbyte(ch)) 242 goto ilseq; 243 psenc->ch[psenc->chlen++] = ch; 244 break; 245 case 3: 246 ch = psenc->ch[2] & 0xFF; 247 if (!is_leadbyte(ch)) 248 return (EINVAL); 249 } 250 if (n-- < 1) 251 goto restart; 252 wc |= (wchar_t)(ch << 8); 253 ch = *s0++ & 0xFF; 254 if (!is_trailbyte(ch)) 255 goto ilseq; 256 wc |= (wchar_t)ch; 257 if (pwc != NULL) 258 *pwc = wc; 259 *nresult = (size_t)(s0 - *s); 260 *s = s0; 261 psenc->chlen = 0; 262 263 return (0); 264 265restart: 266 *nresult = (size_t)-2; 267 *s = s0; 268 return (0); 269 270ilseq: 271 *nresult = (size_t)-1; 272 return (EILSEQ); 273} 274 275static int 276/*ARGSUSED*/ 277_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 278 char * __restrict s, size_t n, wchar_t wc, 279 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 280{ 281 int ch; 282 283 if (psenc->chlen != 0) 284 return (EINVAL); 285 286 /* XXX: assume wchar_t as int */ 287 if ((uint32_t)wc <= 0x7F) { 288 ch = wc & 0xFF; 289 } else { 290 if ((uint32_t)wc > 0xFFFF) { 291 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 292 goto ilseq; 293 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 294 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 295 wc &= 0xFFFF; 296 } 297 ch = (wc >> 8) & 0xFF; 298 if (!is_leadbyte(ch)) 299 goto ilseq; 300 psenc->ch[psenc->chlen++] = ch; 301 ch = wc & 0xFF; 302 if (!is_trailbyte(ch)) 303 goto ilseq; 304 } 305 psenc->ch[psenc->chlen++] = ch; 306 if (n < psenc->chlen) { 307 *nresult = (size_t)-1; 308 return (E2BIG); 309 } 310 memcpy(s, psenc->ch, psenc->chlen); 311 *nresult = psenc->chlen; 312 psenc->chlen = 0; 313 314 return (0); 315 316ilseq: 317 *nresult = (size_t)-1; 318 return (EILSEQ); 319} 320 321static __inline int 322/*ARGSUSED*/ 323_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 324 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 325{ 326 wchar_t mask; 327 int plane; 328 329 plane = 0; 330 mask = 0x7F; 331 /* XXX: assume wchar_t as int */ 332 if ((uint32_t)wc > 0x7F) { 333 if ((uint32_t)wc > 0xFFFF) { 334 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 335 return (EILSEQ); 336 plane += 2; 337 } 338 if (!is_leadbyte((wc >> 8) & 0xFF) || 339 !is_trailbyte(wc & 0xFF)) 340 return (EILSEQ); 341 plane += (wc & 0x80) ? 1 : 2; 342 mask |= 0x7F00; 343 } 344 *csid = plane; 345 *idx = (_index_t)(wc & mask); 346 347 return (0); 348} 349 350static __inline int 351/*ARGSUSED*/ 352_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 353 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 354{ 355 356 if (csid == 0) { 357 if (idx > 0x7F) 358 return (EILSEQ); 359 } else if (csid <= 4) { 360 if (!is_94charset(idx >> 8)) 361 return (EILSEQ); 362 if (!is_94charset(idx & 0xFF)) 363 return (EILSEQ); 364 if (csid % 2) 365 idx |= 0x80; 366 idx |= 0x8000; 367 if (csid > 2) 368 idx |= HANYUBIT; 369 } else 370 return (EILSEQ); 371 *wc = (wchar_t)idx; 372 return (0); 373} 374 375static __inline int 376/*ARGSUSED*/ 377_citrus_DECHanyu_stdenc_get_state_desc_generic( 378 _DECHanyuEncodingInfo * __restrict ei __unused, 379 _DECHanyuState * __restrict psenc, int * __restrict rstate) 380{ 381 382 *rstate = (psenc->chlen == 0) 383 ? _STDENC_SDGEN_INITIAL 384 : _STDENC_SDGEN_INCOMPLETE_CHAR; 385 return (0); 386} 387 388/* ---------------------------------------------------------------------- 389 * public interface for stdenc 390 */ 391 392_CITRUS_STDENC_DECLS(DECHanyu); 393_CITRUS_STDENC_DEF_OPS(DECHanyu); 394 395#include "citrus_stdenc_template.h" 396