1/* $FreeBSD$ */ 2/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */ 3 4/*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2007 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31#include <sys/cdefs.h> 32#include <sys/types.h> 33 34#include <assert.h> 35#include <errno.h> 36#include <limits.h> 37#include <stddef.h> 38#include <stdint.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <string.h> 42#include <wchar.h> 43 44#include "citrus_namespace.h" 45#include "citrus_types.h" 46#include "citrus_bcs.h" 47#include "citrus_module.h" 48#include "citrus_stdenc.h" 49#include "citrus_dechanyu.h" 50 51/* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55typedef struct { 56 size_t chlen; 57 char ch[4]; 58} _DECHanyuState; 59 60typedef struct { 61 int dummy; 62} _DECHanyuEncodingInfo; 63 64#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 65#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_) 66 67#define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m) 68#define _ENCODING_INFO _DECHanyuEncodingInfo 69#define _ENCODING_STATE _DECHanyuState 70#define _ENCODING_MB_CUR_MAX(_ei_) 4 71#define _ENCODING_IS_STATE_DEPENDENT 0 72#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 73 74static __inline void 75/*ARGSUSED*/ 76_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused, 77 _DECHanyuState * __restrict psenc) 78{ 79 80 psenc->chlen = 0; 81} 82 83#if 0 84static __inline void 85/*ARGSUSED*/ 86_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 87 void * __restrict pspriv, const _DECHanyuState * __restrict psenc) 88{ 89 90 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 91} 92 93static __inline void 94/*ARGSUSED*/ 95_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused, 96 _DECHanyuState * __restrict psenc, 97 const void * __restrict pspriv) 98{ 99 100 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 101} 102#endif 103 104static void 105/*ARGSUSED*/ 106_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused) 107{ 108 109 /* ei may be null */ 110} 111 112static int 113/*ARGSUSED*/ 114_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused, 115 const void * __restrict var __unused, size_t lenvar __unused) 116{ 117 118 /* ei may be null */ 119 return (0); 120} 121 122static __inline bool 123is_singlebyte(int c) 124{ 125 126 return (c <= 0x7F); 127} 128 129static __inline bool 130is_leadbyte(int c) 131{ 132 133 return (c >= 0xA1 && c <= 0xFE); 134} 135 136static __inline bool 137is_trailbyte(int c) 138{ 139 140 c &= ~0x80; 141 return (c >= 0x21 && c <= 0x7E); 142} 143 144static __inline bool 145is_hanyu1(int c) 146{ 147 148 return (c == 0xC2); 149} 150 151static __inline bool 152is_hanyu2(int c) 153{ 154 155 return (c == 0xCB); 156} 157 158#define HANYUBIT 0xC2CB0000 159 160static __inline bool 161is_94charset(int c) 162{ 163 164 return (c >= 0x21 && c <= 0x7E); 165} 166 167static int 168/*ARGSUSED*/ 169_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei, 170 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 171 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 172{ 173 char *s0; 174 wchar_t wc; 175 int ch; 176 177 if (*s == NULL) { 178 _citrus_DECHanyu_init_state(ei, psenc); 179 *nresult = _ENCODING_IS_STATE_DEPENDENT; 180 return (0); 181 } 182 s0 = *s; 183 184 wc = (wchar_t)0; 185 switch (psenc->chlen) { 186 case 0: 187 if (n-- < 1) 188 goto restart; 189 ch = *s0++ & 0xFF; 190 if (is_singlebyte(ch)) { 191 if (pwc != NULL) 192 *pwc = (wchar_t)ch; 193 *nresult = (size_t)((ch == 0) ? 0 : 1); 194 *s = s0; 195 return (0); 196 } 197 if (!is_leadbyte(ch)) 198 goto ilseq; 199 psenc->ch[psenc->chlen++] = ch; 200 break; 201 case 1: 202 ch = psenc->ch[0] & 0xFF; 203 if (!is_leadbyte(ch)) 204 return (EINVAL); 205 break; 206 case 2: case 3: 207 ch = psenc->ch[0] & 0xFF; 208 if (is_hanyu1(ch)) { 209 ch = psenc->ch[1] & 0xFF; 210 if (is_hanyu2(ch)) { 211 wc |= (wchar_t)HANYUBIT; 212 break; 213 } 214 } 215 /*FALLTHROUGH*/ 216 default: 217 return (EINVAL); 218 } 219 220 switch (psenc->chlen) { 221 case 1: 222 if (is_hanyu1(ch)) { 223 if (n-- < 1) 224 goto restart; 225 ch = *s0++ & 0xFF; 226 if (!is_hanyu2(ch)) 227 goto ilseq; 228 psenc->ch[psenc->chlen++] = ch; 229 wc |= (wchar_t)HANYUBIT; 230 if (n-- < 1) 231 goto restart; 232 ch = *s0++ & 0xFF; 233 if (!is_leadbyte(ch)) 234 goto ilseq; 235 psenc->ch[psenc->chlen++] = ch; 236 } 237 break; 238 case 2: 239 if (n-- < 1) 240 goto restart; 241 ch = *s0++ & 0xFF; 242 if (!is_leadbyte(ch)) 243 goto ilseq; 244 psenc->ch[psenc->chlen++] = ch; 245 break; 246 case 3: 247 ch = psenc->ch[2] & 0xFF; 248 if (!is_leadbyte(ch)) 249 return (EINVAL); 250 } 251 if (n-- < 1) 252 goto restart; 253 wc |= (wchar_t)(ch << 8); 254 ch = *s0++ & 0xFF; 255 if (!is_trailbyte(ch)) 256 goto ilseq; 257 wc |= (wchar_t)ch; 258 if (pwc != NULL) 259 *pwc = wc; 260 *nresult = (size_t)(s0 - *s); 261 *s = s0; 262 psenc->chlen = 0; 263 264 return (0); 265 266restart: 267 *nresult = (size_t)-2; 268 *s = s0; 269 return (0); 270 271ilseq: 272 *nresult = (size_t)-1; 273 return (EILSEQ); 274} 275 276static int 277/*ARGSUSED*/ 278_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused, 279 char * __restrict s, size_t n, wchar_t wc, 280 _DECHanyuState * __restrict psenc, size_t * __restrict nresult) 281{ 282 int ch; 283 284 if (psenc->chlen != 0) 285 return (EINVAL); 286 287 /* XXX: assume wchar_t as int */ 288 if ((uint32_t)wc <= 0x7F) { 289 ch = wc & 0xFF; 290 } else { 291 if ((uint32_t)wc > 0xFFFF) { 292 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 293 goto ilseq; 294 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF; 295 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF; 296 wc &= 0xFFFF; 297 } 298 ch = (wc >> 8) & 0xFF; 299 if (!is_leadbyte(ch)) 300 goto ilseq; 301 psenc->ch[psenc->chlen++] = ch; 302 ch = wc & 0xFF; 303 if (!is_trailbyte(ch)) 304 goto ilseq; 305 } 306 psenc->ch[psenc->chlen++] = ch; 307 if (n < psenc->chlen) { 308 *nresult = (size_t)-1; 309 return (E2BIG); 310 } 311 memcpy(s, psenc->ch, psenc->chlen); 312 *nresult = psenc->chlen; 313 psenc->chlen = 0; 314 315 return (0); 316 317ilseq: 318 *nresult = (size_t)-1; 319 return (EILSEQ); 320} 321 322static __inline int 323/*ARGSUSED*/ 324_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused, 325 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 326{ 327 wchar_t mask; 328 int plane; 329 330 plane = 0; 331 mask = 0x7F; 332 /* XXX: assume wchar_t as int */ 333 if ((uint32_t)wc > 0x7F) { 334 if ((uint32_t)wc > 0xFFFF) { 335 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT) 336 return (EILSEQ); 337 plane += 2; 338 } 339 if (!is_leadbyte((wc >> 8) & 0xFF) || 340 !is_trailbyte(wc & 0xFF)) 341 return (EILSEQ); 342 plane += (wc & 0x80) ? 1 : 2; 343 mask |= 0x7F00; 344 } 345 *csid = plane; 346 *idx = (_index_t)(wc & mask); 347 348 return (0); 349} 350 351static __inline int 352/*ARGSUSED*/ 353_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused, 354 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 355{ 356 357 if (csid == 0) { 358 if (idx > 0x7F) 359 return (EILSEQ); 360 } else if (csid <= 4) { 361 if (!is_94charset(idx >> 8)) 362 return (EILSEQ); 363 if (!is_94charset(idx & 0xFF)) 364 return (EILSEQ); 365 if (csid % 2) 366 idx |= 0x80; 367 idx |= 0x8000; 368 if (csid > 2) 369 idx |= HANYUBIT; 370 } else 371 return (EILSEQ); 372 *wc = (wchar_t)idx; 373 return (0); 374} 375 376static __inline int 377/*ARGSUSED*/ 378_citrus_DECHanyu_stdenc_get_state_desc_generic( 379 _DECHanyuEncodingInfo * __restrict ei __unused, 380 _DECHanyuState * __restrict psenc, int * __restrict rstate) 381{ 382 383 *rstate = (psenc->chlen == 0) 384 ? _STDENC_SDGEN_INITIAL 385 : _STDENC_SDGEN_INCOMPLETE_CHAR; 386 return (0); 387} 388 389/* ---------------------------------------------------------------------- 390 * public interface for stdenc 391 */ 392 393_CITRUS_STDENC_DECLS(DECHanyu); 394_CITRUS_STDENC_DEF_OPS(DECHanyu); 395 396#include "citrus_stdenc_template.h" 397