1/* $FreeBSD$ */ 2/* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4/*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2003 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#include <sys/types.h> 34 35#include <assert.h> 36#include <errno.h> 37#include <limits.h> 38#include <stdbool.h> 39#include <stddef.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <wchar.h> 44 45#include "citrus_namespace.h" 46#include "citrus_types.h" 47#include "citrus_bcs.h" 48#include "citrus_module.h" 49#include "citrus_stdenc.h" 50#include "citrus_gbk2k.h" 51 52 53/* ---------------------------------------------------------------------- 54 * private stuffs used by templates 55 */ 56 57typedef struct _GBK2KState { 58 int chlen; 59 char ch[4]; 60} _GBK2KState; 61 62typedef struct { 63 int mb_cur_max; 64} _GBK2KEncodingInfo; 65 66#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 67#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 68 69#define _FUNCNAME(m) _citrus_GBK2K_##m 70#define _ENCODING_INFO _GBK2KEncodingInfo 71#define _ENCODING_STATE _GBK2KState 72#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 73#define _ENCODING_IS_STATE_DEPENDENT 0 74#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 75 76static __inline void 77/*ARGSUSED*/ 78_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused, 79 _GBK2KState * __restrict s) 80{ 81 82 memset(s, 0, sizeof(*s)); 83} 84 85#if 0 86static __inline void 87/*ARGSUSED*/ 88_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused, 89 void * __restrict pspriv, const _GBK2KState * __restrict s) 90{ 91 92 memcpy(pspriv, (const void *)s, sizeof(*s)); 93} 94 95static __inline void 96/*ARGSUSED*/ 97_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused, 98 _GBK2KState * __restrict s, const void * __restrict pspriv) 99{ 100 101 memcpy((void *)s, pspriv, sizeof(*s)); 102} 103#endif 104 105static __inline bool 106_mb_singlebyte(int c) 107{ 108 109 return ((c & 0xff) <= 0x7f); 110} 111 112static __inline bool 113_mb_leadbyte(int c) 114{ 115 116 c &= 0xff; 117 return (0x81 <= c && c <= 0xfe); 118} 119 120static __inline bool 121_mb_trailbyte(int c) 122{ 123 124 c &= 0xff; 125 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 126} 127 128static __inline bool 129_mb_surrogate(int c) 130{ 131 132 c &= 0xff; 133 return (0x30 <= c && c <= 0x39); 134} 135 136static __inline int 137_mb_count(wchar_t v) 138{ 139 uint32_t c; 140 141 c = (uint32_t)v; /* XXX */ 142 if (!(c & 0xffffff00)) 143 return (1); 144 if (!(c & 0xffff0000)) 145 return (2); 146 return (4); 147} 148 149#define _PSENC (psenc->ch[psenc->chlen - 1]) 150#define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 151 152static int 153_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 154 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 155 _GBK2KState * __restrict psenc, size_t * __restrict nresult) 156{ 157 char *s0, *s1; 158 wchar_t wc; 159 int chlenbak, len; 160 161 s0 = *s; 162 163 if (s0 == NULL) { 164 /* _citrus_GBK2K_init_state(ei, psenc); */ 165 psenc->chlen = 0; 166 *nresult = 0; 167 return (0); 168 } 169 170 chlenbak = psenc->chlen; 171 172 switch (psenc->chlen) { 173 case 3: 174 if (!_mb_leadbyte (_PSENC)) 175 goto invalid; 176 /* FALLTHROUGH */ 177 case 2: 178 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 179 goto invalid; 180 /* FALLTHROUGH */ 181 case 1: 182 if (!_mb_leadbyte (_PSENC)) 183 goto invalid; 184 /* FALLTHOROUGH */ 185 case 0: 186 break; 187 default: 188 goto invalid; 189 } 190 191 for (;;) { 192 if (n-- < 1) 193 goto restart; 194 195 _PUSH_PSENC(*s0++); 196 197 switch (psenc->chlen) { 198 case 1: 199 if (_mb_singlebyte(_PSENC)) 200 goto convert; 201 if (_mb_leadbyte (_PSENC)) 202 continue; 203 goto ilseq; 204 case 2: 205 if (_mb_trailbyte (_PSENC)) 206 goto convert; 207 if (ei->mb_cur_max == 4 && 208 _mb_surrogate (_PSENC)) 209 continue; 210 goto ilseq; 211 case 3: 212 if (_mb_leadbyte (_PSENC)) 213 continue; 214 goto ilseq; 215 case 4: 216 if (_mb_surrogate (_PSENC)) 217 goto convert; 218 goto ilseq; 219 } 220 } 221 222convert: 223 len = psenc->chlen; 224 s1 = &psenc->ch[0]; 225 wc = 0; 226 while (len-- > 0) 227 wc = (wc << 8) | (*s1++ & 0xff); 228 229 if (pwc != NULL) 230 *pwc = wc; 231 *s = s0; 232 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 233 /* _citrus_GBK2K_init_state(ei, psenc); */ 234 psenc->chlen = 0; 235 236 return (0); 237 238restart: 239 *s = s0; 240 *nresult = (size_t)-2; 241 242 return (0); 243 244invalid: 245 return (EINVAL); 246 247ilseq: 248 *nresult = (size_t)-1; 249 return (EILSEQ); 250} 251 252static int 253_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 254 char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc, 255 size_t * __restrict nresult) 256{ 257 size_t len; 258 int ret; 259 260 if (psenc->chlen != 0) { 261 ret = EINVAL; 262 goto err; 263 } 264 265 len = _mb_count(wc); 266 if (n < len) { 267 ret = E2BIG; 268 goto err; 269 } 270 271 switch (len) { 272 case 1: 273 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 274 ret = EILSEQ; 275 goto err; 276 } 277 break; 278 case 2: 279 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 280 !_mb_trailbyte (_PUSH_PSENC(wc))) { 281 ret = EILSEQ; 282 goto err; 283 } 284 break; 285 case 4: 286 if (ei->mb_cur_max != 4 || 287 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 288 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 289 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 290 !_mb_surrogate (_PUSH_PSENC(wc))) { 291 ret = EILSEQ; 292 goto err; 293 } 294 break; 295 } 296 297 memcpy(s, psenc->ch, psenc->chlen); 298 *nresult = psenc->chlen; 299 /* _citrus_GBK2K_init_state(ei, psenc); */ 300 psenc->chlen = 0; 301 302 return (0); 303 304err: 305 *nresult = (size_t)-1; 306 return (ret); 307} 308 309static __inline int 310/*ARGSUSED*/ 311_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused, 312 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 313{ 314 uint8_t ch, cl; 315 316 if ((uint32_t)wc < 0x80) { 317 /* ISO646 */ 318 *csid = 0; 319 *idx = (_index_t)wc; 320 } else if ((uint32_t)wc >= 0x10000) { 321 /* GBKUCS : XXX */ 322 *csid = 3; 323 *idx = (_index_t)wc; 324 } else { 325 ch = (uint8_t)(wc >> 8); 326 cl = (uint8_t)wc; 327 if (ch >= 0xA1 && cl >= 0xA1) { 328 /* EUC G1 */ 329 *csid = 1; 330 *idx = (_index_t)wc & 0x7F7FU; 331 } else { 332 /* extended area (0x8140-) */ 333 *csid = 2; 334 *idx = (_index_t)wc; 335 } 336 } 337 338 return (0); 339} 340 341static __inline int 342/*ARGSUSED*/ 343_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 344 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 345{ 346 347 switch (csid) { 348 case 0: 349 /* ISO646 */ 350 *wc = (wchar_t)idx; 351 break; 352 case 1: 353 /* EUC G1 */ 354 *wc = (wchar_t)idx | 0x8080U; 355 break; 356 case 2: 357 /* extended area */ 358 *wc = (wchar_t)idx; 359 break; 360 case 3: 361 /* GBKUCS : XXX */ 362 if (ei->mb_cur_max != 4) 363 return (EINVAL); 364 *wc = (wchar_t)idx; 365 break; 366 default: 367 return (EILSEQ); 368 } 369 370 return (0); 371} 372 373static __inline int 374/*ARGSUSED*/ 375_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused, 376 _GBK2KState * __restrict psenc, int * __restrict rstate) 377{ 378 379 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 380 _STDENC_SDGEN_INCOMPLETE_CHAR; 381 return (0); 382} 383 384static int 385/*ARGSUSED*/ 386_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 387 const void * __restrict var, size_t lenvar) 388{ 389 const char *p; 390 391 p = var; 392 memset((void *)ei, 0, sizeof(*ei)); 393 ei->mb_cur_max = 4; 394 while (lenvar > 0) { 395 switch (_bcs_tolower(*p)) { 396 case '2': 397 MATCH("2byte", ei->mb_cur_max = 2); 398 break; 399 } 400 p++; 401 lenvar--; 402 } 403 404 return (0); 405} 406 407static void 408/*ARGSUSED*/ 409_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused) 410{ 411 412} 413 414/* ---------------------------------------------------------------------- 415 * public interface for stdenc 416 */ 417 418_CITRUS_STDENC_DECLS(GBK2K); 419_CITRUS_STDENC_DEF_OPS(GBK2K); 420 421#include "citrus_stdenc_template.h" 422