1/* $NetBSD: citrus_euc.c,v 1.12.6.1 2009/01/04 17:02:19 christos Exp $ */ 2 3/*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61#include <sys/cdefs.h> 62#if defined(LIBC_SCCS) && !defined(lint) 63__RCSID("$NetBSD: citrus_euc.c,v 1.12.6.1 2009/01/04 17:02:19 christos Exp $"); 64#endif /* LIBC_SCCS and not lint */ 65 66#include <assert.h> 67#include <errno.h> 68#include <string.h> 69#include <stdio.h> 70#include <stdlib.h> 71#include <stddef.h> 72#include <wchar.h> 73#include <sys/types.h> 74#include <limits.h> 75 76#include "citrus_namespace.h" 77#include "citrus_bcs.h" 78#include "citrus_types.h" 79#include "citrus_module.h" 80#include "citrus_ctype.h" 81#include "citrus_stdenc.h" 82#include "citrus_euc.h" 83 84 85/* ---------------------------------------------------------------------- 86 * private stuffs used by templates 87 */ 88 89typedef struct { 90 char ch[3]; 91 int chlen; 92} _EUCState; 93 94typedef struct { 95 unsigned count[4]; 96 wchar_t bits[4]; 97 wchar_t mask; 98 unsigned mb_cur_max; 99} _EUCEncodingInfo; 100 101typedef struct { 102 _EUCEncodingInfo ei; 103 struct { 104 /* for future multi-locale facility */ 105 _EUCState s_mblen; 106 _EUCState s_mbrlen; 107 _EUCState s_mbrtowc; 108 _EUCState s_mbtowc; 109 _EUCState s_mbsrtowcs; 110 _EUCState s_wcrtomb; 111 _EUCState s_wcsrtombs; 112 _EUCState s_wctomb; 113 } states; 114} _EUCCTypeInfo; 115 116#define _SS2 0x008e 117#define _SS3 0x008f 118 119#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 120#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 121 122#define _FUNCNAME(m) _citrus_EUC_##m 123#define _ENCODING_INFO _EUCEncodingInfo 124#define _CTYPE_INFO _EUCCTypeInfo 125#define _ENCODING_STATE _EUCState 126#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 127#define _ENCODING_IS_STATE_DEPENDENT 0 128#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 129 130 131static __inline int 132_citrus_EUC_cs(unsigned int c) 133{ 134 c &= 0xff; 135 136 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 137} 138 139static __inline int 140_citrus_EUC_parse_variable(_EUCEncodingInfo *ei, 141 const void *var, size_t lenvar) 142{ 143 const char *v, *e; 144 int x; 145 146 /* parse variable string */ 147 if (!var) 148 return (EFTYPE); 149 150 v = (const char *) var; 151 152 while (*v == ' ' || *v == '\t') 153 ++v; 154 155 ei->mb_cur_max = 1; 156 for (x = 0; x < 4; ++x) { 157 ei->count[x] = (int)_bcs_strtol(v, (char **)&e, 0); 158 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) { 159 return (EFTYPE); 160 } 161 if (ei->mb_cur_max < ei->count[x]) 162 ei->mb_cur_max = ei->count[x]; 163 while (*v == ' ' || *v == '\t') 164 ++v; 165 ei->bits[x] = (int)_bcs_strtol(v, (char **)&e, 0); 166 if (v == e || !(v = e)) { 167 return (EFTYPE); 168 } 169 while (*v == ' ' || *v == '\t') 170 ++v; 171 } 172 ei->mask = (int)_bcs_strtol(v, (char **)&e, 0); 173 if (v == e || !(v = e)) { 174 return (EFTYPE); 175 } 176 177 return 0; 178} 179 180 181static __inline void 182/*ARGSUSED*/ 183_citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s) 184{ 185 memset(s, 0, sizeof(*s)); 186} 187 188static __inline void 189/*ARGSUSED*/ 190_citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s) 191{ 192 memcpy(pspriv, (const void *)s, sizeof(*s)); 193} 194 195static __inline void 196/*ARGSUSED*/ 197_citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s, 198 const void *pspriv) 199{ 200 memcpy((void *)s, pspriv, sizeof(*s)); 201} 202 203static int 204_citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s, 205 size_t n, _EUCState *psenc, size_t *nresult) 206{ 207 wchar_t wchar; 208 int c, cs, len; 209 int chlenbak; 210 const char *s0, *s1 = NULL; 211 212 _DIAGASSERT(nresult != 0); 213 _DIAGASSERT(ei != NULL); 214 _DIAGASSERT(psenc != NULL); 215 _DIAGASSERT(s != NULL); 216 217 s0 = *s; 218 219 if (s0 == NULL) { 220 _citrus_EUC_init_state(ei, psenc); 221 *nresult = 0; /* state independent */ 222 return (0); 223 } 224 225 chlenbak = psenc->chlen; 226 227 /* make sure we have the first byte in the buffer */ 228 switch (psenc->chlen) { 229 case 0: 230 if (n < 1) 231 goto restart; 232 psenc->ch[0] = *s0++; 233 psenc->chlen = 1; 234 n--; 235 break; 236 case 1: 237 case 2: 238 break; 239 default: 240 /* illgeal state */ 241 goto encoding_error; 242 } 243 244 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 245 if (c == 0) 246 goto encoding_error; 247 while (psenc->chlen < c) { 248 if (n < 1) 249 goto restart; 250 psenc->ch[psenc->chlen] = *s0++; 251 psenc->chlen++; 252 n--; 253 } 254 *s = s0; 255 256 switch (cs) { 257 case 3: 258 case 2: 259 /* skip SS2/SS3 */ 260 len = c - 1; 261 s1 = &psenc->ch[1]; 262 break; 263 case 1: 264 case 0: 265 len = c; 266 s1 = &psenc->ch[0]; 267 break; 268 default: 269 goto encoding_error; 270 } 271 wchar = 0; 272 while (len-- > 0) 273 wchar = (wchar << 8) | (*s1++ & 0xff); 274 wchar = (wchar & ~ei->mask) | ei->bits[cs]; 275 276 psenc->chlen = 0; 277 if (pwc) 278 *pwc = wchar; 279 280 if (!wchar) { 281 *nresult = 0; 282 } else { 283 *nresult = (size_t)(c - chlenbak); 284 } 285 286 return 0; 287 288encoding_error: 289 psenc->chlen = 0; 290 *nresult = (size_t)-1; 291 return (EILSEQ); 292 293restart: 294 *nresult = (size_t)-2; 295 *s = s0; 296 return (0); 297} 298 299static int 300_citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 301 _EUCState *psenc, size_t *nresult) 302{ 303 wchar_t m, nm; 304 int cs, i, ret; 305 306 _DIAGASSERT(ei != NULL); 307 _DIAGASSERT(nresult != 0); 308 _DIAGASSERT(s != NULL); 309 310 m = wc & ei->mask; 311 nm = wc & ~m; 312 313 for (cs = 0; 314 cs < sizeof(ei->count)/sizeof(ei->count[0]); 315 cs++) { 316 if (m == ei->bits[cs]) 317 break; 318 } 319 /* fallback case - not sure if it is necessary */ 320 if (cs == sizeof(ei->count)/sizeof(ei->count[0])) 321 cs = 1; 322 323 i = ei->count[cs]; 324 if (n < i) { 325 ret = E2BIG; 326 goto err; 327 } 328 m = (cs) ? 0x80 : 0x00; 329 switch (cs) { 330 case 2: 331 *s++ = _SS2; 332 i--; 333 break; 334 case 3: 335 *s++ = _SS3; 336 i--; 337 break; 338 } 339 340 while (i-- > 0) 341 *s++ = ((nm >> (i << 3)) & 0xff) | m; 342 343 *nresult = (size_t)ei->count[cs]; 344 return 0; 345 346err: 347 *nresult = (size_t)-1; 348 return ret; 349} 350 351static __inline int 352/*ARGSUSED*/ 353_citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei, 354 _csid_t * __restrict csid, 355 _index_t * __restrict idx, wchar_t wc) 356{ 357 wchar_t m, nm; 358 359 _DIAGASSERT(ei != NULL && csid != NULL && idx != NULL); 360 361 m = wc & ei->mask; 362 nm = wc & ~m; 363 364 *csid = (_citrus_csid_t)m; 365 *idx = (_citrus_index_t)nm; 366 367 return (0); 368} 369 370static __inline int 371/*ARGSUSED*/ 372_citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei, 373 wchar_t * __restrict wc, 374 _csid_t csid, _index_t idx) 375{ 376 377 _DIAGASSERT(ei != NULL && wc != NULL); 378 379 if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0) 380 return (EINVAL); 381 382 *wc = (wchar_t)csid | (wchar_t)idx; 383 384 return (0); 385} 386 387static __inline int 388/*ARGSUSED*/ 389_citrus_EUC_stdenc_get_state_desc_generic(_EUCEncodingInfo * __restrict ei, 390 _EUCState * __restrict psenc, 391 int * __restrict rstate) 392{ 393 394 if (psenc->chlen == 0) 395 *rstate = _STDENC_SDGEN_INITIAL; 396 else 397 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 398 399 return 0; 400} 401 402static int 403/*ARGSUSED*/ 404_citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei, 405 const void * __restrict var, size_t lenvar) 406{ 407 408 _DIAGASSERT(ei != NULL); 409 410 return (_citrus_EUC_parse_variable(ei, var, lenvar)); 411} 412 413static void 414/*ARGSUSED*/ 415_citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei) 416{ 417} 418 419/* ---------------------------------------------------------------------- 420 * public interface for ctype 421 */ 422 423_CITRUS_CTYPE_DECLS(EUC); 424_CITRUS_CTYPE_DEF_OPS(EUC); 425 426#include "citrus_ctype_template.h" 427 428/* ---------------------------------------------------------------------- 429 * public interface for stdenc 430 */ 431 432_CITRUS_STDENC_DECLS(EUC); 433_CITRUS_STDENC_DEF_OPS(EUC); 434 435#include "citrus_stdenc_template.h" 436