citrus_euc.c revision 219019
1219019Sgabor/* $FreeBSD: head/lib/libiconv_modules/EUC/citrus_euc.c 219019 2011-02-25 00:04:39Z gabor $ */ 2219019Sgabor/* $NetBSD: citrus_euc.c,v 1.14 2009/01/11 02:46:24 christos Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2002 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor/*- 31219019Sgabor * Copyright (c) 1993 32219019Sgabor * The Regents of the University of California. All rights reserved. 33219019Sgabor * 34219019Sgabor * This code is derived from software contributed to Berkeley by 35219019Sgabor * Paul Borman at Krystal Technologies. 36219019Sgabor * 37219019Sgabor * Redistribution and use in source and binary forms, with or without 38219019Sgabor * modification, are permitted provided that the following conditions 39219019Sgabor * are met: 40219019Sgabor * 1. Redistributions of source code must retain the above copyright 41219019Sgabor * notice, this list of conditions and the following disclaimer. 42219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 43219019Sgabor * notice, this list of conditions and the following disclaimer in the 44219019Sgabor * documentation and/or other materials provided with the distribution. 45219019Sgabor * 3. Neither the name of the University nor the names of its contributors 46219019Sgabor * may be used to endorse or promote products derived from this software 47219019Sgabor * without specific prior written permission. 48219019Sgabor * 49219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59219019Sgabor * SUCH DAMAGE. 60219019Sgabor */ 61219019Sgabor 62219019Sgabor#include <sys/cdefs.h> 63219019Sgabor#include <sys/types.h> 64219019Sgabor 65219019Sgabor#include <assert.h> 66219019Sgabor#include <errno.h> 67219019Sgabor#include <limits.h> 68219019Sgabor#include <stddef.h> 69219019Sgabor#include <stdio.h> 70219019Sgabor#include <stdlib.h> 71219019Sgabor#include <string.h> 72219019Sgabor#include <wchar.h> 73219019Sgabor 74219019Sgabor#include "citrus_namespace.h" 75219019Sgabor#include "citrus_bcs.h" 76219019Sgabor#include "citrus_types.h" 77219019Sgabor#include "citrus_module.h" 78219019Sgabor#include "citrus_stdenc.h" 79219019Sgabor#include "citrus_euc.h" 80219019Sgabor 81219019Sgabor 82219019Sgabor/* ---------------------------------------------------------------------- 83219019Sgabor * private stuffs used by templates 84219019Sgabor */ 85219019Sgabor 86219019Sgabortypedef struct { 87219019Sgabor int chlen; 88219019Sgabor char ch[3]; 89219019Sgabor} _EUCState; 90219019Sgabor 91219019Sgabortypedef struct { 92219019Sgabor wchar_t bits[4]; 93219019Sgabor wchar_t mask; 94219019Sgabor unsigned count[4]; 95219019Sgabor unsigned mb_cur_max; 96219019Sgabor} _EUCEncodingInfo; 97219019Sgabor 98219019Sgabor#define _SS2 0x008e 99219019Sgabor#define _SS3 0x008f 100219019Sgabor 101219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 102219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 103219019Sgabor 104219019Sgabor#define _FUNCNAME(m) _citrus_EUC_##m 105219019Sgabor#define _ENCODING_INFO _EUCEncodingInfo 106219019Sgabor#define _ENCODING_STATE _EUCState 107219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 108219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 0 109219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 110219019Sgabor 111219019Sgabor 112219019Sgaborstatic __inline int 113219019Sgabor_citrus_EUC_cs(unsigned int c) 114219019Sgabor{ 115219019Sgabor 116219019Sgabor c &= 0xff; 117219019Sgabor 118219019Sgabor return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 119219019Sgabor} 120219019Sgabor 121219019Sgaborstatic __inline int 122219019Sgabor_citrus_EUC_parse_variable(_EUCEncodingInfo *ei, const void *var, 123219019Sgabor size_t lenvar __unused) 124219019Sgabor{ 125219019Sgabor char *e; 126219019Sgabor const char *v; 127219019Sgabor int x; 128219019Sgabor 129219019Sgabor /* parse variable string */ 130219019Sgabor if (!var) 131219019Sgabor return (EFTYPE); 132219019Sgabor 133219019Sgabor v = (const char *)var; 134219019Sgabor 135219019Sgabor while (*v == ' ' || *v == '\t') 136219019Sgabor ++v; 137219019Sgabor 138219019Sgabor ei->mb_cur_max = 1; 139219019Sgabor for (x = 0; x < 4; ++x) { 140219019Sgabor ei->count[x] = (int)_bcs_strtol(v, (char **)&e, 0); 141219019Sgabor if (v == e || !(v = e) || ei->count[x] < 1 || ei->count[x] > 4) { 142219019Sgabor return (EFTYPE); 143219019Sgabor } 144219019Sgabor if (ei->mb_cur_max < ei->count[x]) 145219019Sgabor ei->mb_cur_max = ei->count[x]; 146219019Sgabor while (*v == ' ' || *v == '\t') 147219019Sgabor ++v; 148219019Sgabor ei->bits[x] = (int)_bcs_strtol(v, (char **)&e, 0); 149219019Sgabor if (v == e || !(v = e)) { 150219019Sgabor return (EFTYPE); 151219019Sgabor } 152219019Sgabor while (*v == ' ' || *v == '\t') 153219019Sgabor ++v; 154219019Sgabor } 155219019Sgabor ei->mask = (int)_bcs_strtol(v, (char **)&e, 0); 156219019Sgabor if (v == e || !(v = e)) { 157219019Sgabor return (EFTYPE); 158219019Sgabor } 159219019Sgabor 160219019Sgabor return (0); 161219019Sgabor} 162219019Sgabor 163219019Sgabor 164219019Sgaborstatic __inline void 165219019Sgabor/*ARGSUSED*/ 166219019Sgabor_citrus_EUC_init_state(_EUCEncodingInfo *ei __unused, _EUCState *s) 167219019Sgabor{ 168219019Sgabor 169219019Sgabor memset(s, 0, sizeof(*s)); 170219019Sgabor} 171219019Sgabor 172219019Sgaborstatic __inline void 173219019Sgabor/*ARGSUSED*/ 174219019Sgabor_citrus_EUC_pack_state(_EUCEncodingInfo *ei __unused, void *pspriv, 175219019Sgabor const _EUCState *s) 176219019Sgabor{ 177219019Sgabor 178219019Sgabor memcpy(pspriv, (const void *)s, sizeof(*s)); 179219019Sgabor} 180219019Sgabor 181219019Sgaborstatic __inline void 182219019Sgabor/*ARGSUSED*/ 183219019Sgabor_citrus_EUC_unpack_state(_EUCEncodingInfo *ei __unused, _EUCState *s, 184219019Sgabor const void *pspriv) 185219019Sgabor{ 186219019Sgabor 187219019Sgabor memcpy((void *)s, pspriv, sizeof(*s)); 188219019Sgabor} 189219019Sgabor 190219019Sgaborstatic int 191219019Sgabor_citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, char **s, 192219019Sgabor size_t n, _EUCState *psenc, size_t *nresult) 193219019Sgabor{ 194219019Sgabor wchar_t wchar; 195219019Sgabor int c, chlenbak, cs, len; 196219019Sgabor char *s0, *s1 = NULL; 197219019Sgabor 198219019Sgabor s0 = *s; 199219019Sgabor 200219019Sgabor if (s0 == NULL) { 201219019Sgabor _citrus_EUC_init_state(ei, psenc); 202219019Sgabor *nresult = 0; /* state independent */ 203219019Sgabor return (0); 204219019Sgabor } 205219019Sgabor 206219019Sgabor chlenbak = psenc->chlen; 207219019Sgabor 208219019Sgabor /* make sure we have the first byte in the buffer */ 209219019Sgabor switch (psenc->chlen) { 210219019Sgabor case 0: 211219019Sgabor if (n < 1) 212219019Sgabor goto restart; 213219019Sgabor psenc->ch[0] = *s0++; 214219019Sgabor psenc->chlen = 1; 215219019Sgabor n--; 216219019Sgabor break; 217219019Sgabor case 1: 218219019Sgabor case 2: 219219019Sgabor break; 220219019Sgabor default: 221219019Sgabor /* illgeal state */ 222219019Sgabor goto encoding_error; 223219019Sgabor } 224219019Sgabor 225219019Sgabor c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 226219019Sgabor if (c == 0) 227219019Sgabor goto encoding_error; 228219019Sgabor while (psenc->chlen < c) { 229219019Sgabor if (n < 1) 230219019Sgabor goto restart; 231219019Sgabor psenc->ch[psenc->chlen] = *s0++; 232219019Sgabor psenc->chlen++; 233219019Sgabor n--; 234219019Sgabor } 235219019Sgabor *s = s0; 236219019Sgabor 237219019Sgabor switch (cs) { 238219019Sgabor case 3: 239219019Sgabor case 2: 240219019Sgabor /* skip SS2/SS3 */ 241219019Sgabor len = c - 1; 242219019Sgabor s1 = &psenc->ch[1]; 243219019Sgabor break; 244219019Sgabor case 1: 245219019Sgabor case 0: 246219019Sgabor len = c; 247219019Sgabor s1 = &psenc->ch[0]; 248219019Sgabor break; 249219019Sgabor default: 250219019Sgabor goto encoding_error; 251219019Sgabor } 252219019Sgabor wchar = 0; 253219019Sgabor while (len-- > 0) 254219019Sgabor wchar = (wchar << 8) | (*s1++ & 0xff); 255219019Sgabor wchar = (wchar & ~ei->mask) | ei->bits[cs]; 256219019Sgabor 257219019Sgabor psenc->chlen = 0; 258219019Sgabor if (pwc) 259219019Sgabor *pwc = wchar; 260219019Sgabor *nresult = wchar ? (size_t)(c - chlenbak) : 0; 261219019Sgabor return (0); 262219019Sgabor 263219019Sgaborencoding_error: 264219019Sgabor psenc->chlen = 0; 265219019Sgabor *nresult = (size_t)-1; 266219019Sgabor return (EILSEQ); 267219019Sgabor 268219019Sgaborrestart: 269219019Sgabor *nresult = (size_t)-2; 270219019Sgabor *s = s0; 271219019Sgabor return (0); 272219019Sgabor} 273219019Sgabor 274219019Sgaborstatic int 275219019Sgabor_citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 276219019Sgabor _EUCState *psenc __unused, size_t *nresult) 277219019Sgabor{ 278219019Sgabor wchar_t m, nm; 279219019Sgabor unsigned int cs; 280219019Sgabor int ret; 281219019Sgabor short i; 282219019Sgabor 283219019Sgabor m = wc & ei->mask; 284219019Sgabor nm = wc & ~m; 285219019Sgabor 286219019Sgabor for (cs = 0; cs < sizeof(ei->count) / sizeof(ei->count[0]); cs++) 287219019Sgabor if (m == ei->bits[cs]) 288219019Sgabor break; 289219019Sgabor /* fallback case - not sure if it is necessary */ 290219019Sgabor if (cs == sizeof(ei->count) / sizeof(ei->count[0])) 291219019Sgabor cs = 1; 292219019Sgabor 293219019Sgabor i = ei->count[cs]; 294219019Sgabor if (n < (unsigned)i) { 295219019Sgabor ret = E2BIG; 296219019Sgabor goto err; 297219019Sgabor } 298219019Sgabor m = (cs) ? 0x80 : 0x00; 299219019Sgabor switch (cs) { 300219019Sgabor case 2: 301219019Sgabor *s++ = _SS2; 302219019Sgabor i--; 303219019Sgabor break; 304219019Sgabor case 3: 305219019Sgabor *s++ = _SS3; 306219019Sgabor i--; 307219019Sgabor break; 308219019Sgabor } 309219019Sgabor 310219019Sgabor while (i-- > 0) 311219019Sgabor *s++ = ((nm >> (i << 3)) & 0xff) | m; 312219019Sgabor 313219019Sgabor *nresult = (size_t)ei->count[cs]; 314219019Sgabor return (0); 315219019Sgabor 316219019Sgaborerr: 317219019Sgabor *nresult = (size_t)-1; 318219019Sgabor return (ret); 319219019Sgabor} 320219019Sgabor 321219019Sgaborstatic __inline int 322219019Sgabor/*ARGSUSED*/ 323219019Sgabor_citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei, 324219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 325219019Sgabor{ 326219019Sgabor wchar_t m, nm; 327219019Sgabor 328219019Sgabor m = wc & ei->mask; 329219019Sgabor nm = wc & ~m; 330219019Sgabor 331219019Sgabor *csid = (_citrus_csid_t)m; 332219019Sgabor *idx = (_citrus_index_t)nm; 333219019Sgabor 334219019Sgabor return (0); 335219019Sgabor} 336219019Sgabor 337219019Sgaborstatic __inline int 338219019Sgabor/*ARGSUSED*/ 339219019Sgabor_citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei, 340219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 341219019Sgabor{ 342219019Sgabor 343219019Sgabor if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0) 344219019Sgabor return (EINVAL); 345219019Sgabor 346219019Sgabor *wc = (wchar_t)csid | (wchar_t)idx; 347219019Sgabor 348219019Sgabor return (0); 349219019Sgabor} 350219019Sgabor 351219019Sgaborstatic __inline int 352219019Sgabor/*ARGSUSED*/ 353219019Sgabor_citrus_EUC_stdenc_get_state_desc_generic(_EUCEncodingInfo * __restrict ei __unused, 354219019Sgabor _EUCState * __restrict psenc, int * __restrict rstate) 355219019Sgabor{ 356219019Sgabor 357219019Sgabor *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 358219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 359219019Sgabor return (0); 360219019Sgabor} 361219019Sgabor 362219019Sgaborstatic int 363219019Sgabor/*ARGSUSED*/ 364219019Sgabor_citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei, 365219019Sgabor const void * __restrict var, size_t lenvar) 366219019Sgabor{ 367219019Sgabor 368219019Sgabor return (_citrus_EUC_parse_variable(ei, var, lenvar)); 369219019Sgabor} 370219019Sgabor 371219019Sgaborstatic void 372219019Sgabor/*ARGSUSED*/ 373219019Sgabor_citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei __unused) 374219019Sgabor{ 375219019Sgabor 376219019Sgabor} 377219019Sgabor 378219019Sgabor/* ---------------------------------------------------------------------- 379219019Sgabor * public interface for stdenc 380219019Sgabor */ 381219019Sgabor 382219019Sgabor_CITRUS_STDENC_DECLS(EUC); 383219019Sgabor_CITRUS_STDENC_DEF_OPS(EUC); 384219019Sgabor 385219019Sgabor#include "citrus_stdenc_template.h" 386