citrus_ues.c revision 219019
1219019Sgabor/* $FreeBSD: head/lib/libiconv_modules/UES/citrus_ues.c 219019 2011-02-25 00:04:39Z gabor $ */ 2219019Sgabor/* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2006 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor#include <sys/cdefs.h> 31219019Sgabor 32219019Sgabor#include <assert.h> 33219019Sgabor#include <errno.h> 34219019Sgabor#include <limits.h> 35219019Sgabor#include <stdio.h> 36219019Sgabor#include <stdint.h> 37219019Sgabor#include <stdlib.h> 38219019Sgabor#include <string.h> 39219019Sgabor#include <wchar.h> 40219019Sgabor 41219019Sgabor#include "citrus_namespace.h" 42219019Sgabor#include "citrus_types.h" 43219019Sgabor#include "citrus_bcs.h" 44219019Sgabor#include "citrus_module.h" 45219019Sgabor#include "citrus_stdenc.h" 46219019Sgabor#include "citrus_ues.h" 47219019Sgabor 48219019Sgabortypedef struct { 49219019Sgabor size_t mb_cur_max; 50219019Sgabor int mode; 51219019Sgabor#define MODE_C99 1 52219019Sgabor} _UESEncodingInfo; 53219019Sgabor 54219019Sgabortypedef struct { 55219019Sgabor int chlen; 56219019Sgabor char ch[12]; 57219019Sgabor} _UESState; 58219019Sgabor 59219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 60219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 61219019Sgabor 62219019Sgabor#define _FUNCNAME(m) _citrus_UES_##m 63219019Sgabor#define _ENCODING_INFO _UESEncodingInfo 64219019Sgabor#define _ENCODING_STATE _UESState 65219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 66219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 0 67219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 68219019Sgabor 69219019Sgaborstatic __inline void 70219019Sgabor/*ARGSUSED*/ 71219019Sgabor_citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 72219019Sgabor _UESState * __restrict psenc) 73219019Sgabor{ 74219019Sgabor 75219019Sgabor psenc->chlen = 0; 76219019Sgabor} 77219019Sgabor 78219019Sgaborstatic __inline void 79219019Sgabor/*ARGSUSED*/ 80219019Sgabor_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 81219019Sgabor void *__restrict pspriv, const _UESState * __restrict psenc) 82219019Sgabor{ 83219019Sgabor 84219019Sgabor memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 85219019Sgabor} 86219019Sgabor 87219019Sgaborstatic __inline void 88219019Sgabor/*ARGSUSED*/ 89219019Sgabor_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 90219019Sgabor _UESState * __restrict psenc, const void * __restrict pspriv) 91219019Sgabor{ 92219019Sgabor 93219019Sgabor memcpy((void *)psenc, pspriv, sizeof(*psenc)); 94219019Sgabor} 95219019Sgabor 96219019Sgaborstatic __inline int 97219019Sgaborto_int(int ch) 98219019Sgabor{ 99219019Sgabor 100219019Sgabor if (ch >= '0' && ch <= '9') 101219019Sgabor return (ch - '0'); 102219019Sgabor else if (ch >= 'A' && ch <= 'F') 103219019Sgabor return ((ch - 'A') + 10); 104219019Sgabor else if (ch >= 'a' && ch <= 'f') 105219019Sgabor return ((ch - 'a') + 10); 106219019Sgabor return (-1); 107219019Sgabor} 108219019Sgabor 109219019Sgabor#define ESCAPE '\\' 110219019Sgabor#define UCS2_ESC 'u' 111219019Sgabor#define UCS4_ESC 'U' 112219019Sgabor 113219019Sgabor#define UCS2_BIT 16 114219019Sgabor#define UCS4_BIT 32 115219019Sgabor#define BMP_MAX UINT32_C(0xFFFF) 116219019Sgabor#define UCS2_MAX UINT32_C(0x10FFFF) 117219019Sgabor#define UCS4_MAX UINT32_C(0x7FFFFFFF) 118219019Sgabor 119219019Sgaborstatic const char *xdig = "0123456789abcdef"; 120219019Sgabor 121219019Sgaborstatic __inline int 122219019Sgaborto_str(char *s, wchar_t wc, int bit) 123219019Sgabor{ 124219019Sgabor char *p; 125219019Sgabor 126219019Sgabor p = s; 127219019Sgabor *p++ = ESCAPE; 128219019Sgabor switch (bit) { 129219019Sgabor case UCS2_BIT: 130219019Sgabor *p++ = UCS2_ESC; 131219019Sgabor break; 132219019Sgabor case UCS4_BIT: 133219019Sgabor *p++ = UCS4_ESC; 134219019Sgabor break; 135219019Sgabor default: 136219019Sgabor abort(); 137219019Sgabor } 138219019Sgabor do { 139219019Sgabor *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 140219019Sgabor } while (bit > 0); 141219019Sgabor return (p - s); 142219019Sgabor} 143219019Sgabor 144219019Sgaborstatic __inline bool 145219019Sgaboris_hi_surrogate(wchar_t wc) 146219019Sgabor{ 147219019Sgabor 148219019Sgabor return (wc >= 0xD800 && wc <= 0xDBFF); 149219019Sgabor} 150219019Sgabor 151219019Sgaborstatic __inline bool 152219019Sgaboris_lo_surrogate(wchar_t wc) 153219019Sgabor{ 154219019Sgabor 155219019Sgabor return (wc >= 0xDC00 && wc <= 0xDFFF); 156219019Sgabor} 157219019Sgabor 158219019Sgaborstatic __inline wchar_t 159219019Sgaborsurrogate_to_ucs(wchar_t hi, wchar_t lo) 160219019Sgabor{ 161219019Sgabor 162219019Sgabor hi -= 0xD800; 163219019Sgabor lo -= 0xDC00; 164219019Sgabor return ((hi << 10 | lo) + 0x10000); 165219019Sgabor} 166219019Sgabor 167219019Sgaborstatic __inline void 168219019Sgaborucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 169219019Sgabor{ 170219019Sgabor 171219019Sgabor wc -= 0x10000; 172219019Sgabor *hi = (wc >> 10) + 0xD800; 173219019Sgabor *lo = (wc & 0x3FF) + 0xDC00; 174219019Sgabor} 175219019Sgabor 176219019Sgaborstatic __inline bool 177219019Sgaboris_basic(wchar_t wc) 178219019Sgabor{ 179219019Sgabor 180219019Sgabor return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 181219019Sgabor wc != 0x60); 182219019Sgabor} 183219019Sgabor 184219019Sgaborstatic int 185219019Sgabor_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 186219019Sgabor wchar_t * __restrict pwc, char ** __restrict s, size_t n, 187219019Sgabor _UESState * __restrict psenc, size_t * __restrict nresult) 188219019Sgabor{ 189219019Sgabor char *s0; 190219019Sgabor int ch, head, num, tail; 191219019Sgabor wchar_t hi, wc; 192219019Sgabor 193219019Sgabor if (*s == NULL) { 194219019Sgabor _citrus_UES_init_state(ei, psenc); 195219019Sgabor *nresult = 0; 196219019Sgabor return (0); 197219019Sgabor } 198219019Sgabor s0 = *s; 199219019Sgabor 200219019Sgabor hi = (wchar_t)0; 201219019Sgabor tail = 0; 202219019Sgabor 203219019Sgaborsurrogate: 204219019Sgabor wc = (wchar_t)0; 205219019Sgabor head = tail; 206219019Sgabor if (psenc->chlen == head) { 207219019Sgabor if (n-- < 1) 208219019Sgabor goto restart; 209219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 210219019Sgabor } 211219019Sgabor ch = (unsigned char)psenc->ch[head++]; 212219019Sgabor if (ch == ESCAPE) { 213219019Sgabor if (psenc->chlen == head) { 214219019Sgabor if (n-- < 1) 215219019Sgabor goto restart; 216219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 217219019Sgabor } 218219019Sgabor switch (psenc->ch[head]) { 219219019Sgabor case UCS2_ESC: 220219019Sgabor tail += 6; 221219019Sgabor break; 222219019Sgabor case UCS4_ESC: 223219019Sgabor if (ei->mode & MODE_C99) { 224219019Sgabor tail = 10; 225219019Sgabor break; 226219019Sgabor } 227219019Sgabor /*FALLTHROUGH*/ 228219019Sgabor default: 229219019Sgabor tail = 0; 230219019Sgabor } 231219019Sgabor ++head; 232219019Sgabor } 233219019Sgabor for (; head < tail; ++head) { 234219019Sgabor if (psenc->chlen == head) { 235219019Sgabor if (n-- < 1) { 236219019Sgaborrestart: 237219019Sgabor *s = s0; 238219019Sgabor *nresult = (size_t)-2; 239219019Sgabor return (0); 240219019Sgabor } 241219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 242219019Sgabor } 243219019Sgabor num = to_int((int)(unsigned char)psenc->ch[head]); 244219019Sgabor if (num < 0) { 245219019Sgabor tail = 0; 246219019Sgabor break; 247219019Sgabor } 248219019Sgabor wc = (wc << 4) | num; 249219019Sgabor } 250219019Sgabor head = 0; 251219019Sgabor switch (tail) { 252219019Sgabor case 0: 253219019Sgabor break; 254219019Sgabor case 6: 255219019Sgabor if (hi != (wchar_t)0) 256219019Sgabor break; 257219019Sgabor if ((ei->mode & MODE_C99) == 0) { 258219019Sgabor if (is_hi_surrogate(wc) != 0) { 259219019Sgabor hi = wc; 260219019Sgabor goto surrogate; 261219019Sgabor } 262219019Sgabor if ((uint32_t)wc <= 0x7F /* XXX */ || 263219019Sgabor is_lo_surrogate(wc) != 0) 264219019Sgabor break; 265219019Sgabor goto done; 266219019Sgabor } 267219019Sgabor /*FALLTHROUGH*/ 268219019Sgabor case 10: 269219019Sgabor if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 270219019Sgabor is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 271219019Sgabor goto done; 272219019Sgabor *nresult = (size_t)-1; 273219019Sgabor return (EILSEQ); 274219019Sgabor case 12: 275219019Sgabor if (is_lo_surrogate(wc) == 0) 276219019Sgabor break; 277219019Sgabor wc = surrogate_to_ucs(hi, wc); 278219019Sgabor goto done; 279219019Sgabor } 280219019Sgabor ch = (unsigned char)psenc->ch[0]; 281219019Sgabor head = psenc->chlen; 282219019Sgabor if (--head > 0) 283219019Sgabor memmove(&psenc->ch[0], &psenc->ch[1], head); 284219019Sgabor wc = (wchar_t)ch; 285219019Sgabordone: 286219019Sgabor psenc->chlen = head; 287219019Sgabor if (pwc != NULL) 288219019Sgabor *pwc = wc; 289219019Sgabor *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 290219019Sgabor *s = s0; 291219019Sgabor 292219019Sgabor return (0); 293219019Sgabor} 294219019Sgabor 295219019Sgaborstatic int 296219019Sgabor_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 297219019Sgabor char * __restrict s, size_t n, wchar_t wc, 298219019Sgabor _UESState * __restrict psenc, size_t * __restrict nresult) 299219019Sgabor{ 300219019Sgabor wchar_t hi, lo; 301219019Sgabor 302219019Sgabor if (psenc->chlen != 0) 303219019Sgabor return (EINVAL); 304219019Sgabor 305219019Sgabor if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 306219019Sgabor if (n-- < 1) 307219019Sgabor goto e2big; 308219019Sgabor psenc->ch[psenc->chlen++] = (char)wc; 309219019Sgabor } else if ((uint32_t)wc <= BMP_MAX) { 310219019Sgabor if (n < 6) 311219019Sgabor goto e2big; 312219019Sgabor psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 313219019Sgabor } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 314219019Sgabor if (n < 12) 315219019Sgabor goto e2big; 316219019Sgabor ucs_to_surrogate(wc, &hi, &lo); 317219019Sgabor psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 318219019Sgabor psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 319219019Sgabor } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 320219019Sgabor if (n < 10) 321219019Sgabor goto e2big; 322219019Sgabor psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 323219019Sgabor } else { 324219019Sgabor *nresult = (size_t)-1; 325219019Sgabor return (EILSEQ); 326219019Sgabor } 327219019Sgabor memcpy(s, psenc->ch, psenc->chlen); 328219019Sgabor *nresult = psenc->chlen; 329219019Sgabor psenc->chlen = 0; 330219019Sgabor 331219019Sgabor return (0); 332219019Sgabor 333219019Sgabore2big: 334219019Sgabor *nresult = (size_t)-1; 335219019Sgabor return (E2BIG); 336219019Sgabor} 337219019Sgabor 338219019Sgabor/*ARGSUSED*/ 339219019Sgaborstatic int 340219019Sgabor_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 341219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 342219019Sgabor{ 343219019Sgabor 344219019Sgabor *csid = 0; 345219019Sgabor *idx = (_index_t)wc; 346219019Sgabor 347219019Sgabor return (0); 348219019Sgabor} 349219019Sgabor 350219019Sgaborstatic __inline int 351219019Sgabor/*ARGSUSED*/ 352219019Sgabor_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 353219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 354219019Sgabor{ 355219019Sgabor 356219019Sgabor if (csid != 0) 357219019Sgabor return (EILSEQ); 358219019Sgabor *wc = (wchar_t)idx; 359219019Sgabor 360219019Sgabor return (0); 361219019Sgabor} 362219019Sgabor 363219019Sgaborstatic __inline int 364219019Sgabor/*ARGSUSED*/ 365219019Sgabor_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 366219019Sgabor _UESState * __restrict psenc, int * __restrict rstate) 367219019Sgabor{ 368219019Sgabor 369219019Sgabor *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 370219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 371219019Sgabor return (0); 372219019Sgabor} 373219019Sgabor 374219019Sgaborstatic void 375219019Sgabor/*ARGSUSED*/ 376219019Sgabor_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 377219019Sgabor{ 378219019Sgabor 379219019Sgabor /* ei seems to be unused */ 380219019Sgabor} 381219019Sgabor 382219019Sgaborstatic int 383219019Sgabor/*ARGSUSED*/ 384219019Sgabor_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 385219019Sgabor const void * __restrict var, size_t lenvar) 386219019Sgabor{ 387219019Sgabor const char *p; 388219019Sgabor 389219019Sgabor p = var; 390219019Sgabor memset((void *)ei, 0, sizeof(*ei)); 391219019Sgabor while (lenvar > 0) { 392219019Sgabor switch (_bcs_toupper(*p)) { 393219019Sgabor case 'C': 394219019Sgabor MATCH(C99, ei->mode |= MODE_C99); 395219019Sgabor break; 396219019Sgabor } 397219019Sgabor ++p; 398219019Sgabor --lenvar; 399219019Sgabor } 400219019Sgabor ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 401219019Sgabor 402219019Sgabor return (0); 403219019Sgabor} 404219019Sgabor 405219019Sgabor/* ---------------------------------------------------------------------- 406219019Sgabor * public interface for stdenc 407219019Sgabor */ 408219019Sgabor 409219019Sgabor_CITRUS_STDENC_DECLS(UES); 410219019Sgabor_CITRUS_STDENC_DEF_OPS(UES); 411219019Sgabor 412219019Sgabor#include "citrus_stdenc_template.h" 413