1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2006 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor#include <sys/cdefs.h> 31219019Sgabor 32219019Sgabor#include <assert.h> 33219019Sgabor#include <errno.h> 34219019Sgabor#include <limits.h> 35219019Sgabor#include <stdio.h> 36219019Sgabor#include <stdint.h> 37219019Sgabor#include <stdlib.h> 38219019Sgabor#include <string.h> 39219019Sgabor#include <wchar.h> 40219019Sgabor 41219019Sgabor#include "citrus_namespace.h" 42219019Sgabor#include "citrus_types.h" 43219019Sgabor#include "citrus_bcs.h" 44219019Sgabor#include "citrus_module.h" 45219019Sgabor#include "citrus_stdenc.h" 46219019Sgabor#include "citrus_ues.h" 47219019Sgabor 48219019Sgabortypedef struct { 49219019Sgabor size_t mb_cur_max; 50219019Sgabor int mode; 51219019Sgabor#define MODE_C99 1 52219019Sgabor} _UESEncodingInfo; 53219019Sgabor 54219019Sgabortypedef struct { 55219019Sgabor int chlen; 56219019Sgabor char ch[12]; 57219019Sgabor} _UESState; 58219019Sgabor 59219019Sgabor#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 60219019Sgabor#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 61219019Sgabor 62219019Sgabor#define _FUNCNAME(m) _citrus_UES_##m 63219019Sgabor#define _ENCODING_INFO _UESEncodingInfo 64219019Sgabor#define _ENCODING_STATE _UESState 65219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 66219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT 0 67219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 68219019Sgabor 69219019Sgaborstatic __inline void 70219019Sgabor/*ARGSUSED*/ 71219019Sgabor_citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 72219019Sgabor _UESState * __restrict psenc) 73219019Sgabor{ 74219019Sgabor 75219019Sgabor psenc->chlen = 0; 76219019Sgabor} 77219019Sgabor 78260264Sdim#if 0 79219019Sgaborstatic __inline void 80219019Sgabor/*ARGSUSED*/ 81219019Sgabor_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 82219019Sgabor void *__restrict pspriv, const _UESState * __restrict psenc) 83219019Sgabor{ 84219019Sgabor 85219019Sgabor memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 86219019Sgabor} 87219019Sgabor 88219019Sgaborstatic __inline void 89219019Sgabor/*ARGSUSED*/ 90219019Sgabor_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 91219019Sgabor _UESState * __restrict psenc, const void * __restrict pspriv) 92219019Sgabor{ 93219019Sgabor 94219019Sgabor memcpy((void *)psenc, pspriv, sizeof(*psenc)); 95219019Sgabor} 96260264Sdim#endif 97219019Sgabor 98219019Sgaborstatic __inline int 99219019Sgaborto_int(int ch) 100219019Sgabor{ 101219019Sgabor 102219019Sgabor if (ch >= '0' && ch <= '9') 103219019Sgabor return (ch - '0'); 104219019Sgabor else if (ch >= 'A' && ch <= 'F') 105219019Sgabor return ((ch - 'A') + 10); 106219019Sgabor else if (ch >= 'a' && ch <= 'f') 107219019Sgabor return ((ch - 'a') + 10); 108219019Sgabor return (-1); 109219019Sgabor} 110219019Sgabor 111219019Sgabor#define ESCAPE '\\' 112219019Sgabor#define UCS2_ESC 'u' 113219019Sgabor#define UCS4_ESC 'U' 114219019Sgabor 115219019Sgabor#define UCS2_BIT 16 116219019Sgabor#define UCS4_BIT 32 117219019Sgabor#define BMP_MAX UINT32_C(0xFFFF) 118219019Sgabor#define UCS2_MAX UINT32_C(0x10FFFF) 119219019Sgabor#define UCS4_MAX UINT32_C(0x7FFFFFFF) 120219019Sgabor 121219019Sgaborstatic const char *xdig = "0123456789abcdef"; 122219019Sgabor 123219019Sgaborstatic __inline int 124219019Sgaborto_str(char *s, wchar_t wc, int bit) 125219019Sgabor{ 126219019Sgabor char *p; 127219019Sgabor 128219019Sgabor p = s; 129219019Sgabor *p++ = ESCAPE; 130219019Sgabor switch (bit) { 131219019Sgabor case UCS2_BIT: 132219019Sgabor *p++ = UCS2_ESC; 133219019Sgabor break; 134219019Sgabor case UCS4_BIT: 135219019Sgabor *p++ = UCS4_ESC; 136219019Sgabor break; 137219019Sgabor default: 138219019Sgabor abort(); 139219019Sgabor } 140219019Sgabor do { 141219019Sgabor *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 142219019Sgabor } while (bit > 0); 143219019Sgabor return (p - s); 144219019Sgabor} 145219019Sgabor 146219019Sgaborstatic __inline bool 147219019Sgaboris_hi_surrogate(wchar_t wc) 148219019Sgabor{ 149219019Sgabor 150219019Sgabor return (wc >= 0xD800 && wc <= 0xDBFF); 151219019Sgabor} 152219019Sgabor 153219019Sgaborstatic __inline bool 154219019Sgaboris_lo_surrogate(wchar_t wc) 155219019Sgabor{ 156219019Sgabor 157219019Sgabor return (wc >= 0xDC00 && wc <= 0xDFFF); 158219019Sgabor} 159219019Sgabor 160219019Sgaborstatic __inline wchar_t 161219019Sgaborsurrogate_to_ucs(wchar_t hi, wchar_t lo) 162219019Sgabor{ 163219019Sgabor 164219019Sgabor hi -= 0xD800; 165219019Sgabor lo -= 0xDC00; 166219019Sgabor return ((hi << 10 | lo) + 0x10000); 167219019Sgabor} 168219019Sgabor 169219019Sgaborstatic __inline void 170219019Sgaborucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 171219019Sgabor{ 172219019Sgabor 173219019Sgabor wc -= 0x10000; 174219019Sgabor *hi = (wc >> 10) + 0xD800; 175219019Sgabor *lo = (wc & 0x3FF) + 0xDC00; 176219019Sgabor} 177219019Sgabor 178219019Sgaborstatic __inline bool 179219019Sgaboris_basic(wchar_t wc) 180219019Sgabor{ 181219019Sgabor 182219019Sgabor return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 183219019Sgabor wc != 0x60); 184219019Sgabor} 185219019Sgabor 186219019Sgaborstatic int 187219019Sgabor_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 188219019Sgabor wchar_t * __restrict pwc, char ** __restrict s, size_t n, 189219019Sgabor _UESState * __restrict psenc, size_t * __restrict nresult) 190219019Sgabor{ 191219019Sgabor char *s0; 192219019Sgabor int ch, head, num, tail; 193219019Sgabor wchar_t hi, wc; 194219019Sgabor 195219019Sgabor if (*s == NULL) { 196219019Sgabor _citrus_UES_init_state(ei, psenc); 197219019Sgabor *nresult = 0; 198219019Sgabor return (0); 199219019Sgabor } 200219019Sgabor s0 = *s; 201219019Sgabor 202219019Sgabor hi = (wchar_t)0; 203219019Sgabor tail = 0; 204219019Sgabor 205219019Sgaborsurrogate: 206219019Sgabor wc = (wchar_t)0; 207219019Sgabor head = tail; 208219019Sgabor if (psenc->chlen == head) { 209219019Sgabor if (n-- < 1) 210219019Sgabor goto restart; 211219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 212219019Sgabor } 213219019Sgabor ch = (unsigned char)psenc->ch[head++]; 214219019Sgabor if (ch == ESCAPE) { 215219019Sgabor if (psenc->chlen == head) { 216219019Sgabor if (n-- < 1) 217219019Sgabor goto restart; 218219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 219219019Sgabor } 220219019Sgabor switch (psenc->ch[head]) { 221219019Sgabor case UCS2_ESC: 222219019Sgabor tail += 6; 223219019Sgabor break; 224219019Sgabor case UCS4_ESC: 225219019Sgabor if (ei->mode & MODE_C99) { 226219019Sgabor tail = 10; 227219019Sgabor break; 228219019Sgabor } 229219019Sgabor /*FALLTHROUGH*/ 230219019Sgabor default: 231219019Sgabor tail = 0; 232219019Sgabor } 233219019Sgabor ++head; 234219019Sgabor } 235219019Sgabor for (; head < tail; ++head) { 236219019Sgabor if (psenc->chlen == head) { 237219019Sgabor if (n-- < 1) { 238219019Sgaborrestart: 239219019Sgabor *s = s0; 240219019Sgabor *nresult = (size_t)-2; 241219019Sgabor return (0); 242219019Sgabor } 243219019Sgabor psenc->ch[psenc->chlen++] = *s0++; 244219019Sgabor } 245219019Sgabor num = to_int((int)(unsigned char)psenc->ch[head]); 246219019Sgabor if (num < 0) { 247219019Sgabor tail = 0; 248219019Sgabor break; 249219019Sgabor } 250219019Sgabor wc = (wc << 4) | num; 251219019Sgabor } 252219019Sgabor head = 0; 253219019Sgabor switch (tail) { 254219019Sgabor case 0: 255219019Sgabor break; 256219019Sgabor case 6: 257219019Sgabor if (hi != (wchar_t)0) 258219019Sgabor break; 259219019Sgabor if ((ei->mode & MODE_C99) == 0) { 260219019Sgabor if (is_hi_surrogate(wc) != 0) { 261219019Sgabor hi = wc; 262219019Sgabor goto surrogate; 263219019Sgabor } 264219019Sgabor if ((uint32_t)wc <= 0x7F /* XXX */ || 265219019Sgabor is_lo_surrogate(wc) != 0) 266219019Sgabor break; 267219019Sgabor goto done; 268219019Sgabor } 269219019Sgabor /*FALLTHROUGH*/ 270219019Sgabor case 10: 271219019Sgabor if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 272219019Sgabor is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 273219019Sgabor goto done; 274219019Sgabor *nresult = (size_t)-1; 275219019Sgabor return (EILSEQ); 276219019Sgabor case 12: 277219019Sgabor if (is_lo_surrogate(wc) == 0) 278219019Sgabor break; 279219019Sgabor wc = surrogate_to_ucs(hi, wc); 280219019Sgabor goto done; 281219019Sgabor } 282219019Sgabor ch = (unsigned char)psenc->ch[0]; 283219019Sgabor head = psenc->chlen; 284219019Sgabor if (--head > 0) 285219019Sgabor memmove(&psenc->ch[0], &psenc->ch[1], head); 286219019Sgabor wc = (wchar_t)ch; 287219019Sgabordone: 288219019Sgabor psenc->chlen = head; 289219019Sgabor if (pwc != NULL) 290219019Sgabor *pwc = wc; 291219019Sgabor *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 292219019Sgabor *s = s0; 293219019Sgabor 294219019Sgabor return (0); 295219019Sgabor} 296219019Sgabor 297219019Sgaborstatic int 298219019Sgabor_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 299219019Sgabor char * __restrict s, size_t n, wchar_t wc, 300219019Sgabor _UESState * __restrict psenc, size_t * __restrict nresult) 301219019Sgabor{ 302219019Sgabor wchar_t hi, lo; 303219019Sgabor 304219019Sgabor if (psenc->chlen != 0) 305219019Sgabor return (EINVAL); 306219019Sgabor 307219019Sgabor if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 308219019Sgabor if (n-- < 1) 309219019Sgabor goto e2big; 310219019Sgabor psenc->ch[psenc->chlen++] = (char)wc; 311219019Sgabor } else if ((uint32_t)wc <= BMP_MAX) { 312219019Sgabor if (n < 6) 313219019Sgabor goto e2big; 314219019Sgabor psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 315219019Sgabor } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 316219019Sgabor if (n < 12) 317219019Sgabor goto e2big; 318219019Sgabor ucs_to_surrogate(wc, &hi, &lo); 319219019Sgabor psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 320219019Sgabor psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 321219019Sgabor } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 322219019Sgabor if (n < 10) 323219019Sgabor goto e2big; 324219019Sgabor psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 325219019Sgabor } else { 326219019Sgabor *nresult = (size_t)-1; 327219019Sgabor return (EILSEQ); 328219019Sgabor } 329219019Sgabor memcpy(s, psenc->ch, psenc->chlen); 330219019Sgabor *nresult = psenc->chlen; 331219019Sgabor psenc->chlen = 0; 332219019Sgabor 333219019Sgabor return (0); 334219019Sgabor 335219019Sgabore2big: 336219019Sgabor *nresult = (size_t)-1; 337219019Sgabor return (E2BIG); 338219019Sgabor} 339219019Sgabor 340219019Sgabor/*ARGSUSED*/ 341219019Sgaborstatic int 342219019Sgabor_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 343219019Sgabor _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 344219019Sgabor{ 345219019Sgabor 346219019Sgabor *csid = 0; 347219019Sgabor *idx = (_index_t)wc; 348219019Sgabor 349219019Sgabor return (0); 350219019Sgabor} 351219019Sgabor 352219019Sgaborstatic __inline int 353219019Sgabor/*ARGSUSED*/ 354219019Sgabor_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 355219019Sgabor wchar_t * __restrict wc, _csid_t csid, _index_t idx) 356219019Sgabor{ 357219019Sgabor 358219019Sgabor if (csid != 0) 359219019Sgabor return (EILSEQ); 360219019Sgabor *wc = (wchar_t)idx; 361219019Sgabor 362219019Sgabor return (0); 363219019Sgabor} 364219019Sgabor 365219019Sgaborstatic __inline int 366219019Sgabor/*ARGSUSED*/ 367219019Sgabor_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 368219019Sgabor _UESState * __restrict psenc, int * __restrict rstate) 369219019Sgabor{ 370219019Sgabor 371219019Sgabor *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 372219019Sgabor _STDENC_SDGEN_INCOMPLETE_CHAR; 373219019Sgabor return (0); 374219019Sgabor} 375219019Sgabor 376219019Sgaborstatic void 377219019Sgabor/*ARGSUSED*/ 378219019Sgabor_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 379219019Sgabor{ 380219019Sgabor 381219019Sgabor /* ei seems to be unused */ 382219019Sgabor} 383219019Sgabor 384219019Sgaborstatic int 385219019Sgabor/*ARGSUSED*/ 386219019Sgabor_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 387219019Sgabor const void * __restrict var, size_t lenvar) 388219019Sgabor{ 389219019Sgabor const char *p; 390219019Sgabor 391219019Sgabor p = var; 392219019Sgabor memset((void *)ei, 0, sizeof(*ei)); 393219019Sgabor while (lenvar > 0) { 394219019Sgabor switch (_bcs_toupper(*p)) { 395219019Sgabor case 'C': 396219019Sgabor MATCH(C99, ei->mode |= MODE_C99); 397219019Sgabor break; 398219019Sgabor } 399219019Sgabor ++p; 400219019Sgabor --lenvar; 401219019Sgabor } 402219019Sgabor ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 403219019Sgabor 404219019Sgabor return (0); 405219019Sgabor} 406219019Sgabor 407219019Sgabor/* ---------------------------------------------------------------------- 408219019Sgabor * public interface for stdenc 409219019Sgabor */ 410219019Sgabor 411219019Sgabor_CITRUS_STDENC_DECLS(UES); 412219019Sgabor_CITRUS_STDENC_DEF_OPS(UES); 413219019Sgabor 414219019Sgabor#include "citrus_stdenc_template.h" 415