1/* $NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $ */ 2 3/*- 4 * Copyright (c)2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30#if defined(LIBC_SCCS) && !defined(lint) 31__RCSID("$NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $"); 32#endif /* LIBC_SCCS and not lint */ 33 34#include <assert.h> 35#include <errno.h> 36#include <string.h> 37#include <stdio.h> 38#include <stdint.h> 39#include <stdlib.h> 40#include <limits.h> 41#include <wchar.h> 42 43#include "citrus_namespace.h" 44#include "citrus_types.h" 45#include "citrus_bcs.h" 46#include "citrus_module.h" 47#include "citrus_ctype.h" 48#include "citrus_stdenc.h" 49#include "citrus_ues.h" 50 51typedef struct { 52 int mode; 53#define MODE_C99 1 54 size_t mb_cur_max; 55} _UESEncodingInfo; 56 57typedef struct { 58 int chlen; 59 char ch[12]; 60} _UESState; 61 62typedef struct { 63 _UESEncodingInfo ei; 64 struct { 65 /* for future multi-locale facility */ 66 _UESState s_mblen; 67 _UESState s_mbrlen; 68 _UESState s_mbrtowc; 69 _UESState s_mbtowc; 70 _UESState s_mbsrtowcs; 71 _UESState s_wcrtomb; 72 _UESState s_wcsrtombs; 73 _UESState s_wctomb; 74 } states; 75} _UESCTypeInfo; 76 77#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 78#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 79 80#define _FUNCNAME(m) _citrus_UES_##m 81#define _ENCODING_INFO _UESEncodingInfo 82#define _CTYPE_INFO _UESCTypeInfo 83#define _ENCODING_STATE _UESState 84#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 85#define _ENCODING_IS_STATE_DEPENDENT 0 86#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 87 88static __inline void 89/*ARGSUSED*/ 90_citrus_UES_init_state(_UESEncodingInfo * __restrict ei, 91 _UESState * __restrict psenc) 92{ 93 psenc->chlen = 0; 94} 95 96static __inline void 97/*ARGSUSED*/ 98_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei, 99 void *__restrict pspriv, const _UESState * __restrict psenc) 100{ 101 /* ei seem to be unused */ 102 _DIAGASSERT(pspriv != NULL); 103 _DIAGASSERT(psenc != NULL); 104 105 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 106} 107 108static __inline void 109/*ARGSUSED*/ 110_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei, 111 _UESState * __restrict psenc, const void * __restrict pspriv) 112{ 113 /* ei seem to be unused */ 114 _DIAGASSERT(psenc != NULL); 115 _DIAGASSERT(pspriv != NULL); 116 117 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 118} 119 120static __inline int 121to_int(int ch) 122{ 123 if (ch >= '0' && ch <= '9') 124 return ch - '0'; 125 else if (ch >= 'A' && ch <= 'F') 126 return (ch - 'A') + 10; 127 else if (ch >= 'a' && ch <= 'f') 128 return (ch - 'a') + 10; 129 return -1; 130} 131 132#define ESCAPE '\\' 133#define UCS2_ESC 'u' 134#define UCS4_ESC 'U' 135 136#define UCS2_BIT 16 137#define UCS4_BIT 32 138#define BMP_MAX UINT32_C(0xFFFF) 139#define UCS2_MAX UINT32_C(0x10FFFF) 140#define UCS4_MAX UINT32_C(0x7FFFFFFF) 141 142static const char *xdig = "0123456789abcdef"; 143 144static __inline int 145to_str(char *s, wchar_t wc, int bit) 146{ 147 char *p; 148 149 p = s; 150 *p++ = ESCAPE; 151 switch (bit) { 152 case UCS2_BIT: 153 *p++ = UCS2_ESC; 154 break; 155 case UCS4_BIT: 156 *p++ = UCS4_ESC; 157 break; 158 default: 159 abort(); 160 } 161 do { 162 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 163 } while (bit > 0); 164 return p - s; 165} 166 167static __inline int 168is_hi_surrogate(wchar_t wc) 169{ 170 return wc >= 0xD800 && wc <= 0xDBFF; 171} 172 173static __inline int 174is_lo_surrogate(wchar_t wc) 175{ 176 return wc >= 0xDC00 && wc <= 0xDFFF; 177} 178 179static __inline wchar_t 180surrogate_to_ucs(wchar_t hi, wchar_t lo) 181{ 182 _DIAGASSERT(is_hi_surrogate(hi)); 183 _DIAGASSERT(is_lo_surrogate(lo)); 184 185 hi -= 0xD800; 186 lo -= 0xDC00; 187 return (hi << 10 | lo) + 0x10000; 188} 189 190static __inline void 191ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 192{ 193 _DIAGASSERT(hi != NULL); 194 _DIAGASSERT(lo != NULL); 195 _DIAGASSERT(wc >= 0x10000); 196 197 wc -= 0x10000; 198 *hi = (wc >> 10) + 0xD800; 199 *lo = (wc & 0x3FF) + 0xDC00; 200} 201 202static __inline int 203is_basic(wchar_t wc) 204{ 205 return (uint32_t)wc <= 0x9F && 206 wc != 0x24 && wc != 0x40 && wc != 0x60; 207} 208 209static int 210_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 211 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 212 _UESState * __restrict psenc, size_t * __restrict nresult) 213{ 214 const char *s0; 215 int ch, head, tail, num; 216 wchar_t hi, wc; 217 218 _DIAGASSERT(ei != NULL); 219 /* pwc may be null */ 220 _DIAGASSERT(s != NULL); 221 _DIAGASSERT(psenc != NULL); 222 _DIAGASSERT(nresult != NULL); 223 224 if (*s == NULL) { 225 _citrus_UES_init_state(ei, psenc); 226 *nresult = 0; 227 return 0; 228 } 229 s0 = *s; 230 231 hi = (wchar_t)0; 232 tail = 0; 233 234surrogate: 235 wc = (wchar_t)0; 236 head = tail; 237 if (psenc->chlen == head) { 238 if (n-- < 1) 239 goto restart; 240 psenc->ch[psenc->chlen++] = *s0++; 241 } 242 ch = (unsigned char)psenc->ch[head++]; 243 if (ch == ESCAPE) { 244 if (psenc->chlen == head) { 245 if (n-- < 1) 246 goto restart; 247 psenc->ch[psenc->chlen++] = *s0++; 248 } 249 switch (psenc->ch[head]) { 250 case UCS2_ESC: 251 tail += 6; 252 break; 253 case UCS4_ESC: 254 if (ei->mode & MODE_C99) { 255 tail = 10; 256 break; 257 } 258 /*FALLTHROUGH*/ 259 default: 260 tail = 0; 261 } 262 ++head; 263 } 264 for (; head < tail; ++head) { 265 if (psenc->chlen == head) { 266 if (n-- < 1) { 267restart: 268 *s = s0; 269 *nresult = (size_t)-2; 270 return 0; 271 } 272 psenc->ch[psenc->chlen++] = *s0++; 273 } 274 num = to_int((int)(unsigned char)psenc->ch[head]); 275 if (num < 0) { 276 tail = 0; 277 break; 278 } 279 wc = (wc << 4) | num; 280 } 281 head = 0; 282 switch (tail) { 283 case 0: 284 break; 285 case 6: 286 if (hi != (wchar_t)0) 287 break; 288 if ((ei->mode & MODE_C99) == 0) { 289 if (is_hi_surrogate(wc) != 0) { 290 hi = wc; 291 goto surrogate; 292 } 293 if ((uint32_t)wc <= 0x7F /* XXX */ || 294 is_lo_surrogate(wc) != 0) 295 break; 296 goto done; 297 } 298 /*FALLTHROUGH*/ 299 case 10: 300 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 301 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 302 goto done; 303 *nresult = (size_t)-1; 304 return EILSEQ; 305 case 12: 306 if (is_lo_surrogate(wc) == 0) 307 break; 308 wc = surrogate_to_ucs(hi, wc); 309 goto done; 310 } 311 ch = (unsigned char)psenc->ch[0]; 312 head = psenc->chlen; 313 if (--head > 0) 314 memmove(&psenc->ch[0], &psenc->ch[1], head); 315 wc = (wchar_t)ch; 316done: 317 psenc->chlen = head; 318 if (pwc != NULL) 319 *pwc = wc; 320 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 321 *s = s0; 322 323 return 0; 324} 325 326static int 327_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 328 char * __restrict s, size_t n, wchar_t wc, 329 _UESState * __restrict psenc, size_t * __restrict nresult) 330{ 331 wchar_t hi, lo; 332 333 if (psenc->chlen != 0) 334 return EINVAL; 335 336 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 337 if (n-- < 1) 338 goto e2big; 339 psenc->ch[psenc->chlen++] = (char)wc; 340 } else if ((uint32_t)wc <= BMP_MAX) { 341 if (n < 6) 342 goto e2big; 343 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 344 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 345 if (n < 12) 346 goto e2big; 347 ucs_to_surrogate(wc, &hi, &lo); 348 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 349 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 350 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 351 if (n < 10) 352 goto e2big; 353 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 354 } else { 355 *nresult = (size_t)-1; 356 return EILSEQ; 357 } 358 memcpy(s, psenc->ch, psenc->chlen); 359 *nresult = psenc->chlen; 360 psenc->chlen = 0; 361 362 return 0; 363 364e2big: 365 *nresult = (size_t)-1; 366 return E2BIG; 367} 368 369/*ARGSUSED*/ 370static int 371_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei, 372 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 373{ 374 /* ei seem to be unused */ 375 _DIAGASSERT(csid != NULL); 376 _DIAGASSERT(idx != NULL); 377 378 *csid = 0; 379 *idx = (_index_t)wc; 380 381 return 0; 382} 383 384static __inline int 385/*ARGSUSED*/ 386_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei, 387 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 388{ 389 /* ei seem to be unused */ 390 _DIAGASSERT(wc != NULL); 391 392 if (csid != 0) 393 return EILSEQ; 394 *wc = (wchar_t)idx; 395 396 return 0; 397} 398 399static __inline int 400/*ARGSUSED*/ 401_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei, 402 _UESState * __restrict psenc, int * __restrict rstate) 403{ 404 _DIAGASSERT(psenc != NULL); 405 _DIAGASSERT(rstate != NULL); 406 407 if (psenc->chlen == 0) 408 *rstate = _STDENC_SDGEN_INITIAL; 409 else 410 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */ 411 412 return 0; 413} 414 415static void 416/*ARGSUSED*/ 417_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei) 418{ 419 /* ei seems to be unused */ 420} 421 422static int 423/*ARGSUSED*/ 424_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 425 const void * __restrict var, size_t lenvar) 426{ 427 const char *p; 428 429 _DIAGASSERT(ei != NULL); 430 431 p = var; 432#define MATCH(x, act) \ 433do { \ 434 if (lenvar >= (sizeof(#x)-1) && \ 435 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 436 act; \ 437 lenvar -= sizeof(#x)-1; \ 438 p += sizeof(#x)-1; \ 439 } \ 440} while (/*CONSTCOND*/0) 441 memset((void *)ei, 0, sizeof(*ei)); 442 while (lenvar > 0) { 443 switch (_bcs_toupper(*p)) { 444 case 'C': 445 MATCH(C99, ei->mode |= MODE_C99); 446 break; 447 } 448 ++p; 449 --lenvar; 450 } 451 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 452 453 return 0; 454} 455 456/* ---------------------------------------------------------------------- 457 * public interface for ctype 458 */ 459 460_CITRUS_CTYPE_DECLS(UES); 461_CITRUS_CTYPE_DEF_OPS(UES); 462 463#include "citrus_ctype_template.h" 464 465/* ---------------------------------------------------------------------- 466 * public interface for stdenc 467 */ 468 469_CITRUS_STDENC_DECLS(UES); 470_CITRUS_STDENC_DEF_OPS(UES); 471 472#include "citrus_stdenc_template.h" 473