citrus_ues.c revision 260264
196643Sobrien/* $FreeBSD: stable/9/lib/libiconv_modules/UES/citrus_ues.c 260264 2014-01-04 17:27:43Z dim $ */ 296643Sobrien/* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */ 396643Sobrien 496643Sobrien/*- 596643Sobrien * Copyright (c)2006 Citrus Project, 6160819Ssimon * All rights reserved. 7160819Ssimon * 8162915Ssimon * Redistribution and use in source and binary forms, with or without 9162915Ssimon * modification, are permitted provided that the following conditions 10162915Ssimon * are met: 11162915Ssimon * 1. Redistributions of source code must retain the above copyright 12160819Ssimon * notice, this list of conditions and the following disclaimer. 13160819Ssimon * 2. Redistributions in binary form must reproduce the above copyright 14160819Ssimon * notice, this list of conditions and the following disclaimer in the 15160819Ssimon * documentation and/or other materials provided with the distribution. 16160819Ssimon * 17160819Ssimon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18160819Ssimon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19160819Ssimon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20160819Ssimon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21160819Ssimon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22160819Ssimon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23160819Ssimon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24160819Ssimon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25160819Ssimon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26160819Ssimon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27160819Ssimon * SUCH DAMAGE. 28160819Ssimon */ 29160819Ssimon 30160819Ssimon#include <sys/cdefs.h> 31160819Ssimon 32160819Ssimon#include <assert.h> 3396643Sobrien#include <errno.h> 34160819Ssimon#include <limits.h> 35160819Ssimon#include <stdio.h> 3696643Sobrien#include <stdint.h> 37160819Ssimon#include <stdlib.h> 38160819Ssimon#include <string.h> 39160819Ssimon#include <wchar.h> 40160819Ssimon 4196643Sobrien#include "citrus_namespace.h" 4296643Sobrien#include "citrus_types.h" 4396643Sobrien#include "citrus_bcs.h" 4496643Sobrien#include "citrus_module.h" 4596643Sobrien#include "citrus_stdenc.h" 4696643Sobrien#include "citrus_ues.h" 4796643Sobrien 4896643Sobrientypedef struct { 4996643Sobrien size_t mb_cur_max; 5096643Sobrien int mode; 5196643Sobrien#define MODE_C99 1 5296643Sobrien} _UESEncodingInfo; 5396643Sobrien 54160819Ssimontypedef struct { 5596643Sobrien int chlen; 5696643Sobrien char ch[12]; 5796643Sobrien} _UESState; 5896643Sobrien 59160819Ssimon#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 6096643Sobrien#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 6196643Sobrien 62160819Ssimon#define _FUNCNAME(m) _citrus_UES_##m 63160819Ssimon#define _ENCODING_INFO _UESEncodingInfo 6496643Sobrien#define _ENCODING_STATE _UESState 6596643Sobrien#define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 6696643Sobrien#define _ENCODING_IS_STATE_DEPENDENT 0 6796643Sobrien#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 6896643Sobrien 6996643Sobrienstatic __inline void 7096643Sobrien/*ARGSUSED*/ 7196643Sobrien_citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused, 7296643Sobrien _UESState * __restrict psenc) 7396643Sobrien{ 7496643Sobrien 7596643Sobrien psenc->chlen = 0; 7696643Sobrien} 7796643Sobrien 7896643Sobrien#if 0 7996643Sobrienstatic __inline void 8096643Sobrien/*ARGSUSED*/ 8196643Sobrien_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused, 8296643Sobrien void *__restrict pspriv, const _UESState * __restrict psenc) 8396643Sobrien{ 8496643Sobrien 8596643Sobrien memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 8696643Sobrien} 8796643Sobrien 8896643Sobrienstatic __inline void 8996643Sobrien/*ARGSUSED*/ 9096643Sobrien_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused, 9196643Sobrien _UESState * __restrict psenc, const void * __restrict pspriv) 9296643Sobrien{ 9396643Sobrien 9496643Sobrien memcpy((void *)psenc, pspriv, sizeof(*psenc)); 9596643Sobrien} 9696643Sobrien#endif 97160819Ssimon 9896643Sobrienstatic __inline int 9996643Sobriento_int(int ch) 10096643Sobrien{ 10196643Sobrien 10296643Sobrien if (ch >= '0' && ch <= '9') 10396643Sobrien return (ch - '0'); 10496643Sobrien else if (ch >= 'A' && ch <= 'F') 10596643Sobrien return ((ch - 'A') + 10); 10696643Sobrien else if (ch >= 'a' && ch <= 'f') 10796643Sobrien return ((ch - 'a') + 10); 10896643Sobrien return (-1); 10996643Sobrien} 11096643Sobrien 11196643Sobrien#define ESCAPE '\\' 11296643Sobrien#define UCS2_ESC 'u' 11396643Sobrien#define UCS4_ESC 'U' 11496643Sobrien 11596643Sobrien#define UCS2_BIT 16 11696643Sobrien#define UCS4_BIT 32 11796643Sobrien#define BMP_MAX UINT32_C(0xFFFF) 11896643Sobrien#define UCS2_MAX UINT32_C(0x10FFFF) 11996643Sobrien#define UCS4_MAX UINT32_C(0x7FFFFFFF) 12096643Sobrien 12196643Sobrienstatic const char *xdig = "0123456789abcdef"; 12296643Sobrien 12396643Sobrienstatic __inline int 12496643Sobriento_str(char *s, wchar_t wc, int bit) 12596643Sobrien{ 12696643Sobrien char *p; 12796643Sobrien 12896643Sobrien p = s; 12996643Sobrien *p++ = ESCAPE; 13096643Sobrien switch (bit) { 13196643Sobrien case UCS2_BIT: 13296643Sobrien *p++ = UCS2_ESC; 13396643Sobrien break; 13496643Sobrien case UCS4_BIT: 13596643Sobrien *p++ = UCS4_ESC; 13696643Sobrien break; 13796643Sobrien default: 13896643Sobrien abort(); 13996643Sobrien } 14096643Sobrien do { 14196643Sobrien *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 14296643Sobrien } while (bit > 0); 14396643Sobrien return (p - s); 14496643Sobrien} 14596643Sobrien 14696643Sobrienstatic __inline bool 14796643Sobrienis_hi_surrogate(wchar_t wc) 14896643Sobrien{ 14996643Sobrien 15096643Sobrien return (wc >= 0xD800 && wc <= 0xDBFF); 15196643Sobrien} 15296643Sobrien 15396643Sobrienstatic __inline bool 15496643Sobrienis_lo_surrogate(wchar_t wc) 15596643Sobrien{ 15696643Sobrien 15796643Sobrien return (wc >= 0xDC00 && wc <= 0xDFFF); 15896643Sobrien} 15996643Sobrien 16096643Sobrienstatic __inline wchar_t 16196643Sobriensurrogate_to_ucs(wchar_t hi, wchar_t lo) 16296643Sobrien{ 16396643Sobrien 16496643Sobrien hi -= 0xD800; 16596643Sobrien lo -= 0xDC00; 16696643Sobrien return ((hi << 10 | lo) + 0x10000); 16796643Sobrien} 16896643Sobrien 16996643Sobrienstatic __inline void 17096643Sobrienucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 17196643Sobrien{ 17296643Sobrien 17396643Sobrien wc -= 0x10000; 17496643Sobrien *hi = (wc >> 10) + 0xD800; 17596643Sobrien *lo = (wc & 0x3FF) + 0xDC00; 17696643Sobrien} 17796643Sobrien 17896643Sobrienstatic __inline bool 17996643Sobrienis_basic(wchar_t wc) 18096643Sobrien{ 18196643Sobrien 18296643Sobrien return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 && 18396643Sobrien wc != 0x60); 18496643Sobrien} 18596643Sobrien 18696643Sobrienstatic int 18796643Sobrien_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 18896643Sobrien wchar_t * __restrict pwc, char ** __restrict s, size_t n, 18996643Sobrien _UESState * __restrict psenc, size_t * __restrict nresult) 19096643Sobrien{ 19196643Sobrien char *s0; 19296643Sobrien int ch, head, num, tail; 19396643Sobrien wchar_t hi, wc; 19496643Sobrien 19596643Sobrien if (*s == NULL) { 19696643Sobrien _citrus_UES_init_state(ei, psenc); 19796643Sobrien *nresult = 0; 19896643Sobrien return (0); 19996643Sobrien } 20096643Sobrien s0 = *s; 20196643Sobrien 20296643Sobrien hi = (wchar_t)0; 20396643Sobrien tail = 0; 20496643Sobrien 20596643Sobriensurrogate: 206 wc = (wchar_t)0; 207 head = tail; 208 if (psenc->chlen == head) { 209 if (n-- < 1) 210 goto restart; 211 psenc->ch[psenc->chlen++] = *s0++; 212 } 213 ch = (unsigned char)psenc->ch[head++]; 214 if (ch == ESCAPE) { 215 if (psenc->chlen == head) { 216 if (n-- < 1) 217 goto restart; 218 psenc->ch[psenc->chlen++] = *s0++; 219 } 220 switch (psenc->ch[head]) { 221 case UCS2_ESC: 222 tail += 6; 223 break; 224 case UCS4_ESC: 225 if (ei->mode & MODE_C99) { 226 tail = 10; 227 break; 228 } 229 /*FALLTHROUGH*/ 230 default: 231 tail = 0; 232 } 233 ++head; 234 } 235 for (; head < tail; ++head) { 236 if (psenc->chlen == head) { 237 if (n-- < 1) { 238restart: 239 *s = s0; 240 *nresult = (size_t)-2; 241 return (0); 242 } 243 psenc->ch[psenc->chlen++] = *s0++; 244 } 245 num = to_int((int)(unsigned char)psenc->ch[head]); 246 if (num < 0) { 247 tail = 0; 248 break; 249 } 250 wc = (wc << 4) | num; 251 } 252 head = 0; 253 switch (tail) { 254 case 0: 255 break; 256 case 6: 257 if (hi != (wchar_t)0) 258 break; 259 if ((ei->mode & MODE_C99) == 0) { 260 if (is_hi_surrogate(wc) != 0) { 261 hi = wc; 262 goto surrogate; 263 } 264 if ((uint32_t)wc <= 0x7F /* XXX */ || 265 is_lo_surrogate(wc) != 0) 266 break; 267 goto done; 268 } 269 /*FALLTHROUGH*/ 270 case 10: 271 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 272 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 273 goto done; 274 *nresult = (size_t)-1; 275 return (EILSEQ); 276 case 12: 277 if (is_lo_surrogate(wc) == 0) 278 break; 279 wc = surrogate_to_ucs(hi, wc); 280 goto done; 281 } 282 ch = (unsigned char)psenc->ch[0]; 283 head = psenc->chlen; 284 if (--head > 0) 285 memmove(&psenc->ch[0], &psenc->ch[1], head); 286 wc = (wchar_t)ch; 287done: 288 psenc->chlen = head; 289 if (pwc != NULL) 290 *pwc = wc; 291 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 292 *s = s0; 293 294 return (0); 295} 296 297static int 298_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 299 char * __restrict s, size_t n, wchar_t wc, 300 _UESState * __restrict psenc, size_t * __restrict nresult) 301{ 302 wchar_t hi, lo; 303 304 if (psenc->chlen != 0) 305 return (EINVAL); 306 307 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 308 if (n-- < 1) 309 goto e2big; 310 psenc->ch[psenc->chlen++] = (char)wc; 311 } else if ((uint32_t)wc <= BMP_MAX) { 312 if (n < 6) 313 goto e2big; 314 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 315 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 316 if (n < 12) 317 goto e2big; 318 ucs_to_surrogate(wc, &hi, &lo); 319 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 320 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 321 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 322 if (n < 10) 323 goto e2big; 324 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 325 } else { 326 *nresult = (size_t)-1; 327 return (EILSEQ); 328 } 329 memcpy(s, psenc->ch, psenc->chlen); 330 *nresult = psenc->chlen; 331 psenc->chlen = 0; 332 333 return (0); 334 335e2big: 336 *nresult = (size_t)-1; 337 return (E2BIG); 338} 339 340/*ARGSUSED*/ 341static int 342_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused, 343 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 344{ 345 346 *csid = 0; 347 *idx = (_index_t)wc; 348 349 return (0); 350} 351 352static __inline int 353/*ARGSUSED*/ 354_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused, 355 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 356{ 357 358 if (csid != 0) 359 return (EILSEQ); 360 *wc = (wchar_t)idx; 361 362 return (0); 363} 364 365static __inline int 366/*ARGSUSED*/ 367_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused, 368 _UESState * __restrict psenc, int * __restrict rstate) 369{ 370 371 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 372 _STDENC_SDGEN_INCOMPLETE_CHAR; 373 return (0); 374} 375 376static void 377/*ARGSUSED*/ 378_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused) 379{ 380 381 /* ei seems to be unused */ 382} 383 384static int 385/*ARGSUSED*/ 386_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 387 const void * __restrict var, size_t lenvar) 388{ 389 const char *p; 390 391 p = var; 392 memset((void *)ei, 0, sizeof(*ei)); 393 while (lenvar > 0) { 394 switch (_bcs_toupper(*p)) { 395 case 'C': 396 MATCH(C99, ei->mode |= MODE_C99); 397 break; 398 } 399 ++p; 400 --lenvar; 401 } 402 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 403 404 return (0); 405} 406 407/* ---------------------------------------------------------------------- 408 * public interface for stdenc 409 */ 410 411_CITRUS_STDENC_DECLS(UES); 412_CITRUS_STDENC_DEF_OPS(UES); 413 414#include "citrus_stdenc_template.h" 415