citrus_ues.c revision 219019
1219019Sgabor/* $FreeBSD: head/lib/libiconv_modules/UES/citrus_ues.c 219019 2011-02-25 00:04:39Z gabor $ */
2219019Sgabor/* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor#include <sys/cdefs.h>
31219019Sgabor
32219019Sgabor#include <assert.h>
33219019Sgabor#include <errno.h>
34219019Sgabor#include <limits.h>
35219019Sgabor#include <stdio.h>
36219019Sgabor#include <stdint.h>
37219019Sgabor#include <stdlib.h>
38219019Sgabor#include <string.h>
39219019Sgabor#include <wchar.h>
40219019Sgabor
41219019Sgabor#include "citrus_namespace.h"
42219019Sgabor#include "citrus_types.h"
43219019Sgabor#include "citrus_bcs.h"
44219019Sgabor#include "citrus_module.h"
45219019Sgabor#include "citrus_stdenc.h"
46219019Sgabor#include "citrus_ues.h"
47219019Sgabor
48219019Sgabortypedef struct {
49219019Sgabor	size_t	 mb_cur_max;
50219019Sgabor	int	 mode;
51219019Sgabor#define MODE_C99	1
52219019Sgabor} _UESEncodingInfo;
53219019Sgabor
54219019Sgabortypedef struct {
55219019Sgabor	int	 chlen;
56219019Sgabor	char	 ch[12];
57219019Sgabor} _UESState;
58219019Sgabor
59219019Sgabor#define _CEI_TO_EI(_cei_)               (&(_cei_)->ei)
60219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)    (_cei_)->states.s_##_func_
61219019Sgabor
62219019Sgabor#define _FUNCNAME(m)			_citrus_UES_##m
63219019Sgabor#define _ENCODING_INFO			_UESEncodingInfo
64219019Sgabor#define _ENCODING_STATE			_UESState
65219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
66219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT		0
67219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
68219019Sgabor
69219019Sgaborstatic __inline void
70219019Sgabor/*ARGSUSED*/
71219019Sgabor_citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused,
72219019Sgabor    _UESState * __restrict psenc)
73219019Sgabor{
74219019Sgabor
75219019Sgabor	psenc->chlen = 0;
76219019Sgabor}
77219019Sgabor
78219019Sgaborstatic __inline void
79219019Sgabor/*ARGSUSED*/
80219019Sgabor_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused,
81219019Sgabor    void *__restrict pspriv, const _UESState * __restrict psenc)
82219019Sgabor{
83219019Sgabor
84219019Sgabor	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
85219019Sgabor}
86219019Sgabor
87219019Sgaborstatic __inline void
88219019Sgabor/*ARGSUSED*/
89219019Sgabor_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused,
90219019Sgabor    _UESState * __restrict psenc, const void * __restrict pspriv)
91219019Sgabor{
92219019Sgabor
93219019Sgabor	memcpy((void *)psenc, pspriv, sizeof(*psenc));
94219019Sgabor}
95219019Sgabor
96219019Sgaborstatic __inline int
97219019Sgaborto_int(int ch)
98219019Sgabor{
99219019Sgabor
100219019Sgabor	if (ch >= '0' && ch <= '9')
101219019Sgabor		return (ch - '0');
102219019Sgabor	else if (ch >= 'A' && ch <= 'F')
103219019Sgabor		return ((ch - 'A') + 10);
104219019Sgabor	else if (ch >= 'a' && ch <= 'f')
105219019Sgabor		return ((ch - 'a') + 10);
106219019Sgabor	return (-1);
107219019Sgabor}
108219019Sgabor
109219019Sgabor#define ESCAPE		'\\'
110219019Sgabor#define UCS2_ESC	'u'
111219019Sgabor#define UCS4_ESC	'U'
112219019Sgabor
113219019Sgabor#define UCS2_BIT	16
114219019Sgabor#define UCS4_BIT	32
115219019Sgabor#define BMP_MAX		UINT32_C(0xFFFF)
116219019Sgabor#define UCS2_MAX	UINT32_C(0x10FFFF)
117219019Sgabor#define UCS4_MAX	UINT32_C(0x7FFFFFFF)
118219019Sgabor
119219019Sgaborstatic const char *xdig = "0123456789abcdef";
120219019Sgabor
121219019Sgaborstatic __inline int
122219019Sgaborto_str(char *s, wchar_t wc, int bit)
123219019Sgabor{
124219019Sgabor	char *p;
125219019Sgabor
126219019Sgabor	p = s;
127219019Sgabor	*p++ = ESCAPE;
128219019Sgabor	switch (bit) {
129219019Sgabor	case UCS2_BIT:
130219019Sgabor		*p++ = UCS2_ESC;
131219019Sgabor		break;
132219019Sgabor	case UCS4_BIT:
133219019Sgabor		*p++ = UCS4_ESC;
134219019Sgabor		break;
135219019Sgabor	default:
136219019Sgabor		abort();
137219019Sgabor	}
138219019Sgabor	do {
139219019Sgabor		*p++ = xdig[(wc >> (bit -= 4)) & 0xF];
140219019Sgabor	} while (bit > 0);
141219019Sgabor	return (p - s);
142219019Sgabor}
143219019Sgabor
144219019Sgaborstatic __inline bool
145219019Sgaboris_hi_surrogate(wchar_t wc)
146219019Sgabor{
147219019Sgabor
148219019Sgabor	return (wc >= 0xD800 && wc <= 0xDBFF);
149219019Sgabor}
150219019Sgabor
151219019Sgaborstatic __inline bool
152219019Sgaboris_lo_surrogate(wchar_t wc)
153219019Sgabor{
154219019Sgabor
155219019Sgabor	return (wc >= 0xDC00 && wc <= 0xDFFF);
156219019Sgabor}
157219019Sgabor
158219019Sgaborstatic __inline wchar_t
159219019Sgaborsurrogate_to_ucs(wchar_t hi, wchar_t lo)
160219019Sgabor{
161219019Sgabor
162219019Sgabor	hi -= 0xD800;
163219019Sgabor	lo -= 0xDC00;
164219019Sgabor	return ((hi << 10 | lo) + 0x10000);
165219019Sgabor}
166219019Sgabor
167219019Sgaborstatic __inline void
168219019Sgaborucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo)
169219019Sgabor{
170219019Sgabor
171219019Sgabor	wc -= 0x10000;
172219019Sgabor	*hi = (wc >> 10) + 0xD800;
173219019Sgabor	*lo = (wc & 0x3FF) + 0xDC00;
174219019Sgabor}
175219019Sgabor
176219019Sgaborstatic __inline bool
177219019Sgaboris_basic(wchar_t wc)
178219019Sgabor{
179219019Sgabor
180219019Sgabor	return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 &&
181219019Sgabor	    wc != 0x60);
182219019Sgabor}
183219019Sgabor
184219019Sgaborstatic int
185219019Sgabor_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei,
186219019Sgabor    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
187219019Sgabor    _UESState * __restrict psenc, size_t * __restrict nresult)
188219019Sgabor{
189219019Sgabor	char *s0;
190219019Sgabor	int ch, head, num, tail;
191219019Sgabor	wchar_t hi, wc;
192219019Sgabor
193219019Sgabor	if (*s == NULL) {
194219019Sgabor		_citrus_UES_init_state(ei, psenc);
195219019Sgabor		*nresult = 0;
196219019Sgabor		return (0);
197219019Sgabor	}
198219019Sgabor	s0 = *s;
199219019Sgabor
200219019Sgabor	hi = (wchar_t)0;
201219019Sgabor	tail = 0;
202219019Sgabor
203219019Sgaborsurrogate:
204219019Sgabor	wc = (wchar_t)0;
205219019Sgabor	head = tail;
206219019Sgabor	if (psenc->chlen == head) {
207219019Sgabor		if (n-- < 1)
208219019Sgabor			goto restart;
209219019Sgabor		psenc->ch[psenc->chlen++] = *s0++;
210219019Sgabor	}
211219019Sgabor	ch = (unsigned char)psenc->ch[head++];
212219019Sgabor	if (ch == ESCAPE) {
213219019Sgabor		if (psenc->chlen == head) {
214219019Sgabor			if (n-- < 1)
215219019Sgabor				goto restart;
216219019Sgabor			psenc->ch[psenc->chlen++] = *s0++;
217219019Sgabor		}
218219019Sgabor		switch (psenc->ch[head]) {
219219019Sgabor		case UCS2_ESC:
220219019Sgabor			tail += 6;
221219019Sgabor			break;
222219019Sgabor		case UCS4_ESC:
223219019Sgabor			if (ei->mode & MODE_C99) {
224219019Sgabor				tail = 10;
225219019Sgabor				break;
226219019Sgabor			}
227219019Sgabor		/*FALLTHROUGH*/
228219019Sgabor		default:
229219019Sgabor			tail = 0;
230219019Sgabor		}
231219019Sgabor		++head;
232219019Sgabor	}
233219019Sgabor	for (; head < tail; ++head) {
234219019Sgabor		if (psenc->chlen == head) {
235219019Sgabor			if (n-- < 1) {
236219019Sgaborrestart:
237219019Sgabor				*s = s0;
238219019Sgabor				*nresult = (size_t)-2;
239219019Sgabor				return (0);
240219019Sgabor			}
241219019Sgabor			psenc->ch[psenc->chlen++] = *s0++;
242219019Sgabor		}
243219019Sgabor		num = to_int((int)(unsigned char)psenc->ch[head]);
244219019Sgabor		if (num < 0) {
245219019Sgabor			tail = 0;
246219019Sgabor			break;
247219019Sgabor		}
248219019Sgabor		wc = (wc << 4) | num;
249219019Sgabor	}
250219019Sgabor	head = 0;
251219019Sgabor	switch (tail) {
252219019Sgabor	case 0:
253219019Sgabor		break;
254219019Sgabor	case 6:
255219019Sgabor		if (hi != (wchar_t)0)
256219019Sgabor			break;
257219019Sgabor		if ((ei->mode & MODE_C99) == 0) {
258219019Sgabor			if (is_hi_surrogate(wc) != 0) {
259219019Sgabor				hi = wc;
260219019Sgabor				goto surrogate;
261219019Sgabor			}
262219019Sgabor			if ((uint32_t)wc <= 0x7F /* XXX */ ||
263219019Sgabor			    is_lo_surrogate(wc) != 0)
264219019Sgabor				break;
265219019Sgabor			goto done;
266219019Sgabor		}
267219019Sgabor	/*FALLTHROUGH*/
268219019Sgabor	case 10:
269219019Sgabor		if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX &&
270219019Sgabor		    is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0)
271219019Sgabor			goto done;
272219019Sgabor		*nresult = (size_t)-1;
273219019Sgabor		return (EILSEQ);
274219019Sgabor	case 12:
275219019Sgabor		if (is_lo_surrogate(wc) == 0)
276219019Sgabor			break;
277219019Sgabor		wc = surrogate_to_ucs(hi, wc);
278219019Sgabor		goto done;
279219019Sgabor	}
280219019Sgabor	ch = (unsigned char)psenc->ch[0];
281219019Sgabor	head = psenc->chlen;
282219019Sgabor	if (--head > 0)
283219019Sgabor		memmove(&psenc->ch[0], &psenc->ch[1], head);
284219019Sgabor	wc = (wchar_t)ch;
285219019Sgabordone:
286219019Sgabor	psenc->chlen = head;
287219019Sgabor	if (pwc != NULL)
288219019Sgabor		*pwc = wc;
289219019Sgabor	*nresult = (size_t)((wc == 0) ? 0 : (s0 - *s));
290219019Sgabor	*s = s0;
291219019Sgabor
292219019Sgabor	return (0);
293219019Sgabor}
294219019Sgabor
295219019Sgaborstatic int
296219019Sgabor_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei,
297219019Sgabor    char * __restrict s, size_t n, wchar_t wc,
298219019Sgabor    _UESState * __restrict psenc, size_t * __restrict nresult)
299219019Sgabor{
300219019Sgabor	wchar_t hi, lo;
301219019Sgabor
302219019Sgabor	if (psenc->chlen != 0)
303219019Sgabor		return (EINVAL);
304219019Sgabor
305219019Sgabor	if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) {
306219019Sgabor		if (n-- < 1)
307219019Sgabor			goto e2big;
308219019Sgabor		psenc->ch[psenc->chlen++] = (char)wc;
309219019Sgabor	} else if ((uint32_t)wc <= BMP_MAX) {
310219019Sgabor		if (n < 6)
311219019Sgabor			goto e2big;
312219019Sgabor		psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT);
313219019Sgabor	} else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) {
314219019Sgabor		if (n < 12)
315219019Sgabor			goto e2big;
316219019Sgabor		ucs_to_surrogate(wc, &hi, &lo);
317219019Sgabor		psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT);
318219019Sgabor		psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT);
319219019Sgabor	} else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) {
320219019Sgabor		if (n < 10)
321219019Sgabor			goto e2big;
322219019Sgabor		psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT);
323219019Sgabor	} else {
324219019Sgabor		*nresult = (size_t)-1;
325219019Sgabor		return (EILSEQ);
326219019Sgabor	}
327219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
328219019Sgabor	*nresult = psenc->chlen;
329219019Sgabor	psenc->chlen = 0;
330219019Sgabor
331219019Sgabor	return (0);
332219019Sgabor
333219019Sgabore2big:
334219019Sgabor	*nresult = (size_t)-1;
335219019Sgabor	return (E2BIG);
336219019Sgabor}
337219019Sgabor
338219019Sgabor/*ARGSUSED*/
339219019Sgaborstatic int
340219019Sgabor_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused,
341219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
342219019Sgabor{
343219019Sgabor
344219019Sgabor	*csid = 0;
345219019Sgabor	*idx = (_index_t)wc;
346219019Sgabor
347219019Sgabor	return (0);
348219019Sgabor}
349219019Sgabor
350219019Sgaborstatic __inline int
351219019Sgabor/*ARGSUSED*/
352219019Sgabor_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused,
353219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
354219019Sgabor{
355219019Sgabor
356219019Sgabor	if (csid != 0)
357219019Sgabor		return (EILSEQ);
358219019Sgabor	*wc = (wchar_t)idx;
359219019Sgabor
360219019Sgabor	return (0);
361219019Sgabor}
362219019Sgabor
363219019Sgaborstatic __inline int
364219019Sgabor/*ARGSUSED*/
365219019Sgabor_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused,
366219019Sgabor    _UESState * __restrict psenc, int * __restrict rstate)
367219019Sgabor{
368219019Sgabor
369219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
370219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
371219019Sgabor	return (0);
372219019Sgabor}
373219019Sgabor
374219019Sgaborstatic void
375219019Sgabor/*ARGSUSED*/
376219019Sgabor_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused)
377219019Sgabor{
378219019Sgabor
379219019Sgabor	/* ei seems to be unused */
380219019Sgabor}
381219019Sgabor
382219019Sgaborstatic int
383219019Sgabor/*ARGSUSED*/
384219019Sgabor_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei,
385219019Sgabor    const void * __restrict var, size_t lenvar)
386219019Sgabor{
387219019Sgabor	const char *p;
388219019Sgabor
389219019Sgabor	p = var;
390219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
391219019Sgabor	while (lenvar > 0) {
392219019Sgabor		switch (_bcs_toupper(*p)) {
393219019Sgabor		case 'C':
394219019Sgabor			MATCH(C99, ei->mode |= MODE_C99);
395219019Sgabor			break;
396219019Sgabor		}
397219019Sgabor		++p;
398219019Sgabor		--lenvar;
399219019Sgabor	}
400219019Sgabor	ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12;
401219019Sgabor
402219019Sgabor	return (0);
403219019Sgabor}
404219019Sgabor
405219019Sgabor/* ----------------------------------------------------------------------
406219019Sgabor * public interface for stdenc
407219019Sgabor */
408219019Sgabor
409219019Sgabor_CITRUS_STDENC_DECLS(UES);
410219019Sgabor_CITRUS_STDENC_DEF_OPS(UES);
411219019Sgabor
412219019Sgabor#include "citrus_stdenc_template.h"
413