1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/* $NetBSD: citrus_ues.c,v 1.1 2006/11/13 15:16:31 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor#include <sys/cdefs.h>
31219019Sgabor
32219019Sgabor#include <assert.h>
33219019Sgabor#include <errno.h>
34219019Sgabor#include <limits.h>
35219019Sgabor#include <stdio.h>
36219019Sgabor#include <stdint.h>
37219019Sgabor#include <stdlib.h>
38219019Sgabor#include <string.h>
39219019Sgabor#include <wchar.h>
40219019Sgabor
41219019Sgabor#include "citrus_namespace.h"
42219019Sgabor#include "citrus_types.h"
43219019Sgabor#include "citrus_bcs.h"
44219019Sgabor#include "citrus_module.h"
45219019Sgabor#include "citrus_stdenc.h"
46219019Sgabor#include "citrus_ues.h"
47219019Sgabor
48219019Sgabortypedef struct {
49219019Sgabor	size_t	 mb_cur_max;
50219019Sgabor	int	 mode;
51219019Sgabor#define MODE_C99	1
52219019Sgabor} _UESEncodingInfo;
53219019Sgabor
54219019Sgabortypedef struct {
55219019Sgabor	int	 chlen;
56219019Sgabor	char	 ch[12];
57219019Sgabor} _UESState;
58219019Sgabor
59219019Sgabor#define _CEI_TO_EI(_cei_)               (&(_cei_)->ei)
60219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)    (_cei_)->states.s_##_func_
61219019Sgabor
62219019Sgabor#define _FUNCNAME(m)			_citrus_UES_##m
63219019Sgabor#define _ENCODING_INFO			_UESEncodingInfo
64219019Sgabor#define _ENCODING_STATE			_UESState
65219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
66219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT		0
67219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
68219019Sgabor
69219019Sgaborstatic __inline void
70219019Sgabor/*ARGSUSED*/
71219019Sgabor_citrus_UES_init_state(_UESEncodingInfo * __restrict ei __unused,
72219019Sgabor    _UESState * __restrict psenc)
73219019Sgabor{
74219019Sgabor
75219019Sgabor	psenc->chlen = 0;
76219019Sgabor}
77219019Sgabor
78260264Sdim#if 0
79219019Sgaborstatic __inline void
80219019Sgabor/*ARGSUSED*/
81219019Sgabor_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei __unused,
82219019Sgabor    void *__restrict pspriv, const _UESState * __restrict psenc)
83219019Sgabor{
84219019Sgabor
85219019Sgabor	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
86219019Sgabor}
87219019Sgabor
88219019Sgaborstatic __inline void
89219019Sgabor/*ARGSUSED*/
90219019Sgabor_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei __unused,
91219019Sgabor    _UESState * __restrict psenc, const void * __restrict pspriv)
92219019Sgabor{
93219019Sgabor
94219019Sgabor	memcpy((void *)psenc, pspriv, sizeof(*psenc));
95219019Sgabor}
96260264Sdim#endif
97219019Sgabor
98219019Sgaborstatic __inline int
99219019Sgaborto_int(int ch)
100219019Sgabor{
101219019Sgabor
102219019Sgabor	if (ch >= '0' && ch <= '9')
103219019Sgabor		return (ch - '0');
104219019Sgabor	else if (ch >= 'A' && ch <= 'F')
105219019Sgabor		return ((ch - 'A') + 10);
106219019Sgabor	else if (ch >= 'a' && ch <= 'f')
107219019Sgabor		return ((ch - 'a') + 10);
108219019Sgabor	return (-1);
109219019Sgabor}
110219019Sgabor
111219019Sgabor#define ESCAPE		'\\'
112219019Sgabor#define UCS2_ESC	'u'
113219019Sgabor#define UCS4_ESC	'U'
114219019Sgabor
115219019Sgabor#define UCS2_BIT	16
116219019Sgabor#define UCS4_BIT	32
117219019Sgabor#define BMP_MAX		UINT32_C(0xFFFF)
118219019Sgabor#define UCS2_MAX	UINT32_C(0x10FFFF)
119219019Sgabor#define UCS4_MAX	UINT32_C(0x7FFFFFFF)
120219019Sgabor
121219019Sgaborstatic const char *xdig = "0123456789abcdef";
122219019Sgabor
123219019Sgaborstatic __inline int
124219019Sgaborto_str(char *s, wchar_t wc, int bit)
125219019Sgabor{
126219019Sgabor	char *p;
127219019Sgabor
128219019Sgabor	p = s;
129219019Sgabor	*p++ = ESCAPE;
130219019Sgabor	switch (bit) {
131219019Sgabor	case UCS2_BIT:
132219019Sgabor		*p++ = UCS2_ESC;
133219019Sgabor		break;
134219019Sgabor	case UCS4_BIT:
135219019Sgabor		*p++ = UCS4_ESC;
136219019Sgabor		break;
137219019Sgabor	default:
138219019Sgabor		abort();
139219019Sgabor	}
140219019Sgabor	do {
141219019Sgabor		*p++ = xdig[(wc >> (bit -= 4)) & 0xF];
142219019Sgabor	} while (bit > 0);
143219019Sgabor	return (p - s);
144219019Sgabor}
145219019Sgabor
146219019Sgaborstatic __inline bool
147219019Sgaboris_hi_surrogate(wchar_t wc)
148219019Sgabor{
149219019Sgabor
150219019Sgabor	return (wc >= 0xD800 && wc <= 0xDBFF);
151219019Sgabor}
152219019Sgabor
153219019Sgaborstatic __inline bool
154219019Sgaboris_lo_surrogate(wchar_t wc)
155219019Sgabor{
156219019Sgabor
157219019Sgabor	return (wc >= 0xDC00 && wc <= 0xDFFF);
158219019Sgabor}
159219019Sgabor
160219019Sgaborstatic __inline wchar_t
161219019Sgaborsurrogate_to_ucs(wchar_t hi, wchar_t lo)
162219019Sgabor{
163219019Sgabor
164219019Sgabor	hi -= 0xD800;
165219019Sgabor	lo -= 0xDC00;
166219019Sgabor	return ((hi << 10 | lo) + 0x10000);
167219019Sgabor}
168219019Sgabor
169219019Sgaborstatic __inline void
170219019Sgaborucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo)
171219019Sgabor{
172219019Sgabor
173219019Sgabor	wc -= 0x10000;
174219019Sgabor	*hi = (wc >> 10) + 0xD800;
175219019Sgabor	*lo = (wc & 0x3FF) + 0xDC00;
176219019Sgabor}
177219019Sgabor
178219019Sgaborstatic __inline bool
179219019Sgaboris_basic(wchar_t wc)
180219019Sgabor{
181219019Sgabor
182219019Sgabor	return ((uint32_t)wc <= 0x9F && wc != 0x24 && wc != 0x40 &&
183219019Sgabor	    wc != 0x60);
184219019Sgabor}
185219019Sgabor
186219019Sgaborstatic int
187219019Sgabor_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei,
188219019Sgabor    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
189219019Sgabor    _UESState * __restrict psenc, size_t * __restrict nresult)
190219019Sgabor{
191219019Sgabor	char *s0;
192219019Sgabor	int ch, head, num, tail;
193219019Sgabor	wchar_t hi, wc;
194219019Sgabor
195219019Sgabor	if (*s == NULL) {
196219019Sgabor		_citrus_UES_init_state(ei, psenc);
197219019Sgabor		*nresult = 0;
198219019Sgabor		return (0);
199219019Sgabor	}
200219019Sgabor	s0 = *s;
201219019Sgabor
202219019Sgabor	hi = (wchar_t)0;
203219019Sgabor	tail = 0;
204219019Sgabor
205219019Sgaborsurrogate:
206219019Sgabor	wc = (wchar_t)0;
207219019Sgabor	head = tail;
208219019Sgabor	if (psenc->chlen == head) {
209219019Sgabor		if (n-- < 1)
210219019Sgabor			goto restart;
211219019Sgabor		psenc->ch[psenc->chlen++] = *s0++;
212219019Sgabor	}
213219019Sgabor	ch = (unsigned char)psenc->ch[head++];
214219019Sgabor	if (ch == ESCAPE) {
215219019Sgabor		if (psenc->chlen == head) {
216219019Sgabor			if (n-- < 1)
217219019Sgabor				goto restart;
218219019Sgabor			psenc->ch[psenc->chlen++] = *s0++;
219219019Sgabor		}
220219019Sgabor		switch (psenc->ch[head]) {
221219019Sgabor		case UCS2_ESC:
222219019Sgabor			tail += 6;
223219019Sgabor			break;
224219019Sgabor		case UCS4_ESC:
225219019Sgabor			if (ei->mode & MODE_C99) {
226219019Sgabor				tail = 10;
227219019Sgabor				break;
228219019Sgabor			}
229219019Sgabor		/*FALLTHROUGH*/
230219019Sgabor		default:
231219019Sgabor			tail = 0;
232219019Sgabor		}
233219019Sgabor		++head;
234219019Sgabor	}
235219019Sgabor	for (; head < tail; ++head) {
236219019Sgabor		if (psenc->chlen == head) {
237219019Sgabor			if (n-- < 1) {
238219019Sgaborrestart:
239219019Sgabor				*s = s0;
240219019Sgabor				*nresult = (size_t)-2;
241219019Sgabor				return (0);
242219019Sgabor			}
243219019Sgabor			psenc->ch[psenc->chlen++] = *s0++;
244219019Sgabor		}
245219019Sgabor		num = to_int((int)(unsigned char)psenc->ch[head]);
246219019Sgabor		if (num < 0) {
247219019Sgabor			tail = 0;
248219019Sgabor			break;
249219019Sgabor		}
250219019Sgabor		wc = (wc << 4) | num;
251219019Sgabor	}
252219019Sgabor	head = 0;
253219019Sgabor	switch (tail) {
254219019Sgabor	case 0:
255219019Sgabor		break;
256219019Sgabor	case 6:
257219019Sgabor		if (hi != (wchar_t)0)
258219019Sgabor			break;
259219019Sgabor		if ((ei->mode & MODE_C99) == 0) {
260219019Sgabor			if (is_hi_surrogate(wc) != 0) {
261219019Sgabor				hi = wc;
262219019Sgabor				goto surrogate;
263219019Sgabor			}
264219019Sgabor			if ((uint32_t)wc <= 0x7F /* XXX */ ||
265219019Sgabor			    is_lo_surrogate(wc) != 0)
266219019Sgabor				break;
267219019Sgabor			goto done;
268219019Sgabor		}
269219019Sgabor	/*FALLTHROUGH*/
270219019Sgabor	case 10:
271219019Sgabor		if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX &&
272219019Sgabor		    is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0)
273219019Sgabor			goto done;
274219019Sgabor		*nresult = (size_t)-1;
275219019Sgabor		return (EILSEQ);
276219019Sgabor	case 12:
277219019Sgabor		if (is_lo_surrogate(wc) == 0)
278219019Sgabor			break;
279219019Sgabor		wc = surrogate_to_ucs(hi, wc);
280219019Sgabor		goto done;
281219019Sgabor	}
282219019Sgabor	ch = (unsigned char)psenc->ch[0];
283219019Sgabor	head = psenc->chlen;
284219019Sgabor	if (--head > 0)
285219019Sgabor		memmove(&psenc->ch[0], &psenc->ch[1], head);
286219019Sgabor	wc = (wchar_t)ch;
287219019Sgabordone:
288219019Sgabor	psenc->chlen = head;
289219019Sgabor	if (pwc != NULL)
290219019Sgabor		*pwc = wc;
291219019Sgabor	*nresult = (size_t)((wc == 0) ? 0 : (s0 - *s));
292219019Sgabor	*s = s0;
293219019Sgabor
294219019Sgabor	return (0);
295219019Sgabor}
296219019Sgabor
297219019Sgaborstatic int
298219019Sgabor_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei,
299219019Sgabor    char * __restrict s, size_t n, wchar_t wc,
300219019Sgabor    _UESState * __restrict psenc, size_t * __restrict nresult)
301219019Sgabor{
302219019Sgabor	wchar_t hi, lo;
303219019Sgabor
304219019Sgabor	if (psenc->chlen != 0)
305219019Sgabor		return (EINVAL);
306219019Sgabor
307219019Sgabor	if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) {
308219019Sgabor		if (n-- < 1)
309219019Sgabor			goto e2big;
310219019Sgabor		psenc->ch[psenc->chlen++] = (char)wc;
311219019Sgabor	} else if ((uint32_t)wc <= BMP_MAX) {
312219019Sgabor		if (n < 6)
313219019Sgabor			goto e2big;
314219019Sgabor		psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT);
315219019Sgabor	} else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) {
316219019Sgabor		if (n < 12)
317219019Sgabor			goto e2big;
318219019Sgabor		ucs_to_surrogate(wc, &hi, &lo);
319219019Sgabor		psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT);
320219019Sgabor		psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT);
321219019Sgabor	} else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) {
322219019Sgabor		if (n < 10)
323219019Sgabor			goto e2big;
324219019Sgabor		psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT);
325219019Sgabor	} else {
326219019Sgabor		*nresult = (size_t)-1;
327219019Sgabor		return (EILSEQ);
328219019Sgabor	}
329219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
330219019Sgabor	*nresult = psenc->chlen;
331219019Sgabor	psenc->chlen = 0;
332219019Sgabor
333219019Sgabor	return (0);
334219019Sgabor
335219019Sgabore2big:
336219019Sgabor	*nresult = (size_t)-1;
337219019Sgabor	return (E2BIG);
338219019Sgabor}
339219019Sgabor
340219019Sgabor/*ARGSUSED*/
341219019Sgaborstatic int
342219019Sgabor_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei __unused,
343219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
344219019Sgabor{
345219019Sgabor
346219019Sgabor	*csid = 0;
347219019Sgabor	*idx = (_index_t)wc;
348219019Sgabor
349219019Sgabor	return (0);
350219019Sgabor}
351219019Sgabor
352219019Sgaborstatic __inline int
353219019Sgabor/*ARGSUSED*/
354219019Sgabor_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei __unused,
355219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
356219019Sgabor{
357219019Sgabor
358219019Sgabor	if (csid != 0)
359219019Sgabor		return (EILSEQ);
360219019Sgabor	*wc = (wchar_t)idx;
361219019Sgabor
362219019Sgabor	return (0);
363219019Sgabor}
364219019Sgabor
365219019Sgaborstatic __inline int
366219019Sgabor/*ARGSUSED*/
367219019Sgabor_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei __unused,
368219019Sgabor    _UESState * __restrict psenc, int * __restrict rstate)
369219019Sgabor{
370219019Sgabor
371219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
372219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
373219019Sgabor	return (0);
374219019Sgabor}
375219019Sgabor
376219019Sgaborstatic void
377219019Sgabor/*ARGSUSED*/
378219019Sgabor_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei __unused)
379219019Sgabor{
380219019Sgabor
381219019Sgabor	/* ei seems to be unused */
382219019Sgabor}
383219019Sgabor
384219019Sgaborstatic int
385219019Sgabor/*ARGSUSED*/
386219019Sgabor_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei,
387219019Sgabor    const void * __restrict var, size_t lenvar)
388219019Sgabor{
389219019Sgabor	const char *p;
390219019Sgabor
391219019Sgabor	p = var;
392219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
393219019Sgabor	while (lenvar > 0) {
394219019Sgabor		switch (_bcs_toupper(*p)) {
395219019Sgabor		case 'C':
396219019Sgabor			MATCH(C99, ei->mode |= MODE_C99);
397219019Sgabor			break;
398219019Sgabor		}
399219019Sgabor		++p;
400219019Sgabor		--lenvar;
401219019Sgabor	}
402219019Sgabor	ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12;
403219019Sgabor
404219019Sgabor	return (0);
405219019Sgabor}
406219019Sgabor
407219019Sgabor/* ----------------------------------------------------------------------
408219019Sgabor * public interface for stdenc
409219019Sgabor */
410219019Sgabor
411219019Sgabor_CITRUS_STDENC_DECLS(UES);
412219019Sgabor_CITRUS_STDENC_DEF_OPS(UES);
413219019Sgabor
414219019Sgabor#include "citrus_stdenc_template.h"
415