1/* $NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $ */
2
3/*-
4 * Copyright (c)2006 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#if defined(LIBC_SCCS) && !defined(lint)
31__RCSID("$NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $");
32#endif /* LIBC_SCCS and not lint */
33
34#include <assert.h>
35#include <errno.h>
36#include <string.h>
37#include <stdio.h>
38#include <stdint.h>
39#include <stdlib.h>
40#include <limits.h>
41#include <wchar.h>
42
43#include "citrus_namespace.h"
44#include "citrus_types.h"
45#include "citrus_bcs.h"
46#include "citrus_module.h"
47#include "citrus_ctype.h"
48#include "citrus_stdenc.h"
49#include "citrus_ues.h"
50
51typedef struct {
52	int mode;
53#define MODE_C99	1
54	size_t mb_cur_max;
55} _UESEncodingInfo;
56
57typedef struct {
58	int chlen;
59	char ch[12];
60} _UESState;
61
62typedef struct {
63	_UESEncodingInfo	ei;
64	struct {
65		/* for future multi-locale facility */
66		_UESState	s_mblen;
67		_UESState	s_mbrlen;
68		_UESState	s_mbrtowc;
69		_UESState	s_mbtowc;
70		_UESState	s_mbsrtowcs;
71		_UESState	s_wcrtomb;
72		_UESState	s_wcsrtombs;
73		_UESState	s_wctomb;
74	} states;
75} _UESCTypeInfo;
76
77#define _CEI_TO_EI(_cei_)               (&(_cei_)->ei)
78#define _CEI_TO_STATE(_cei_, _func_)    (_cei_)->states.s_##_func_
79
80#define _FUNCNAME(m)			_citrus_UES_##m
81#define _ENCODING_INFO			_UESEncodingInfo
82#define _CTYPE_INFO			_UESCTypeInfo
83#define _ENCODING_STATE			_UESState
84#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
85#define _ENCODING_IS_STATE_DEPENDENT		0
86#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
87
88static __inline void
89/*ARGSUSED*/
90_citrus_UES_init_state(_UESEncodingInfo * __restrict ei,
91	_UESState * __restrict psenc)
92{
93	psenc->chlen = 0;
94}
95
96static __inline void
97/*ARGSUSED*/
98_citrus_UES_pack_state(_UESEncodingInfo * __restrict ei,
99	void *__restrict pspriv, const _UESState * __restrict psenc)
100{
101	/* ei seem to be unused */
102	_DIAGASSERT(pspriv != NULL);
103	_DIAGASSERT(psenc != NULL);
104
105	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
106}
107
108static __inline void
109/*ARGSUSED*/
110_citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei,
111	_UESState * __restrict psenc, const void * __restrict pspriv)
112{
113	/* ei seem to be unused */
114	_DIAGASSERT(psenc != NULL);
115	_DIAGASSERT(pspriv != NULL);
116
117	memcpy((void *)psenc, pspriv, sizeof(*psenc));
118}
119
120static __inline int
121to_int(int ch)
122{
123	if (ch >= '0' && ch <= '9')
124		return ch - '0';
125	else if (ch >= 'A' && ch <= 'F')
126		return (ch - 'A') + 10;
127	else if (ch >= 'a' && ch <= 'f')
128		return (ch - 'a') + 10;
129	return -1;
130}
131
132#define ESCAPE		'\\'
133#define UCS2_ESC	'u'
134#define UCS4_ESC	'U'
135
136#define UCS2_BIT	16
137#define UCS4_BIT	32
138#define BMP_MAX		UINT32_C(0xFFFF)
139#define UCS2_MAX	UINT32_C(0x10FFFF)
140#define UCS4_MAX	UINT32_C(0x7FFFFFFF)
141
142static const char *xdig = "0123456789abcdef";
143
144static __inline int
145to_str(char *s, wchar_t wc, int bit)
146{
147	char *p;
148
149	p = s;
150	*p++ = ESCAPE;
151	switch (bit) {
152	case UCS2_BIT:
153		*p++ = UCS2_ESC;
154		break;
155	case UCS4_BIT:
156		*p++ = UCS4_ESC;
157		break;
158	default:
159		abort();
160	}
161	do {
162		*p++ = xdig[(wc >> (bit -= 4)) & 0xF];
163	} while (bit > 0);
164	return p - s;
165}
166
167static __inline int
168is_hi_surrogate(wchar_t wc)
169{
170	return wc >= 0xD800 && wc <= 0xDBFF;
171}
172
173static __inline int
174is_lo_surrogate(wchar_t wc)
175{
176	return wc >= 0xDC00 && wc <= 0xDFFF;
177}
178
179static __inline wchar_t
180surrogate_to_ucs(wchar_t hi, wchar_t lo)
181{
182	_DIAGASSERT(is_hi_surrogate(hi));
183	_DIAGASSERT(is_lo_surrogate(lo));
184
185	hi -= 0xD800;
186	lo -= 0xDC00;
187	return (hi << 10 | lo) + 0x10000;
188}
189
190static __inline void
191ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo)
192{
193	_DIAGASSERT(hi != NULL);
194	_DIAGASSERT(lo != NULL);
195	_DIAGASSERT(wc >= 0x10000);
196
197	wc -= 0x10000;
198	*hi = (wc >> 10) + 0xD800;
199	*lo = (wc & 0x3FF) + 0xDC00;
200}
201
202static __inline int
203is_basic(wchar_t wc)
204{
205	return (uint32_t)wc <= 0x9F &&
206	    wc != 0x24 && wc != 0x40 && wc != 0x60;
207}
208
209static int
210_citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei,
211	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
212	_UESState * __restrict psenc, size_t * __restrict nresult)
213{
214	const char *s0;
215	int ch, head, tail, num;
216	wchar_t hi, wc;
217
218	_DIAGASSERT(ei != NULL);
219	/* pwc may be null */
220	_DIAGASSERT(s != NULL);
221	_DIAGASSERT(psenc != NULL);
222	_DIAGASSERT(nresult != NULL);
223
224	if (*s == NULL) {
225		_citrus_UES_init_state(ei, psenc);
226		*nresult = 0;
227		return 0;
228	}
229	s0 = *s;
230
231	hi = (wchar_t)0;
232	tail = 0;
233
234surrogate:
235	wc = (wchar_t)0;
236	head = tail;
237	if (psenc->chlen == head) {
238		if (n-- < 1)
239			goto restart;
240		psenc->ch[psenc->chlen++] = *s0++;
241	}
242	ch = (unsigned char)psenc->ch[head++];
243	if (ch == ESCAPE) {
244		if (psenc->chlen == head) {
245			if (n-- < 1)
246				goto restart;
247			psenc->ch[psenc->chlen++] = *s0++;
248		}
249		switch (psenc->ch[head]) {
250		case UCS2_ESC:
251			tail += 6;
252			break;
253		case UCS4_ESC:
254			if (ei->mode & MODE_C99) {
255				tail = 10;
256				break;
257			}
258		/*FALLTHROUGH*/
259		default:
260			tail = 0;
261		}
262		++head;
263	}
264	for (; head < tail; ++head) {
265		if (psenc->chlen == head) {
266			if (n-- < 1) {
267restart:
268				*s = s0;
269				*nresult = (size_t)-2;
270				return 0;
271			}
272			psenc->ch[psenc->chlen++] = *s0++;
273		}
274		num = to_int((int)(unsigned char)psenc->ch[head]);
275		if (num < 0) {
276			tail = 0;
277			break;
278		}
279		wc = (wc << 4) | num;
280	}
281	head = 0;
282	switch (tail) {
283	case 0:
284		break;
285	case 6:
286		if (hi != (wchar_t)0)
287			break;
288		if ((ei->mode & MODE_C99) == 0) {
289			if (is_hi_surrogate(wc) != 0) {
290				hi = wc;
291				goto surrogate;
292			}
293			if ((uint32_t)wc <= 0x7F /* XXX */ ||
294			    is_lo_surrogate(wc) != 0)
295				break;
296			goto done;
297		}
298	/*FALLTHROUGH*/
299	case 10:
300		if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX &&
301		    is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0)
302			goto done;
303		*nresult = (size_t)-1;
304		return EILSEQ;
305	case 12:
306		if (is_lo_surrogate(wc) == 0)
307			break;
308		wc = surrogate_to_ucs(hi, wc);
309		goto done;
310	}
311	ch = (unsigned char)psenc->ch[0];
312	head = psenc->chlen;
313	if (--head > 0)
314		memmove(&psenc->ch[0], &psenc->ch[1], head);
315	wc = (wchar_t)ch;
316done:
317	psenc->chlen = head;
318	if (pwc != NULL)
319		*pwc = wc;
320	*nresult = (size_t)((wc == 0) ? 0 : (s0 - *s));
321	*s = s0;
322
323	return 0;
324}
325
326static int
327_citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei,
328	char * __restrict s, size_t n, wchar_t wc,
329	_UESState * __restrict psenc, size_t * __restrict nresult)
330{
331	wchar_t hi, lo;
332
333	if (psenc->chlen != 0)
334		return EINVAL;
335
336	if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) {
337		if (n-- < 1)
338			goto e2big;
339		psenc->ch[psenc->chlen++] = (char)wc;
340	} else if ((uint32_t)wc <= BMP_MAX) {
341		if (n < 6)
342			goto e2big;
343		psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT);
344	} else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) {
345		if (n < 12)
346			goto e2big;
347		ucs_to_surrogate(wc, &hi, &lo);
348		psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT);
349		psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT);
350	} else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) {
351		if (n < 10)
352			goto e2big;
353		psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT);
354	} else {
355		*nresult = (size_t)-1;
356		return EILSEQ;
357	}
358	memcpy(s, psenc->ch, psenc->chlen);
359	*nresult = psenc->chlen;
360	psenc->chlen = 0;
361
362	return 0;
363
364e2big:
365	*nresult = (size_t)-1;
366	return E2BIG;
367}
368
369/*ARGSUSED*/
370static int
371_citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei,
372	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
373{
374	/* ei seem to be unused */
375	_DIAGASSERT(csid != NULL);
376	_DIAGASSERT(idx != NULL);
377
378	*csid = 0;
379	*idx = (_index_t)wc;
380
381	return 0;
382}
383
384static __inline int
385/*ARGSUSED*/
386_citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei,
387	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
388{
389	/* ei seem to be unused */
390	_DIAGASSERT(wc != NULL);
391
392	if (csid != 0)
393		return EILSEQ;
394	*wc = (wchar_t)idx;
395
396	return 0;
397}
398
399static __inline int
400/*ARGSUSED*/
401_citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei,
402	_UESState * __restrict psenc, int * __restrict rstate)
403{
404	_DIAGASSERT(psenc != NULL);
405	_DIAGASSERT(rstate != NULL);
406
407	if (psenc->chlen == 0)
408		*rstate = _STDENC_SDGEN_INITIAL;
409	else
410		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */
411
412	return 0;
413}
414
415static void
416/*ARGSUSED*/
417_citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei)
418{
419	/* ei seems to be unused */
420}
421
422static int
423/*ARGSUSED*/
424_citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei,
425	const void * __restrict var, size_t lenvar)
426{
427	const char *p;
428
429	_DIAGASSERT(ei != NULL);
430
431	p = var;
432#define MATCH(x, act)						\
433do {								\
434        if (lenvar >= (sizeof(#x)-1) &&				\
435            _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {	\
436                act;						\
437                lenvar -= sizeof(#x)-1;				\
438                p += sizeof(#x)-1;				\
439        }							\
440} while (/*CONSTCOND*/0)
441	memset((void *)ei, 0, sizeof(*ei));
442	while (lenvar > 0) {
443		switch (_bcs_toupper(*p)) {
444		case 'C':
445			MATCH(C99, ei->mode |= MODE_C99);
446			break;
447		}
448		++p;
449		--lenvar;
450	}
451	ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12;
452
453	return 0;
454}
455
456/* ----------------------------------------------------------------------
457 * public interface for ctype
458 */
459
460_CITRUS_CTYPE_DECLS(UES);
461_CITRUS_CTYPE_DEF_OPS(UES);
462
463#include "citrus_ctype_template.h"
464
465/* ----------------------------------------------------------------------
466 * public interface for stdenc
467 */
468
469_CITRUS_STDENC_DECLS(UES);
470_CITRUS_STDENC_DEF_OPS(UES);
471
472#include "citrus_stdenc_template.h"
473