1/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
2
3/*-
4 * Copyright (c)2007 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28#include <sys/cdefs.h>
29#if defined(LIBC_SCCS) && !defined(lint)
30__RCSID("$NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $");
31#endif /* LIBC_SCCS and not lint */
32
33#include <sys/types.h>
34#include <assert.h>
35#include <errno.h>
36#include <string.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <stddef.h>
41#include <wchar.h>
42#include <limits.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_bcs.h"
47#include "citrus_module.h"
48#include "citrus_ctype.h"
49#include "citrus_stdenc.h"
50#include "citrus_dechanyu.h"
51
52/* ----------------------------------------------------------------------
53 * private stuffs used by templates
54 */
55
56typedef struct {
57	int chlen;
58	char ch[4];
59} _DECHanyuState;
60
61typedef struct {
62	int dummy;
63} _DECHanyuEncodingInfo;
64
65typedef struct {
66	_DECHanyuEncodingInfo	ei;
67	struct {
68		/* for future multi-locale facility */
69		_DECHanyuState	s_mblen;
70		_DECHanyuState	s_mbrlen;
71		_DECHanyuState	s_mbrtowc;
72		_DECHanyuState	s_mbtowc;
73		_DECHanyuState	s_mbsrtowcs;
74		_DECHanyuState	s_mbsnrtowcs;
75		_DECHanyuState	s_wcrtomb;
76		_DECHanyuState	s_wcsrtombs;
77		_DECHanyuState	s_wcsnrtombs;
78		_DECHanyuState	s_wctomb;
79	} states;
80} _DECHanyuCTypeInfo;
81
82#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
83#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
84
85#define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
86#define _ENCODING_INFO			_DECHanyuEncodingInfo
87#define _CTYPE_INFO			_DECHanyuCTypeInfo
88#define _ENCODING_STATE			_DECHanyuState
89#define _ENCODING_MB_CUR_MAX(_ei_)		4
90#define _ENCODING_IS_STATE_DEPENDENT		0
91#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
92
93static __inline void
94/*ARGSUSED*/
95_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
96	_DECHanyuState * __restrict psenc)
97{
98	/* ei may be null */
99	_DIAGASSERT(psenc != NULL);
100
101	psenc->chlen = 0;
102}
103
104static __inline void
105/*ARGSUSED*/
106_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
107	void * __restrict pspriv,
108	const _DECHanyuState * __restrict psenc)
109{
110	/* ei may be null */
111	_DIAGASSERT(pspriv != NULL);
112	_DIAGASSERT(psenc != NULL);
113
114	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
115}
116
117static __inline void
118/*ARGSUSED*/
119_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
120	_DECHanyuState * __restrict psenc,
121	const void * __restrict pspriv)
122{
123	/* ei may be null */
124	_DIAGASSERT(psenc != NULL);
125	_DIAGASSERT(pspriv != NULL);
126
127	memcpy((void *)psenc, pspriv, sizeof(*psenc));
128}
129
130static void
131/*ARGSUSED*/
132_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
133{
134	/* ei may be null */
135}
136
137static int
138/*ARGSUSED*/
139_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
140	const void * __restrict var, size_t lenvar)
141{
142	/* ei may be null */
143	return 0;
144}
145
146static __inline int
147is_singlebyte(int c)
148{
149	return c <= 0x7F;
150}
151
152static __inline int
153is_leadbyte(int c)
154{
155	return c >= 0xA1 && c <= 0xFE;
156}
157
158static __inline int
159is_trailbyte(int c)
160{
161	c &= ~0x80;
162	return c >= 0x21 && c <= 0x7E;
163}
164
165static __inline int
166is_hanyu1(int c)
167{
168	return c == 0xC2;
169}
170
171static __inline int
172is_hanyu2(int c)
173{
174	return c == 0xCB;
175}
176
177#define HANYUBIT	0xC2CB0000
178
179static __inline int
180is_94charset(int c)
181{
182	return c >= 0x21 && c <= 0x7E;
183}
184
185static int
186/*ARGSUSED*/
187_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
188	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
189	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
190{
191	const char *s0;
192	int ch;
193	wchar_t wc;
194
195	/* ei may be unused */
196	_DIAGASSERT(s != NULL);
197	_DIAGASSERT(psenc != NULL);
198	_DIAGASSERT(nresult != NULL);
199
200	if (*s == NULL) {
201		_citrus_DECHanyu_init_state(ei, psenc);
202		*nresult = _ENCODING_IS_STATE_DEPENDENT;
203		return 0;
204	}
205	s0 = *s;
206
207	wc = (wchar_t)0;
208	switch (psenc->chlen) {
209	case 0:
210		if (n-- < 1)
211			goto restart;
212		ch = *s0++ & 0xFF;
213		if (is_singlebyte(ch) != 0) {
214			if (pwc != NULL)
215				*pwc = (wchar_t)ch;
216			*nresult = (size_t)((ch == 0) ? 0 : 1);
217			*s = s0;
218			return 0;
219		}
220		if (is_leadbyte(ch) == 0)
221			goto ilseq;
222		psenc->ch[psenc->chlen++] = ch;
223		break;
224	case 1:
225		ch = psenc->ch[0] & 0xFF;
226		if (is_leadbyte(ch) == 0)
227			return EINVAL;
228		break;
229	case 2: case 3:
230		ch = psenc->ch[0] & 0xFF;
231		if (is_hanyu1(ch) != 0) {
232			ch = psenc->ch[1] & 0xFF;
233			if (is_hanyu2(ch) != 0) {
234				wc |= (wchar_t)HANYUBIT;
235				break;
236			}
237		}
238	/*FALLTHROUGH*/
239	default:
240		return EINVAL;
241	}
242
243	switch (psenc->chlen) {
244	case 1:
245		if (is_hanyu1(ch) != 0) {
246			if (n-- < 1)
247				goto restart;
248			ch = *s0++ & 0xFF;
249			if (is_hanyu2(ch) == 0)
250				goto ilseq;
251			psenc->ch[psenc->chlen++] = ch;
252			wc |= (wchar_t)HANYUBIT;
253			if (n-- < 1)
254				goto restart;
255			ch = *s0++ & 0xFF;
256			if (is_leadbyte(ch) == 0)
257				goto ilseq;
258			psenc->ch[psenc->chlen++] = ch;
259		}
260		break;
261	case 2:
262		if (n-- < 1)
263			goto restart;
264		ch = *s0++ & 0xFF;
265		if (is_leadbyte(ch) == 0)
266			goto ilseq;
267		psenc->ch[psenc->chlen++] = ch;
268		break;
269	case 3:
270		ch = psenc->ch[2] & 0xFF;
271		if (is_leadbyte(ch) == 0)
272			return EINVAL;
273	}
274	if (n-- < 1)
275		goto restart;
276	wc |= (wchar_t)(ch << 8);
277	ch = *s0++ & 0xFF;
278	if (is_trailbyte(ch) == 0)
279		goto ilseq;
280	wc |= (wchar_t)ch;
281	if (pwc != NULL)
282		*pwc = wc;
283	*nresult = (size_t)(s0 - *s);
284	*s = s0;
285	psenc->chlen = 0;
286
287	return 0;
288
289restart:
290	*nresult = (size_t)-2;
291	*s = s0;
292	return 0;
293
294ilseq:
295	*nresult = (size_t)-1;
296	return EILSEQ;
297}
298
299static int
300/*ARGSUSED*/
301_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
302	char * __restrict s, size_t n, wchar_t wc,
303	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
304{
305	int ch;
306
307	/* ei may be unused */
308	_DIAGASSERT(s != NULL);
309	_DIAGASSERT(psenc != NULL);
310	_DIAGASSERT(nresult != NULL);
311
312	if (psenc->chlen != 0)
313		return EINVAL;
314
315	/* XXX: assume wchar_t as int */
316	if ((uint32_t)wc <= 0x7F) {
317		ch = wc & 0xFF;
318	} else {
319		if ((uint32_t)wc > 0xFFFF) {
320			if ((wc & ~0xFFFF) != HANYUBIT)
321				goto ilseq;
322			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
323			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
324			wc &= 0xFFFF;
325		}
326		ch = (wc >> 8) & 0xFF;
327		if (!is_leadbyte(ch))
328			goto ilseq;
329		psenc->ch[psenc->chlen++] = ch;
330		ch = wc & 0xFF;
331		if (is_trailbyte(ch) == 0)
332			goto ilseq;
333	}
334	psenc->ch[psenc->chlen++] = ch;
335	if (n < psenc->chlen) {
336		*nresult = (size_t)-1;
337		return E2BIG;
338	}
339	memcpy(s, psenc->ch, psenc->chlen);
340	*nresult = psenc->chlen;
341	psenc->chlen = 0;
342
343	return 0;
344
345ilseq:
346	*nresult = (size_t)-1;
347	return EILSEQ;
348}
349
350static __inline int
351/*ARGSUSED*/
352_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
353	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
354{
355	int plane;
356	wchar_t mask;
357
358	/* ei may be unused */
359	_DIAGASSERT(csid != NULL);
360	_DIAGASSERT(idx != NULL);
361
362	plane = 0;
363	mask = 0x7F;
364	/* XXX: assume wchar_t as int */
365	if ((uint32_t)wc > 0x7F) {
366		if ((uint32_t)wc > 0xFFFF) {
367			if ((wc & ~0xFFFF) != HANYUBIT)
368				return EILSEQ;
369			plane += 2;
370		}
371		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
372		    is_trailbyte(wc & 0xFF) == 0)
373			return EILSEQ;
374		plane += (wc & 0x80) ? 1 : 2;
375		mask |= 0x7F00;
376	}
377	*csid = plane;
378	*idx = (_index_t)(wc & mask);
379
380	return 0;
381}
382
383static __inline int
384/*ARGSUSED*/
385_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
386	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
387{
388	/* ei may be unused */
389	_DIAGASSERT(wc != NULL);
390
391	if (csid == 0) {
392		if (idx > 0x7F)
393			return EILSEQ;
394	} else if (csid <= 4) {
395		if (is_94charset(idx >> 8) == 0)
396			return EILSEQ;
397		if (is_94charset(idx & 0xFF) == 0)
398			return EILSEQ;
399		if (csid % 2)
400			idx |= 0x80;
401		idx |= 0x8000;
402		if (csid > 2)
403			idx |= HANYUBIT;
404	} else
405		return EILSEQ;
406	*wc = (wchar_t)idx;
407	return 0;
408}
409
410static __inline int
411/*ARGSUSED*/
412_citrus_DECHanyu_stdenc_get_state_desc_generic(
413	_DECHanyuEncodingInfo * __restrict ei,
414	_DECHanyuState * __restrict psenc, int * __restrict rstate)
415{
416	/* ei may be unused */
417	_DIAGASSERT(psenc != NULL);
418	_DIAGASSERT(rstate != NULL);
419
420	*rstate = (psenc->chlen == 0)
421	    ? _STDENC_SDGEN_INITIAL
422	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
423	return 0;
424}
425
426/* ----------------------------------------------------------------------
427 * public interface for ctype
428 */
429
430_CITRUS_CTYPE_DECLS(DECHanyu);
431_CITRUS_CTYPE_DEF_OPS(DECHanyu);
432
433#include "citrus_ctype_template.h"
434
435
436/* ----------------------------------------------------------------------
437 * public interface for stdenc
438 */
439
440_CITRUS_STDENC_DECLS(DECHanyu);
441_CITRUS_STDENC_DEF_OPS(DECHanyu);
442
443#include "citrus_stdenc_template.h"
444