1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/* $NetBSD: citrus_dechanyu.c,v 1.3 2008/06/14 16:01:07 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2007 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor#include <sys/cdefs.h>
30219019Sgabor#include <sys/types.h>
31219019Sgabor
32219019Sgabor#include <assert.h>
33219019Sgabor#include <errno.h>
34219019Sgabor#include <limits.h>
35219019Sgabor#include <stddef.h>
36219019Sgabor#include <stdint.h>
37219019Sgabor#include <stdio.h>
38219019Sgabor#include <stdlib.h>
39219019Sgabor#include <string.h>
40219019Sgabor#include <wchar.h>
41219019Sgabor
42219019Sgabor#include "citrus_namespace.h"
43219019Sgabor#include "citrus_types.h"
44219019Sgabor#include "citrus_bcs.h"
45219019Sgabor#include "citrus_module.h"
46219019Sgabor#include "citrus_stdenc.h"
47219019Sgabor#include "citrus_dechanyu.h"
48219019Sgabor
49219019Sgabor/* ----------------------------------------------------------------------
50219019Sgabor * private stuffs used by templates
51219019Sgabor */
52219019Sgabor
53219019Sgabortypedef struct {
54219019Sgabor	size_t	 chlen;
55219019Sgabor	char	 ch[4];
56219019Sgabor} _DECHanyuState;
57219019Sgabor
58219019Sgabortypedef struct {
59219019Sgabor	int	 dummy;
60219019Sgabor} _DECHanyuEncodingInfo;
61219019Sgabor
62219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
63219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
64219019Sgabor
65219019Sgabor#define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
66219019Sgabor#define _ENCODING_INFO			_DECHanyuEncodingInfo
67219019Sgabor#define _ENCODING_STATE			_DECHanyuState
68219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)		4
69219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT		0
70219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
71219019Sgabor
72219019Sgaborstatic __inline void
73219019Sgabor/*ARGSUSED*/
74219019Sgabor_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
75219019Sgabor    _DECHanyuState * __restrict psenc)
76219019Sgabor{
77219019Sgabor
78219019Sgabor	psenc->chlen = 0;
79219019Sgabor}
80219019Sgabor
81219019Sgaborstatic __inline void
82219019Sgabor/*ARGSUSED*/
83219019Sgabor_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
84219019Sgabor    void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
85219019Sgabor{
86219019Sgabor
87219019Sgabor	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
88219019Sgabor}
89219019Sgabor
90219019Sgaborstatic __inline void
91219019Sgabor/*ARGSUSED*/
92219019Sgabor_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
93219019Sgabor    _DECHanyuState * __restrict psenc,
94219019Sgabor    const void * __restrict pspriv)
95219019Sgabor{
96219019Sgabor
97219019Sgabor	memcpy((void *)psenc, pspriv, sizeof(*psenc));
98219019Sgabor}
99219019Sgabor
100219019Sgaborstatic void
101219019Sgabor/*ARGSUSED*/
102219019Sgabor_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
103219019Sgabor{
104219019Sgabor
105219019Sgabor	/* ei may be null */
106219019Sgabor}
107219019Sgabor
108219019Sgaborstatic int
109219019Sgabor/*ARGSUSED*/
110219019Sgabor_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
111219019Sgabor    const void * __restrict var __unused, size_t lenvar __unused)
112219019Sgabor{
113219019Sgabor
114219019Sgabor	/* ei may be null */
115219019Sgabor	return (0);
116219019Sgabor}
117219019Sgabor
118219019Sgaborstatic __inline bool
119219019Sgaboris_singlebyte(int c)
120219019Sgabor{
121219019Sgabor
122219019Sgabor	return (c <= 0x7F);
123219019Sgabor}
124219019Sgabor
125219019Sgaborstatic __inline bool
126219019Sgaboris_leadbyte(int c)
127219019Sgabor{
128219019Sgabor
129219019Sgabor	return (c >= 0xA1 && c <= 0xFE);
130219019Sgabor}
131219019Sgabor
132219019Sgaborstatic __inline bool
133219019Sgaboris_trailbyte(int c)
134219019Sgabor{
135219019Sgabor
136219019Sgabor	c &= ~0x80;
137219019Sgabor	return (c >= 0x21 && c <= 0x7E);
138219019Sgabor}
139219019Sgabor
140219019Sgaborstatic __inline bool
141219019Sgaboris_hanyu1(int c)
142219019Sgabor{
143219019Sgabor
144219019Sgabor	return (c == 0xC2);
145219019Sgabor}
146219019Sgabor
147219019Sgaborstatic __inline bool
148219019Sgaboris_hanyu2(int c)
149219019Sgabor{
150219019Sgabor
151219019Sgabor	return (c == 0xCB);
152219019Sgabor}
153219019Sgabor
154219019Sgabor#define HANYUBIT	0xC2CB0000
155219019Sgabor
156219019Sgaborstatic __inline bool
157219019Sgaboris_94charset(int c)
158219019Sgabor{
159219019Sgabor
160219019Sgabor	return (c >= 0x21 && c <= 0x7E);
161219019Sgabor}
162219019Sgabor
163219019Sgaborstatic int
164219019Sgabor/*ARGSUSED*/
165219019Sgabor_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
166252583Speter    wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
167219019Sgabor    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
168219019Sgabor{
169252583Speter	const char *s0;
170219019Sgabor	wchar_t wc;
171219019Sgabor	int ch;
172219019Sgabor
173219019Sgabor	if (*s == NULL) {
174219019Sgabor		_citrus_DECHanyu_init_state(ei, psenc);
175219019Sgabor		*nresult = _ENCODING_IS_STATE_DEPENDENT;
176219019Sgabor		return (0);
177219019Sgabor	}
178219019Sgabor	s0 = *s;
179219019Sgabor
180219019Sgabor	wc = (wchar_t)0;
181219019Sgabor	switch (psenc->chlen) {
182219019Sgabor	case 0:
183219019Sgabor		if (n-- < 1)
184219019Sgabor			goto restart;
185219019Sgabor		ch = *s0++ & 0xFF;
186219019Sgabor		if (is_singlebyte(ch)) {
187219019Sgabor			if (pwc != NULL)
188219019Sgabor				*pwc = (wchar_t)ch;
189219019Sgabor			*nresult = (size_t)((ch == 0) ? 0 : 1);
190219019Sgabor			*s = s0;
191219019Sgabor			return (0);
192219019Sgabor		}
193219019Sgabor		if (!is_leadbyte(ch))
194219019Sgabor			goto ilseq;
195219019Sgabor		psenc->ch[psenc->chlen++] = ch;
196219019Sgabor		break;
197219019Sgabor	case 1:
198219019Sgabor		ch = psenc->ch[0] & 0xFF;
199219019Sgabor		if (!is_leadbyte(ch))
200219019Sgabor			return (EINVAL);
201219019Sgabor		break;
202219019Sgabor	case 2: case 3:
203219019Sgabor		ch = psenc->ch[0] & 0xFF;
204219019Sgabor		if (is_hanyu1(ch)) {
205219019Sgabor			ch = psenc->ch[1] & 0xFF;
206219019Sgabor			if (is_hanyu2(ch)) {
207219019Sgabor				wc |= (wchar_t)HANYUBIT;
208219019Sgabor				break;
209219019Sgabor			}
210219019Sgabor		}
211219019Sgabor	/*FALLTHROUGH*/
212219019Sgabor	default:
213219019Sgabor		return (EINVAL);
214219019Sgabor	}
215219019Sgabor
216219019Sgabor	switch (psenc->chlen) {
217219019Sgabor	case 1:
218219019Sgabor		if (is_hanyu1(ch)) {
219219019Sgabor			if (n-- < 1)
220219019Sgabor				goto restart;
221219019Sgabor			ch = *s0++ & 0xFF;
222219019Sgabor			if (!is_hanyu2(ch))
223219019Sgabor				goto ilseq;
224219019Sgabor			psenc->ch[psenc->chlen++] = ch;
225219019Sgabor			wc |= (wchar_t)HANYUBIT;
226219019Sgabor			if (n-- < 1)
227219019Sgabor				goto restart;
228219019Sgabor			ch = *s0++ & 0xFF;
229219019Sgabor			if (!is_leadbyte(ch))
230219019Sgabor				goto ilseq;
231219019Sgabor			psenc->ch[psenc->chlen++] = ch;
232219019Sgabor		}
233219019Sgabor		break;
234219019Sgabor	case 2:
235219019Sgabor		if (n-- < 1)
236219019Sgabor			goto restart;
237219019Sgabor		ch = *s0++ & 0xFF;
238219019Sgabor		if (!is_leadbyte(ch))
239219019Sgabor			goto ilseq;
240219019Sgabor		psenc->ch[psenc->chlen++] = ch;
241219019Sgabor		break;
242219019Sgabor	case 3:
243219019Sgabor		ch = psenc->ch[2] & 0xFF;
244219019Sgabor		if (!is_leadbyte(ch))
245219019Sgabor			return (EINVAL);
246219019Sgabor	}
247219019Sgabor	if (n-- < 1)
248219019Sgabor		goto restart;
249219019Sgabor	wc |= (wchar_t)(ch << 8);
250219019Sgabor	ch = *s0++ & 0xFF;
251219019Sgabor	if (!is_trailbyte(ch))
252219019Sgabor		goto ilseq;
253219019Sgabor	wc |= (wchar_t)ch;
254219019Sgabor	if (pwc != NULL)
255219019Sgabor		*pwc = wc;
256219019Sgabor	*nresult = (size_t)(s0 - *s);
257219019Sgabor	*s = s0;
258219019Sgabor	psenc->chlen = 0;
259219019Sgabor
260219019Sgabor	return (0);
261219019Sgabor
262219019Sgaborrestart:
263219019Sgabor	*nresult = (size_t)-2;
264219019Sgabor	*s = s0;
265219019Sgabor	return (0);
266219019Sgabor
267219019Sgaborilseq:
268219019Sgabor	*nresult = (size_t)-1;
269219019Sgabor	return (EILSEQ);
270219019Sgabor}
271219019Sgabor
272219019Sgaborstatic int
273219019Sgabor/*ARGSUSED*/
274219019Sgabor_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
275219019Sgabor    char * __restrict s, size_t n, wchar_t wc,
276219019Sgabor    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
277219019Sgabor{
278219019Sgabor	int ch;
279219019Sgabor
280219019Sgabor	if (psenc->chlen != 0)
281219019Sgabor		return (EINVAL);
282219019Sgabor
283219019Sgabor	/* XXX: assume wchar_t as int */
284219019Sgabor	if ((uint32_t)wc <= 0x7F) {
285219019Sgabor		ch = wc & 0xFF;
286219019Sgabor	} else {
287219019Sgabor		if ((uint32_t)wc > 0xFFFF) {
288219019Sgabor			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
289219019Sgabor				goto ilseq;
290219019Sgabor			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
291219019Sgabor			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
292219019Sgabor			wc &= 0xFFFF;
293219019Sgabor		}
294219019Sgabor		ch = (wc >> 8) & 0xFF;
295219019Sgabor		if (!is_leadbyte(ch))
296219019Sgabor			goto ilseq;
297219019Sgabor		psenc->ch[psenc->chlen++] = ch;
298219019Sgabor		ch = wc & 0xFF;
299219019Sgabor		if (!is_trailbyte(ch))
300219019Sgabor			goto ilseq;
301219019Sgabor	}
302219019Sgabor	psenc->ch[psenc->chlen++] = ch;
303219019Sgabor	if (n < psenc->chlen) {
304219019Sgabor		*nresult = (size_t)-1;
305219019Sgabor		return (E2BIG);
306219019Sgabor	}
307219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
308219019Sgabor	*nresult = psenc->chlen;
309219019Sgabor	psenc->chlen = 0;
310219019Sgabor
311219019Sgabor	return (0);
312219019Sgabor
313219019Sgaborilseq:
314219019Sgabor	*nresult = (size_t)-1;
315219019Sgabor	return (EILSEQ);
316219019Sgabor}
317219019Sgabor
318219019Sgaborstatic __inline int
319219019Sgabor/*ARGSUSED*/
320219019Sgabor_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
321219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
322219019Sgabor{
323219019Sgabor	wchar_t mask;
324219019Sgabor	int plane;
325219019Sgabor
326219019Sgabor	plane = 0;
327219019Sgabor	mask = 0x7F;
328219019Sgabor	/* XXX: assume wchar_t as int */
329219019Sgabor	if ((uint32_t)wc > 0x7F) {
330219019Sgabor		if ((uint32_t)wc > 0xFFFF) {
331219019Sgabor			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
332219019Sgabor				return (EILSEQ);
333219019Sgabor			plane += 2;
334219019Sgabor		}
335219019Sgabor		if (!is_leadbyte((wc >> 8) & 0xFF) ||
336219019Sgabor		    !is_trailbyte(wc & 0xFF))
337219019Sgabor			return (EILSEQ);
338219019Sgabor		plane += (wc & 0x80) ? 1 : 2;
339219019Sgabor		mask |= 0x7F00;
340219019Sgabor	}
341219019Sgabor	*csid = plane;
342219019Sgabor	*idx = (_index_t)(wc & mask);
343219019Sgabor
344219019Sgabor	return (0);
345219019Sgabor}
346219019Sgabor
347219019Sgaborstatic __inline int
348219019Sgabor/*ARGSUSED*/
349219019Sgabor_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
350219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
351219019Sgabor{
352219019Sgabor
353219019Sgabor	if (csid == 0) {
354219019Sgabor		if (idx > 0x7F)
355219019Sgabor			return (EILSEQ);
356219019Sgabor	} else if (csid <= 4) {
357219019Sgabor		if (!is_94charset(idx >> 8))
358219019Sgabor			return (EILSEQ);
359219019Sgabor		if (!is_94charset(idx & 0xFF))
360219019Sgabor			return (EILSEQ);
361219019Sgabor		if (csid % 2)
362219019Sgabor			idx |= 0x80;
363219019Sgabor		idx |= 0x8000;
364219019Sgabor		if (csid > 2)
365219019Sgabor			idx |= HANYUBIT;
366219019Sgabor	} else
367219019Sgabor		return (EILSEQ);
368219019Sgabor	*wc = (wchar_t)idx;
369219019Sgabor	return (0);
370219019Sgabor}
371219019Sgabor
372219019Sgaborstatic __inline int
373219019Sgabor/*ARGSUSED*/
374219019Sgabor_citrus_DECHanyu_stdenc_get_state_desc_generic(
375219019Sgabor    _DECHanyuEncodingInfo * __restrict ei __unused,
376219019Sgabor    _DECHanyuState * __restrict psenc, int * __restrict rstate)
377219019Sgabor{
378219019Sgabor
379219019Sgabor	*rstate = (psenc->chlen == 0)
380219019Sgabor	    ? _STDENC_SDGEN_INITIAL
381219019Sgabor	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
382219019Sgabor	return (0);
383219019Sgabor}
384219019Sgabor
385219019Sgabor/* ----------------------------------------------------------------------
386219019Sgabor * public interface for stdenc
387219019Sgabor */
388219019Sgabor
389219019Sgabor_CITRUS_STDENC_DECLS(DECHanyu);
390219019Sgabor_CITRUS_STDENC_DEF_OPS(DECHanyu);
391219019Sgabor
392219019Sgabor#include "citrus_stdenc_template.h"
393