1/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
2
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2007 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/types.h>
32
33#include <assert.h>
34#include <errno.h>
35#include <limits.h>
36#include <stddef.h>
37#include <stdint.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <wchar.h>
42
43#include "citrus_namespace.h"
44#include "citrus_types.h"
45#include "citrus_bcs.h"
46#include "citrus_module.h"
47#include "citrus_stdenc.h"
48#include "citrus_dechanyu.h"
49
50/* ----------------------------------------------------------------------
51 * private stuffs used by templates
52 */
53
54typedef struct {
55	size_t	 chlen;
56	char	 ch[4];
57} _DECHanyuState;
58
59typedef struct {
60	int	 dummy;
61} _DECHanyuEncodingInfo;
62
63#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
64#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
65
66#define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
67#define _ENCODING_INFO			_DECHanyuEncodingInfo
68#define _ENCODING_STATE			_DECHanyuState
69#define _ENCODING_MB_CUR_MAX(_ei_)		4
70#define _ENCODING_IS_STATE_DEPENDENT		0
71#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
72
73static __inline void
74/*ARGSUSED*/
75_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
76    _DECHanyuState * __restrict psenc)
77{
78
79	psenc->chlen = 0;
80}
81
82#if 0
83static __inline void
84/*ARGSUSED*/
85_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
86    void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
87{
88
89	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
90}
91
92static __inline void
93/*ARGSUSED*/
94_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
95    _DECHanyuState * __restrict psenc,
96    const void * __restrict pspriv)
97{
98
99	memcpy((void *)psenc, pspriv, sizeof(*psenc));
100}
101#endif
102
103static void
104/*ARGSUSED*/
105_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
106{
107
108	/* ei may be null */
109}
110
111static int
112/*ARGSUSED*/
113_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
114    const void * __restrict var __unused, size_t lenvar __unused)
115{
116
117	/* ei may be null */
118	return (0);
119}
120
121static __inline bool
122is_singlebyte(int c)
123{
124
125	return (c <= 0x7F);
126}
127
128static __inline bool
129is_leadbyte(int c)
130{
131
132	return (c >= 0xA1 && c <= 0xFE);
133}
134
135static __inline bool
136is_trailbyte(int c)
137{
138
139	c &= ~0x80;
140	return (c >= 0x21 && c <= 0x7E);
141}
142
143static __inline bool
144is_hanyu1(int c)
145{
146
147	return (c == 0xC2);
148}
149
150static __inline bool
151is_hanyu2(int c)
152{
153
154	return (c == 0xCB);
155}
156
157#define HANYUBIT	0xC2CB0000
158
159static __inline bool
160is_94charset(int c)
161{
162
163	return (c >= 0x21 && c <= 0x7E);
164}
165
166static int
167/*ARGSUSED*/
168_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
169    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
170    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
171{
172	char *s0;
173	wchar_t wc;
174	int ch;
175
176	if (*s == NULL) {
177		_citrus_DECHanyu_init_state(ei, psenc);
178		*nresult = _ENCODING_IS_STATE_DEPENDENT;
179		return (0);
180	}
181	s0 = *s;
182
183	wc = (wchar_t)0;
184	switch (psenc->chlen) {
185	case 0:
186		if (n-- < 1)
187			goto restart;
188		ch = *s0++ & 0xFF;
189		if (is_singlebyte(ch)) {
190			if (pwc != NULL)
191				*pwc = (wchar_t)ch;
192			*nresult = (size_t)((ch == 0) ? 0 : 1);
193			*s = s0;
194			return (0);
195		}
196		if (!is_leadbyte(ch))
197			goto ilseq;
198		psenc->ch[psenc->chlen++] = ch;
199		break;
200	case 1:
201		ch = psenc->ch[0] & 0xFF;
202		if (!is_leadbyte(ch))
203			return (EINVAL);
204		break;
205	case 2: case 3:
206		ch = psenc->ch[0] & 0xFF;
207		if (is_hanyu1(ch)) {
208			ch = psenc->ch[1] & 0xFF;
209			if (is_hanyu2(ch)) {
210				wc |= (wchar_t)HANYUBIT;
211				break;
212			}
213		}
214	/*FALLTHROUGH*/
215	default:
216		return (EINVAL);
217	}
218
219	switch (psenc->chlen) {
220	case 1:
221		if (is_hanyu1(ch)) {
222			if (n-- < 1)
223				goto restart;
224			ch = *s0++ & 0xFF;
225			if (!is_hanyu2(ch))
226				goto ilseq;
227			psenc->ch[psenc->chlen++] = ch;
228			wc |= (wchar_t)HANYUBIT;
229			if (n-- < 1)
230				goto restart;
231			ch = *s0++ & 0xFF;
232			if (!is_leadbyte(ch))
233				goto ilseq;
234			psenc->ch[psenc->chlen++] = ch;
235		}
236		break;
237	case 2:
238		if (n-- < 1)
239			goto restart;
240		ch = *s0++ & 0xFF;
241		if (!is_leadbyte(ch))
242			goto ilseq;
243		psenc->ch[psenc->chlen++] = ch;
244		break;
245	case 3:
246		ch = psenc->ch[2] & 0xFF;
247		if (!is_leadbyte(ch))
248			return (EINVAL);
249	}
250	if (n-- < 1)
251		goto restart;
252	wc |= (wchar_t)(ch << 8);
253	ch = *s0++ & 0xFF;
254	if (!is_trailbyte(ch))
255		goto ilseq;
256	wc |= (wchar_t)ch;
257	if (pwc != NULL)
258		*pwc = wc;
259	*nresult = (size_t)(s0 - *s);
260	*s = s0;
261	psenc->chlen = 0;
262
263	return (0);
264
265restart:
266	*nresult = (size_t)-2;
267	*s = s0;
268	return (0);
269
270ilseq:
271	*nresult = (size_t)-1;
272	return (EILSEQ);
273}
274
275static int
276/*ARGSUSED*/
277_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
278    char * __restrict s, size_t n, wchar_t wc,
279    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
280{
281	int ch;
282
283	if (psenc->chlen != 0)
284		return (EINVAL);
285
286	/* XXX: assume wchar_t as int */
287	if ((uint32_t)wc <= 0x7F) {
288		ch = wc & 0xFF;
289	} else {
290		if ((uint32_t)wc > 0xFFFF) {
291			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
292				goto ilseq;
293			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
294			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
295			wc &= 0xFFFF;
296		}
297		ch = (wc >> 8) & 0xFF;
298		if (!is_leadbyte(ch))
299			goto ilseq;
300		psenc->ch[psenc->chlen++] = ch;
301		ch = wc & 0xFF;
302		if (!is_trailbyte(ch))
303			goto ilseq;
304	}
305	psenc->ch[psenc->chlen++] = ch;
306	if (n < psenc->chlen) {
307		*nresult = (size_t)-1;
308		return (E2BIG);
309	}
310	memcpy(s, psenc->ch, psenc->chlen);
311	*nresult = psenc->chlen;
312	psenc->chlen = 0;
313
314	return (0);
315
316ilseq:
317	*nresult = (size_t)-1;
318	return (EILSEQ);
319}
320
321static __inline int
322/*ARGSUSED*/
323_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
324    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
325{
326	wchar_t mask;
327	int plane;
328
329	plane = 0;
330	mask = 0x7F;
331	/* XXX: assume wchar_t as int */
332	if ((uint32_t)wc > 0x7F) {
333		if ((uint32_t)wc > 0xFFFF) {
334			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
335				return (EILSEQ);
336			plane += 2;
337		}
338		if (!is_leadbyte((wc >> 8) & 0xFF) ||
339		    !is_trailbyte(wc & 0xFF))
340			return (EILSEQ);
341		plane += (wc & 0x80) ? 1 : 2;
342		mask |= 0x7F00;
343	}
344	*csid = plane;
345	*idx = (_index_t)(wc & mask);
346
347	return (0);
348}
349
350static __inline int
351/*ARGSUSED*/
352_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
353    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
354{
355
356	if (csid == 0) {
357		if (idx > 0x7F)
358			return (EILSEQ);
359	} else if (csid <= 4) {
360		if (!is_94charset(idx >> 8))
361			return (EILSEQ);
362		if (!is_94charset(idx & 0xFF))
363			return (EILSEQ);
364		if (csid % 2)
365			idx |= 0x80;
366		idx |= 0x8000;
367		if (csid > 2)
368			idx |= HANYUBIT;
369	} else
370		return (EILSEQ);
371	*wc = (wchar_t)idx;
372	return (0);
373}
374
375static __inline int
376/*ARGSUSED*/
377_citrus_DECHanyu_stdenc_get_state_desc_generic(
378    _DECHanyuEncodingInfo * __restrict ei __unused,
379    _DECHanyuState * __restrict psenc, int * __restrict rstate)
380{
381
382	*rstate = (psenc->chlen == 0)
383	    ? _STDENC_SDGEN_INITIAL
384	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
385	return (0);
386}
387
388/* ----------------------------------------------------------------------
389 * public interface for stdenc
390 */
391
392_CITRUS_STDENC_DECLS(DECHanyu);
393_CITRUS_STDENC_DEF_OPS(DECHanyu);
394
395#include "citrus_stdenc_template.h"
396