1/* $FreeBSD$ */
2/* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
3
4/*-
5 * SPDX-License-Identifier: BSD-2-Clause
6 *
7 * Copyright (c)2007 Citrus Project,
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31#include <sys/cdefs.h>
32#include <sys/types.h>
33
34#include <assert.h>
35#include <errno.h>
36#include <limits.h>
37#include <stddef.h>
38#include <stdint.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_bcs.h"
47#include "citrus_module.h"
48#include "citrus_stdenc.h"
49#include "citrus_dechanyu.h"
50
51/* ----------------------------------------------------------------------
52 * private stuffs used by templates
53 */
54
55typedef struct {
56	size_t	 chlen;
57	char	 ch[4];
58} _DECHanyuState;
59
60typedef struct {
61	int	 dummy;
62} _DECHanyuEncodingInfo;
63
64#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
65#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
66
67#define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
68#define _ENCODING_INFO			_DECHanyuEncodingInfo
69#define _ENCODING_STATE			_DECHanyuState
70#define _ENCODING_MB_CUR_MAX(_ei_)		4
71#define _ENCODING_IS_STATE_DEPENDENT		0
72#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
73
74static __inline void
75/*ARGSUSED*/
76_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
77    _DECHanyuState * __restrict psenc)
78{
79
80	psenc->chlen = 0;
81}
82
83#if 0
84static __inline void
85/*ARGSUSED*/
86_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
87    void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
88{
89
90	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
91}
92
93static __inline void
94/*ARGSUSED*/
95_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
96    _DECHanyuState * __restrict psenc,
97    const void * __restrict pspriv)
98{
99
100	memcpy((void *)psenc, pspriv, sizeof(*psenc));
101}
102#endif
103
104static void
105/*ARGSUSED*/
106_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
107{
108
109	/* ei may be null */
110}
111
112static int
113/*ARGSUSED*/
114_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
115    const void * __restrict var __unused, size_t lenvar __unused)
116{
117
118	/* ei may be null */
119	return (0);
120}
121
122static __inline bool
123is_singlebyte(int c)
124{
125
126	return (c <= 0x7F);
127}
128
129static __inline bool
130is_leadbyte(int c)
131{
132
133	return (c >= 0xA1 && c <= 0xFE);
134}
135
136static __inline bool
137is_trailbyte(int c)
138{
139
140	c &= ~0x80;
141	return (c >= 0x21 && c <= 0x7E);
142}
143
144static __inline bool
145is_hanyu1(int c)
146{
147
148	return (c == 0xC2);
149}
150
151static __inline bool
152is_hanyu2(int c)
153{
154
155	return (c == 0xCB);
156}
157
158#define HANYUBIT	0xC2CB0000
159
160static __inline bool
161is_94charset(int c)
162{
163
164	return (c >= 0x21 && c <= 0x7E);
165}
166
167static int
168/*ARGSUSED*/
169_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
170    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
171    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
172{
173	char *s0;
174	wchar_t wc;
175	int ch;
176
177	if (*s == NULL) {
178		_citrus_DECHanyu_init_state(ei, psenc);
179		*nresult = _ENCODING_IS_STATE_DEPENDENT;
180		return (0);
181	}
182	s0 = *s;
183
184	wc = (wchar_t)0;
185	switch (psenc->chlen) {
186	case 0:
187		if (n-- < 1)
188			goto restart;
189		ch = *s0++ & 0xFF;
190		if (is_singlebyte(ch)) {
191			if (pwc != NULL)
192				*pwc = (wchar_t)ch;
193			*nresult = (size_t)((ch == 0) ? 0 : 1);
194			*s = s0;
195			return (0);
196		}
197		if (!is_leadbyte(ch))
198			goto ilseq;
199		psenc->ch[psenc->chlen++] = ch;
200		break;
201	case 1:
202		ch = psenc->ch[0] & 0xFF;
203		if (!is_leadbyte(ch))
204			return (EINVAL);
205		break;
206	case 2: case 3:
207		ch = psenc->ch[0] & 0xFF;
208		if (is_hanyu1(ch)) {
209			ch = psenc->ch[1] & 0xFF;
210			if (is_hanyu2(ch)) {
211				wc |= (wchar_t)HANYUBIT;
212				break;
213			}
214		}
215	/*FALLTHROUGH*/
216	default:
217		return (EINVAL);
218	}
219
220	switch (psenc->chlen) {
221	case 1:
222		if (is_hanyu1(ch)) {
223			if (n-- < 1)
224				goto restart;
225			ch = *s0++ & 0xFF;
226			if (!is_hanyu2(ch))
227				goto ilseq;
228			psenc->ch[psenc->chlen++] = ch;
229			wc |= (wchar_t)HANYUBIT;
230			if (n-- < 1)
231				goto restart;
232			ch = *s0++ & 0xFF;
233			if (!is_leadbyte(ch))
234				goto ilseq;
235			psenc->ch[psenc->chlen++] = ch;
236		}
237		break;
238	case 2:
239		if (n-- < 1)
240			goto restart;
241		ch = *s0++ & 0xFF;
242		if (!is_leadbyte(ch))
243			goto ilseq;
244		psenc->ch[psenc->chlen++] = ch;
245		break;
246	case 3:
247		ch = psenc->ch[2] & 0xFF;
248		if (!is_leadbyte(ch))
249			return (EINVAL);
250	}
251	if (n-- < 1)
252		goto restart;
253	wc |= (wchar_t)(ch << 8);
254	ch = *s0++ & 0xFF;
255	if (!is_trailbyte(ch))
256		goto ilseq;
257	wc |= (wchar_t)ch;
258	if (pwc != NULL)
259		*pwc = wc;
260	*nresult = (size_t)(s0 - *s);
261	*s = s0;
262	psenc->chlen = 0;
263
264	return (0);
265
266restart:
267	*nresult = (size_t)-2;
268	*s = s0;
269	return (0);
270
271ilseq:
272	*nresult = (size_t)-1;
273	return (EILSEQ);
274}
275
276static int
277/*ARGSUSED*/
278_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
279    char * __restrict s, size_t n, wchar_t wc,
280    _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
281{
282	int ch;
283
284	if (psenc->chlen != 0)
285		return (EINVAL);
286
287	/* XXX: assume wchar_t as int */
288	if ((uint32_t)wc <= 0x7F) {
289		ch = wc & 0xFF;
290	} else {
291		if ((uint32_t)wc > 0xFFFF) {
292			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
293				goto ilseq;
294			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
295			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
296			wc &= 0xFFFF;
297		}
298		ch = (wc >> 8) & 0xFF;
299		if (!is_leadbyte(ch))
300			goto ilseq;
301		psenc->ch[psenc->chlen++] = ch;
302		ch = wc & 0xFF;
303		if (!is_trailbyte(ch))
304			goto ilseq;
305	}
306	psenc->ch[psenc->chlen++] = ch;
307	if (n < psenc->chlen) {
308		*nresult = (size_t)-1;
309		return (E2BIG);
310	}
311	memcpy(s, psenc->ch, psenc->chlen);
312	*nresult = psenc->chlen;
313	psenc->chlen = 0;
314
315	return (0);
316
317ilseq:
318	*nresult = (size_t)-1;
319	return (EILSEQ);
320}
321
322static __inline int
323/*ARGSUSED*/
324_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
325    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
326{
327	wchar_t mask;
328	int plane;
329
330	plane = 0;
331	mask = 0x7F;
332	/* XXX: assume wchar_t as int */
333	if ((uint32_t)wc > 0x7F) {
334		if ((uint32_t)wc > 0xFFFF) {
335			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
336				return (EILSEQ);
337			plane += 2;
338		}
339		if (!is_leadbyte((wc >> 8) & 0xFF) ||
340		    !is_trailbyte(wc & 0xFF))
341			return (EILSEQ);
342		plane += (wc & 0x80) ? 1 : 2;
343		mask |= 0x7F00;
344	}
345	*csid = plane;
346	*idx = (_index_t)(wc & mask);
347
348	return (0);
349}
350
351static __inline int
352/*ARGSUSED*/
353_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
354    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
355{
356
357	if (csid == 0) {
358		if (idx > 0x7F)
359			return (EILSEQ);
360	} else if (csid <= 4) {
361		if (!is_94charset(idx >> 8))
362			return (EILSEQ);
363		if (!is_94charset(idx & 0xFF))
364			return (EILSEQ);
365		if (csid % 2)
366			idx |= 0x80;
367		idx |= 0x8000;
368		if (csid > 2)
369			idx |= HANYUBIT;
370	} else
371		return (EILSEQ);
372	*wc = (wchar_t)idx;
373	return (0);
374}
375
376static __inline int
377/*ARGSUSED*/
378_citrus_DECHanyu_stdenc_get_state_desc_generic(
379    _DECHanyuEncodingInfo * __restrict ei __unused,
380    _DECHanyuState * __restrict psenc, int * __restrict rstate)
381{
382
383	*rstate = (psenc->chlen == 0)
384	    ? _STDENC_SDGEN_INITIAL
385	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
386	return (0);
387}
388
389/* ----------------------------------------------------------------------
390 * public interface for stdenc
391 */
392
393_CITRUS_STDENC_DECLS(DECHanyu);
394_CITRUS_STDENC_DEF_OPS(DECHanyu);
395
396#include "citrus_stdenc_template.h"
397