1/* $FreeBSD$ */
2/* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32#include <sys/types.h>
33
34#include <assert.h>
35#include <errno.h>
36#include <limits.h>
37#include <stddef.h>
38#include <stdio.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_module.h"
47#include "citrus_stdenc.h"
48#include "citrus_zw.h"
49
50/* ----------------------------------------------------------------------
51 * private stuffs used by templates
52 */
53
54typedef struct {
55	int	 dummy;
56} _ZWEncodingInfo;
57
58typedef enum {
59	NONE, AMBIGIOUS, ASCII, GB2312
60} _ZWCharset;
61
62typedef struct {
63	_ZWCharset	 charset;
64	int		 chlen;
65	char		 ch[4];
66} _ZWState;
67
68#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
69#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
70
71#define _FUNCNAME(m)			_citrus_ZW_##m
72#define _ENCODING_INFO			_ZWEncodingInfo
73#define _ENCODING_STATE			_ZWState
74#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
75#define _ENCODING_IS_STATE_DEPENDENT		1
76#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
77
78static __inline void
79/*ARGSUSED*/
80_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
81    _ZWState * __restrict psenc)
82{
83
84	psenc->chlen = 0;
85	psenc->charset = NONE;
86}
87
88#if 0
89static __inline void
90/*ARGSUSED*/
91_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
92    void *__restrict pspriv, const _ZWState * __restrict psenc)
93{
94
95	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
96}
97
98static __inline void
99/*ARGSUSED*/
100_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
101    _ZWState * __restrict psenc, const void * __restrict pspriv)
102{
103
104	memcpy((void *)psenc, pspriv, sizeof(*psenc));
105}
106#endif
107
108static int
109_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
110    wchar_t * __restrict pwc, char **__restrict s, size_t n,
111    _ZWState * __restrict psenc, size_t * __restrict nresult)
112{
113	char *s0;
114	wchar_t  wc;
115	int ch, len;
116
117	if (*s == NULL) {
118		_citrus_ZW_init_state(ei, psenc);
119		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
120		return (0);
121	}
122	s0 = *s;
123	len = 0;
124
125#define	STORE				\
126do {					\
127	if (n-- < 1) {			\
128		*nresult = (size_t)-2;	\
129		*s = s0;		\
130		return (0);		\
131	}				\
132	ch = (unsigned char)*s0++;	\
133	if (len++ > MB_LEN_MAX || ch > 0x7F)\
134		goto ilseq;		\
135	psenc->ch[psenc->chlen++] = ch;	\
136} while (/*CONSTCOND*/0)
137
138loop:
139	switch (psenc->charset) {
140	case ASCII:
141		switch (psenc->chlen) {
142		case 0:
143			STORE;
144			switch (psenc->ch[0]) {
145			case '\0': case '\n':
146				psenc->charset = NONE;
147			}
148		/*FALLTHROUGH*/
149		case 1:
150			break;
151		default:
152			return (EINVAL);
153		}
154		ch = (unsigned char)psenc->ch[0];
155		if (ch > 0x7F)
156			goto ilseq;
157		wc = (wchar_t)ch;
158		psenc->chlen = 0;
159		break;
160	case NONE:
161		if (psenc->chlen != 0)
162			return (EINVAL);
163		STORE;
164		ch = (unsigned char)psenc->ch[0];
165		if (ch != 'z') {
166			if (ch != '\n' && ch != '\0')
167				psenc->charset = ASCII;
168			wc = (wchar_t)ch;
169			psenc->chlen = 0;
170			break;
171		}
172		psenc->charset = AMBIGIOUS;
173		psenc->chlen = 0;
174	/* FALLTHROUGH */
175	case AMBIGIOUS:
176		if (psenc->chlen != 0)
177			return (EINVAL);
178		STORE;
179		if (psenc->ch[0] != 'W') {
180			psenc->charset = ASCII;
181			wc = L'z';
182			break;
183		}
184		psenc->charset = GB2312;
185		psenc->chlen = 0;
186	/* FALLTHROUGH */
187	case GB2312:
188		switch (psenc->chlen) {
189		case 0:
190			STORE;
191			ch = (unsigned char)psenc->ch[0];
192			if (ch == '\0') {
193				psenc->charset = NONE;
194				wc = (wchar_t)ch;
195				psenc->chlen = 0;
196				break;
197			} else if (ch == '\n') {
198				psenc->charset = NONE;
199				psenc->chlen = 0;
200				goto loop;
201			}
202		/*FALLTHROUGH*/
203		case 1:
204			STORE;
205			if (psenc->ch[0] == ' ') {
206				ch = (unsigned char)psenc->ch[1];
207				wc = (wchar_t)ch;
208				psenc->chlen = 0;
209				break;
210			} else if (psenc->ch[0] == '#') {
211				ch = (unsigned char)psenc->ch[1];
212				if (ch == '\n') {
213					psenc->charset = NONE;
214					wc = (wchar_t)ch;
215					psenc->chlen = 0;
216					break;
217				} else if (ch == ' ') {
218					wc = (wchar_t)ch;
219					psenc->chlen = 0;
220					break;
221				}
222			}
223			ch = (unsigned char)psenc->ch[0];
224			if (ch < 0x21 || ch > 0x7E)
225				goto ilseq;
226			wc = (wchar_t)(ch << 8);
227			ch = (unsigned char)psenc->ch[1];
228			if (ch < 0x21 || ch > 0x7E) {
229ilseq:
230				*nresult = (size_t)-1;
231				return (EILSEQ);
232			}
233			wc |= (wchar_t)ch;
234			psenc->chlen = 0;
235			break;
236		default:
237			return (EINVAL);
238		}
239		break;
240	default:
241		return (EINVAL);
242	}
243	if (pwc != NULL)
244		*pwc = wc;
245
246	*nresult = (size_t)(wc == 0 ? 0 : len);
247	*s = s0;
248
249	return (0);
250}
251
252static int
253/*ARGSUSED*/
254_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
255    char *__restrict s, size_t n, wchar_t wc,
256    _ZWState * __restrict psenc, size_t * __restrict nresult)
257{
258	int ch;
259
260	if (psenc->chlen != 0)
261		return (EINVAL);
262	if ((uint32_t)wc <= 0x7F) {
263		ch = (unsigned char)wc;
264		switch (psenc->charset) {
265		case NONE:
266			if (ch == '\0' || ch == '\n')
267				psenc->ch[psenc->chlen++] = ch;
268			else {
269				if (n < 4)
270					return (E2BIG);
271				n -= 4;
272				psenc->ch[psenc->chlen++] = 'z';
273				psenc->ch[psenc->chlen++] = 'W';
274				psenc->ch[psenc->chlen++] = ' ';
275				psenc->ch[psenc->chlen++] = ch;
276				psenc->charset = GB2312;
277			}
278			break;
279		case GB2312:
280			if (n < 2)
281				return (E2BIG);
282			n -= 2;
283			if (ch == '\0') {
284				psenc->ch[psenc->chlen++] = '\n';
285				psenc->ch[psenc->chlen++] = '\0';
286				psenc->charset = NONE;
287			} else if (ch == '\n') {
288				psenc->ch[psenc->chlen++] = '#';
289				psenc->ch[psenc->chlen++] = '\n';
290				psenc->charset = NONE;
291			} else {
292				psenc->ch[psenc->chlen++] = ' ';
293				psenc->ch[psenc->chlen++] = ch;
294			}
295			break;
296		default:
297			return (EINVAL);
298		}
299	} else if ((uint32_t)wc <= 0x7E7E) {
300		switch (psenc->charset) {
301		case NONE:
302			if (n < 2)
303				return (E2BIG);
304			n -= 2;
305			psenc->ch[psenc->chlen++] = 'z';
306			psenc->ch[psenc->chlen++] = 'W';
307			psenc->charset = GB2312;
308		/* FALLTHROUGH*/
309		case GB2312:
310			if (n < 2)
311				return (E2BIG);
312			n -= 2;
313			ch = (wc >> 8) & 0xFF;
314			if (ch < 0x21 || ch > 0x7E)
315				goto ilseq;
316			psenc->ch[psenc->chlen++] = ch;
317			ch = wc & 0xFF;
318			if (ch < 0x21 || ch > 0x7E)
319				goto ilseq;
320			psenc->ch[psenc->chlen++] = ch;
321			break;
322		default:
323			return (EINVAL);
324		}
325	} else {
326ilseq:
327		*nresult = (size_t)-1;
328		return (EILSEQ);
329	}
330	memcpy(s, psenc->ch, psenc->chlen);
331	*nresult = psenc->chlen;
332	psenc->chlen = 0;
333
334	return (0);
335}
336
337static int
338/*ARGSUSED*/
339_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
340    char * __restrict s, size_t n, _ZWState * __restrict psenc,
341    size_t * __restrict nresult)
342{
343
344	if (psenc->chlen != 0)
345		return (EINVAL);
346	switch (psenc->charset) {
347	case GB2312:
348		if (n-- < 1)
349			return (E2BIG);
350		psenc->ch[psenc->chlen++] = '\n';
351		psenc->charset = NONE;
352	/*FALLTHROUGH*/
353	case NONE:
354		*nresult = psenc->chlen;
355		if (psenc->chlen > 0) {
356			memcpy(s, psenc->ch, psenc->chlen);
357			psenc->chlen = 0;
358		}
359		break;
360	default:
361		return (EINVAL);
362	}
363
364	return (0);
365}
366
367static __inline int
368/*ARGSUSED*/
369_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
370    _ZWState * __restrict psenc, int * __restrict rstate)
371{
372
373	switch (psenc->charset) {
374	case NONE:
375		if (psenc->chlen != 0)
376			return (EINVAL);
377		*rstate = _STDENC_SDGEN_INITIAL;
378		break;
379	case AMBIGIOUS:
380		if (psenc->chlen != 0)
381			return (EINVAL);
382		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
383		break;
384	case ASCII:
385	case GB2312:
386		switch (psenc->chlen) {
387		case 0:
388			*rstate = _STDENC_SDGEN_STABLE;
389			break;
390		case 1:
391			*rstate = (psenc->ch[0] == '#') ?
392			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
393			    _STDENC_SDGEN_INCOMPLETE_CHAR;
394			break;
395		default:
396			return (EINVAL);
397		}
398		break;
399	default:
400		return (EINVAL);
401	}
402	return (0);
403}
404
405static __inline int
406/*ARGSUSED*/
407_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
408    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
409{
410
411	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
412	*idx = (_index_t)wc;
413
414	return (0);
415}
416
417static __inline int
418/*ARGSUSED*/
419_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
420    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
421{
422
423	switch (csid) {
424	case 0: case 1:
425		break;
426	default:
427		return (EINVAL);
428	}
429	*wc = (wchar_t)idx;
430
431	return (0);
432}
433
434static void
435/*ARGSUSED*/
436_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
437{
438
439}
440
441static int
442/*ARGSUSED*/
443_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
444    const void *__restrict var __unused, size_t lenvar __unused)
445{
446
447	return (0);
448}
449
450/* ----------------------------------------------------------------------
451 * public interface for stdenc
452 */
453
454_CITRUS_STDENC_DECLS(ZW);
455_CITRUS_STDENC_DEF_OPS(ZW);
456
457#include "citrus_stdenc_template.h"
458