1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2004, 2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor *
29219019Sgabor */
30219019Sgabor
31219019Sgabor#include <sys/cdefs.h>
32219019Sgabor#include <sys/types.h>
33219019Sgabor
34219019Sgabor#include <assert.h>
35219019Sgabor#include <errno.h>
36219019Sgabor#include <limits.h>
37219019Sgabor#include <stddef.h>
38219019Sgabor#include <stdio.h>
39219019Sgabor#include <stdint.h>
40219019Sgabor#include <stdlib.h>
41219019Sgabor#include <string.h>
42219019Sgabor#include <wchar.h>
43219019Sgabor
44219019Sgabor#include "citrus_namespace.h"
45219019Sgabor#include "citrus_types.h"
46219019Sgabor#include "citrus_module.h"
47219019Sgabor#include "citrus_stdenc.h"
48219019Sgabor#include "citrus_zw.h"
49219019Sgabor
50219019Sgabor/* ----------------------------------------------------------------------
51219019Sgabor * private stuffs used by templates
52219019Sgabor */
53219019Sgabor
54219019Sgabortypedef struct {
55219019Sgabor	int	 dummy;
56219019Sgabor} _ZWEncodingInfo;
57219019Sgabor
58219019Sgabortypedef enum {
59219019Sgabor	NONE, AMBIGIOUS, ASCII, GB2312
60219019Sgabor} _ZWCharset;
61219019Sgabor
62219019Sgabortypedef struct {
63219019Sgabor	_ZWCharset	 charset;
64219019Sgabor	int		 chlen;
65219019Sgabor	char		 ch[4];
66219019Sgabor} _ZWState;
67219019Sgabor
68219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
69219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
70219019Sgabor
71219019Sgabor#define _FUNCNAME(m)			_citrus_ZW_##m
72219019Sgabor#define _ENCODING_INFO			_ZWEncodingInfo
73219019Sgabor#define _ENCODING_STATE			_ZWState
74219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
75219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT		1
76219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
77219019Sgabor
78219019Sgaborstatic __inline void
79219019Sgabor/*ARGSUSED*/
80219019Sgabor_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
81219019Sgabor    _ZWState * __restrict psenc)
82219019Sgabor{
83219019Sgabor
84219019Sgabor	psenc->chlen = 0;
85219019Sgabor	psenc->charset = NONE;
86219019Sgabor}
87219019Sgabor
88260264Sdim#if 0
89219019Sgaborstatic __inline void
90219019Sgabor/*ARGSUSED*/
91219019Sgabor_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
92219019Sgabor    void *__restrict pspriv, const _ZWState * __restrict psenc)
93219019Sgabor{
94219019Sgabor
95219019Sgabor	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
96219019Sgabor}
97219019Sgabor
98219019Sgaborstatic __inline void
99219019Sgabor/*ARGSUSED*/
100219019Sgabor_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
101219019Sgabor    _ZWState * __restrict psenc, const void * __restrict pspriv)
102219019Sgabor{
103219019Sgabor
104219019Sgabor	memcpy((void *)psenc, pspriv, sizeof(*psenc));
105219019Sgabor}
106260264Sdim#endif
107219019Sgabor
108219019Sgaborstatic int
109219019Sgabor_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
110219019Sgabor    wchar_t * __restrict pwc, char **__restrict s, size_t n,
111219019Sgabor    _ZWState * __restrict psenc, size_t * __restrict nresult)
112219019Sgabor{
113219019Sgabor	char *s0;
114219019Sgabor	wchar_t  wc;
115219019Sgabor	int ch, len;
116219019Sgabor
117219019Sgabor	if (*s == NULL) {
118219019Sgabor		_citrus_ZW_init_state(ei, psenc);
119219019Sgabor		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
120219019Sgabor		return (0);
121219019Sgabor	}
122219019Sgabor	s0 = *s;
123219019Sgabor	len = 0;
124219019Sgabor
125219019Sgabor#define	STORE				\
126219019Sgabordo {					\
127219019Sgabor	if (n-- < 1) {			\
128219019Sgabor		*nresult = (size_t)-2;	\
129219019Sgabor		*s = s0;		\
130219019Sgabor		return (0);		\
131219019Sgabor	}				\
132219019Sgabor	ch = (unsigned char)*s0++;	\
133219019Sgabor	if (len++ > MB_LEN_MAX || ch > 0x7F)\
134219019Sgabor		goto ilseq;		\
135219019Sgabor	psenc->ch[psenc->chlen++] = ch;	\
136219019Sgabor} while (/*CONSTCOND*/0)
137219019Sgabor
138219019Sgaborloop:
139219019Sgabor	switch (psenc->charset) {
140219019Sgabor	case ASCII:
141219019Sgabor		switch (psenc->chlen) {
142219019Sgabor		case 0:
143219019Sgabor			STORE;
144219019Sgabor			switch (psenc->ch[0]) {
145219019Sgabor			case '\0': case '\n':
146219019Sgabor				psenc->charset = NONE;
147219019Sgabor			}
148219019Sgabor		/*FALLTHROUGH*/
149219019Sgabor		case 1:
150219019Sgabor			break;
151219019Sgabor		default:
152219019Sgabor			return (EINVAL);
153219019Sgabor		}
154219019Sgabor		ch = (unsigned char)psenc->ch[0];
155219019Sgabor		if (ch > 0x7F)
156219019Sgabor			goto ilseq;
157219019Sgabor		wc = (wchar_t)ch;
158219019Sgabor		psenc->chlen = 0;
159219019Sgabor		break;
160219019Sgabor	case NONE:
161219019Sgabor		if (psenc->chlen != 0)
162219019Sgabor			return (EINVAL);
163219019Sgabor		STORE;
164219019Sgabor		ch = (unsigned char)psenc->ch[0];
165219019Sgabor		if (ch != 'z') {
166219019Sgabor			if (ch != '\n' && ch != '\0')
167219019Sgabor				psenc->charset = ASCII;
168219019Sgabor			wc = (wchar_t)ch;
169219019Sgabor			psenc->chlen = 0;
170219019Sgabor			break;
171219019Sgabor		}
172219019Sgabor		psenc->charset = AMBIGIOUS;
173219019Sgabor		psenc->chlen = 0;
174219019Sgabor	/* FALLTHROUGH */
175219019Sgabor	case AMBIGIOUS:
176219019Sgabor		if (psenc->chlen != 0)
177219019Sgabor			return (EINVAL);
178219019Sgabor		STORE;
179219019Sgabor		if (psenc->ch[0] != 'W') {
180219019Sgabor			psenc->charset = ASCII;
181219019Sgabor			wc = L'z';
182219019Sgabor			break;
183219019Sgabor		}
184219019Sgabor		psenc->charset = GB2312;
185219019Sgabor		psenc->chlen = 0;
186219019Sgabor	/* FALLTHROUGH */
187219019Sgabor	case GB2312:
188219019Sgabor		switch (psenc->chlen) {
189219019Sgabor		case 0:
190219019Sgabor			STORE;
191219019Sgabor			ch = (unsigned char)psenc->ch[0];
192219019Sgabor			if (ch == '\0') {
193219019Sgabor				psenc->charset = NONE;
194219019Sgabor				wc = (wchar_t)ch;
195219019Sgabor				psenc->chlen = 0;
196219019Sgabor				break;
197219019Sgabor			} else if (ch == '\n') {
198219019Sgabor				psenc->charset = NONE;
199219019Sgabor				psenc->chlen = 0;
200219019Sgabor				goto loop;
201219019Sgabor			}
202219019Sgabor		/*FALLTHROUGH*/
203219019Sgabor		case 1:
204219019Sgabor			STORE;
205219019Sgabor			if (psenc->ch[0] == ' ') {
206219019Sgabor				ch = (unsigned char)psenc->ch[1];
207219019Sgabor				wc = (wchar_t)ch;
208219019Sgabor				psenc->chlen = 0;
209219019Sgabor				break;
210219019Sgabor			} else if (psenc->ch[0] == '#') {
211219019Sgabor				ch = (unsigned char)psenc->ch[1];
212219019Sgabor				if (ch == '\n') {
213219019Sgabor					psenc->charset = NONE;
214219019Sgabor					wc = (wchar_t)ch;
215219019Sgabor					psenc->chlen = 0;
216219019Sgabor					break;
217219019Sgabor				} else if (ch == ' ') {
218219019Sgabor					wc = (wchar_t)ch;
219219019Sgabor					psenc->chlen = 0;
220219019Sgabor					break;
221219019Sgabor				}
222219019Sgabor			}
223219019Sgabor			ch = (unsigned char)psenc->ch[0];
224219019Sgabor			if (ch < 0x21 || ch > 0x7E)
225219019Sgabor				goto ilseq;
226219019Sgabor			wc = (wchar_t)(ch << 8);
227219019Sgabor			ch = (unsigned char)psenc->ch[1];
228219019Sgabor			if (ch < 0x21 || ch > 0x7E) {
229219019Sgaborilseq:
230219019Sgabor				*nresult = (size_t)-1;
231219019Sgabor				return (EILSEQ);
232219019Sgabor			}
233219019Sgabor			wc |= (wchar_t)ch;
234219019Sgabor			psenc->chlen = 0;
235219019Sgabor			break;
236219019Sgabor		default:
237219019Sgabor			return (EINVAL);
238219019Sgabor		}
239219019Sgabor		break;
240219019Sgabor	default:
241219019Sgabor		return (EINVAL);
242219019Sgabor	}
243219019Sgabor	if (pwc != NULL)
244219019Sgabor		*pwc = wc;
245219019Sgabor
246219019Sgabor	*nresult = (size_t)(wc == 0 ? 0 : len);
247219019Sgabor	*s = s0;
248219019Sgabor
249219019Sgabor	return (0);
250219019Sgabor}
251219019Sgabor
252219019Sgaborstatic int
253219019Sgabor/*ARGSUSED*/
254219019Sgabor_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
255219019Sgabor    char *__restrict s, size_t n, wchar_t wc,
256219019Sgabor    _ZWState * __restrict psenc, size_t * __restrict nresult)
257219019Sgabor{
258219019Sgabor	int ch;
259219019Sgabor
260219019Sgabor	if (psenc->chlen != 0)
261219019Sgabor		return (EINVAL);
262219019Sgabor	if ((uint32_t)wc <= 0x7F) {
263219019Sgabor		ch = (unsigned char)wc;
264219019Sgabor		switch (psenc->charset) {
265219019Sgabor		case NONE:
266219019Sgabor			if (ch == '\0' || ch == '\n')
267219019Sgabor				psenc->ch[psenc->chlen++] = ch;
268219019Sgabor			else {
269219019Sgabor				if (n < 4)
270219019Sgabor					return (E2BIG);
271219019Sgabor				n -= 4;
272219019Sgabor				psenc->ch[psenc->chlen++] = 'z';
273219019Sgabor				psenc->ch[psenc->chlen++] = 'W';
274219019Sgabor				psenc->ch[psenc->chlen++] = ' ';
275219019Sgabor				psenc->ch[psenc->chlen++] = ch;
276219019Sgabor				psenc->charset = GB2312;
277219019Sgabor			}
278219019Sgabor			break;
279219019Sgabor		case GB2312:
280219019Sgabor			if (n < 2)
281219019Sgabor				return (E2BIG);
282219019Sgabor			n -= 2;
283219019Sgabor			if (ch == '\0') {
284219019Sgabor				psenc->ch[psenc->chlen++] = '\n';
285219019Sgabor				psenc->ch[psenc->chlen++] = '\0';
286219019Sgabor				psenc->charset = NONE;
287219019Sgabor			} else if (ch == '\n') {
288219019Sgabor				psenc->ch[psenc->chlen++] = '#';
289219019Sgabor				psenc->ch[psenc->chlen++] = '\n';
290219019Sgabor				psenc->charset = NONE;
291219019Sgabor			} else {
292219019Sgabor				psenc->ch[psenc->chlen++] = ' ';
293219019Sgabor				psenc->ch[psenc->chlen++] = ch;
294219019Sgabor			}
295219019Sgabor			break;
296219019Sgabor		default:
297219019Sgabor			return (EINVAL);
298219019Sgabor		}
299219019Sgabor	} else if ((uint32_t)wc <= 0x7E7E) {
300219019Sgabor		switch (psenc->charset) {
301219019Sgabor		case NONE:
302219019Sgabor			if (n < 2)
303219019Sgabor				return (E2BIG);
304219019Sgabor			n -= 2;
305219019Sgabor			psenc->ch[psenc->chlen++] = 'z';
306219019Sgabor			psenc->ch[psenc->chlen++] = 'W';
307219019Sgabor			psenc->charset = GB2312;
308219019Sgabor		/* FALLTHROUGH*/
309219019Sgabor		case GB2312:
310219019Sgabor			if (n < 2)
311219019Sgabor				return (E2BIG);
312219019Sgabor			n -= 2;
313219019Sgabor			ch = (wc >> 8) & 0xFF;
314219019Sgabor			if (ch < 0x21 || ch > 0x7E)
315219019Sgabor				goto ilseq;
316219019Sgabor			psenc->ch[psenc->chlen++] = ch;
317219019Sgabor			ch = wc & 0xFF;
318219019Sgabor			if (ch < 0x21 || ch > 0x7E)
319219019Sgabor				goto ilseq;
320219019Sgabor			psenc->ch[psenc->chlen++] = ch;
321219019Sgabor			break;
322219019Sgabor		default:
323219019Sgabor			return (EINVAL);
324219019Sgabor		}
325219019Sgabor	} else {
326219019Sgaborilseq:
327219019Sgabor		*nresult = (size_t)-1;
328219019Sgabor		return (EILSEQ);
329219019Sgabor	}
330219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
331219019Sgabor	*nresult = psenc->chlen;
332219019Sgabor	psenc->chlen = 0;
333219019Sgabor
334219019Sgabor	return (0);
335219019Sgabor}
336219019Sgabor
337219019Sgaborstatic int
338219019Sgabor/*ARGSUSED*/
339219019Sgabor_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
340219019Sgabor    char * __restrict s, size_t n, _ZWState * __restrict psenc,
341219019Sgabor    size_t * __restrict nresult)
342219019Sgabor{
343219019Sgabor
344219019Sgabor	if (psenc->chlen != 0)
345219019Sgabor		return (EINVAL);
346219019Sgabor	switch (psenc->charset) {
347219019Sgabor	case GB2312:
348219019Sgabor		if (n-- < 1)
349219019Sgabor			return (E2BIG);
350219019Sgabor		psenc->ch[psenc->chlen++] = '\n';
351219019Sgabor		psenc->charset = NONE;
352219019Sgabor	/*FALLTHROUGH*/
353219019Sgabor	case NONE:
354219019Sgabor		*nresult = psenc->chlen;
355219019Sgabor		if (psenc->chlen > 0) {
356219019Sgabor			memcpy(s, psenc->ch, psenc->chlen);
357219019Sgabor			psenc->chlen = 0;
358219019Sgabor		}
359219019Sgabor		break;
360219019Sgabor	default:
361219019Sgabor		return (EINVAL);
362219019Sgabor	}
363219019Sgabor
364219019Sgabor	return (0);
365219019Sgabor}
366219019Sgabor
367219019Sgaborstatic __inline int
368219019Sgabor/*ARGSUSED*/
369219019Sgabor_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
370219019Sgabor    _ZWState * __restrict psenc, int * __restrict rstate)
371219019Sgabor{
372219019Sgabor
373219019Sgabor	switch (psenc->charset) {
374219019Sgabor	case NONE:
375219019Sgabor		if (psenc->chlen != 0)
376219019Sgabor			return (EINVAL);
377219019Sgabor		*rstate = _STDENC_SDGEN_INITIAL;
378219019Sgabor		break;
379219019Sgabor	case AMBIGIOUS:
380219019Sgabor		if (psenc->chlen != 0)
381219019Sgabor			return (EINVAL);
382219019Sgabor		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
383219019Sgabor		break;
384219019Sgabor	case ASCII:
385219019Sgabor	case GB2312:
386219019Sgabor		switch (psenc->chlen) {
387219019Sgabor		case 0:
388219019Sgabor			*rstate = _STDENC_SDGEN_STABLE;
389219019Sgabor			break;
390219019Sgabor		case 1:
391219019Sgabor			*rstate = (psenc->ch[0] == '#') ?
392219019Sgabor			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
393219019Sgabor			    _STDENC_SDGEN_INCOMPLETE_CHAR;
394219019Sgabor			break;
395219019Sgabor		default:
396219019Sgabor			return (EINVAL);
397219019Sgabor		}
398219019Sgabor		break;
399219019Sgabor	default:
400219019Sgabor		return (EINVAL);
401219019Sgabor	}
402219019Sgabor	return (0);
403219019Sgabor}
404219019Sgabor
405219019Sgaborstatic __inline int
406219019Sgabor/*ARGSUSED*/
407219019Sgabor_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
408219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
409219019Sgabor{
410219019Sgabor
411219019Sgabor	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
412219019Sgabor	*idx = (_index_t)wc;
413219019Sgabor
414219019Sgabor	return (0);
415219019Sgabor}
416219019Sgabor
417219019Sgaborstatic __inline int
418219019Sgabor/*ARGSUSED*/
419219019Sgabor_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
420219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
421219019Sgabor{
422219019Sgabor
423219019Sgabor	switch (csid) {
424219019Sgabor	case 0: case 1:
425219019Sgabor		break;
426219019Sgabor	default:
427219019Sgabor		return (EINVAL);
428219019Sgabor	}
429219019Sgabor	*wc = (wchar_t)idx;
430219019Sgabor
431219019Sgabor	return (0);
432219019Sgabor}
433219019Sgabor
434219019Sgaborstatic void
435219019Sgabor/*ARGSUSED*/
436219019Sgabor_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
437219019Sgabor{
438219019Sgabor
439219019Sgabor}
440219019Sgabor
441219019Sgaborstatic int
442219019Sgabor/*ARGSUSED*/
443219019Sgabor_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
444219019Sgabor    const void *__restrict var __unused, size_t lenvar __unused)
445219019Sgabor{
446219019Sgabor
447219019Sgabor	return (0);
448219019Sgabor}
449219019Sgabor
450219019Sgabor/* ----------------------------------------------------------------------
451219019Sgabor * public interface for stdenc
452219019Sgabor */
453219019Sgabor
454219019Sgabor_CITRUS_STDENC_DECLS(ZW);
455219019Sgabor_CITRUS_STDENC_DEF_OPS(ZW);
456219019Sgabor
457219019Sgabor#include "citrus_stdenc_template.h"
458