citrus_zw.c revision 252583
1/* $FreeBSD: head/lib/libiconv_modules/ZW/citrus_zw.c 252583 2013-07-03 18:27:45Z peter $ */
2/* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31#include <sys/cdefs.h>
32#include <sys/types.h>
33
34#include <assert.h>
35#include <errno.h>
36#include <limits.h>
37#include <stddef.h>
38#include <stdio.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_module.h"
47#include "citrus_stdenc.h"
48#include "citrus_zw.h"
49
50/* ----------------------------------------------------------------------
51 * private stuffs used by templates
52 */
53
54typedef struct {
55	int	 dummy;
56} _ZWEncodingInfo;
57
58typedef enum {
59	NONE, AMBIGIOUS, ASCII, GB2312
60} _ZWCharset;
61
62typedef struct {
63	_ZWCharset	 charset;
64	int		 chlen;
65	char		 ch[4];
66} _ZWState;
67
68#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
69#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
70
71#define _FUNCNAME(m)			_citrus_ZW_##m
72#define _ENCODING_INFO			_ZWEncodingInfo
73#define _ENCODING_STATE			_ZWState
74#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
75#define _ENCODING_IS_STATE_DEPENDENT		1
76#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
77
78static __inline void
79/*ARGSUSED*/
80_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
81    _ZWState * __restrict psenc)
82{
83
84	psenc->chlen = 0;
85	psenc->charset = NONE;
86}
87
88static __inline void
89/*ARGSUSED*/
90_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
91    void *__restrict pspriv, const _ZWState * __restrict psenc)
92{
93
94	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
95}
96
97static __inline void
98/*ARGSUSED*/
99_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
100    _ZWState * __restrict psenc, const void * __restrict pspriv)
101{
102
103	memcpy((void *)psenc, pspriv, sizeof(*psenc));
104}
105
106static int
107_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
108    wchar_t * __restrict pwc, const char **__restrict s, size_t n,
109    _ZWState * __restrict psenc, size_t * __restrict nresult)
110{
111	const char *s0;
112	wchar_t  wc;
113	int ch, len;
114
115	if (*s == NULL) {
116		_citrus_ZW_init_state(ei, psenc);
117		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
118		return (0);
119	}
120	s0 = *s;
121	len = 0;
122
123#define	STORE				\
124do {					\
125	if (n-- < 1) {			\
126		*nresult = (size_t)-2;	\
127		*s = s0;		\
128		return (0);		\
129	}				\
130	ch = (unsigned char)*s0++;	\
131	if (len++ > MB_LEN_MAX || ch > 0x7F)\
132		goto ilseq;		\
133	psenc->ch[psenc->chlen++] = ch;	\
134} while (/*CONSTCOND*/0)
135
136loop:
137	switch (psenc->charset) {
138	case ASCII:
139		switch (psenc->chlen) {
140		case 0:
141			STORE;
142			switch (psenc->ch[0]) {
143			case '\0': case '\n':
144				psenc->charset = NONE;
145			}
146		/*FALLTHROUGH*/
147		case 1:
148			break;
149		default:
150			return (EINVAL);
151		}
152		ch = (unsigned char)psenc->ch[0];
153		if (ch > 0x7F)
154			goto ilseq;
155		wc = (wchar_t)ch;
156		psenc->chlen = 0;
157		break;
158	case NONE:
159		if (psenc->chlen != 0)
160			return (EINVAL);
161		STORE;
162		ch = (unsigned char)psenc->ch[0];
163		if (ch != 'z') {
164			if (ch != '\n' && ch != '\0')
165				psenc->charset = ASCII;
166			wc = (wchar_t)ch;
167			psenc->chlen = 0;
168			break;
169		}
170		psenc->charset = AMBIGIOUS;
171		psenc->chlen = 0;
172	/* FALLTHROUGH */
173	case AMBIGIOUS:
174		if (psenc->chlen != 0)
175			return (EINVAL);
176		STORE;
177		if (psenc->ch[0] != 'W') {
178			psenc->charset = ASCII;
179			wc = L'z';
180			break;
181		}
182		psenc->charset = GB2312;
183		psenc->chlen = 0;
184	/* FALLTHROUGH */
185	case GB2312:
186		switch (psenc->chlen) {
187		case 0:
188			STORE;
189			ch = (unsigned char)psenc->ch[0];
190			if (ch == '\0') {
191				psenc->charset = NONE;
192				wc = (wchar_t)ch;
193				psenc->chlen = 0;
194				break;
195			} else if (ch == '\n') {
196				psenc->charset = NONE;
197				psenc->chlen = 0;
198				goto loop;
199			}
200		/*FALLTHROUGH*/
201		case 1:
202			STORE;
203			if (psenc->ch[0] == ' ') {
204				ch = (unsigned char)psenc->ch[1];
205				wc = (wchar_t)ch;
206				psenc->chlen = 0;
207				break;
208			} else if (psenc->ch[0] == '#') {
209				ch = (unsigned char)psenc->ch[1];
210				if (ch == '\n') {
211					psenc->charset = NONE;
212					wc = (wchar_t)ch;
213					psenc->chlen = 0;
214					break;
215				} else if (ch == ' ') {
216					wc = (wchar_t)ch;
217					psenc->chlen = 0;
218					break;
219				}
220			}
221			ch = (unsigned char)psenc->ch[0];
222			if (ch < 0x21 || ch > 0x7E)
223				goto ilseq;
224			wc = (wchar_t)(ch << 8);
225			ch = (unsigned char)psenc->ch[1];
226			if (ch < 0x21 || ch > 0x7E) {
227ilseq:
228				*nresult = (size_t)-1;
229				return (EILSEQ);
230			}
231			wc |= (wchar_t)ch;
232			psenc->chlen = 0;
233			break;
234		default:
235			return (EINVAL);
236		}
237		break;
238	default:
239		return (EINVAL);
240	}
241	if (pwc != NULL)
242		*pwc = wc;
243
244	*nresult = (size_t)(wc == 0 ? 0 : len);
245	*s = s0;
246
247	return (0);
248}
249
250static int
251/*ARGSUSED*/
252_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
253    char *__restrict s, size_t n, wchar_t wc,
254    _ZWState * __restrict psenc, size_t * __restrict nresult)
255{
256	int ch;
257
258	if (psenc->chlen != 0)
259		return (EINVAL);
260	if ((uint32_t)wc <= 0x7F) {
261		ch = (unsigned char)wc;
262		switch (psenc->charset) {
263		case NONE:
264			if (ch == '\0' || ch == '\n')
265				psenc->ch[psenc->chlen++] = ch;
266			else {
267				if (n < 4)
268					return (E2BIG);
269				n -= 4;
270				psenc->ch[psenc->chlen++] = 'z';
271				psenc->ch[psenc->chlen++] = 'W';
272				psenc->ch[psenc->chlen++] = ' ';
273				psenc->ch[psenc->chlen++] = ch;
274				psenc->charset = GB2312;
275			}
276			break;
277		case GB2312:
278			if (n < 2)
279				return (E2BIG);
280			n -= 2;
281			if (ch == '\0') {
282				psenc->ch[psenc->chlen++] = '\n';
283				psenc->ch[psenc->chlen++] = '\0';
284				psenc->charset = NONE;
285			} else if (ch == '\n') {
286				psenc->ch[psenc->chlen++] = '#';
287				psenc->ch[psenc->chlen++] = '\n';
288				psenc->charset = NONE;
289			} else {
290				psenc->ch[psenc->chlen++] = ' ';
291				psenc->ch[psenc->chlen++] = ch;
292			}
293			break;
294		default:
295			return (EINVAL);
296		}
297	} else if ((uint32_t)wc <= 0x7E7E) {
298		switch (psenc->charset) {
299		case NONE:
300			if (n < 2)
301				return (E2BIG);
302			n -= 2;
303			psenc->ch[psenc->chlen++] = 'z';
304			psenc->ch[psenc->chlen++] = 'W';
305			psenc->charset = GB2312;
306		/* FALLTHROUGH*/
307		case GB2312:
308			if (n < 2)
309				return (E2BIG);
310			n -= 2;
311			ch = (wc >> 8) & 0xFF;
312			if (ch < 0x21 || ch > 0x7E)
313				goto ilseq;
314			psenc->ch[psenc->chlen++] = ch;
315			ch = wc & 0xFF;
316			if (ch < 0x21 || ch > 0x7E)
317				goto ilseq;
318			psenc->ch[psenc->chlen++] = ch;
319			break;
320		default:
321			return (EINVAL);
322		}
323	} else {
324ilseq:
325		*nresult = (size_t)-1;
326		return (EILSEQ);
327	}
328	memcpy(s, psenc->ch, psenc->chlen);
329	*nresult = psenc->chlen;
330	psenc->chlen = 0;
331
332	return (0);
333}
334
335static int
336/*ARGSUSED*/
337_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
338    char * __restrict s, size_t n, _ZWState * __restrict psenc,
339    size_t * __restrict nresult)
340{
341
342	if (psenc->chlen != 0)
343		return (EINVAL);
344	switch (psenc->charset) {
345	case GB2312:
346		if (n-- < 1)
347			return (E2BIG);
348		psenc->ch[psenc->chlen++] = '\n';
349		psenc->charset = NONE;
350	/*FALLTHROUGH*/
351	case NONE:
352		*nresult = psenc->chlen;
353		if (psenc->chlen > 0) {
354			memcpy(s, psenc->ch, psenc->chlen);
355			psenc->chlen = 0;
356		}
357		break;
358	default:
359		return (EINVAL);
360	}
361
362	return (0);
363}
364
365static __inline int
366/*ARGSUSED*/
367_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
368    _ZWState * __restrict psenc, int * __restrict rstate)
369{
370
371	switch (psenc->charset) {
372	case NONE:
373		if (psenc->chlen != 0)
374			return (EINVAL);
375		*rstate = _STDENC_SDGEN_INITIAL;
376		break;
377	case AMBIGIOUS:
378		if (psenc->chlen != 0)
379			return (EINVAL);
380		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
381		break;
382	case ASCII:
383	case GB2312:
384		switch (psenc->chlen) {
385		case 0:
386			*rstate = _STDENC_SDGEN_STABLE;
387			break;
388		case 1:
389			*rstate = (psenc->ch[0] == '#') ?
390			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
391			    _STDENC_SDGEN_INCOMPLETE_CHAR;
392			break;
393		default:
394			return (EINVAL);
395		}
396		break;
397	default:
398		return (EINVAL);
399	}
400	return (0);
401}
402
403static __inline int
404/*ARGSUSED*/
405_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
406    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
407{
408
409	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
410	*idx = (_index_t)wc;
411
412	return (0);
413}
414
415static __inline int
416/*ARGSUSED*/
417_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
418    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
419{
420
421	switch (csid) {
422	case 0: case 1:
423		break;
424	default:
425		return (EINVAL);
426	}
427	*wc = (wchar_t)idx;
428
429	return (0);
430}
431
432static void
433/*ARGSUSED*/
434_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
435{
436
437}
438
439static int
440/*ARGSUSED*/
441_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
442    const void *__restrict var __unused, size_t lenvar __unused)
443{
444
445	return (0);
446}
447
448/* ----------------------------------------------------------------------
449 * public interface for stdenc
450 */
451
452_CITRUS_STDENC_DECLS(ZW);
453_CITRUS_STDENC_DEF_OPS(ZW);
454
455#include "citrus_stdenc_template.h"
456