1/* $NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $ */
2
3/*-
4 * Copyright (c)2004, 2006 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30#include <sys/cdefs.h>
31#if defined(LIB_SCCS) && !defined(lint)
32__RCSID("$NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $");
33#endif /* LIB_SCCS and not lint */
34
35#include <sys/types.h>
36#include <assert.h>
37#include <errno.h>
38#include <string.h>
39#include <stdio.h>
40#include <stdint.h>
41#include <stdlib.h>
42#include <stddef.h>
43#include <wchar.h>
44#include <limits.h>
45
46#include "citrus_namespace.h"
47#include "citrus_types.h"
48#include "citrus_module.h"
49#include "citrus_ctype.h"
50#include "citrus_stdenc.h"
51#include "citrus_zw.h"
52
53/* ----------------------------------------------------------------------
54 * private stuffs used by templates
55 */
56
57typedef struct {
58	int dummy;
59} _ZWEncodingInfo;
60
61typedef enum {
62	NONE, AMBIGIOUS, ASCII, GB2312
63} _ZWCharset;
64
65typedef struct {
66	int		chlen;
67	char		ch[4];
68	_ZWCharset	charset;
69} _ZWState;
70
71typedef struct {
72	_ZWEncodingInfo	ei;
73	struct {
74		/* for future multi-locale facility */
75		_ZWState	s_mblen;
76		_ZWState	s_mbrlen;
77		_ZWState	s_mbrtowc;
78		_ZWState	s_mbtowc;
79		_ZWState	s_mbsrtowcs;
80		_ZWState	s_wcrtomb;
81		_ZWState	s_wcsrtombs;
82		_ZWState	s_wctomb;
83	} states;
84} _ZWCTypeInfo;
85
86#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
87#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
88
89#define _FUNCNAME(m)			_citrus_ZW_##m
90#define _ENCODING_INFO			_ZWEncodingInfo
91#define _CTYPE_INFO			_ZWCTypeInfo
92#define _ENCODING_STATE			_ZWState
93#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
94#define _ENCODING_IS_STATE_DEPENDENT		1
95#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
96
97static __inline void
98/*ARGSUSED*/
99_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,
100	_ZWState * __restrict psenc)
101{
102	/* ei my be unused */
103	_DIAGASSERT(psenc != NULL);
104
105	psenc->chlen = 0;
106	psenc->charset = NONE;
107}
108
109static __inline void
110/*ARGSUSED*/
111_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,
112	void *__restrict pspriv, const _ZWState * __restrict psenc)
113{
114	/* ei may be unused */
115	_DIAGASSERT(pspriv != NULL);
116	_DIAGASSERT(psenc != NULL);
117
118	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
119}
120
121static __inline void
122/*ARGSUSED*/
123_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,
124	_ZWState * __restrict psenc, const void * __restrict pspriv)
125{
126	/* ei may be unused */
127	_DIAGASSERT(psenc != NULL);
128	_DIAGASSERT(pspriv != NULL);
129
130	memcpy((void *)psenc, pspriv, sizeof(*psenc));
131}
132
133static int
134_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
135	wchar_t * __restrict pwc, const char **__restrict s, size_t n,
136	_ZWState * __restrict psenc, size_t * __restrict nresult)
137{
138	const char *s0;
139	int ch, len;
140	wchar_t	 wc;
141
142	/* ei may be unused */
143	/* pwc may be null */
144	_DIAGASSERT(s != NULL);
145	_DIAGASSERT(psenc != NULL);
146	_DIAGASSERT(nresult != NULL);
147
148	if (*s == NULL) {
149		_citrus_ZW_init_state(ei, psenc);
150		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
151		return 0;
152	}
153	s0 = *s;
154	len = 0;
155
156#define	STORE				\
157do {					\
158	if (n-- < 1) {			\
159		*nresult = (size_t)-2;	\
160		*s = s0;		\
161		return 0;		\
162	}				\
163	ch = (unsigned char)*s0++;	\
164	if (len++ > MB_LEN_MAX || ch > 0x7F)\
165		goto ilseq;		\
166	psenc->ch[psenc->chlen++] = ch;	\
167} while (/*CONSTCOND*/0)
168
169loop:
170	switch (psenc->charset) {
171	case ASCII:
172		switch (psenc->chlen) {
173		case 0:
174			STORE;
175			switch (psenc->ch[0]) {
176			case '\0': case '\n':
177				psenc->charset = NONE;
178			}
179		/*FALLTHROUGH*/
180		case 1:
181			break;
182		default:
183			return EINVAL;
184		}
185		ch = (unsigned char)psenc->ch[0];
186		if (ch > 0x7F)
187			goto ilseq;
188		wc = (wchar_t)ch;
189		psenc->chlen = 0;
190		break;
191	case NONE:
192		if (psenc->chlen != 0)
193			return EINVAL;
194		STORE;
195		ch = (unsigned char)psenc->ch[0];
196		if (ch != 'z') {
197			if (ch != '\n' && ch != '\0')
198				psenc->charset = ASCII;
199			wc = (wchar_t)ch;
200			psenc->chlen = 0;
201			break;
202		}
203		psenc->charset = AMBIGIOUS;
204		psenc->chlen = 0;
205	/* FALLTHROUGH */
206	case AMBIGIOUS:
207		if (psenc->chlen != 0)
208			return EINVAL;
209		STORE;
210		if (psenc->ch[0] != 'W') {
211			psenc->charset = ASCII;
212			wc = L'z';
213			break;
214		}
215		psenc->charset = GB2312;
216		psenc->chlen = 0;
217	/* FALLTHROUGH */
218	case GB2312:
219		switch (psenc->chlen) {
220		case 0:
221			STORE;
222			ch = (unsigned char)psenc->ch[0];
223			if (ch == '\0') {
224				psenc->charset = NONE;
225				wc = (wchar_t)ch;
226				psenc->chlen = 0;
227				break;
228			} else if (ch == '\n') {
229				psenc->charset = NONE;
230				psenc->chlen = 0;
231				goto loop;
232			}
233		/*FALLTHROUGH*/
234		case 1:
235			STORE;
236			if (psenc->ch[0] == ' ') {
237				ch = (unsigned char)psenc->ch[1];
238				wc = (wchar_t)ch;
239				psenc->chlen = 0;
240				break;
241			} else if (psenc->ch[0] == '#') {
242				ch = (unsigned char)psenc->ch[1];
243				if (ch == '\n') {
244					psenc->charset = NONE;
245					wc = (wchar_t)ch;
246					psenc->chlen = 0;
247					break;
248				} else if (ch == ' ') {
249					wc = (wchar_t)ch;
250					psenc->chlen = 0;
251					break;
252				}
253			}
254			ch = (unsigned char)psenc->ch[0];
255			if (ch < 0x21 || ch > 0x7E)
256				goto ilseq;
257			wc = (wchar_t)(ch << 8);
258			ch = (unsigned char)psenc->ch[1];
259			if (ch < 0x21 || ch > 0x7E) {
260ilseq:
261				*nresult = (size_t)-1;
262				return EILSEQ;
263			}
264			wc |= (wchar_t)ch;
265			psenc->chlen = 0;
266			break;
267		default:
268			return EINVAL;
269		}
270		break;
271	default:
272		return EINVAL;
273	}
274	if (pwc != NULL)
275		*pwc = wc;
276
277	*nresult = (size_t)(wc == 0 ? 0 : len);
278	*s = s0;
279
280	return 0;
281}
282
283static int
284/*ARGSUSED*/
285_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,
286	char *__restrict s, size_t n, wchar_t wc,
287	_ZWState * __restrict psenc, size_t * __restrict nresult)
288{
289	int ch;
290
291	/* ei may be null */
292	_DIAGASSERT(s != NULL);
293	_DIAGASSERT(psenc != NULL);
294	_DIAGASSERT(nresult != NULL);
295
296	if (psenc->chlen != 0)
297		return EINVAL;
298	if ((uint32_t)wc <= 0x7F) {
299		ch = (unsigned char)wc;
300		switch (psenc->charset) {
301		case NONE:
302			if (ch == '\0' || ch == '\n') {
303				psenc->ch[psenc->chlen++] = ch;
304			} else {
305				if (n < 4)
306					return E2BIG;
307				n -= 4;
308				psenc->ch[psenc->chlen++] = 'z';
309				psenc->ch[psenc->chlen++] = 'W';
310				psenc->ch[psenc->chlen++] = ' ';
311				psenc->ch[psenc->chlen++] = ch;
312				psenc->charset = GB2312;
313			}
314			break;
315		case GB2312:
316			if (n < 2)
317				return E2BIG;
318			n -= 2;
319			if (ch == '\0') {
320				psenc->ch[psenc->chlen++] = '\n';
321				psenc->ch[psenc->chlen++] = '\0';
322				psenc->charset = NONE;
323			} else if (ch == '\n') {
324				psenc->ch[psenc->chlen++] = '#';
325				psenc->ch[psenc->chlen++] = '\n';
326				psenc->charset = NONE;
327			} else {
328				psenc->ch[psenc->chlen++] = ' ';
329				psenc->ch[psenc->chlen++] = ch;
330			}
331			break;
332		default:
333			return EINVAL;
334		}
335	} else if ((uint32_t)wc <= 0x7E7E) {
336		switch (psenc->charset) {
337		case NONE:
338			if (n < 2)
339				return E2BIG;
340			n -= 2;
341			psenc->ch[psenc->chlen++] = 'z';
342			psenc->ch[psenc->chlen++] = 'W';
343			psenc->charset = GB2312;
344		/* FALLTHROUGH*/
345		case GB2312:
346			if (n < 2)
347				return E2BIG;
348			n -= 2;
349			ch = (wc >> 8) & 0xFF;
350			if (ch < 0x21 || ch > 0x7E)
351				goto ilseq;
352			psenc->ch[psenc->chlen++] = ch;
353			ch = wc & 0xFF;
354			if (ch < 0x21 || ch > 0x7E)
355				goto ilseq;
356			psenc->ch[psenc->chlen++] = ch;
357			break;
358		default:
359			return EINVAL;
360		}
361	} else {
362ilseq:
363		*nresult = (size_t)-1;
364		return EILSEQ;
365	}
366	memcpy(s, psenc->ch, psenc->chlen);
367	*nresult = psenc->chlen;
368	psenc->chlen = 0;
369
370	return 0;
371}
372
373static int
374/*ARGSUSED*/
375_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,
376	char * __restrict s, size_t n,
377	_ZWState * __restrict psenc, size_t * __restrict nresult)
378{
379	/* ei may be unused */
380	_DIAGASSERT(s != NULL);
381	_DIAGASSERT(psenc != NULL);
382	_DIAGASSERT(nresult != NULL);
383
384	if (psenc->chlen != 0)
385		return EINVAL;
386	switch (psenc->charset) {
387	case GB2312:
388		if (n-- < 1)
389			return E2BIG;
390		psenc->ch[psenc->chlen++] = '\n';
391		psenc->charset = NONE;
392	/*FALLTHROUGH*/
393	case NONE:
394		*nresult = psenc->chlen;
395		if (psenc->chlen > 0) {
396			memcpy(s, psenc->ch, psenc->chlen);
397			psenc->chlen = 0;
398		}
399		break;
400	default:
401		return EINVAL;
402	}
403
404	return 0;
405}
406
407static __inline int
408/*ARGSUSED*/
409_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,
410	_ZWState * __restrict psenc, int * __restrict rstate)
411{
412	/* ei may be unused */
413	_DIAGASSERT(psenc != NULL);
414	_DIAGASSERT(rstate != NULL);
415
416	switch (psenc->charset) {
417	case NONE:
418		if (psenc->chlen != 0)
419			return EINVAL;
420		*rstate = _STDENC_SDGEN_INITIAL;
421		break;
422	case AMBIGIOUS:
423		if (psenc->chlen != 0)
424			return EINVAL;
425		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
426		break;
427	case ASCII:
428	case GB2312:
429		switch (psenc->chlen) {
430		case 0:
431			*rstate = _STDENC_SDGEN_STABLE;
432			break;
433		case 1:
434			*rstate = (psenc->ch[0] == '#')
435			    ? _STDENC_SDGEN_INCOMPLETE_SHIFT
436			    : _STDENC_SDGEN_INCOMPLETE_CHAR;
437			break;
438		default:
439			return EINVAL;
440		}
441		break;
442	default:
443		return EINVAL;
444	}
445	return 0;
446}
447
448static __inline int
449/*ARGSUSED*/
450_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,
451	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
452{
453	/* ei seems to be unused */
454	_DIAGASSERT(csid != NULL);
455	_DIAGASSERT(idx != NULL);
456
457	*csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1;
458	*idx = (_index_t)wc;
459
460	return 0;
461}
462
463static __inline int
464/*ARGSUSED*/
465_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,
466	 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
467{
468	/* ei seems to be unused */
469	_DIAGASSERT(wc != NULL);
470
471	switch (csid) {
472	case 0: case 1:
473		break;
474	default:
475		return EINVAL;
476	}
477	*wc = (wchar_t)idx;
478
479	return 0;
480}
481
482static void
483/*ARGSUSED*/
484_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei)
485{
486}
487
488static int
489/*ARGSUSED*/
490_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,
491	const void *__restrict var, size_t lenvar)
492{
493	return 0;
494}
495
496/* ----------------------------------------------------------------------
497 * public interface for ctype
498 */
499
500_CITRUS_CTYPE_DECLS(ZW);
501_CITRUS_CTYPE_DEF_OPS(ZW);
502
503#include "citrus_ctype_template.h"
504
505/* ----------------------------------------------------------------------
506 * public interface for stdenc
507 */
508
509_CITRUS_STDENC_DECLS(ZW);
510_CITRUS_STDENC_DEF_OPS(ZW);
511
512#include "citrus_stdenc_template.h"
513