1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2003 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor#include <sys/cdefs.h>
31219019Sgabor#include <sys/types.h>
32219019Sgabor
33219019Sgabor#include <assert.h>
34219019Sgabor#include <errno.h>
35219019Sgabor#include <limits.h>
36219019Sgabor#include <stdbool.h>
37219019Sgabor#include <stddef.h>
38219019Sgabor#include <stdio.h>
39219019Sgabor#include <stdlib.h>
40219019Sgabor#include <string.h>
41219019Sgabor#include <wchar.h>
42219019Sgabor
43219019Sgabor#include "citrus_namespace.h"
44219019Sgabor#include "citrus_types.h"
45219019Sgabor#include "citrus_bcs.h"
46219019Sgabor#include "citrus_module.h"
47219019Sgabor#include "citrus_stdenc.h"
48219019Sgabor#include "citrus_gbk2k.h"
49219019Sgabor
50219019Sgabor
51219019Sgabor/* ----------------------------------------------------------------------
52219019Sgabor * private stuffs used by templates
53219019Sgabor */
54219019Sgabor
55219019Sgabortypedef struct _GBK2KState {
56219019Sgabor	int	 chlen;
57219019Sgabor	char	 ch[4];
58219019Sgabor} _GBK2KState;
59219019Sgabor
60219019Sgabortypedef struct {
61219019Sgabor	int	 mb_cur_max;
62219019Sgabor} _GBK2KEncodingInfo;
63219019Sgabor
64219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
65219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
66219019Sgabor
67219019Sgabor#define _FUNCNAME(m)			_citrus_GBK2K_##m
68219019Sgabor#define _ENCODING_INFO			_GBK2KEncodingInfo
69219019Sgabor#define _ENCODING_STATE			_GBK2KState
70219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
71219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
72219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
73219019Sgabor
74219019Sgaborstatic __inline void
75219019Sgabor/*ARGSUSED*/
76219019Sgabor_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
77219019Sgabor    _GBK2KState * __restrict s)
78219019Sgabor{
79219019Sgabor
80219019Sgabor	memset(s, 0, sizeof(*s));
81219019Sgabor}
82219019Sgabor
83260264Sdim#if 0
84219019Sgaborstatic __inline void
85219019Sgabor/*ARGSUSED*/
86219019Sgabor_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
87219019Sgabor    void * __restrict pspriv, const _GBK2KState * __restrict s)
88219019Sgabor{
89219019Sgabor
90219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
91219019Sgabor}
92219019Sgabor
93219019Sgaborstatic __inline void
94219019Sgabor/*ARGSUSED*/
95219019Sgabor_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
96219019Sgabor    _GBK2KState * __restrict s, const void * __restrict pspriv)
97219019Sgabor{
98219019Sgabor
99219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
100219019Sgabor}
101260264Sdim#endif
102219019Sgabor
103219019Sgaborstatic  __inline bool
104219019Sgabor_mb_singlebyte(int c)
105219019Sgabor{
106219019Sgabor
107219019Sgabor	return ((c & 0xff) <= 0x7f);
108219019Sgabor}
109219019Sgabor
110219019Sgaborstatic __inline bool
111219019Sgabor_mb_leadbyte(int c)
112219019Sgabor{
113219019Sgabor
114219019Sgabor	c &= 0xff;
115219019Sgabor	return (0x81 <= c && c <= 0xfe);
116219019Sgabor}
117219019Sgabor
118219019Sgaborstatic __inline bool
119219019Sgabor_mb_trailbyte(int c)
120219019Sgabor{
121219019Sgabor
122219019Sgabor	c &= 0xff;
123219019Sgabor	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
124219019Sgabor}
125219019Sgabor
126219019Sgaborstatic __inline bool
127219019Sgabor_mb_surrogate(int c)
128219019Sgabor{
129219019Sgabor
130219019Sgabor	c &= 0xff;
131219019Sgabor	return (0x30 <= c && c <= 0x39);
132219019Sgabor}
133219019Sgabor
134219019Sgaborstatic __inline int
135219019Sgabor_mb_count(wchar_t v)
136219019Sgabor{
137219019Sgabor	uint32_t c;
138219019Sgabor
139219019Sgabor	c = (uint32_t)v; /* XXX */
140219019Sgabor	if (!(c & 0xffffff00))
141219019Sgabor		return (1);
142219019Sgabor	if (!(c & 0xffff0000))
143219019Sgabor		return (2);
144219019Sgabor	return (4);
145219019Sgabor}
146219019Sgabor
147219019Sgabor#define	_PSENC		(psenc->ch[psenc->chlen - 1])
148219019Sgabor#define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
149219019Sgabor
150219019Sgaborstatic int
151219019Sgabor_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
152282275Stijl    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
153219019Sgabor    _GBK2KState * __restrict psenc, size_t * __restrict nresult)
154219019Sgabor{
155282275Stijl	char *s0, *s1;
156219019Sgabor	wchar_t wc;
157219019Sgabor	int chlenbak, len;
158219019Sgabor
159219019Sgabor	s0 = *s;
160219019Sgabor
161219019Sgabor	if (s0 == NULL) {
162219019Sgabor		/* _citrus_GBK2K_init_state(ei, psenc); */
163219019Sgabor		psenc->chlen = 0;
164219019Sgabor		*nresult = 0;
165219019Sgabor		return (0);
166219019Sgabor	}
167219019Sgabor
168219019Sgabor	chlenbak = psenc->chlen;
169219019Sgabor
170219019Sgabor	switch (psenc->chlen) {
171219019Sgabor	case 3:
172219019Sgabor		if (!_mb_leadbyte (_PSENC))
173219019Sgabor			goto invalid;
174219019Sgabor	/* FALLTHROUGH */
175219019Sgabor	case 2:
176219019Sgabor		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
177219019Sgabor			goto invalid;
178219019Sgabor	/* FALLTHROUGH */
179219019Sgabor	case 1:
180219019Sgabor		if (!_mb_leadbyte (_PSENC))
181219019Sgabor			goto invalid;
182219019Sgabor	/* FALLTHOROUGH */
183219019Sgabor	case 0:
184219019Sgabor		break;
185219019Sgabor	default:
186219019Sgabor		goto invalid;
187219019Sgabor	}
188219019Sgabor
189219019Sgabor	for (;;) {
190219019Sgabor		if (n-- < 1)
191219019Sgabor			goto restart;
192219019Sgabor
193219019Sgabor		_PUSH_PSENC(*s0++);
194219019Sgabor
195219019Sgabor		switch (psenc->chlen) {
196219019Sgabor		case 1:
197219019Sgabor			if (_mb_singlebyte(_PSENC))
198219019Sgabor				goto convert;
199219019Sgabor			if (_mb_leadbyte  (_PSENC))
200219019Sgabor				continue;
201219019Sgabor			goto ilseq;
202219019Sgabor		case 2:
203219019Sgabor			if (_mb_trailbyte (_PSENC))
204219019Sgabor				goto convert;
205219019Sgabor			if (ei->mb_cur_max == 4 &&
206219019Sgabor			    _mb_surrogate (_PSENC))
207219019Sgabor				continue;
208219019Sgabor			goto ilseq;
209219019Sgabor		case 3:
210219019Sgabor			if (_mb_leadbyte  (_PSENC))
211219019Sgabor				continue;
212219019Sgabor			goto ilseq;
213219019Sgabor		case 4:
214219019Sgabor			if (_mb_surrogate (_PSENC))
215219019Sgabor				goto convert;
216219019Sgabor			goto ilseq;
217219019Sgabor		}
218219019Sgabor	}
219219019Sgabor
220219019Sgaborconvert:
221219019Sgabor	len = psenc->chlen;
222219019Sgabor	s1  = &psenc->ch[0];
223219019Sgabor	wc  = 0;
224219019Sgabor	while (len-- > 0)
225219019Sgabor		wc = (wc << 8) | (*s1++ & 0xff);
226219019Sgabor
227219019Sgabor	if (pwc != NULL)
228219019Sgabor		*pwc = wc;
229219019Sgabor	*s = s0;
230219019Sgabor	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
231219019Sgabor	/* _citrus_GBK2K_init_state(ei, psenc); */
232219019Sgabor	psenc->chlen = 0;
233219019Sgabor
234219019Sgabor	return (0);
235219019Sgabor
236219019Sgaborrestart:
237219019Sgabor	*s = s0;
238219019Sgabor	*nresult = (size_t)-2;
239219019Sgabor
240219019Sgabor	return (0);
241219019Sgabor
242219019Sgaborinvalid:
243219019Sgabor	return (EINVAL);
244219019Sgabor
245219019Sgaborilseq:
246219019Sgabor	*nresult = (size_t)-1;
247219019Sgabor	return (EILSEQ);
248219019Sgabor}
249219019Sgabor
250219019Sgaborstatic int
251219019Sgabor_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
252219019Sgabor    char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
253219019Sgabor    size_t * __restrict nresult)
254219019Sgabor{
255219019Sgabor	size_t len;
256219019Sgabor	int ret;
257219019Sgabor
258219019Sgabor	if (psenc->chlen != 0) {
259219019Sgabor		ret = EINVAL;
260219019Sgabor		goto err;
261219019Sgabor	}
262219019Sgabor
263219019Sgabor	len = _mb_count(wc);
264219019Sgabor	if (n < len) {
265219019Sgabor		ret = E2BIG;
266219019Sgabor		goto err;
267219019Sgabor	}
268219019Sgabor
269219019Sgabor	switch (len) {
270219019Sgabor	case 1:
271219019Sgabor		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
272219019Sgabor			ret = EILSEQ;
273219019Sgabor			goto err;
274219019Sgabor		}
275219019Sgabor		break;
276219019Sgabor	case 2:
277219019Sgabor		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
278219019Sgabor		    !_mb_trailbyte (_PUSH_PSENC(wc))) {
279219019Sgabor			ret = EILSEQ;
280219019Sgabor			goto err;
281219019Sgabor		}
282219019Sgabor		break;
283219019Sgabor	case 4:
284219019Sgabor		if (ei->mb_cur_max != 4 ||
285219019Sgabor		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
286219019Sgabor		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
287219019Sgabor		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
288219019Sgabor		    !_mb_surrogate (_PUSH_PSENC(wc))) {
289219019Sgabor			ret = EILSEQ;
290219019Sgabor			goto err;
291219019Sgabor		}
292219019Sgabor		break;
293219019Sgabor	}
294219019Sgabor
295219019Sgabor	memcpy(s, psenc->ch, psenc->chlen);
296219019Sgabor	*nresult = psenc->chlen;
297219019Sgabor	/* _citrus_GBK2K_init_state(ei, psenc); */
298219019Sgabor	psenc->chlen = 0;
299219019Sgabor
300219019Sgabor	return (0);
301219019Sgabor
302219019Sgaborerr:
303219019Sgabor	*nresult = (size_t)-1;
304219019Sgabor	return (ret);
305219019Sgabor}
306219019Sgabor
307219019Sgaborstatic __inline int
308219019Sgabor/*ARGSUSED*/
309219019Sgabor_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
310219019Sgabor    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
311219019Sgabor{
312219019Sgabor	uint8_t ch, cl;
313219019Sgabor
314219019Sgabor	if ((uint32_t)wc < 0x80) {
315219019Sgabor		/* ISO646 */
316219019Sgabor		*csid = 0;
317219019Sgabor		*idx = (_index_t)wc;
318219019Sgabor	} else if ((uint32_t)wc >= 0x10000) {
319219019Sgabor		/* GBKUCS : XXX */
320219019Sgabor		*csid = 3;
321219019Sgabor		*idx = (_index_t)wc;
322219019Sgabor	} else {
323219019Sgabor		ch = (uint8_t)(wc >> 8);
324219019Sgabor		cl = (uint8_t)wc;
325219019Sgabor		if (ch >= 0xA1 && cl >= 0xA1) {
326219019Sgabor			/* EUC G1 */
327219019Sgabor			*csid = 1;
328219019Sgabor			*idx = (_index_t)wc & 0x7F7FU;
329219019Sgabor		} else {
330219019Sgabor			/* extended area (0x8140-) */
331219019Sgabor			*csid = 2;
332219019Sgabor			*idx = (_index_t)wc;
333219019Sgabor		}
334219019Sgabor	}
335219019Sgabor
336219019Sgabor	return (0);
337219019Sgabor}
338219019Sgabor
339219019Sgaborstatic __inline int
340219019Sgabor/*ARGSUSED*/
341219019Sgabor_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
342219019Sgabor    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
343219019Sgabor{
344219019Sgabor
345219019Sgabor	switch (csid) {
346219019Sgabor	case 0:
347219019Sgabor		/* ISO646 */
348219019Sgabor		*wc = (wchar_t)idx;
349219019Sgabor		break;
350219019Sgabor	case 1:
351219019Sgabor		/* EUC G1 */
352219019Sgabor		*wc = (wchar_t)idx | 0x8080U;
353219019Sgabor		break;
354219019Sgabor	case 2:
355219019Sgabor		/* extended area */
356219019Sgabor		*wc = (wchar_t)idx;
357219019Sgabor		break;
358219019Sgabor	case 3:
359219019Sgabor		/* GBKUCS : XXX */
360219019Sgabor		if (ei->mb_cur_max != 4)
361219019Sgabor			return (EINVAL);
362219019Sgabor		*wc = (wchar_t)idx;
363219019Sgabor		break;
364219019Sgabor	default:
365219019Sgabor		return (EILSEQ);
366219019Sgabor	}
367219019Sgabor
368219019Sgabor	return (0);
369219019Sgabor}
370219019Sgabor
371219019Sgaborstatic __inline int
372219019Sgabor/*ARGSUSED*/
373219019Sgabor_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
374219019Sgabor    _GBK2KState * __restrict psenc, int * __restrict rstate)
375219019Sgabor{
376219019Sgabor
377219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
378219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
379219019Sgabor	return (0);
380219019Sgabor}
381219019Sgabor
382219019Sgaborstatic int
383219019Sgabor/*ARGSUSED*/
384219019Sgabor_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
385219019Sgabor    const void * __restrict var, size_t lenvar)
386219019Sgabor{
387219019Sgabor	const char *p;
388219019Sgabor
389219019Sgabor	p = var;
390219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
391219019Sgabor	ei->mb_cur_max = 4;
392219019Sgabor	while (lenvar > 0) {
393219019Sgabor		switch (_bcs_tolower(*p)) {
394219019Sgabor		case '2':
395219019Sgabor			MATCH("2byte", ei->mb_cur_max = 2);
396219019Sgabor			break;
397219019Sgabor		}
398219019Sgabor		p++;
399219019Sgabor		lenvar--;
400219019Sgabor	}
401219019Sgabor
402219019Sgabor	return (0);
403219019Sgabor}
404219019Sgabor
405219019Sgaborstatic void
406219019Sgabor/*ARGSUSED*/
407219019Sgabor_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
408219019Sgabor{
409219019Sgabor
410219019Sgabor}
411219019Sgabor
412219019Sgabor/* ----------------------------------------------------------------------
413219019Sgabor * public interface for stdenc
414219019Sgabor */
415219019Sgabor
416219019Sgabor_CITRUS_STDENC_DECLS(GBK2K);
417219019Sgabor_CITRUS_STDENC_DEF_OPS(GBK2K);
418219019Sgabor
419219019Sgabor#include "citrus_stdenc_template.h"
420