1219019Sgabor/* $FreeBSD$ */
2219019Sgabor/*	$NetBSD: citrus_big5.c,v 1.12 2008/06/14 16:01:07 tnozaki Exp $	*/
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2002, 2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor/*-
31219019Sgabor * Copyright (c) 1993
32219019Sgabor *	The Regents of the University of California.  All rights reserved.
33219019Sgabor *
34219019Sgabor * This code is derived from software contributed to Berkeley by
35219019Sgabor * Paul Borman at Krystal Technologies.
36219019Sgabor *
37219019Sgabor * Redistribution and use in source and binary forms, with or without
38219019Sgabor * modification, are permitted provided that the following conditions
39219019Sgabor * are met:
40219019Sgabor * 1. Redistributions of source code must retain the above copyright
41219019Sgabor *    notice, this list of conditions and the following disclaimer.
42219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
43219019Sgabor *    notice, this list of conditions and the following disclaimer in the
44219019Sgabor *    documentation and/or other materials provided with the distribution.
45219019Sgabor * 3. Neither the name of the University nor the names of its contributors
46219019Sgabor *    may be used to endorse or promote products derived from this software
47219019Sgabor *    without specific prior written permission.
48219019Sgabor *
49219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59219019Sgabor * SUCH DAMAGE.
60219019Sgabor */
61219019Sgabor
62219019Sgabor#include <sys/cdefs.h>
63219019Sgabor#include <sys/queue.h>
64219019Sgabor#include <sys/types.h>
65219019Sgabor
66219019Sgabor#include <assert.h>
67219019Sgabor#include <errno.h>
68219019Sgabor#include <limits.h>
69219019Sgabor#include <stddef.h>
70219019Sgabor#include <stdint.h>
71219019Sgabor#include <stdio.h>
72219019Sgabor#include <stdlib.h>
73219019Sgabor#include <string.h>
74219019Sgabor#include <wchar.h>
75219019Sgabor
76219019Sgabor#include "citrus_namespace.h"
77219019Sgabor#include "citrus_prop.h"
78219019Sgabor#include "citrus_types.h"
79219019Sgabor#include "citrus_bcs.h"
80219019Sgabor#include "citrus_module.h"
81219019Sgabor#include "citrus_stdenc.h"
82219019Sgabor#include "citrus_big5.h"
83219019Sgabor
84219019Sgabor/* ----------------------------------------------------------------------
85219019Sgabor * private stuffs used by templates
86219019Sgabor */
87219019Sgabor
88219019Sgabortypedef struct {
89219019Sgabor	int	 chlen;
90219019Sgabor	char	 ch[2];
91219019Sgabor} _BIG5State;
92219019Sgabor
93219019Sgabortypedef struct _BIG5Exclude {
94219019Sgabor	TAILQ_ENTRY(_BIG5Exclude)	 entry;
95219019Sgabor	wint_t				 end;
96219019Sgabor	wint_t				 start;
97219019Sgabor} _BIG5Exclude;
98219019Sgabor
99219019Sgabortypedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
100219019Sgabor
101219019Sgabortypedef struct {
102219019Sgabor	_BIG5ExcludeList	 excludes;
103219019Sgabor	int			 cell[0x100];
104219019Sgabor} _BIG5EncodingInfo;
105219019Sgabor
106219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
107219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
108219019Sgabor
109219019Sgabor#define _FUNCNAME(m)			_citrus_BIG5_##m
110219019Sgabor#define _ENCODING_INFO			_BIG5EncodingInfo
111219019Sgabor#define _ENCODING_STATE			_BIG5State
112219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	2
113219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
114219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
115219019Sgabor
116219019Sgabor
117219019Sgaborstatic __inline void
118219019Sgabor/*ARGSUSED*/
119219019Sgabor_citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei __unused,
120219019Sgabor    _BIG5State * __restrict s)
121219019Sgabor{
122219019Sgabor
123219019Sgabor	memset(s, 0, sizeof(*s));
124219019Sgabor}
125219019Sgabor
126260264Sdim#if 0
127219019Sgaborstatic __inline void
128219019Sgabor/*ARGSUSED*/
129219019Sgabor_citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei __unused,
130219019Sgabor    void * __restrict pspriv,
131219019Sgabor    const _BIG5State * __restrict s)
132219019Sgabor{
133219019Sgabor
134219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
135219019Sgabor}
136219019Sgabor
137219019Sgaborstatic __inline void
138219019Sgabor/*ARGSUSED*/
139219019Sgabor_citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei __unused,
140219019Sgabor    _BIG5State * __restrict s,
141219019Sgabor    const void * __restrict pspriv)
142219019Sgabor{
143219019Sgabor
144219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
145219019Sgabor}
146260264Sdim#endif
147219019Sgabor
148219019Sgaborstatic __inline int
149219019Sgabor_citrus_BIG5_check(_BIG5EncodingInfo *ei, unsigned int c)
150219019Sgabor{
151219019Sgabor
152219019Sgabor	return ((ei->cell[c & 0xFF] & 0x1) ? 2 : 1);
153219019Sgabor}
154219019Sgabor
155219019Sgaborstatic __inline int
156219019Sgabor_citrus_BIG5_check2(_BIG5EncodingInfo *ei, unsigned int c)
157219019Sgabor{
158219019Sgabor
159219019Sgabor	return ((ei->cell[c & 0xFF] & 0x2) ? 1 : 0);
160219019Sgabor}
161219019Sgabor
162219019Sgaborstatic __inline int
163219019Sgabor_citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c)
164219019Sgabor{
165219019Sgabor	_BIG5Exclude *exclude;
166219019Sgabor
167219019Sgabor	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
168219019Sgabor		if (c >= exclude->start && c <= exclude->end)
169219019Sgabor			return (EILSEQ);
170219019Sgabor	}
171219019Sgabor	return (0);
172219019Sgabor}
173219019Sgabor
174219019Sgaborstatic int
175219019Sgabor_citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s,
176219019Sgabor    uint64_t start, uint64_t end)
177219019Sgabor{
178219019Sgabor	_BIG5EncodingInfo *ei;
179219019Sgabor	uint64_t n;
180219019Sgabor	int i;
181219019Sgabor
182219019Sgabor	if (start > 0xFF || end > 0xFF)
183219019Sgabor		return (EINVAL);
184219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
185219019Sgabor	i = strcmp("row", s) ? 1 : 0;
186219019Sgabor	i = 1 << i;
187219019Sgabor	for (n = start; n <= end; ++n)
188219019Sgabor		ei->cell[n & 0xFF] |= i;
189219019Sgabor	return (0);
190219019Sgabor}
191219019Sgabor
192219019Sgaborstatic int
193219019Sgabor/*ARGSUSED*/
194219019Sgabor_citrus_BIG5_fill_excludes(void ** __restrict ctx,
195219019Sgabor    const char * __restrict s __unused, uint64_t start, uint64_t end)
196219019Sgabor{
197219019Sgabor	_BIG5EncodingInfo *ei;
198219019Sgabor	_BIG5Exclude *exclude;
199219019Sgabor
200219019Sgabor	if (start > 0xFFFF || end > 0xFFFF)
201219019Sgabor		return (EINVAL);
202219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
203219019Sgabor	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
204219019Sgabor	if (exclude != NULL && (wint_t)start <= exclude->end)
205219019Sgabor		return (EINVAL);
206219019Sgabor	exclude = (void *)malloc(sizeof(*exclude));
207219019Sgabor	if (exclude == NULL)
208219019Sgabor		return (ENOMEM);
209219019Sgabor	exclude->start = (wint_t)start;
210219019Sgabor	exclude->end = (wint_t)end;
211219019Sgabor	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
212219019Sgabor
213219019Sgabor	return (0);
214219019Sgabor}
215219019Sgabor
216219019Sgaborstatic const _citrus_prop_hint_t root_hints[] = {
217219019Sgabor    _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
218219019Sgabor    _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
219219019Sgabor    _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
220219019Sgabor    _CITRUS_PROP_HINT_END
221219019Sgabor};
222219019Sgabor
223219019Sgaborstatic void
224219019Sgabor/*ARGSUSED*/
225219019Sgabor_citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
226219019Sgabor{
227219019Sgabor	_BIG5Exclude *exclude;
228219019Sgabor
229219019Sgabor	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
230219019Sgabor		TAILQ_REMOVE(&ei->excludes, exclude, entry);
231219019Sgabor		free(exclude);
232219019Sgabor	}
233219019Sgabor}
234219019Sgabor
235219019Sgaborstatic int
236219019Sgabor/*ARGSUSED*/
237219019Sgabor_citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
238219019Sgabor    const void * __restrict var, size_t lenvar)
239219019Sgabor{
240219019Sgabor	void *ctx = (void *)ei;
241219019Sgabor	const char *s;
242219019Sgabor	int err;
243219019Sgabor
244219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
245219019Sgabor	TAILQ_INIT(&ei->excludes);
246219019Sgabor
247219019Sgabor	if (lenvar > 0 && var != NULL) {
248219019Sgabor		s = _bcs_skip_ws_len((const char *)var, &lenvar);
249219019Sgabor		if (lenvar > 0 && *s != '\0') {
250219019Sgabor			err = _citrus_prop_parse_variable(
251219019Sgabor			    root_hints, (void *)ei, s, lenvar);
252219019Sgabor			if (err == 0)
253219019Sgabor				return (0);
254219019Sgabor
255219019Sgabor			_citrus_BIG5_encoding_module_uninit(ei);
256219019Sgabor			memset((void *)ei, 0, sizeof(*ei));
257219019Sgabor			TAILQ_INIT(&ei->excludes);
258219019Sgabor		}
259219019Sgabor	}
260219019Sgabor
261219019Sgabor	/* fallback Big5-1984, for backward compatibility. */
262219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "row", 0xA1, 0xFE);
263219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "col", 0x40, 0x7E);
264219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "col", 0xA1, 0xFE);
265219019Sgabor
266219019Sgabor	return (0);
267219019Sgabor}
268219019Sgabor
269219019Sgaborstatic int
270219019Sgabor/*ARGSUSED*/
271219019Sgabor_citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
272219019Sgabor    wchar_t * __restrict pwc,
273219019Sgabor    char ** __restrict s, size_t n,
274219019Sgabor    _BIG5State * __restrict psenc,
275219019Sgabor    size_t * __restrict nresult)
276219019Sgabor{
277219019Sgabor	wchar_t wchar;
278219019Sgabor	char *s0;
279219019Sgabor	int c, chlenbak;
280219019Sgabor
281219019Sgabor	s0 = *s;
282219019Sgabor
283219019Sgabor	if (s0 == NULL) {
284219019Sgabor		_citrus_BIG5_init_state(ei, psenc);
285219019Sgabor		*nresult = 0;
286219019Sgabor		return (0);
287219019Sgabor	}
288219019Sgabor
289219019Sgabor	chlenbak = psenc->chlen;
290219019Sgabor
291219019Sgabor	/* make sure we have the first byte in the buffer */
292219019Sgabor	switch (psenc->chlen) {
293219019Sgabor	case 0:
294219019Sgabor		if (n < 1)
295219019Sgabor			goto restart;
296219019Sgabor		psenc->ch[0] = *s0++;
297219019Sgabor		psenc->chlen = 1;
298219019Sgabor		n--;
299219019Sgabor		break;
300219019Sgabor	case 1:
301219019Sgabor		break;
302219019Sgabor	default:
303219019Sgabor		/* illegal state */
304219019Sgabor		goto ilseq;
305219019Sgabor	}
306219019Sgabor
307219019Sgabor	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
308219019Sgabor	if (c == 0)
309219019Sgabor		goto ilseq;
310219019Sgabor	while (psenc->chlen < c) {
311219019Sgabor		if (n < 1) {
312219019Sgabor			goto restart;
313219019Sgabor		}
314219019Sgabor		psenc->ch[psenc->chlen] = *s0++;
315219019Sgabor		psenc->chlen++;
316219019Sgabor		n--;
317219019Sgabor	}
318219019Sgabor
319219019Sgabor	switch (c) {
320219019Sgabor	case 1:
321219019Sgabor		wchar = psenc->ch[0] & 0xff;
322219019Sgabor		break;
323219019Sgabor	case 2:
324219019Sgabor		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
325219019Sgabor			goto ilseq;
326219019Sgabor		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
327219019Sgabor		break;
328219019Sgabor	default:
329219019Sgabor		/* illegal state */
330219019Sgabor		goto ilseq;
331219019Sgabor	}
332219019Sgabor
333219019Sgabor	if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0)
334219019Sgabor		goto ilseq;
335219019Sgabor
336219019Sgabor	*s = s0;
337219019Sgabor	psenc->chlen = 0;
338219019Sgabor	if (pwc)
339219019Sgabor		*pwc = wchar;
340219019Sgabor	*nresult = wchar ? c - chlenbak : 0;
341219019Sgabor
342219019Sgabor	return (0);
343219019Sgabor
344219019Sgaborilseq:
345219019Sgabor	psenc->chlen = 0;
346219019Sgabor	*nresult = (size_t)-1;
347219019Sgabor	return (EILSEQ);
348219019Sgabor
349219019Sgaborrestart:
350219019Sgabor	*s = s0;
351219019Sgabor	*nresult = (size_t)-2;
352219019Sgabor	return (0);
353219019Sgabor}
354219019Sgabor
355219019Sgaborstatic int
356219019Sgabor/*ARGSUSED*/
357219019Sgabor_citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
358219019Sgabor    char * __restrict s,
359219019Sgabor    size_t n, wchar_t wc, _BIG5State * __restrict psenc __unused,
360219019Sgabor    size_t * __restrict nresult)
361219019Sgabor{
362219019Sgabor	unsigned char l;
363219019Sgabor	int ret;
364219019Sgabor
365219019Sgabor	/* check invalid sequence */
366219019Sgabor	if (wc & ~0xffff ||
367219019Sgabor	    _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) {
368219019Sgabor		ret = EILSEQ;
369219019Sgabor		goto err;
370219019Sgabor	}
371219019Sgabor
372219019Sgabor	if (wc & 0x8000) {
373219019Sgabor		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
374219019Sgabor		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
375219019Sgabor			ret = EILSEQ;
376219019Sgabor			goto err;
377219019Sgabor		}
378219019Sgabor		l = 2;
379219019Sgabor	} else {
380219019Sgabor		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
381219019Sgabor			ret = EILSEQ;
382219019Sgabor			goto err;
383219019Sgabor		}
384219019Sgabor		l = 1;
385219019Sgabor	}
386219019Sgabor
387219019Sgabor	if (n < l) {
388219019Sgabor		/* bound check failure */
389219019Sgabor		ret = E2BIG;
390219019Sgabor		goto err;
391219019Sgabor	}
392219019Sgabor
393219019Sgabor	if (l == 2) {
394219019Sgabor		s[0] = (wc >> 8) & 0xff;
395219019Sgabor		s[1] = wc & 0xff;
396219019Sgabor	} else
397219019Sgabor		s[0] = wc & 0xff;
398219019Sgabor
399219019Sgabor	*nresult = l;
400219019Sgabor
401219019Sgabor	return (0);
402219019Sgabor
403219019Sgaborerr:
404219019Sgabor	*nresult = (size_t)-1;
405219019Sgabor	return (ret);
406219019Sgabor}
407219019Sgabor
408219019Sgaborstatic __inline int
409219019Sgabor/*ARGSUSED*/
410219019Sgabor_citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei __unused,
411219019Sgabor    _csid_t * __restrict csid,
412219019Sgabor    _index_t * __restrict idx, wchar_t wc)
413219019Sgabor{
414219019Sgabor
415219019Sgabor	*csid = (wc < 0x100) ? 0 : 1;
416219019Sgabor	*idx = (_index_t)wc;
417219019Sgabor
418219019Sgabor	return (0);
419219019Sgabor}
420219019Sgabor
421219019Sgaborstatic __inline int
422219019Sgabor/*ARGSUSED*/
423219019Sgabor_citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei __unused,
424219019Sgabor    wchar_t * __restrict wc,
425219019Sgabor    _csid_t csid, _index_t idx)
426219019Sgabor{
427219019Sgabor
428219019Sgabor	switch (csid) {
429219019Sgabor	case 0:
430219019Sgabor	case 1:
431219019Sgabor		*wc = (wchar_t)idx;
432219019Sgabor		break;
433219019Sgabor	default:
434219019Sgabor		return (EILSEQ);
435219019Sgabor	}
436219019Sgabor
437219019Sgabor	return (0);
438219019Sgabor}
439219019Sgabor
440219019Sgaborstatic __inline int
441219019Sgabor/*ARGSUSED*/
442219019Sgabor_citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei __unused,
443219019Sgabor    _BIG5State * __restrict psenc,
444219019Sgabor    int * __restrict rstate)
445219019Sgabor{
446219019Sgabor
447219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
448219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
449219019Sgabor	return (0);
450219019Sgabor}
451219019Sgabor
452219019Sgabor/* ----------------------------------------------------------------------
453219019Sgabor * public interface for stdenc
454219019Sgabor */
455219019Sgabor
456219019Sgabor_CITRUS_STDENC_DECLS(BIG5);
457219019Sgabor_CITRUS_STDENC_DEF_OPS(BIG5);
458219019Sgabor
459219019Sgabor#include "citrus_stdenc_template.h"
460