citrus_big5.c revision 252583
1219019Sgabor/* $FreeBSD: head/lib/libiconv_modules/BIG5/citrus_big5.c 252583 2013-07-03 18:27:45Z peter $ */
2219019Sgabor/*	$NetBSD: citrus_big5.c,v 1.12 2008/06/14 16:01:07 tnozaki Exp $	*/
3219019Sgabor
4219019Sgabor/*-
5219019Sgabor * Copyright (c)2002, 2006 Citrus Project,
6219019Sgabor * All rights reserved.
7219019Sgabor *
8219019Sgabor * Redistribution and use in source and binary forms, with or without
9219019Sgabor * modification, are permitted provided that the following conditions
10219019Sgabor * are met:
11219019Sgabor * 1. Redistributions of source code must retain the above copyright
12219019Sgabor *    notice, this list of conditions and the following disclaimer.
13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14219019Sgabor *    notice, this list of conditions and the following disclaimer in the
15219019Sgabor *    documentation and/or other materials provided with the distribution.
16219019Sgabor *
17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219019Sgabor * SUCH DAMAGE.
28219019Sgabor */
29219019Sgabor
30219019Sgabor/*-
31219019Sgabor * Copyright (c) 1993
32219019Sgabor *	The Regents of the University of California.  All rights reserved.
33219019Sgabor *
34219019Sgabor * This code is derived from software contributed to Berkeley by
35219019Sgabor * Paul Borman at Krystal Technologies.
36219019Sgabor *
37219019Sgabor * Redistribution and use in source and binary forms, with or without
38219019Sgabor * modification, are permitted provided that the following conditions
39219019Sgabor * are met:
40219019Sgabor * 1. Redistributions of source code must retain the above copyright
41219019Sgabor *    notice, this list of conditions and the following disclaimer.
42219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
43219019Sgabor *    notice, this list of conditions and the following disclaimer in the
44219019Sgabor *    documentation and/or other materials provided with the distribution.
45219019Sgabor * 3. Neither the name of the University nor the names of its contributors
46219019Sgabor *    may be used to endorse or promote products derived from this software
47219019Sgabor *    without specific prior written permission.
48219019Sgabor *
49219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59219019Sgabor * SUCH DAMAGE.
60219019Sgabor */
61219019Sgabor
62219019Sgabor#include <sys/cdefs.h>
63219019Sgabor#include <sys/queue.h>
64219019Sgabor#include <sys/types.h>
65219019Sgabor
66219019Sgabor#include <assert.h>
67219019Sgabor#include <errno.h>
68219019Sgabor#include <limits.h>
69219019Sgabor#include <stddef.h>
70219019Sgabor#include <stdint.h>
71219019Sgabor#include <stdio.h>
72219019Sgabor#include <stdlib.h>
73219019Sgabor#include <string.h>
74219019Sgabor#include <wchar.h>
75219019Sgabor
76219019Sgabor#include "citrus_namespace.h"
77219019Sgabor#include "citrus_prop.h"
78219019Sgabor#include "citrus_types.h"
79219019Sgabor#include "citrus_bcs.h"
80219019Sgabor#include "citrus_module.h"
81219019Sgabor#include "citrus_stdenc.h"
82219019Sgabor#include "citrus_big5.h"
83219019Sgabor
84219019Sgabor/* ----------------------------------------------------------------------
85219019Sgabor * private stuffs used by templates
86219019Sgabor */
87219019Sgabor
88219019Sgabortypedef struct {
89219019Sgabor	int	 chlen;
90219019Sgabor	char	 ch[2];
91219019Sgabor} _BIG5State;
92219019Sgabor
93219019Sgabortypedef struct _BIG5Exclude {
94219019Sgabor	TAILQ_ENTRY(_BIG5Exclude)	 entry;
95219019Sgabor	wint_t				 end;
96219019Sgabor	wint_t				 start;
97219019Sgabor} _BIG5Exclude;
98219019Sgabor
99219019Sgabortypedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
100219019Sgabor
101219019Sgabortypedef struct {
102219019Sgabor	_BIG5ExcludeList	 excludes;
103219019Sgabor	int			 cell[0x100];
104219019Sgabor} _BIG5EncodingInfo;
105219019Sgabor
106219019Sgabor#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
107219019Sgabor#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
108219019Sgabor
109219019Sgabor#define _FUNCNAME(m)			_citrus_BIG5_##m
110219019Sgabor#define _ENCODING_INFO			_BIG5EncodingInfo
111219019Sgabor#define _ENCODING_STATE			_BIG5State
112219019Sgabor#define _ENCODING_MB_CUR_MAX(_ei_)	2
113219019Sgabor#define _ENCODING_IS_STATE_DEPENDENT	0
114219019Sgabor#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
115219019Sgabor
116219019Sgabor
117219019Sgaborstatic __inline void
118219019Sgabor/*ARGSUSED*/
119219019Sgabor_citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei __unused,
120219019Sgabor    _BIG5State * __restrict s)
121219019Sgabor{
122219019Sgabor
123219019Sgabor	memset(s, 0, sizeof(*s));
124219019Sgabor}
125219019Sgabor
126219019Sgaborstatic __inline void
127219019Sgabor/*ARGSUSED*/
128219019Sgabor_citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei __unused,
129219019Sgabor    void * __restrict pspriv,
130219019Sgabor    const _BIG5State * __restrict s)
131219019Sgabor{
132219019Sgabor
133219019Sgabor	memcpy(pspriv, (const void *)s, sizeof(*s));
134219019Sgabor}
135219019Sgabor
136219019Sgaborstatic __inline void
137219019Sgabor/*ARGSUSED*/
138219019Sgabor_citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei __unused,
139219019Sgabor    _BIG5State * __restrict s,
140219019Sgabor    const void * __restrict pspriv)
141219019Sgabor{
142219019Sgabor
143219019Sgabor	memcpy((void *)s, pspriv, sizeof(*s));
144219019Sgabor}
145219019Sgabor
146219019Sgaborstatic __inline int
147219019Sgabor_citrus_BIG5_check(_BIG5EncodingInfo *ei, unsigned int c)
148219019Sgabor{
149219019Sgabor
150219019Sgabor	return ((ei->cell[c & 0xFF] & 0x1) ? 2 : 1);
151219019Sgabor}
152219019Sgabor
153219019Sgaborstatic __inline int
154219019Sgabor_citrus_BIG5_check2(_BIG5EncodingInfo *ei, unsigned int c)
155219019Sgabor{
156219019Sgabor
157219019Sgabor	return ((ei->cell[c & 0xFF] & 0x2) ? 1 : 0);
158219019Sgabor}
159219019Sgabor
160219019Sgaborstatic __inline int
161219019Sgabor_citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c)
162219019Sgabor{
163219019Sgabor	_BIG5Exclude *exclude;
164219019Sgabor
165219019Sgabor	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
166219019Sgabor		if (c >= exclude->start && c <= exclude->end)
167219019Sgabor			return (EILSEQ);
168219019Sgabor	}
169219019Sgabor	return (0);
170219019Sgabor}
171219019Sgabor
172219019Sgaborstatic int
173219019Sgabor_citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s,
174219019Sgabor    uint64_t start, uint64_t end)
175219019Sgabor{
176219019Sgabor	_BIG5EncodingInfo *ei;
177219019Sgabor	uint64_t n;
178219019Sgabor	int i;
179219019Sgabor
180219019Sgabor	if (start > 0xFF || end > 0xFF)
181219019Sgabor		return (EINVAL);
182219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
183219019Sgabor	i = strcmp("row", s) ? 1 : 0;
184219019Sgabor	i = 1 << i;
185219019Sgabor	for (n = start; n <= end; ++n)
186219019Sgabor		ei->cell[n & 0xFF] |= i;
187219019Sgabor	return (0);
188219019Sgabor}
189219019Sgabor
190219019Sgaborstatic int
191219019Sgabor/*ARGSUSED*/
192219019Sgabor_citrus_BIG5_fill_excludes(void ** __restrict ctx,
193219019Sgabor    const char * __restrict s __unused, uint64_t start, uint64_t end)
194219019Sgabor{
195219019Sgabor	_BIG5EncodingInfo *ei;
196219019Sgabor	_BIG5Exclude *exclude;
197219019Sgabor
198219019Sgabor	if (start > 0xFFFF || end > 0xFFFF)
199219019Sgabor		return (EINVAL);
200219019Sgabor	ei = (_BIG5EncodingInfo *)ctx;
201219019Sgabor	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
202219019Sgabor	if (exclude != NULL && (wint_t)start <= exclude->end)
203219019Sgabor		return (EINVAL);
204219019Sgabor	exclude = (void *)malloc(sizeof(*exclude));
205219019Sgabor	if (exclude == NULL)
206219019Sgabor		return (ENOMEM);
207219019Sgabor	exclude->start = (wint_t)start;
208219019Sgabor	exclude->end = (wint_t)end;
209219019Sgabor	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
210219019Sgabor
211219019Sgabor	return (0);
212219019Sgabor}
213219019Sgabor
214219019Sgaborstatic const _citrus_prop_hint_t root_hints[] = {
215219019Sgabor    _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
216219019Sgabor    _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
217219019Sgabor    _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
218219019Sgabor    _CITRUS_PROP_HINT_END
219219019Sgabor};
220219019Sgabor
221219019Sgaborstatic void
222219019Sgabor/*ARGSUSED*/
223219019Sgabor_citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
224219019Sgabor{
225219019Sgabor	_BIG5Exclude *exclude;
226219019Sgabor
227219019Sgabor	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
228219019Sgabor		TAILQ_REMOVE(&ei->excludes, exclude, entry);
229219019Sgabor		free(exclude);
230219019Sgabor	}
231219019Sgabor}
232219019Sgabor
233219019Sgaborstatic int
234219019Sgabor/*ARGSUSED*/
235219019Sgabor_citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
236219019Sgabor    const void * __restrict var, size_t lenvar)
237219019Sgabor{
238219019Sgabor	void *ctx = (void *)ei;
239219019Sgabor	const char *s;
240219019Sgabor	int err;
241219019Sgabor
242219019Sgabor	memset((void *)ei, 0, sizeof(*ei));
243219019Sgabor	TAILQ_INIT(&ei->excludes);
244219019Sgabor
245219019Sgabor	if (lenvar > 0 && var != NULL) {
246219019Sgabor		s = _bcs_skip_ws_len((const char *)var, &lenvar);
247219019Sgabor		if (lenvar > 0 && *s != '\0') {
248219019Sgabor			err = _citrus_prop_parse_variable(
249219019Sgabor			    root_hints, (void *)ei, s, lenvar);
250219019Sgabor			if (err == 0)
251219019Sgabor				return (0);
252219019Sgabor
253219019Sgabor			_citrus_BIG5_encoding_module_uninit(ei);
254219019Sgabor			memset((void *)ei, 0, sizeof(*ei));
255219019Sgabor			TAILQ_INIT(&ei->excludes);
256219019Sgabor		}
257219019Sgabor	}
258219019Sgabor
259219019Sgabor	/* fallback Big5-1984, for backward compatibility. */
260219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "row", 0xA1, 0xFE);
261219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "col", 0x40, 0x7E);
262219019Sgabor	_citrus_BIG5_fill_rowcol((void **)&ctx, "col", 0xA1, 0xFE);
263219019Sgabor
264219019Sgabor	return (0);
265219019Sgabor}
266219019Sgabor
267219019Sgaborstatic int
268219019Sgabor/*ARGSUSED*/
269219019Sgabor_citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
270219019Sgabor    wchar_t * __restrict pwc,
271252583Speter    const char ** __restrict s, size_t n,
272219019Sgabor    _BIG5State * __restrict psenc,
273219019Sgabor    size_t * __restrict nresult)
274219019Sgabor{
275219019Sgabor	wchar_t wchar;
276252583Speter	const char *s0;
277219019Sgabor	int c, chlenbak;
278219019Sgabor
279219019Sgabor	s0 = *s;
280219019Sgabor
281219019Sgabor	if (s0 == NULL) {
282219019Sgabor		_citrus_BIG5_init_state(ei, psenc);
283219019Sgabor		*nresult = 0;
284219019Sgabor		return (0);
285219019Sgabor	}
286219019Sgabor
287219019Sgabor	chlenbak = psenc->chlen;
288219019Sgabor
289219019Sgabor	/* make sure we have the first byte in the buffer */
290219019Sgabor	switch (psenc->chlen) {
291219019Sgabor	case 0:
292219019Sgabor		if (n < 1)
293219019Sgabor			goto restart;
294219019Sgabor		psenc->ch[0] = *s0++;
295219019Sgabor		psenc->chlen = 1;
296219019Sgabor		n--;
297219019Sgabor		break;
298219019Sgabor	case 1:
299219019Sgabor		break;
300219019Sgabor	default:
301219019Sgabor		/* illegal state */
302219019Sgabor		goto ilseq;
303219019Sgabor	}
304219019Sgabor
305219019Sgabor	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
306219019Sgabor	if (c == 0)
307219019Sgabor		goto ilseq;
308219019Sgabor	while (psenc->chlen < c) {
309219019Sgabor		if (n < 1) {
310219019Sgabor			goto restart;
311219019Sgabor		}
312219019Sgabor		psenc->ch[psenc->chlen] = *s0++;
313219019Sgabor		psenc->chlen++;
314219019Sgabor		n--;
315219019Sgabor	}
316219019Sgabor
317219019Sgabor	switch (c) {
318219019Sgabor	case 1:
319219019Sgabor		wchar = psenc->ch[0] & 0xff;
320219019Sgabor		break;
321219019Sgabor	case 2:
322219019Sgabor		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
323219019Sgabor			goto ilseq;
324219019Sgabor		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
325219019Sgabor		break;
326219019Sgabor	default:
327219019Sgabor		/* illegal state */
328219019Sgabor		goto ilseq;
329219019Sgabor	}
330219019Sgabor
331219019Sgabor	if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0)
332219019Sgabor		goto ilseq;
333219019Sgabor
334219019Sgabor	*s = s0;
335219019Sgabor	psenc->chlen = 0;
336219019Sgabor	if (pwc)
337219019Sgabor		*pwc = wchar;
338219019Sgabor	*nresult = wchar ? c - chlenbak : 0;
339219019Sgabor
340219019Sgabor	return (0);
341219019Sgabor
342219019Sgaborilseq:
343219019Sgabor	psenc->chlen = 0;
344219019Sgabor	*nresult = (size_t)-1;
345219019Sgabor	return (EILSEQ);
346219019Sgabor
347219019Sgaborrestart:
348219019Sgabor	*s = s0;
349219019Sgabor	*nresult = (size_t)-2;
350219019Sgabor	return (0);
351219019Sgabor}
352219019Sgabor
353219019Sgaborstatic int
354219019Sgabor/*ARGSUSED*/
355219019Sgabor_citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
356219019Sgabor    char * __restrict s,
357219019Sgabor    size_t n, wchar_t wc, _BIG5State * __restrict psenc __unused,
358219019Sgabor    size_t * __restrict nresult)
359219019Sgabor{
360219019Sgabor	unsigned char l;
361219019Sgabor	int ret;
362219019Sgabor
363219019Sgabor	/* check invalid sequence */
364219019Sgabor	if (wc & ~0xffff ||
365219019Sgabor	    _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) {
366219019Sgabor		ret = EILSEQ;
367219019Sgabor		goto err;
368219019Sgabor	}
369219019Sgabor
370219019Sgabor	if (wc & 0x8000) {
371219019Sgabor		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
372219019Sgabor		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
373219019Sgabor			ret = EILSEQ;
374219019Sgabor			goto err;
375219019Sgabor		}
376219019Sgabor		l = 2;
377219019Sgabor	} else {
378219019Sgabor		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
379219019Sgabor			ret = EILSEQ;
380219019Sgabor			goto err;
381219019Sgabor		}
382219019Sgabor		l = 1;
383219019Sgabor	}
384219019Sgabor
385219019Sgabor	if (n < l) {
386219019Sgabor		/* bound check failure */
387219019Sgabor		ret = E2BIG;
388219019Sgabor		goto err;
389219019Sgabor	}
390219019Sgabor
391219019Sgabor	if (l == 2) {
392219019Sgabor		s[0] = (wc >> 8) & 0xff;
393219019Sgabor		s[1] = wc & 0xff;
394219019Sgabor	} else
395219019Sgabor		s[0] = wc & 0xff;
396219019Sgabor
397219019Sgabor	*nresult = l;
398219019Sgabor
399219019Sgabor	return (0);
400219019Sgabor
401219019Sgaborerr:
402219019Sgabor	*nresult = (size_t)-1;
403219019Sgabor	return (ret);
404219019Sgabor}
405219019Sgabor
406219019Sgaborstatic __inline int
407219019Sgabor/*ARGSUSED*/
408219019Sgabor_citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei __unused,
409219019Sgabor    _csid_t * __restrict csid,
410219019Sgabor    _index_t * __restrict idx, wchar_t wc)
411219019Sgabor{
412219019Sgabor
413219019Sgabor	*csid = (wc < 0x100) ? 0 : 1;
414219019Sgabor	*idx = (_index_t)wc;
415219019Sgabor
416219019Sgabor	return (0);
417219019Sgabor}
418219019Sgabor
419219019Sgaborstatic __inline int
420219019Sgabor/*ARGSUSED*/
421219019Sgabor_citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei __unused,
422219019Sgabor    wchar_t * __restrict wc,
423219019Sgabor    _csid_t csid, _index_t idx)
424219019Sgabor{
425219019Sgabor
426219019Sgabor	switch (csid) {
427219019Sgabor	case 0:
428219019Sgabor	case 1:
429219019Sgabor		*wc = (wchar_t)idx;
430219019Sgabor		break;
431219019Sgabor	default:
432219019Sgabor		return (EILSEQ);
433219019Sgabor	}
434219019Sgabor
435219019Sgabor	return (0);
436219019Sgabor}
437219019Sgabor
438219019Sgaborstatic __inline int
439219019Sgabor/*ARGSUSED*/
440219019Sgabor_citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei __unused,
441219019Sgabor    _BIG5State * __restrict psenc,
442219019Sgabor    int * __restrict rstate)
443219019Sgabor{
444219019Sgabor
445219019Sgabor	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
446219019Sgabor	    _STDENC_SDGEN_INCOMPLETE_CHAR;
447219019Sgabor	return (0);
448219019Sgabor}
449219019Sgabor
450219019Sgabor/* ----------------------------------------------------------------------
451219019Sgabor * public interface for stdenc
452219019Sgabor */
453219019Sgabor
454219019Sgabor_CITRUS_STDENC_DECLS(BIG5);
455219019Sgabor_CITRUS_STDENC_DEF_OPS(BIG5);
456219019Sgabor
457219019Sgabor#include "citrus_stdenc_template.h"
458