1/* $NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $ */
2
3/*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#if defined(LIBC_SCCS) && !defined(lint)
31__RCSID("$NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $");
32#endif /* LIBC_SCCS and not lint */
33
34#include <assert.h>
35#include <errno.h>
36#include <string.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <stddef.h>
40#include <wchar.h>
41#include <sys/types.h>
42#include <limits.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_bcs.h"
47#include "citrus_module.h"
48#include "citrus_ctype.h"
49#include "citrus_stdenc.h"
50#include "citrus_gbk2k.h"
51
52
53/* ----------------------------------------------------------------------
54 * private stuffs used by templates
55 */
56
57typedef struct _GBK2KState {
58	char ch[4];
59	int chlen;
60} _GBK2KState;
61
62typedef struct {
63	int mb_cur_max;
64} _GBK2KEncodingInfo;
65
66typedef struct {
67	_GBK2KEncodingInfo	ei;
68	struct {
69		/* for future multi-locale facility */
70		_GBK2KState	s_mblen;
71		_GBK2KState	s_mbrlen;
72		_GBK2KState	s_mbrtowc;
73		_GBK2KState	s_mbtowc;
74		_GBK2KState	s_mbsrtowcs;
75		_GBK2KState	s_mbsnrtowcs;
76		_GBK2KState	s_wcrtomb;
77		_GBK2KState	s_wcsrtombs;
78		_GBK2KState	s_wcsnrtombs;
79		_GBK2KState	s_wctomb;
80	} states;
81} _GBK2KCTypeInfo;
82
83#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
84#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
85
86#define _FUNCNAME(m)			_citrus_GBK2K_##m
87#define _ENCODING_INFO			_GBK2KEncodingInfo
88#define _CTYPE_INFO			_GBK2KCTypeInfo
89#define _ENCODING_STATE			_GBK2KState
90#define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
91#define _ENCODING_IS_STATE_DEPENDENT	0
92#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
93
94static __inline void
95/*ARGSUSED*/
96_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
97			 _GBK2KState * __restrict s)
98{
99	memset(s, 0, sizeof(*s));
100}
101
102static __inline void
103/*ARGSUSED*/
104_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
105			 void * __restrict pspriv,
106			 const _GBK2KState * __restrict s)
107{
108	memcpy(pspriv, (const void *)s, sizeof(*s));
109}
110
111static __inline void
112/*ARGSUSED*/
113_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
114			   _GBK2KState * __restrict s,
115			   const void * __restrict pspriv)
116{
117	memcpy((void *)s, pspriv, sizeof(*s));
118}
119
120static  __inline int
121_mb_singlebyte(int c)
122{
123	c &= 0xff;
124	return (c <= 0x7f);
125}
126
127static __inline int
128_mb_leadbyte(int c)
129{
130	c &= 0xff;
131	return (0x81 <= c && c <= 0xfe);
132}
133
134static __inline int
135_mb_trailbyte(int c)
136{
137	c &= 0xff;
138	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
139}
140
141static __inline int
142_mb_surrogate(int c)
143{
144	c &= 0xff;
145	return (0x30 <= c && c <= 0x39);
146}
147
148static __inline int
149_mb_count(wchar_t v)
150{
151	u_int32_t c;
152
153	c = (u_int32_t)v; /* XXX */
154	if (!(c & 0xffffff00))
155		return (1);
156	if (!(c & 0xffff0000))
157		return (2);
158	return (4);
159}
160
161#define	_PSENC		(psenc->ch[psenc->chlen - 1])
162#define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
163
164static int
165_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
166			   wchar_t * __restrict pwc,
167			   const char ** __restrict s, size_t n,
168			   _GBK2KState * __restrict psenc,
169			   size_t * __restrict nresult)
170{
171	int chlenbak, len;
172	const char *s0, *s1;
173	wchar_t wc;
174
175	_DIAGASSERT(ei != NULL);
176	/* pwc may be NULL */
177	_DIAGASSERT(s != NULL);
178	_DIAGASSERT(psenc != NULL);
179
180	s0 = *s;
181
182	if (s0 == NULL) {
183		/* _citrus_GBK2K_init_state(ei, psenc); */
184		psenc->chlen = 0;
185		*nresult = 0;
186		return (0);
187	}
188
189	chlenbak = psenc->chlen;
190
191	switch (psenc->chlen) {
192	case 3:
193		if (!_mb_leadbyte (_PSENC))
194			goto invalid;
195	/* FALLTHROUGH */
196	case 2:
197		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
198			goto invalid;
199	/* FALLTHROUGH */
200	case 1:
201		if (!_mb_leadbyte (_PSENC))
202			goto invalid;
203	/* FALLTHOROUGH */
204	case 0:
205		break;
206	default:
207		goto invalid;
208	}
209
210	for (;;) {
211		if (n-- < 1)
212			goto restart;
213
214		_PUSH_PSENC(*s0++);
215
216		switch (psenc->chlen) {
217		case 1:
218			if (_mb_singlebyte(_PSENC))
219				goto convert;
220			if (_mb_leadbyte  (_PSENC))
221				continue;
222			goto ilseq;
223		case 2:
224			if (_mb_trailbyte (_PSENC))
225				goto convert;
226			if (ei->mb_cur_max == 4 &&
227			    _mb_surrogate (_PSENC))
228				continue;
229			goto ilseq;
230		case 3:
231			if (_mb_leadbyte  (_PSENC))
232				continue;
233			goto ilseq;
234		case 4:
235			if (_mb_surrogate (_PSENC))
236				goto convert;
237			goto ilseq;
238		}
239	}
240
241convert:
242	len = psenc->chlen;
243	s1  = &psenc->ch[0];
244	wc  = 0;
245	while (len-- > 0)
246		wc = (wc << 8) | (*s1++ & 0xff);
247
248	if (pwc != NULL)
249		*pwc = wc;
250	*s = s0;
251	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
252	/* _citrus_GBK2K_init_state(ei, psenc); */
253	psenc->chlen = 0;
254
255	return (0);
256
257restart:
258	*s = s0;
259	*nresult = (size_t)-2;
260
261	return (0);
262
263invalid:
264	return (EINVAL);
265
266ilseq:
267	*nresult = (size_t)-1;
268	return (EILSEQ);
269}
270
271static int
272_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
273			   char * __restrict s, size_t n, wchar_t wc,
274			   _GBK2KState * __restrict psenc,
275			   size_t * __restrict nresult)
276{
277	int len, ret;
278
279	_DIAGASSERT(ei != NULL);
280	_DIAGASSERT(s != NULL);
281	_DIAGASSERT(psenc != NULL);
282
283	if (psenc->chlen != 0) {
284		ret = EINVAL;
285		goto err;
286	}
287
288	len = _mb_count(wc);
289	if (n < len) {
290		ret = E2BIG;
291		goto err;
292	}
293
294	switch (len) {
295	case 1:
296		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
297			ret = EILSEQ;
298			goto err;
299		}
300		break;
301	case 2:
302		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
303		    !_mb_trailbyte (_PUSH_PSENC(wc     ))) {
304			ret = EILSEQ;
305			goto err;
306		}
307		break;
308	case 4:
309		if (ei->mb_cur_max != 4 ||
310		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
311		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
312		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
313		    !_mb_surrogate (_PUSH_PSENC(wc      ))) {
314			ret = EILSEQ;
315			goto err;
316		}
317		break;
318	}
319
320	_DIAGASSERT(len == psenc->chlen);
321
322	memcpy(s, psenc->ch, psenc->chlen);
323	*nresult = psenc->chlen;
324	/* _citrus_GBK2K_init_state(ei, psenc); */
325	psenc->chlen = 0;
326
327	return (0);
328
329err:
330	*nresult = (size_t)-1;
331	return ret;
332}
333
334static __inline int
335/*ARGSUSED*/
336_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,
337			    _csid_t * __restrict csid,
338			    _index_t * __restrict idx, wchar_t wc)
339{
340	u_int8_t ch, cl;
341
342	_DIAGASSERT(csid != NULL && idx != NULL);
343
344	if ((u_int32_t)wc<0x80) {
345		/* ISO646 */
346		*csid = 0;
347		*idx = (_index_t)wc;
348	} else if ((u_int32_t)wc>=0x10000) {
349		/* GBKUCS : XXX */
350		*csid = 3;
351		*idx = (_index_t)wc;
352	} else {
353		ch = (u_int8_t)(wc >> 8);
354		cl = (u_int8_t)wc;
355		if (ch>=0xA1 && cl>=0xA1) {
356			/* EUC G1 */
357			*csid = 1;
358			*idx = (_index_t)wc & 0x7F7FU;
359		} else {
360			/* extended area (0x8140-) */
361			*csid = 2;
362			*idx = (_index_t)wc;
363		}
364	}
365
366	return 0;
367}
368
369static __inline int
370/*ARGSUSED*/
371_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
372			    wchar_t * __restrict wc,
373			    _csid_t csid, _index_t idx)
374{
375
376	_DIAGASSERT(wc != NULL);
377
378	switch (csid) {
379	case 0:
380		/* ISO646 */
381		*wc = (wchar_t)idx;
382		break;
383	case 1:
384		/* EUC G1 */
385		*wc = (wchar_t)idx | 0x8080U;
386		break;
387	case 2:
388		/* extended area */
389		*wc = (wchar_t)idx;
390		break;
391	case 3:
392		/* GBKUCS : XXX */
393		if (ei->mb_cur_max != 4)
394			return EINVAL;
395		*wc = (wchar_t)idx;
396		break;
397	default:
398		return EILSEQ;
399	}
400
401	return 0;
402}
403
404static __inline int
405/*ARGSUSED*/
406_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,
407					    _GBK2KState * __restrict psenc,
408					    int * __restrict rstate)
409{
410
411	if (psenc->chlen == 0)
412		*rstate = _STDENC_SDGEN_INITIAL;
413	else
414		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
415
416	return 0;
417}
418
419static int
420/*ARGSUSED*/
421_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
422				   const void * __restrict var, size_t lenvar)
423{
424	const char *p;
425
426	_DIAGASSERT(ei != NULL);
427
428	p = var;
429#define MATCH(x, act)                                           \
430do {                                                            \
431        if (lenvar >= (sizeof(#x)-1) &&                         \
432            _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {       \
433                act;                                            \
434                lenvar -= sizeof(#x)-1;                         \
435                p += sizeof(#x)-1;                              \
436        }                                                       \
437} while (0)
438	memset((void *)ei, 0, sizeof(*ei));
439	ei->mb_cur_max = 4;
440	while (lenvar>0) {
441		switch (_bcs_tolower(*p)) {
442		case '2':
443			MATCH("2byte", ei->mb_cur_max = 2);
444			break;
445		}
446		p++;
447		lenvar--;
448	}
449
450	return (0);
451}
452
453static void
454/*ARGSUSED*/
455_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei)
456{
457}
458
459
460/* ----------------------------------------------------------------------
461 * public interface for ctype
462 */
463
464_CITRUS_CTYPE_DECLS(GBK2K);
465_CITRUS_CTYPE_DEF_OPS(GBK2K);
466
467#include "citrus_ctype_template.h"
468
469/* ----------------------------------------------------------------------
470 * public interface for stdenc
471 */
472
473_CITRUS_STDENC_DECLS(GBK2K);
474_CITRUS_STDENC_DEF_OPS(GBK2K);
475
476#include "citrus_stdenc_template.h"
477