citrus_hz.c revision 330897
1/* $FreeBSD: stable/11/lib/libiconv_modules/HZ/citrus_hz.c 330897 2018-03-14 03:19:51Z eadler $ */
2/* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
3
4/*-
5 * SPDX-License-Identifier: BSD-2-Clause
6 *
7 * Copyright (c)2004, 2006 Citrus Project,
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 */
32
33#include <sys/cdefs.h>
34#include <sys/queue.h>
35#include <sys/types.h>
36
37#include <assert.h>
38#include <errno.h>
39#include <limits.h>
40#include <stddef.h>
41#include <stdint.h>
42#include <stdlib.h>
43#include <string.h>
44#include <wchar.h>
45
46#include "citrus_namespace.h"
47#include "citrus_types.h"
48#include "citrus_bcs.h"
49#include "citrus_module.h"
50#include "citrus_stdenc.h"
51
52#include "citrus_hz.h"
53#include "citrus_prop.h"
54
55/*
56 * wchar_t mapping:
57 *
58 * CTRL/ASCII	00000000 00000000 00000000 gxxxxxxx
59 * GB2312	00000000 00000000 0xxxxxxx gxxxxxxx
60 * 94/96*n (~M)	0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
61 */
62
63#define ESCAPE_CHAR	'~'
64
65typedef enum {
66	CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
67} charset_t;
68
69typedef struct {
70	int	 start;
71	int	 end;
72	int	 width;
73} range_t;
74
75static const range_t ranges[] = {
76#define RANGE(start, end) { start, end, (end - start) + 1 }
77/* CTRL   */ RANGE(0x00, 0x1F),
78/* ASCII  */ RANGE(0x20, 0x7F),
79/* GB2312 */ RANGE(0x21, 0x7E),
80/* CS94   */ RANGE(0x21, 0x7E),
81/* CS96   */ RANGE(0x20, 0x7F),
82#undef RANGE
83};
84
85typedef struct escape_t escape_t;
86typedef struct {
87	charset_t	 charset;
88	escape_t	*escape;
89	ssize_t		 length;
90#define ROWCOL_MAX	3
91} graphic_t;
92
93typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
94struct escape_t {
95	TAILQ_ENTRY(escape_t)	 entry;
96	escape_list		*set;
97	graphic_t		*left;
98	graphic_t		*right;
99	int			 ch;
100};
101
102#define GL(escape)	((escape)->left)
103#define GR(escape)	((escape)->right)
104#define SET(escape)	((escape)->set)
105#define ESC(escape)	((escape)->ch)
106#define INIT(escape)	(TAILQ_FIRST(SET(escape)))
107
108static __inline escape_t *
109find_escape(escape_list *set, int ch)
110{
111	escape_t *escape;
112
113	TAILQ_FOREACH(escape, set, entry) {
114		if (ESC(escape) == ch)
115			break;
116	}
117
118	return (escape);
119}
120
121typedef struct {
122	escape_list	 e0;
123	escape_list	 e1;
124	graphic_t	*ascii;
125	graphic_t	*gb2312;
126} _HZEncodingInfo;
127
128#define E0SET(ei)	(&(ei)->e0)
129#define E1SET(ei)	(&(ei)->e1)
130#define INIT0(ei)	(TAILQ_FIRST(E0SET(ei)))
131#define INIT1(ei)	(TAILQ_FIRST(E1SET(ei)))
132
133typedef struct {
134	escape_t	*inuse;
135	int		 chlen;
136	char		 ch[ROWCOL_MAX];
137} _HZState;
138
139#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
140#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
141
142#define _FUNCNAME(m)			_citrus_HZ_##m
143#define _ENCODING_INFO			_HZEncodingInfo
144#define _ENCODING_STATE			_HZState
145#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
146#define _ENCODING_IS_STATE_DEPENDENT		1
147#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->inuse == NULL)
148
149static __inline void
150_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
151    _HZState * __restrict psenc)
152{
153
154	psenc->chlen = 0;
155	psenc->inuse = INIT0(ei);
156}
157
158#if 0
159static __inline void
160/*ARGSUSED*/
161_citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
162    void *__restrict pspriv, const _HZState * __restrict psenc)
163{
164
165	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
166}
167
168static __inline void
169/*ARGSUSED*/
170_citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
171    _HZState * __restrict psenc, const void * __restrict pspriv)
172{
173
174	memcpy((void *)psenc, pspriv, sizeof(*psenc));
175}
176#endif
177
178static int
179_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
180    wchar_t * __restrict pwc, char ** __restrict s, size_t n,
181    _HZState * __restrict psenc, size_t * __restrict nresult)
182{
183	escape_t *candidate, *init;
184	graphic_t *graphic;
185	const range_t *range;
186	char *s0;
187	wchar_t wc;
188	int bit, ch, head, len, tail;
189
190	if (*s == NULL) {
191		_citrus_HZ_init_state(ei, psenc);
192		*nresult = 1;
193		return (0);
194	}
195	s0 = *s;
196	if (psenc->chlen < 0 || psenc->inuse == NULL)
197		return (EINVAL);
198
199	wc = (wchar_t)0;
200	bit = head = tail = 0;
201	graphic = NULL;
202	for (len = 0; len <= MB_LEN_MAX;) {
203		if (psenc->chlen == tail) {
204			if (n-- < 1) {
205				*s = s0;
206				*nresult = (size_t)-2;
207				return (0);
208			}
209			psenc->ch[psenc->chlen++] = *s0++;
210			++len;
211		}
212		ch = (unsigned char)psenc->ch[tail++];
213		if (tail == 1) {
214			if ((ch & ~0x80) <= 0x1F) {
215				if (psenc->inuse != INIT0(ei))
216					break;
217				wc = (wchar_t)ch;
218				goto done;
219			}
220			if (ch & 0x80) {
221				graphic = GR(psenc->inuse);
222				bit = 0x80;
223				ch &= ~0x80;
224			} else {
225				graphic = GL(psenc->inuse);
226				if (ch == ESCAPE_CHAR)
227					continue;
228				bit = 0x0;
229			}
230			if (graphic == NULL)
231				break;
232		} else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
233			if (tail < psenc->chlen)
234				return (EINVAL);
235			if (ch == ESCAPE_CHAR) {
236				++head;
237			} else if (ch == '\n') {
238				if (psenc->inuse != INIT0(ei))
239					break;
240				tail = psenc->chlen = 0;
241				continue;
242			} else {
243				candidate = NULL;
244				init = INIT0(ei);
245				if (psenc->inuse == init) {
246					init = INIT1(ei);
247				} else if (INIT(psenc->inuse) == init) {
248					if (ESC(init) != ch)
249						break;
250					candidate = init;
251				}
252				if (candidate == NULL) {
253					candidate = find_escape(
254					    SET(psenc->inuse), ch);
255					if (candidate == NULL) {
256						if (init == NULL ||
257						    ESC(init) != ch)
258							break;
259						candidate = init;
260					}
261				}
262				psenc->inuse = candidate;
263				tail = psenc->chlen = 0;
264				continue;
265			}
266		} else if (ch & 0x80) {
267			if (graphic != GR(psenc->inuse))
268				break;
269			ch &= ~0x80;
270		} else {
271			if (graphic != GL(psenc->inuse))
272				break;
273		}
274		range = &ranges[(size_t)graphic->charset];
275		if (range->start > ch || range->end < ch)
276			break;
277		wc <<= 8;
278		wc |= ch;
279		if (graphic->length == (tail - head)) {
280			if (graphic->charset > GB2312)
281				bit |= ESC(psenc->inuse) << 24;
282			wc |= bit;
283			goto done;
284		}
285	}
286	*nresult = (size_t)-1;
287	return (EILSEQ);
288done:
289	if (tail < psenc->chlen)
290		return (EINVAL);
291	*s = s0;
292	if (pwc != NULL)
293		*pwc = wc;
294	psenc->chlen = 0;
295	*nresult = (wc == 0) ? 0 : len;
296
297	return (0);
298}
299
300static int
301_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
302    char * __restrict s, size_t n, wchar_t wc,
303    _HZState * __restrict psenc, size_t * __restrict nresult)
304{
305	escape_t *candidate, *init;
306	graphic_t *graphic;
307	const range_t *range;
308	size_t len;
309	int bit, ch;
310
311	if (psenc->chlen != 0 || psenc->inuse == NULL)
312		return (EINVAL);
313	if (wc & 0x80) {
314		bit = 0x80;
315		wc &= ~0x80;
316	} else {
317		bit = 0x0;
318	}
319	if ((uint32_t)wc <= 0x1F) {
320		candidate = INIT0(ei);
321		graphic = (bit == 0) ? candidate->left : candidate->right;
322		if (graphic == NULL)
323			goto ilseq;
324		range = &ranges[(size_t)CTRL];
325		len = 1;
326	} else if ((uint32_t)wc <= 0x7F) {
327		graphic = ei->ascii;
328		if (graphic == NULL)
329			goto ilseq;
330		candidate = graphic->escape;
331		range = &ranges[(size_t)graphic->charset];
332		len = graphic->length;
333	} else if ((uint32_t)wc <= 0x7F7F) {
334		graphic = ei->gb2312;
335		if (graphic == NULL)
336			goto ilseq;
337		candidate = graphic->escape;
338		range = &ranges[(size_t)graphic->charset];
339		len = graphic->length;
340	} else {
341		ch = (wc >> 24) & 0xFF;
342		candidate = find_escape(E0SET(ei), ch);
343		if (candidate == NULL) {
344			candidate = find_escape(E1SET(ei), ch);
345			if (candidate == NULL)
346				goto ilseq;
347		}
348		wc &= ~0xFF000000;
349		graphic = (bit == 0) ? candidate->left : candidate->right;
350		if (graphic == NULL)
351			goto ilseq;
352		range = &ranges[(size_t)graphic->charset];
353		len = graphic->length;
354	}
355	if (psenc->inuse != candidate) {
356		init = INIT0(ei);
357		if (SET(psenc->inuse) == SET(candidate)) {
358			if (INIT(psenc->inuse) != init ||
359			    psenc->inuse == init || candidate == init)
360				init = NULL;
361		} else if (candidate == (init = INIT(candidate))) {
362			init = NULL;
363		}
364		if (init != NULL) {
365			if (n < 2)
366				return (E2BIG);
367			n -= 2;
368			psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
369			psenc->ch[psenc->chlen++] = ESC(init);
370		}
371		if (n < 2)
372			return (E2BIG);
373		n -= 2;
374		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
375		psenc->ch[psenc->chlen++] = ESC(candidate);
376		psenc->inuse = candidate;
377	}
378	if (n < len)
379		return (E2BIG);
380	while (len-- > 0) {
381		ch = (wc >> (len * 8)) & 0xFF;
382		if (range->start > ch || range->end < ch)
383			goto ilseq;
384		psenc->ch[psenc->chlen++] = ch | bit;
385	}
386	memcpy(s, psenc->ch, psenc->chlen);
387	*nresult = psenc->chlen;
388	psenc->chlen = 0;
389
390	return (0);
391
392ilseq:
393	*nresult = (size_t)-1;
394	return (EILSEQ);
395}
396
397static __inline int
398_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
399    char * __restrict s, size_t n, _HZState * __restrict psenc,
400    size_t * __restrict nresult)
401{
402	escape_t *candidate;
403
404	if (psenc->chlen != 0 || psenc->inuse == NULL)
405		return (EINVAL);
406	candidate = INIT0(ei);
407	if (psenc->inuse != candidate) {
408		if (n < 2)
409			return (E2BIG);
410		n -= 2;
411		psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
412		psenc->ch[psenc->chlen++] = ESC(candidate);
413	}
414	if (n < 1)
415		return (E2BIG);
416	if (psenc->chlen > 0)
417		memcpy(s, psenc->ch, psenc->chlen);
418	*nresult = psenc->chlen;
419	_citrus_HZ_init_state(ei, psenc);
420
421	return (0);
422}
423
424static __inline int
425_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
426    _HZState * __restrict psenc, int * __restrict rstate)
427{
428
429	if (psenc->chlen < 0 || psenc->inuse == NULL)
430		return (EINVAL);
431	*rstate = (psenc->chlen == 0)
432	    ? ((psenc->inuse == INIT0(ei))
433	        ? _STDENC_SDGEN_INITIAL
434	        : _STDENC_SDGEN_STABLE)
435	    : ((psenc->ch[0] == ESCAPE_CHAR)
436	        ? _STDENC_SDGEN_INCOMPLETE_SHIFT
437	        : _STDENC_SDGEN_INCOMPLETE_CHAR);
438
439	return (0);
440}
441
442static __inline int
443/*ARGSUSED*/
444_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
445    _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
446{
447	int bit;
448
449	if (wc & 0x80) {
450		bit = 0x80;
451		wc &= ~0x80;
452	} else
453		bit = 0x0;
454	if ((uint32_t)wc <= 0x7F) {
455		*csid = (_csid_t)bit;
456		*idx = (_index_t)wc;
457	} else if ((uint32_t)wc <= 0x7F7F) {
458		*csid = (_csid_t)(bit | 0x8000);
459		*idx = (_index_t)wc;
460	} else {
461		*csid = (_index_t)(wc & ~0x00FFFF7F);
462		*idx = (_csid_t)(wc & 0x00FFFF7F);
463	}
464
465	return (0);
466}
467
468static __inline int
469/*ARGSUSED*/
470_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
471    wchar_t * __restrict wc, _csid_t csid, _index_t idx)
472{
473
474	*wc = (wchar_t)idx;
475	switch (csid) {
476	case 0x80:
477	case 0x8080:
478		*wc |= (wchar_t)0x80;
479		/*FALLTHROUGH*/
480	case 0x0:
481	case 0x8000:
482		break;
483	default:
484		*wc |= (wchar_t)csid;
485	}
486
487	return (0);
488}
489
490static void
491_citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
492{
493	escape_t *escape;
494
495	while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
496		TAILQ_REMOVE(E0SET(ei), escape, entry);
497		free(GL(escape));
498		free(GR(escape));
499		free(escape);
500	}
501	while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
502		TAILQ_REMOVE(E1SET(ei), escape, entry);
503		free(GL(escape));
504		free(GR(escape));
505		free(escape);
506	}
507}
508
509static int
510_citrus_HZ_parse_char(void *context, const char *name __unused, const char *s)
511{
512	escape_t *escape;
513	void **p;
514
515	p = (void **)context;
516	escape = (escape_t *)p[0];
517	if (escape->ch != '\0')
518		return (EINVAL);
519	escape->ch = *s++;
520	if (escape->ch == ESCAPE_CHAR || *s != '\0')
521		return (EINVAL);
522
523	return (0);
524}
525
526static int
527_citrus_HZ_parse_graphic(void *context, const char *name, const char *s)
528{
529	_HZEncodingInfo *ei;
530	escape_t *escape;
531	graphic_t *graphic;
532	void **p;
533
534	p = (void **)context;
535	escape = (escape_t *)p[0];
536	ei = (_HZEncodingInfo *)p[1];
537	graphic = calloc(1, sizeof(*graphic));
538	if (graphic == NULL)
539		return (ENOMEM);
540	if (strcmp("GL", name) == 0) {
541		if (GL(escape) != NULL)
542			goto release;
543		GL(escape) = graphic;
544	} else if (strcmp("GR", name) == 0) {
545		if (GR(escape) != NULL)
546			goto release;
547		GR(escape) = graphic;
548	} else {
549release:
550		free(graphic);
551		return (EINVAL);
552	}
553	graphic->escape = escape;
554	if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
555		if (s[5] != '\0')
556			return (EINVAL);
557		graphic->charset = ASCII;
558		graphic->length = 1;
559		ei->ascii = graphic;
560		return (0);
561	} else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
562		if (s[6] != '\0')
563			return (EINVAL);
564		graphic->charset = GB2312;
565		graphic->length = 2;
566		ei->gb2312 = graphic;
567		return (0);
568	} else if (strncmp("94*", s, 3) == 0)
569		graphic->charset = CS94;
570	else if (strncmp("96*", s, 3) == 0)
571		graphic->charset = CS96;
572	else
573		return (EINVAL);
574	s += 3;
575	switch(*s) {
576	case '1': case '2': case '3':
577		graphic->length = (size_t)(*s - '0');
578		if (*++s == '\0')
579			break;
580	/*FALLTHROUGH*/
581	default:
582		return (EINVAL);
583	}
584	return (0);
585}
586
587static const _citrus_prop_hint_t escape_hints[] = {
588_CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
589_CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
590_CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
591_CITRUS_PROP_HINT_END
592};
593
594static int
595_citrus_HZ_parse_escape(void *context, const char *name, const char *s)
596{
597	_HZEncodingInfo *ei;
598	escape_t *escape;
599	void *p[2];
600
601	ei = (_HZEncodingInfo *)context;
602	escape = calloc(1, sizeof(*escape));
603	if (escape == NULL)
604		return (EINVAL);
605	if (strcmp("0", name) == 0) {
606		escape->set = E0SET(ei);
607		TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
608	} else if (strcmp("1", name) == 0) {
609		escape->set = E1SET(ei);
610		TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
611	} else {
612		free(escape);
613		return (EINVAL);
614	}
615	p[0] = (void *)escape;
616	p[1] = (void *)ei;
617	return (_citrus_prop_parse_variable(
618	    escape_hints, (void *)&p[0], s, strlen(s)));
619}
620
621static const _citrus_prop_hint_t root_hints[] = {
622_CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
623_CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
624_CITRUS_PROP_HINT_END
625};
626
627static int
628_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
629    const void * __restrict var, size_t lenvar)
630{
631	int errnum;
632
633	memset(ei, 0, sizeof(*ei));
634	TAILQ_INIT(E0SET(ei));
635	TAILQ_INIT(E1SET(ei));
636	errnum = _citrus_prop_parse_variable(
637	    root_hints, (void *)ei, var, lenvar);
638	if (errnum != 0)
639		_citrus_HZ_encoding_module_uninit(ei);
640	return (errnum);
641}
642
643/* ----------------------------------------------------------------------
644 * public interface for stdenc
645 */
646
647_CITRUS_STDENC_DECLS(HZ);
648_CITRUS_STDENC_DEF_OPS(HZ);
649
650#include "citrus_stdenc_template.h"
651