1/*	$NetBSD: citrus_mskanji.c,v 1.15 2022/04/19 20:32:14 rillig Exp $	*/
2
3/*-
4 * Copyright (c)2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*
30 *    ja_JP.SJIS locale table for BSD4.4/rune
31 *    version 1.0
32 *    (C) Sin'ichiro MIYATANI / Phase One, Inc
33 *    May 12, 1995
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 *    must display the following acknowledgement:
45 *      This product includes software developed by Phase One, Inc.
46 * 4. The name of Phase One, Inc. may be used to endorse or promote products
47 *    derived from this software without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62
63#include <sys/cdefs.h>
64#if defined(LIBC_SCCS) && !defined(lint)
65__RCSID("$NetBSD: citrus_mskanji.c,v 1.15 2022/04/19 20:32:14 rillig Exp $");
66#endif /* LIBC_SCCS and not lint */
67
68#include <assert.h>
69#include <errno.h>
70#include <string.h>
71#include <stdio.h>
72#include <stdlib.h>
73#include <stddef.h>
74#include <wchar.h>
75#include <sys/types.h>
76#include <limits.h>
77
78#include "citrus_namespace.h"
79#include "citrus_types.h"
80#include "citrus_bcs.h"
81#include "citrus_module.h"
82#include "citrus_ctype.h"
83#include "citrus_stdenc.h"
84#include "citrus_mskanji.h"
85
86
87/* ----------------------------------------------------------------------
88 * private stuffs used by templates
89 */
90
91typedef struct _MSKanjiState {
92	char ch[2];
93	int chlen;
94} _MSKanjiState;
95
96typedef struct {
97	int mode;
98#define MODE_JIS2004	1
99} _MSKanjiEncodingInfo;
100
101typedef struct {
102	_MSKanjiEncodingInfo	ei;
103	struct {
104		/* for future multi-locale facility */
105		_MSKanjiState	s_mblen;
106		_MSKanjiState	s_mbrlen;
107		_MSKanjiState	s_mbrtowc;
108		_MSKanjiState	s_mbtowc;
109		_MSKanjiState	s_mbsrtowcs;
110		_MSKanjiState	s_mbsnrtowcs;
111		_MSKanjiState	s_wcrtomb;
112		_MSKanjiState	s_wcsrtombs;
113		_MSKanjiState	s_wcsnrtombs;
114		_MSKanjiState	s_wctomb;
115	} states;
116} _MSKanjiCTypeInfo;
117
118#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
119#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
120
121#define _FUNCNAME(m)			_citrus_MSKanji_##m
122#define _ENCODING_INFO			_MSKanjiEncodingInfo
123#define _CTYPE_INFO			_MSKanjiCTypeInfo
124#define _ENCODING_STATE			_MSKanjiState
125#define _ENCODING_MB_CUR_MAX(_ei_)	2
126#define _ENCODING_IS_STATE_DEPENDENT	0
127#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
128
129
130static int
131_mskanji1(int c)
132{
133
134	if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc))
135		return 1;
136	else
137		return 0;
138}
139
140static int
141_mskanji2(int c)
142{
143
144	if ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc))
145		return 1;
146	else
147		return 0;
148}
149
150static __inline void
151/*ARGSUSED*/
152_citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei,
153			   _MSKanjiState * __restrict s)
154{
155	s->chlen = 0;
156}
157
158static __inline void
159/*ARGSUSED*/
160_citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei,
161			   void * __restrict pspriv,
162			   const _MSKanjiState * __restrict s)
163{
164	memcpy(pspriv, (const void *)s, sizeof(*s));
165}
166
167static __inline void
168/*ARGSUSED*/
169_citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei,
170			     _MSKanjiState * __restrict s,
171			     const void * __restrict pspriv)
172{
173	memcpy((void *)s, pspriv, sizeof(*s));
174}
175
176static int
177/*ARGSUSED*/
178_citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei,
179			     wchar_t * __restrict pwc,
180			     const char ** __restrict s, size_t n,
181			     _MSKanjiState * __restrict psenc,
182			     size_t * __restrict nresult)
183{
184	wchar_t wchar;
185	int len;
186	int chlenbak;
187	const char *s0;
188
189	_DIAGASSERT(nresult != 0);
190	_DIAGASSERT(ei != NULL);
191	_DIAGASSERT(s != NULL);
192	_DIAGASSERT(psenc != NULL);
193
194	s0 = *s;
195
196	if (s0 == NULL) {
197		_citrus_MSKanji_init_state(ei, psenc);
198		*nresult = 0; /* state independent */
199		return (0);
200	}
201
202	chlenbak = psenc->chlen;
203
204	/* make sure we have the first byte in the buffer */
205	switch (psenc->chlen) {
206	case 0:
207		if (n < 1)
208			goto restart;
209		psenc->ch[0] = *s0++;
210		psenc->chlen = 1;
211		n--;
212		break;
213	case 1:
214		break;
215	default:
216		/* illegal state */
217		goto encoding_error;
218	}
219
220	len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1;
221	while (psenc->chlen < len) {
222		if (n < 1)
223			goto restart;
224		psenc->ch[psenc->chlen] = *s0++;
225		psenc->chlen++;
226		n--;
227	}
228
229	*s = s0;
230
231	switch (len) {
232	case 1:
233		wchar = psenc->ch[0] & 0xff;
234		break;
235	case 2:
236		if (!_mskanji2(psenc->ch[1] & 0xff))
237			goto encoding_error;
238		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
239		break;
240	default:
241		/* illegal state */
242		goto encoding_error;
243	}
244
245	psenc->chlen = 0;
246
247	if (pwc)
248		*pwc = wchar;
249
250	if (!wchar)
251		*nresult = 0;
252	else
253		*nresult = len - chlenbak;
254
255	return (0);
256
257encoding_error:
258	psenc->chlen = 0;
259	*nresult = (size_t)-1;
260	return (EILSEQ);
261
262restart:
263	*nresult = (size_t)-2;
264	*s = s0;
265	return (0);
266}
267
268
269static int
270_citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei,
271			     char * __restrict s, size_t n, wchar_t wc,
272			     _MSKanjiState * __restrict psenc,
273			     size_t * __restrict nresult)
274{
275	int ret;
276
277	_DIAGASSERT(ei != NULL);
278	_DIAGASSERT(psenc != NULL);
279	_DIAGASSERT(s != NULL);
280
281	/* check invalid sequence */
282	if (wc & ~0xffff) {
283		ret = EILSEQ;
284		goto err;
285	}
286
287	if (wc & 0xff00) {
288		if (n < 2) {
289			ret = E2BIG;
290			goto err;
291		}
292
293		s[0] = (wc >> 8) & 0xff;
294		s[1] = wc & 0xff;
295		if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) {
296			ret = EILSEQ;
297			goto err;
298		}
299
300		*nresult = 2;
301		return 0;
302	} else {
303		if (n < 1) {
304			ret = E2BIG;
305			goto err;
306		}
307
308		s[0] = wc & 0xff;
309		if (_mskanji1(s[0] & 0xff)) {
310			ret = EILSEQ;
311			goto err;
312		}
313
314		*nresult = 1;
315		return 0;
316	}
317
318err:
319	*nresult = (size_t)-1;
320	return ret;
321}
322
323
324static __inline int
325/*ARGSUSED*/
326_citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei,
327			      _csid_t * __restrict csid,
328			      _index_t * __restrict idx, wchar_t wc)
329{
330	_index_t row, col;
331	int offset;
332
333	_DIAGASSERT(csid != NULL && idx != NULL);
334
335	if ((_wc_t)wc < 0x80) {
336		/* ISO-646 */
337		*csid = 0;
338		*idx = (_index_t)wc;
339	} else if ((_wc_t)wc < 0x100) {
340		/* KANA */
341		*csid = 1;
342		*idx = (_index_t)wc & 0x7F;
343	} else {
344		/* Kanji (containing Gaiji zone) */
345		/*
346		 * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)):
347		 * 0x8140 - 0x817E -> 0x2121 - 0x215F
348		 * 0x8180 - 0x819E -> 0x2160 - 0x217E
349		 * 0x819F - 0x81FC -> 0x2221 - 0x227E
350		 *
351		 * 0x8240 - 0x827E -> 0x2321 - 0x235F
352		 *  ...
353		 * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E
354		 *
355		 * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F
356		 *  ...
357		 * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E
358		 *
359		 * extended Gaiji zone:
360		 * 0xF040 - 0xFCFC
361		 *
362		 * JIS X0213-plane2:
363		 * 0xF040 - 0xF09E -> 0x2121 - 0x217E
364		 * 0xF140 - 0xF19E -> 0x2321 - 0x237E
365		 * ...
366		 * 0xF240 - 0xF29E -> 0x2521 - 0x257E
367		 *
368		 * 0xF09F - 0xF0FC -> 0x2821 - 0x287E
369		 * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E
370		 * ...
371		 * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E
372		 *
373		 * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E
374		 * ...
375		 * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E
376		 */
377		row = ((_wc_t)wc >> 8) & 0xFF;
378		col = (_wc_t)wc & 0xFF;
379		if (!_mskanji1(row) || !_mskanji2(col))
380			return EILSEQ;
381		if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) {
382			*csid = 2;
383			offset = 0x81;
384		} else {
385			*csid = 3;
386			if ((_wc_t)wc <= 0xF49E) {
387				offset = (_wc_t)wc >= 0xF29F ||
388				  ((_wc_t)wc >= 0xF09F && (_wc_t)wc <= 0xF0FC)
389				    ? 0xED : 0xF0;
390			} else
391				offset = 0xCE;
392		}
393		row -= offset;
394		if (row >= 0x5F)
395			row -= 0x40;
396		row = row * 2 + 0x21;
397		col -= 0x1F;
398		if (col >= 0x61)
399			col -= 1;
400		if (col > 0x7E) {
401			row += 1;
402			col -= 0x5E;
403		}
404		*idx = ((_index_t)row << 8) | col;
405	}
406
407	return 0;
408}
409
410static __inline int
411/*ARGSUSED*/
412_citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei,
413			      wchar_t * __restrict wc,
414			      _csid_t csid, _index_t idx)
415{
416	u_int32_t row, col;
417	int offset;
418
419	_DIAGASSERT(wc != NULL);
420
421	switch (csid) {
422	case 0:
423		/* ISO-646 */
424		if (idx >= 0x80)
425			return EILSEQ;
426		*wc = (wchar_t)idx;
427		break;
428	case 1:
429		/* kana */
430		if (idx >= 0x80)
431			return EILSEQ;
432		*wc = (wchar_t)idx + 0x80;
433		break;
434	case 3:
435		if ((ei->mode & MODE_JIS2004) == 0)
436			return EILSEQ;
437	/*FALLTHROUGH*/
438	case 2:
439		/* kanji */
440		row = (idx >> 8);
441		if (row < 0x21)
442			return EILSEQ;
443		if (csid == 3) {
444			if (row <= 0x2F)
445				offset = (row == 0x22 || row >= 0x26)
446				    ? 0xED : 0xF0;
447			else if (row >= 0x4D && row <= 0x7E)
448				offset = 0xCE;
449			else
450				return EILSEQ;
451		} else {
452			if (row > 0x97)
453				return EILSEQ;
454			offset = (row < 0x5F) ? 0x81 : 0xC1;
455		}
456		col = idx & 0xFF;
457		if (col < 0x21 || col > 0x7E)
458			return EILSEQ;
459		row -= 0x21; col -= 0x21;
460		if ((row & 1) == 0) {
461			col += 0x40;
462			if (col >= 0x7F)
463				col += 1;
464		} else
465			col += 0x9F;
466		row = row / 2 + offset;
467		*wc = ((wchar_t)row << 8) | col;
468		break;
469	default:
470		return EILSEQ;
471	}
472
473	return 0;
474}
475
476static __inline int
477/*ARGSUSED*/
478_citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei,
479					      _MSKanjiState * __restrict psenc,
480					      int * __restrict rstate)
481{
482
483	if (psenc->chlen == 0)
484		*rstate = _STDENC_SDGEN_INITIAL;
485	else
486		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
487
488	return 0;
489}
490
491static int
492/*ARGSUSED*/
493_citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo *  __restrict ei,
494				     const void * __restrict var,
495				     size_t lenvar)
496{
497	const char *p;
498
499	_DIAGASSERT(ei != NULL);
500
501	p = var;
502#define MATCH(x, act)						\
503do {								\
504	if (lenvar >= (sizeof(#x)-1) &&				\
505	    _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {	\
506		act;						\
507		lenvar -= sizeof(#x)-1;				\
508		p += sizeof(#x)-1;				\
509	}							\
510} while (0)
511	memset((void *)ei, 0, sizeof(*ei));
512	while (lenvar > 0) {
513		switch (_bcs_toupper(*p)) {
514		case 'J':
515			MATCH(JIS2004, ei->mode |= MODE_JIS2004);
516			break;
517		}
518		++p;
519		--lenvar;
520	}
521
522	return 0;
523}
524
525static void
526_citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei)
527{
528}
529
530/* ----------------------------------------------------------------------
531 * public interface for ctype
532 */
533
534_CITRUS_CTYPE_DECLS(MSKanji);
535_CITRUS_CTYPE_DEF_OPS(MSKanji);
536
537#include "citrus_ctype_template.h"
538
539/* ----------------------------------------------------------------------
540 * public interface for stdenc
541 */
542
543_CITRUS_STDENC_DECLS(MSKanji);
544_CITRUS_STDENC_DEF_OPS(MSKanji);
545
546#include "citrus_stdenc_template.h"
547