1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
5 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
6 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
7 * Copyright (c) 1993
8 *	The Regents of the University of California.  All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Paul Borman at Krystal Technologies.
12 *
13 * Copyright (c) 2011 The FreeBSD Foundation
14 * All rights reserved.
15 * Portions of this software were developed by David Chisnall
16 * under sponsorship from the FreeBSD Foundation.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 *    notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 *    notice, this list of conditions and the following disclaimer in the
25 *    documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42
43#if defined(LIBC_SCCS) && !defined(lint)
44static char sccsid[] = "@(#)euc.c	8.1 (Berkeley) 6/4/93";
45#endif /* LIBC_SCCS and not lint */
46#include <sys/param.h>
47__FBSDID("$FreeBSD$");
48
49#include <errno.h>
50#include <limits.h>
51#include <runetype.h>
52#include <stdlib.h>
53#include <string.h>
54#include <wchar.h>
55#include "mblocal.h"
56
57extern int __mb_sb_limit;
58
59static size_t	_EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
60    size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
61static size_t	_EUC_wcrtomb_impl(char * __restrict, wchar_t,
62    mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
63
64static size_t	_EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
65		    size_t, mbstate_t * __restrict);
66static size_t	_EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
67		    size_t, mbstate_t * __restrict);
68static size_t	_EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
69		    size_t, mbstate_t * __restrict);
70static size_t	_EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
71		    size_t, mbstate_t * __restrict);
72
73static size_t	_EUC_CN_wcrtomb(char * __restrict, wchar_t,
74		    mbstate_t * __restrict);
75static size_t	_EUC_JP_wcrtomb(char * __restrict, wchar_t,
76		    mbstate_t * __restrict);
77static size_t	_EUC_KR_wcrtomb(char * __restrict, wchar_t,
78		    mbstate_t * __restrict);
79static size_t	_EUC_TW_wcrtomb(char * __restrict, wchar_t,
80		    mbstate_t * __restrict);
81
82static size_t	_EUC_CN_mbsnrtowcs(wchar_t * __restrict,
83		    const char ** __restrict, size_t, size_t,
84		    mbstate_t * __restrict);
85static size_t	_EUC_JP_mbsnrtowcs(wchar_t * __restrict,
86		    const char ** __restrict, size_t, size_t,
87		    mbstate_t * __restrict);
88static size_t	_EUC_KR_mbsnrtowcs(wchar_t * __restrict,
89		    const char ** __restrict, size_t, size_t,
90		    mbstate_t * __restrict);
91static size_t	_EUC_TW_mbsnrtowcs(wchar_t * __restrict,
92		    const char ** __restrict, size_t, size_t,
93		    mbstate_t * __restrict);
94
95static size_t	_EUC_CN_wcsnrtombs(char * __restrict,
96		    const wchar_t ** __restrict, size_t, size_t,
97		    mbstate_t * __restrict);
98static size_t	_EUC_JP_wcsnrtombs(char * __restrict,
99		    const wchar_t ** __restrict, size_t, size_t,
100		    mbstate_t * __restrict);
101static size_t	_EUC_KR_wcsnrtombs(char * __restrict,
102		    const wchar_t ** __restrict, size_t, size_t,
103		    mbstate_t * __restrict);
104static size_t	_EUC_TW_wcsnrtombs(char * __restrict,
105		    const wchar_t ** __restrict, size_t, size_t,
106		    mbstate_t * __restrict);
107
108static int	_EUC_mbsinit(const mbstate_t *);
109
110typedef struct {
111	wchar_t	ch;
112	int	set;
113	int	want;
114} _EucState;
115
116static int
117_EUC_mbsinit(const mbstate_t *ps)
118{
119
120	return (ps == NULL || ((const _EucState *)ps)->want == 0);
121}
122
123/*
124 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
125 */
126int
127_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
128{
129	l->__mbrtowc = _EUC_CN_mbrtowc;
130	l->__wcrtomb = _EUC_CN_wcrtomb;
131	l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
132	l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
133	l->__mbsinit = _EUC_mbsinit;
134
135	l->runes = rl;
136	l->__mb_cur_max = 4;
137	l->__mb_sb_limit = 128;
138	return (0);
139}
140
141static size_t
142_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
143    size_t n, mbstate_t * __restrict ps)
144{
145	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
146}
147
148static size_t
149_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
150    const char ** __restrict src,
151    size_t nms, size_t len, mbstate_t * __restrict ps)
152{
153	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
154}
155
156static size_t
157_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
158    mbstate_t * __restrict ps)
159{
160	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
161}
162
163static size_t
164_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
165	size_t nwc, size_t len, mbstate_t * __restrict ps)
166{
167	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
168}
169
170/*
171 * EUC-KR uses only CS0 and CS1.
172 */
173int
174_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
175{
176	l->__mbrtowc = _EUC_KR_mbrtowc;
177	l->__wcrtomb = _EUC_KR_wcrtomb;
178	l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
179	l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
180	l->__mbsinit = _EUC_mbsinit;
181
182	l->runes = rl;
183	l->__mb_cur_max = 2;
184	l->__mb_sb_limit = 128;
185	return (0);
186}
187
188static size_t
189_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
190    size_t n, mbstate_t * __restrict ps)
191{
192	return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
193}
194
195static size_t
196_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
197    const char ** __restrict src,
198    size_t nms, size_t len, mbstate_t * __restrict ps)
199{
200	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
201}
202
203static size_t
204_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
205	mbstate_t * __restrict ps)
206{
207	return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
208}
209
210static size_t
211_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
212	size_t nwc, size_t len, mbstate_t * __restrict ps)
213{
214	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
215}
216
217/*
218 * EUC-JP uses CS0, CS1, CS2, and CS3.
219 */
220int
221_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
222{
223	l->__mbrtowc = _EUC_JP_mbrtowc;
224	l->__wcrtomb = _EUC_JP_wcrtomb;
225	l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
226	l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
227	l->__mbsinit = _EUC_mbsinit;
228
229	l->runes = rl;
230	l->__mb_cur_max = 3;
231	l->__mb_sb_limit = 128;
232	return (0);
233}
234
235static size_t
236_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
237    size_t n, mbstate_t * __restrict ps)
238{
239	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
240}
241
242static size_t
243_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
244    const char ** __restrict src,
245    size_t nms, size_t len, mbstate_t * __restrict ps)
246{
247	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
248}
249
250static size_t
251_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
252    mbstate_t * __restrict ps)
253{
254	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
255}
256
257static size_t
258_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
259	size_t nwc, size_t len, mbstate_t * __restrict ps)
260{
261	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
262}
263
264/*
265 * EUC-TW uses CS0, CS1, and CS2.
266 */
267int
268_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
269{
270	l->__mbrtowc = _EUC_TW_mbrtowc;
271	l->__wcrtomb = _EUC_TW_wcrtomb;
272	l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
273	l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
274	l->__mbsinit = _EUC_mbsinit;
275
276	l->runes = rl;
277	l->__mb_cur_max = 4;
278	l->__mb_sb_limit = 128;
279	return (0);
280}
281
282static size_t
283_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
284	size_t n, mbstate_t * __restrict ps)
285{
286	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
287}
288
289static size_t
290_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
291	const char ** __restrict src,
292	size_t nms, size_t len, mbstate_t * __restrict ps)
293{
294	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
295}
296
297static size_t
298_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
299	mbstate_t * __restrict ps)
300{
301	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
302}
303
304static size_t
305_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
306	size_t nwc, size_t len, mbstate_t * __restrict ps)
307{
308	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
309}
310
311/*
312 * Common EUC code.
313 */
314
315static size_t
316_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
317	size_t n, mbstate_t * __restrict ps,
318	uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
319{
320	_EucState *es;
321	int i, want;
322	wchar_t wc = 0;
323	unsigned char ch, chs;
324
325	es = (_EucState *)ps;
326
327	if (es->want < 0 || es->want > MB_CUR_MAX) {
328		errno = EINVAL;
329		return ((size_t)-1);
330	}
331
332	if (s == NULL) {
333		s = "";
334		n = 1;
335		pwc = NULL;
336	}
337
338	if (n == 0)
339		/* Incomplete multibyte sequence */
340		return ((size_t)-2);
341
342	if (es->want == 0) {
343		/* Fast path for plain ASCII (CS0) */
344		if (((ch = (unsigned char)*s) & 0x80) == 0) {
345			if (pwc != NULL)
346				*pwc = ch;
347			return (ch != '\0' ? 1 : 0);
348		}
349
350		if (ch >= 0xa1) {
351			/* CS1 */
352			want = 2;
353		} else if (ch == cs2) {
354			want = cs2width;
355		} else if (ch == cs3) {
356			want = cs3width;
357		} else {
358			errno = EILSEQ;
359			return ((size_t)-1);
360		}
361
362
363		es->want = want;
364		es->ch = 0;
365	} else {
366		want = es->want;
367		wc = es->ch;
368	}
369
370	for (i = 0; i < MIN(want, n); i++) {
371		wc <<= 8;
372		chs = *s;
373		wc |= chs;
374		s++;
375	}
376	if (i < want) {
377		/* Incomplete multibyte sequence */
378		es->want = want - i;
379		es->ch = wc;
380		errno = EILSEQ;
381		return ((size_t)-2);
382	}
383	if (pwc != NULL)
384		*pwc = wc;
385	es->want = 0;
386	return (wc == L'\0' ? 0 : want);
387}
388
389static size_t
390_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
391    mbstate_t * __restrict ps,
392    uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
393{
394	_EucState *es;
395	int i, len;
396	wchar_t nm;
397
398	es = (_EucState *)ps;
399
400	if (es->want != 0) {
401		errno = EINVAL;
402		return ((size_t)-1);
403	}
404
405	if (s == NULL)
406		/* Reset to initial shift state (no-op) */
407		return (1);
408
409	if ((wc & ~0x7f) == 0) {
410		/* Fast path for plain ASCII (CS0) */
411		*s = (char)wc;
412		return (1);
413	}
414
415	/* Determine the "length" */
416	if ((unsigned)wc > 0xffffff) {
417		len = 4;
418	} else if ((unsigned)wc > 0xffff) {
419		len = 3;
420	} else if ((unsigned)wc > 0xff) {
421		len = 2;
422	} else {
423		len = 1;
424	}
425
426	if (len > MB_CUR_MAX) {
427		errno = EILSEQ;
428		return ((size_t)-1);
429	}
430
431	/* This first check excludes CS1, which is implicitly valid. */
432	if ((wc < 0xa100) || (wc > 0xffff)) {
433		/* Check for valid CS2 or CS3 */
434		nm = (wc >> ((len - 1) * 8));
435		if (nm == cs2) {
436			if (len != cs2width) {
437				errno = EILSEQ;
438				return ((size_t)-1);
439			}
440		} else if (nm == cs3) {
441			if (len != cs3width) {
442				errno = EILSEQ;
443				return ((size_t)-1);
444			}
445		} else {
446			errno = EILSEQ;
447			return ((size_t)-1);
448		}
449	}
450
451	/* Stash the bytes, least significant last */
452	for (i = len - 1; i >= 0; i--) {
453		s[i] = (wc & 0xff);
454		wc >>= 8;
455	}
456	return (len);
457}
458