1/*-
2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
5 * Copyright (c) 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
10 *
11 * Copyright (c) 2011 The FreeBSD Foundation
12 * All rights reserved.
13 * Portions of this software were developed by David Chisnall
14 * under sponsorship from the FreeBSD Foundation.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 *    notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 *    notice, this list of conditions and the following disclaimer in the
23 *    documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41#if defined(LIBC_SCCS) && !defined(lint)
42static char sccsid[] = "@(#)euc.c	8.1 (Berkeley) 6/4/93";
43#endif /* LIBC_SCCS and not lint */
44#include <sys/param.h>
45__FBSDID("$FreeBSD: releng/11.0/lib/libc/locale/euc.c 297532 2016-04-04 02:43:35Z ache $");
46
47#include <errno.h>
48#include <limits.h>
49#include <runetype.h>
50#include <stdlib.h>
51#include <string.h>
52#include <wchar.h>
53#include "mblocal.h"
54
55extern int __mb_sb_limit;
56
57static size_t	_EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
58    size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
59static size_t	_EUC_wcrtomb_impl(char * __restrict, wchar_t,
60    mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
61
62static size_t	_EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
63		    size_t, mbstate_t * __restrict);
64static size_t	_EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
65		    size_t, mbstate_t * __restrict);
66static size_t	_EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
67		    size_t, mbstate_t * __restrict);
68static size_t	_EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
69		    size_t, mbstate_t * __restrict);
70
71static size_t	_EUC_CN_wcrtomb(char * __restrict, wchar_t,
72		    mbstate_t * __restrict);
73static size_t	_EUC_JP_wcrtomb(char * __restrict, wchar_t,
74		    mbstate_t * __restrict);
75static size_t	_EUC_KR_wcrtomb(char * __restrict, wchar_t,
76		    mbstate_t * __restrict);
77static size_t	_EUC_TW_wcrtomb(char * __restrict, wchar_t,
78		    mbstate_t * __restrict);
79
80static size_t	_EUC_CN_mbsnrtowcs(wchar_t * __restrict,
81		    const char ** __restrict, size_t, size_t,
82		    mbstate_t * __restrict);
83static size_t	_EUC_JP_mbsnrtowcs(wchar_t * __restrict,
84		    const char ** __restrict, size_t, size_t,
85		    mbstate_t * __restrict);
86static size_t	_EUC_KR_mbsnrtowcs(wchar_t * __restrict,
87		    const char ** __restrict, size_t, size_t,
88		    mbstate_t * __restrict);
89static size_t	_EUC_TW_mbsnrtowcs(wchar_t * __restrict,
90		    const char ** __restrict, size_t, size_t,
91		    mbstate_t * __restrict);
92
93static size_t	_EUC_CN_wcsnrtombs(char * __restrict,
94		    const wchar_t ** __restrict, size_t, size_t,
95		    mbstate_t * __restrict);
96static size_t	_EUC_JP_wcsnrtombs(char * __restrict,
97		    const wchar_t ** __restrict, size_t, size_t,
98		    mbstate_t * __restrict);
99static size_t	_EUC_KR_wcsnrtombs(char * __restrict,
100		    const wchar_t ** __restrict, size_t, size_t,
101		    mbstate_t * __restrict);
102static size_t	_EUC_TW_wcsnrtombs(char * __restrict,
103		    const wchar_t ** __restrict, size_t, size_t,
104		    mbstate_t * __restrict);
105
106static int	_EUC_mbsinit(const mbstate_t *);
107
108typedef struct {
109	wchar_t	ch;
110	int	set;
111	int	want;
112} _EucState;
113
114static int
115_EUC_mbsinit(const mbstate_t *ps)
116{
117
118	return (ps == NULL || ((const _EucState *)ps)->want == 0);
119}
120
121/*
122 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
123 */
124int
125_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
126{
127	l->__mbrtowc = _EUC_CN_mbrtowc;
128	l->__wcrtomb = _EUC_CN_wcrtomb;
129	l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
130	l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
131	l->__mbsinit = _EUC_mbsinit;
132
133	l->runes = rl;
134	l->__mb_cur_max = 4;
135	l->__mb_sb_limit = 128;
136	return (0);
137}
138
139static size_t
140_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
141    size_t n, mbstate_t * __restrict ps)
142{
143	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
144}
145
146static size_t
147_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
148    const char ** __restrict src,
149    size_t nms, size_t len, mbstate_t * __restrict ps)
150{
151	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
152}
153
154static size_t
155_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
156    mbstate_t * __restrict ps)
157{
158	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
159}
160
161static size_t
162_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
163	size_t nwc, size_t len, mbstate_t * __restrict ps)
164{
165	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
166}
167
168/*
169 * EUC-KR uses only CS0 and CS1.
170 */
171int
172_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
173{
174	l->__mbrtowc = _EUC_KR_mbrtowc;
175	l->__wcrtomb = _EUC_KR_wcrtomb;
176	l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
177	l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
178	l->__mbsinit = _EUC_mbsinit;
179
180	l->runes = rl;
181	l->__mb_cur_max = 2;
182	l->__mb_sb_limit = 128;
183	return (0);
184}
185
186static size_t
187_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
188    size_t n, mbstate_t * __restrict ps)
189{
190	return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
191}
192
193static size_t
194_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
195    const char ** __restrict src,
196    size_t nms, size_t len, mbstate_t * __restrict ps)
197{
198	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
199}
200
201static size_t
202_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
203	mbstate_t * __restrict ps)
204{
205	return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
206}
207
208static size_t
209_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
210	size_t nwc, size_t len, mbstate_t * __restrict ps)
211{
212	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
213}
214
215/*
216 * EUC-JP uses CS0, CS1, CS2, and CS3.
217 */
218int
219_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
220{
221	l->__mbrtowc = _EUC_JP_mbrtowc;
222	l->__wcrtomb = _EUC_JP_wcrtomb;
223	l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
224	l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
225	l->__mbsinit = _EUC_mbsinit;
226
227	l->runes = rl;
228	l->__mb_cur_max = 3;
229	l->__mb_sb_limit = 128;
230	return (0);
231}
232
233static size_t
234_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
235    size_t n, mbstate_t * __restrict ps)
236{
237	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
238}
239
240static size_t
241_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
242    const char ** __restrict src,
243    size_t nms, size_t len, mbstate_t * __restrict ps)
244{
245	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
246}
247
248static size_t
249_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
250    mbstate_t * __restrict ps)
251{
252	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
253}
254
255static size_t
256_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
257	size_t nwc, size_t len, mbstate_t * __restrict ps)
258{
259	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
260}
261
262/*
263 * EUC-TW uses CS0, CS1, and CS2.
264 */
265int
266_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
267{
268	l->__mbrtowc = _EUC_TW_mbrtowc;
269	l->__wcrtomb = _EUC_TW_wcrtomb;
270	l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
271	l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
272	l->__mbsinit = _EUC_mbsinit;
273
274	l->runes = rl;
275	l->__mb_cur_max = 4;
276	l->__mb_sb_limit = 128;
277	return (0);
278}
279
280static size_t
281_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
282	size_t n, mbstate_t * __restrict ps)
283{
284	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
285}
286
287static size_t
288_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
289	const char ** __restrict src,
290	size_t nms, size_t len, mbstate_t * __restrict ps)
291{
292	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
293}
294
295static size_t
296_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
297	mbstate_t * __restrict ps)
298{
299	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
300}
301
302static size_t
303_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
304	size_t nwc, size_t len, mbstate_t * __restrict ps)
305{
306	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
307}
308
309/*
310 * Common EUC code.
311 */
312
313static size_t
314_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
315	size_t n, mbstate_t * __restrict ps,
316	uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
317{
318	_EucState *es;
319	int i, want;
320	wchar_t wc = 0;
321	unsigned char ch, chs;
322
323	es = (_EucState *)ps;
324
325	if (es->want < 0 || es->want > MB_CUR_MAX) {
326		errno = EINVAL;
327		return ((size_t)-1);
328	}
329
330	if (s == NULL) {
331		s = "";
332		n = 1;
333		pwc = NULL;
334	}
335
336	if (n == 0)
337		/* Incomplete multibyte sequence */
338		return ((size_t)-2);
339
340	if (es->want == 0) {
341		/* Fast path for plain ASCII (CS0) */
342		if (((ch = (unsigned char)*s) & 0x80) == 0) {
343			if (pwc != NULL)
344				*pwc = ch;
345			return (ch != '\0' ? 1 : 0);
346		}
347
348		if (ch >= 0xa1) {
349			/* CS1 */
350			want = 2;
351		} else if (ch == cs2) {
352			want = cs2width;
353		} else if (ch == cs3) {
354			want = cs3width;
355		} else {
356			errno = EILSEQ;
357			return ((size_t)-1);
358		}
359
360
361		es->want = want;
362		es->ch = 0;
363	} else {
364		want = es->want;
365		wc = es->ch;
366	}
367
368	for (i = 0; i < MIN(want, n); i++) {
369		wc <<= 8;
370		chs = *s;
371		wc |= chs;
372		s++;
373	}
374	if (i < want) {
375		/* Incomplete multibyte sequence */
376		es->want = want - i;
377		es->ch = wc;
378		errno = EILSEQ;
379		return ((size_t)-2);
380	}
381	if (pwc != NULL)
382		*pwc = wc;
383	es->want = 0;
384	return (wc == L'\0' ? 0 : want);
385}
386
387static size_t
388_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
389    mbstate_t * __restrict ps,
390    uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
391{
392	_EucState *es;
393	int i, len;
394	wchar_t nm;
395
396	es = (_EucState *)ps;
397
398	if (es->want != 0) {
399		errno = EINVAL;
400		return ((size_t)-1);
401	}
402
403	if (s == NULL)
404		/* Reset to initial shift state (no-op) */
405		return (1);
406
407	if ((wc & ~0x7f) == 0) {
408		/* Fast path for plain ASCII (CS0) */
409		*s = (char)wc;
410		return (1);
411	}
412
413	/* Determine the "length" */
414	if ((unsigned)wc > 0xffffff) {
415		len = 4;
416	} else if ((unsigned)wc > 0xffff) {
417		len = 3;
418	} else if ((unsigned)wc > 0xff) {
419		len = 2;
420	} else {
421		len = 1;
422	}
423
424	if (len > MB_CUR_MAX) {
425		errno = EILSEQ;
426		return ((size_t)-1);
427	}
428
429	/* This first check excludes CS1, which is implicitly valid. */
430	if ((wc < 0xa100) || (wc > 0xffff)) {
431		/* Check for valid CS2 or CS3 */
432		nm = (wc >> ((len - 1) * 8));
433		if (nm == cs2) {
434			if (len != cs2width) {
435				errno = EILSEQ;
436				return ((size_t)-1);
437			}
438		} else if (nm == cs3) {
439			if (len != cs3width) {
440				errno = EILSEQ;
441				return ((size_t)-1);
442			}
443		} else {
444			errno = EILSEQ;
445			return ((size_t)-1);
446		}
447	}
448
449	/* Stash the bytes, least significant last */
450	for (i = len - 1; i >= 0; i--) {
451		s[i] = (wc & 0xff);
452		wc >>= 8;
453	}
454	return (len);
455}
456