1/*	$NetBSD$	*/
2/*-
3 * Copyright (c)2010 Citrus Project,
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/endian.h>
29#include <sys/mman.h>
30#include <sys/stat.h>
31#include <assert.h>
32#include <errno.h>
33#include <fcntl.h>
34#define __SETLOCALE_SOURCE__
35#include <locale.h>
36#include <stddef.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <unistd.h>
41#include <wchar.h>
42
43#include "setlocale_local.h"
44
45#include "citrus_module.h"
46#include "citrus_ctype.h"
47
48#include "runetype_local.h"
49
50#include "multibyte.h"
51
52#include "_wctype_local.h"
53#include "_wctrans_local.h"
54
55typedef struct {
56	_RuneLocale rl;
57	unsigned char	rlp_ctype_tab  [_CTYPE_NUM_CHARS + 1];
58	short		rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
59	short		rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
60	char		rlp_codeset[33]; /* XXX */
61} _RuneLocalePriv;
62
63static __inline void
64_rune_wctype_init(_RuneLocale *rl)
65{
66	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
67	    sizeof(rl->rl_wctype));
68}
69
70static __inline void
71_rune_wctrans_init(_RuneLocale *rl)
72{
73	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name   = "tolower";
74	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
75	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
76	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name   = "toupper";
77	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
78	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
79}
80
81static __inline void
82_rune_init_priv(_RuneLocalePriv *rlp)
83{
84#if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
85	int i;
86
87	for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
88		rlp->rlp_ctype_tab  [i + 1] = 0;
89		rlp->rlp_tolower_tab[i + 1] = i;
90		rlp->rlp_toupper_tab[i + 1] = i;
91	}
92#endif
93	rlp->rlp_ctype_tab  [0] = 0;
94	rlp->rlp_tolower_tab[0] = EOF;
95	rlp->rlp_toupper_tab[0] = EOF;
96
97	rlp->rl.rl_ctype_tab   = (const unsigned char *)&rlp->rlp_ctype_tab[0];
98	rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
99	rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
100	rlp->rl.rl_codeset     = (const char *)&rlp->rlp_codeset[0];
101
102	_rune_wctype_init(&rlp->rl);
103	_rune_wctrans_init(&rlp->rl);
104}
105
106static __inline void
107_rune_find_codeset(char *s, size_t n,
108    char *var, size_t *plenvar)
109{
110	size_t lenvar;
111	const char *endvar;
112
113#define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
114
115	lenvar = *plenvar;
116	for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
117		if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
118			*var = '\0';
119			*plenvar -= lenvar;
120			endvar = &var[_RUNE_CODESET_LEN];
121			while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
122				if (*endvar == ' ' || *endvar == '\t')
123					break;
124				*s++ = *endvar++;
125			}
126			break;
127		}
128	}
129	*s = '\0';
130}
131
132static __inline int
133_rune_read_file(const char * __restrict var, size_t lenvar,
134    _RuneLocale ** __restrict prl)
135{
136	int ret, i;
137	const _FileRuneLocale *frl;
138	const _FileRuneEntry *fre;
139	const uint32_t *frune;
140	_RuneLocalePriv *rlp;
141	_RuneLocale *rl;
142	_RuneEntry *re;
143	uint32_t *rune;
144	uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
145	size_t len, n;
146
147	if (lenvar < sizeof(*frl))
148		return EFTYPE;
149	lenvar -= sizeof(*frl);
150	frl = (const _FileRuneLocale *)(const void *)var;
151	if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
152		return EFTYPE;
153
154	runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
155	maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
156	mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
157	len = runetype_len + maplower_len + mapupper_len;
158
159	fre = (const _FileRuneEntry *)(const void *)(frl + 1);
160	frune = (const uint32_t *)(const void *)(fre + len);
161
162	variable_len = be32toh((uint32_t)frl->frl_variable_len);
163
164	n = len * sizeof(*fre);
165	if (lenvar < n)
166		return EFTYPE;
167	lenvar -= n;
168
169	n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
170	rlp = (_RuneLocalePriv *)malloc(n);
171	if (rlp == NULL)
172		return ENOMEM;
173	_rune_init_priv(rlp);
174
175	rl = &rlp->rl;
176	re = (_RuneEntry *)(void *)(rlp + 1);
177	rune = (uint32_t *)(void *)(re + len);
178
179	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
180		rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
181		rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
182		rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
183	}
184
185#define READ_RANGE(name)						\
186do {									\
187	const _FileRuneEntry *end_fre;					\
188	const uint32_t *end_frune;					\
189									\
190	rl->rl_##name##_ext.rr_nranges = name##_len;			\
191	rl->rl_##name##_ext.rr_rune_ranges = re;			\
192									\
193	end_fre = fre + name##_len;					\
194	while (fre < end_fre) {						\
195		re->re_min = be32toh((uint32_t)fre->fre_min);		\
196		re->re_max = be32toh((uint32_t)fre->fre_max);		\
197		re->re_map = be32toh((uint32_t)fre->fre_map);		\
198		if (re->re_map != 0) {					\
199			re->re_rune_types = NULL;			\
200		} else {						\
201			re->re_rune_types = rune;			\
202			len = re->re_max - re->re_min + 1;		\
203			n = len * sizeof(*frune);			\
204			if (lenvar < n) {				\
205				ret = EFTYPE;				\
206				goto err;				\
207			}						\
208			lenvar -= n;					\
209			end_frune = frune + len;			\
210			while (frune < end_frune)			\
211				*rune++ = be32toh(*frune++);		\
212		}							\
213		++fre, ++re;						\
214	}								\
215} while (/*CONSTCOND*/0)
216
217	READ_RANGE(runetype);
218	READ_RANGE(maplower);
219	READ_RANGE(mapupper);
220
221	if (lenvar < variable_len) {
222		ret = EFTYPE;
223		goto err;
224	}
225
226	memcpy((void *)rune, (void const *)frune, variable_len);
227	rl->rl_variable_len = variable_len;
228	rl->rl_variable = (void *)rune;
229
230	_rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
231	    (char *)rl->rl_variable, &rl->rl_variable_len);
232
233	ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
234	    rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
235	if (ret)
236		goto err;
237	if (__mb_len_max_runtime <
238	    _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
239		ret = EINVAL;
240		goto err;
241	}
242
243	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
244		wint_t wc;
245
246		ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
247		if (ret)
248			goto err;
249		if (wc == WEOF) {
250			rlp->rlp_ctype_tab[i + 1] = 0;
251			rlp->rlp_tolower_tab[i + 1] = i;
252			rlp->rlp_toupper_tab[i + 1] = i;
253		} else {
254			rlp->rlp_ctype_tab[i + 1] = (unsigned char)
255			    _runetype_to_ctype(_runetype_priv(rl, wc));
256
257#define CONVERT_MAP(name)						\
258do {									\
259	wint_t map;							\
260	int c;								\
261									\
262	map = _towctrans_priv(wc, _wctrans_##name(rl));			\
263	if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype,	\
264	    map, &c)  || c == EOF))					\
265		c = i;							\
266	rlp->rlp_to##name##_tab[i + 1] = (short)c;			\
267} while (/*CONSTCOND*/0)
268
269			CONVERT_MAP(lower);
270			CONVERT_MAP(upper);
271		}
272	}
273	*prl = rl;
274	return 0;
275
276err:
277	free(rlp);
278	return ret;
279}
280
281int
282_rune_load(const char * __restrict var, size_t lenvar,
283    _RuneLocale ** __restrict prl)
284{
285	int ret;
286
287	_DIAGASSERT(var != NULL || lenvar < 1);
288	_DIAGASSERT(prl != NULL);
289
290	if (lenvar < 1)
291		return EFTYPE;
292	switch (*var) {
293	case 'R':
294		ret = _rune_read_file(var, lenvar, prl);
295		break;
296	default:
297		ret = EFTYPE;
298	}
299	return ret;
300}
301