1/*-
2 * Copyright (c) 2003, 2005 Ryuichiro Imura
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29/*
30 * kiconv(3) requires shared linked, and reduce module size
31 * when statically linked.
32 */
33
34#ifdef PIC
35
36#include <sys/types.h>
37#include <sys/iconv.h>
38#include <sys/sysctl.h>
39
40#include <ctype.h>
41#include <dlfcn.h>
42#include <err.h>
43#include <errno.h>
44#include <locale.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <wctype.h>
49
50#include "quirks.h"
51
52typedef void *iconv_t;
53
54struct xlat16_table {
55	uint32_t *	idx[0x200];
56	void *		data;
57	size_t		size;
58};
59
60static struct xlat16_table kiconv_xlat16_open(const char *, const char *, int);
61static int chklocale(int, const char *);
62
63#ifdef ICONV_DLOPEN
64static int my_iconv_init(void);
65static iconv_t (*my_iconv_open)(const char *, const char *);
66static size_t (*my_iconv)(iconv_t, const char **, size_t *, char **, size_t *);
67static int (*my_iconv_close)(iconv_t);
68#else
69#include <iconv.h>
70#define my_iconv_init() 0
71#define my_iconv_open iconv_open
72#define my_iconv iconv
73#define my_iconv_close iconv_close
74#endif
75static size_t my_iconv_char(iconv_t, const u_char **, size_t *, u_char **, size_t *);
76
77int
78kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
79{
80	int error;
81	size_t idxsize;
82	struct xlat16_table xt;
83	void *data;
84	char *p;
85	const char unicode[] = ENCODING_UNICODE;
86
87	if ((flag & KICONV_WCTYPE) == 0 &&
88	    strcmp(unicode, tocode) != 0 &&
89	    strcmp(unicode, fromcode) != 0 &&
90	    kiconv_lookupconv(unicode) == 0) {
91		error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
92		if (error)
93			return (-1);
94		error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
95		return (error);
96	}
97
98	if (kiconv_lookupcs(tocode, fromcode) == 0)
99		return (0);
100
101	if (flag & KICONV_WCTYPE)
102		xt = kiconv_xlat16_open(fromcode, fromcode, flag);
103	else
104		xt = kiconv_xlat16_open(tocode, fromcode, flag);
105	if (xt.size == 0)
106		return (-1);
107
108	idxsize = sizeof(xt.idx);
109
110	if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) {
111		errno = E2BIG;
112		return (-1);
113	}
114
115	if ((data = malloc(idxsize + xt.size)) != NULL) {
116		p = data;
117		memcpy(p, xt.idx, idxsize);
118		p += idxsize;
119		memcpy(p, xt.data, xt.size);
120		error = kiconv_add_xlat16_table(tocode, fromcode, data,
121		    (int)(idxsize + xt.size));
122		return (error);
123	}
124
125	return (-1);
126}
127
128int
129kiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode)
130{
131	int error, locale;
132
133	error = kiconv_add_xlat16_cspair(foreigncode, localcode,
134	    KICONV_FROM_LOWER | KICONV_FROM_UPPER);
135	if (error)
136		return (error);
137	error = kiconv_add_xlat16_cspair(localcode, foreigncode,
138	    KICONV_LOWER | KICONV_UPPER);
139	if (error)
140		return (error);
141	locale = chklocale(LC_CTYPE, localcode);
142	if (locale == 0) {
143		error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode,
144		    KICONV_WCTYPE);
145		if (error)
146			return (error);
147	}
148
149	return (0);
150}
151
152static struct xlat16_table
153kiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase)
154{
155	u_char src[3], dst[4], *srcp, *dstp, ud, ld;
156	int us, ls, ret;
157	uint16_t c;
158	uint32_t table[0x80];
159	size_t inbytesleft, outbytesleft, pre_q_size, post_q_size;
160	struct xlat16_table xt;
161	struct quirk_replace_list *pre_q_list, *post_q_list;
162	iconv_t cd;
163	char *p;
164
165	xt.data = NULL;
166	xt.size = 0;
167
168	src[2] = '\0';
169	dst[3] = '\0';
170
171	ret = my_iconv_init();
172	if (ret)
173		return (xt);
174
175	cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size),
176	    search_quirk(fromcode, tocode, &post_q_list, &post_q_size));
177	if (cd == (iconv_t) (-1))
178		return (xt);
179
180	if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL)
181		return (xt);
182
183	p = xt.data;
184
185	for (ls = 0 ; ls < 0x200 ; ls++) {
186		xt.idx[ls] = NULL;
187		for (us = 0 ; us < 0x80 ; us++) {
188			srcp = src;
189			dstp = dst;
190
191			inbytesleft = 2;
192			outbytesleft = 3;
193			bzero(dst, outbytesleft);
194
195			c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls;
196
197			if (lcase & KICONV_WCTYPE) {
198				if ((c & 0xff) == 0)
199					c >>= 8;
200				if (iswupper(c)) {
201					c = towlower(c);
202					if ((c & 0xff00) == 0)
203						c <<= 8;
204					table[us] = c | XLAT16_HAS_LOWER_CASE;
205				} else if (iswlower(c)) {
206					c = towupper(c);
207					if ((c & 0xff00) == 0)
208						c <<= 8;
209					table[us] = c | XLAT16_HAS_UPPER_CASE;
210				} else
211					table[us] = 0;
212				/*
213				 * store not NULL
214				 */
215				if (table[us])
216					xt.idx[ls] = table;
217
218				continue;
219			}
220
221			c = quirk_vendor2unix(c, pre_q_list, pre_q_size);
222			src[0] = (u_char)(c >> 8);
223			src[1] = (u_char)c;
224
225			ret = my_iconv_char(cd, (const u_char **)&srcp,
226			    &inbytesleft, &dstp, &outbytesleft);
227			if (ret == -1) {
228				table[us] = 0;
229				continue;
230			}
231
232			ud = (u_char)dst[0];
233			ld = (u_char)dst[1];
234
235			switch(outbytesleft) {
236			case 0:
237#ifdef XLAT16_ACCEPT_3BYTE_CHR
238				table[us] = (ud << 8) | ld;
239				table[us] |= (u_char)dst[2] << 16;
240				table[us] |= XLAT16_IS_3BYTE_CHR;
241#else
242				table[us] = 0;
243				continue;
244#endif
245				break;
246			case 1:
247				table[us] = quirk_unix2vendor((ud << 8) | ld,
248				    post_q_list, post_q_size);
249				if ((table[us] >> 8) == 0)
250					table[us] |= XLAT16_ACCEPT_NULL_OUT;
251				break;
252			case 2:
253				table[us] = ud;
254				if (lcase & KICONV_LOWER && ud != tolower(ud)) {
255					table[us] |= (u_char)tolower(ud) << 16;
256					table[us] |= XLAT16_HAS_LOWER_CASE;
257				}
258				if (lcase & KICONV_UPPER && ud != toupper(ud)) {
259					table[us] |= (u_char)toupper(ud) << 16;
260					table[us] |= XLAT16_HAS_UPPER_CASE;
261				}
262				break;
263			}
264
265			switch(inbytesleft) {
266			case 0:
267				if ((ls & 0xff) == 0)
268					table[us] |= XLAT16_ACCEPT_NULL_IN;
269				break;
270			case 1:
271				c = ls > 0xff ? us | 0x80 : us;
272				if (lcase & KICONV_FROM_LOWER && c != tolower(c)) {
273					table[us] |= (u_char)tolower(c) << 16;
274					table[us] |= XLAT16_HAS_FROM_LOWER_CASE;
275				}
276				if (lcase & KICONV_FROM_UPPER && c != toupper(c)) {
277					table[us] |= (u_char)toupper(c) << 16;
278					table[us] |= XLAT16_HAS_FROM_UPPER_CASE;
279				}
280				break;
281			}
282
283			if (table[us] == 0)
284				continue;
285
286			/*
287			 * store not NULL
288			 */
289			xt.idx[ls] = table;
290		}
291		if (xt.idx[ls]) {
292			memcpy(p, table, sizeof(table));
293			p += sizeof(table);
294		}
295	}
296	my_iconv_close(cd);
297
298	xt.size = p - (char *)xt.data;
299	xt.data = realloc(xt.data, xt.size);
300	return (xt);
301}
302
303static int
304chklocale(int category, const char *code)
305{
306	char *p;
307	int error = -1;
308
309	p = strchr(setlocale(category, NULL), '.');
310	if (p++) {
311		error = strcasecmp(code, p);
312		if (error) {
313			/* XXX - can't avoid calling quirk here... */
314			error = strcasecmp(code, kiconv_quirkcs(p,
315			    KICONV_VENDOR_MICSFT));
316		}
317	}
318	return (error);
319}
320
321#ifdef ICONV_DLOPEN
322static int
323my_iconv_init(void)
324{
325	void *iconv_lib;
326
327	iconv_lib = dlopen("libiconv.so", RTLD_LAZY | RTLD_GLOBAL);
328	if (iconv_lib == NULL) {
329		warn("Unable to load iconv library: %s\n", dlerror());
330		errno = ENOENT;
331		return (-1);
332	}
333	my_iconv_open = dlsym(iconv_lib, "iconv_open");
334	my_iconv = dlsym(iconv_lib, "iconv");
335	my_iconv_close = dlsym(iconv_lib, "iconv_close");
336
337	return (0);
338}
339#endif
340
341static size_t
342my_iconv_char(iconv_t cd, const u_char **ibuf, size_t * ilen, u_char **obuf,
343	size_t * olen)
344{
345	const u_char *sp;
346	u_char *dp, ilocal[3], olocal[3];
347	u_char c1, c2;
348	int ret;
349	size_t ir, or;
350
351	sp = *ibuf;
352	dp = *obuf;
353	ir = *ilen;
354
355	bzero(*obuf, *olen);
356	ret = my_iconv(cd, (const char **)&sp, ilen, (char **)&dp, olen);
357	c1 = (*obuf)[0];
358	c2 = (*obuf)[1];
359
360	if (ret == -1) {
361		if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2))
362			return (0);
363		else
364			return (-1);
365	}
366
367	/*
368	 * We must judge if inbuf is a single byte char or double byte char.
369	 * Here, to judge, try first byte(*sp) conversion and compare.
370	 */
371	ir = 1;
372	or = 3;
373
374	bzero(olocal, or);
375	memcpy(ilocal, *ibuf, sizeof(ilocal));
376	sp = ilocal;
377	dp = olocal;
378
379	if ((my_iconv(cd,(const char **)&sp, &ir, (char **)&dp, &or)) != -1) {
380		if (olocal[0] != c1)
381			return (ret);
382
383		if (olocal[1] == c2 && (*ibuf)[1] == '\0') {
384			/*
385			 * inbuf is a single byte char
386			 */
387			*ilen = 1;
388			*olen = or;
389			return (ret);
390		}
391
392		switch(or) {
393		case 0:
394		case 1:
395			if (olocal[1] == c2) {
396				/*
397				 * inbuf is a single byte char,
398				 * so return false here.
399				 */
400				return (-1);
401			} else {
402				/*
403				 * inbuf is a double byte char
404				 */
405				return (ret);
406			}
407			break;
408		case 2:
409			/*
410			 * should compare second byte of inbuf
411			 */
412			break;
413		}
414	} else {
415		/*
416		 * inbuf clould not be splitted, so inbuf is
417		 * a double byte char.
418		 */
419		return (ret);
420	}
421
422	/*
423	 * try second byte(*(sp+1)) conversion, and compare
424	 */
425	ir = 1;
426	or = 3;
427
428	bzero(olocal, or);
429
430	sp = ilocal + 1;
431	dp = olocal;
432
433	if ((my_iconv(cd,(const char **)&sp, &ir, (char **)&dp, &or)) != -1) {
434		if (olocal[0] == c2)
435			/*
436			 * inbuf is a single byte char
437			 */
438			return (-1);
439	}
440
441	return (ret);
442}
443
444#else /* statically linked */
445
446#include <sys/types.h>
447#include <sys/iconv.h>
448#include <errno.h>
449
450int
451kiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused,
452    int flag __unused)
453{
454
455	errno = EINVAL;
456	return (-1);
457}
458
459int
460kiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused)
461{
462	errno = EINVAL;
463	return (-1);
464}
465
466#endif /* PIC */
467