1120492Sfjoe/*-
2194638Sdelphij * Copyright (c) 2003, 2005 Ryuichiro Imura
3120492Sfjoe * All rights reserved.
4120492Sfjoe *
5120492Sfjoe * Redistribution and use in source and binary forms, with or without
6120492Sfjoe * modification, are permitted provided that the following conditions
7120492Sfjoe * are met:
8120492Sfjoe * 1. Redistributions of source code must retain the above copyright
9120492Sfjoe *    notice, this list of conditions and the following disclaimer.
10120492Sfjoe * 2. Redistributions in binary form must reproduce the above copyright
11120492Sfjoe *    notice, this list of conditions and the following disclaimer in the
12120492Sfjoe *    documentation and/or other materials provided with the distribution.
13120492Sfjoe *
14120492Sfjoe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15120492Sfjoe * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16120492Sfjoe * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17120492Sfjoe * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18120492Sfjoe * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19120492Sfjoe * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20120492Sfjoe * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21120492Sfjoe * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22120492Sfjoe * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23120492Sfjoe * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24120492Sfjoe * SUCH DAMAGE.
25120492Sfjoe *
26120492Sfjoe * $FreeBSD$
27120492Sfjoe */
28120492Sfjoe
29120492Sfjoe/*
30120492Sfjoe * kiconv(3) requires shared linked, and reduce module size
31120492Sfjoe * when statically linked.
32120492Sfjoe */
33120492Sfjoe
34275004Semaste#ifdef PIC
35120492Sfjoe
36120492Sfjoe#include <sys/types.h>
37120492Sfjoe#include <sys/iconv.h>
38120492Sfjoe#include <sys/sysctl.h>
39120492Sfjoe
40120492Sfjoe#include <ctype.h>
41120492Sfjoe#include <dlfcn.h>
42120492Sfjoe#include <err.h>
43120492Sfjoe#include <errno.h>
44194638Sdelphij#include <locale.h>
45120492Sfjoe#include <stdio.h>
46120492Sfjoe#include <stdlib.h>
47120492Sfjoe#include <string.h>
48194638Sdelphij#include <wctype.h>
49120492Sfjoe
50120492Sfjoe#include "quirks.h"
51120492Sfjoe
52120492Sfjoestruct xlat16_table {
53120492Sfjoe	uint32_t *	idx[0x200];
54120492Sfjoe	void *		data;
55120492Sfjoe	size_t		size;
56120492Sfjoe};
57120492Sfjoe
58120492Sfjoestatic struct xlat16_table kiconv_xlat16_open(const char *, const char *, int);
59194638Sdelphijstatic int chklocale(int, const char *);
60120492Sfjoe
61236028Sgabor#ifdef ICONV_DLOPEN
62254273Spetertypedef void *iconv_t;
63120492Sfjoestatic int my_iconv_init(void);
64120492Sfjoestatic iconv_t (*my_iconv_open)(const char *, const char *);
65281550Stijlstatic size_t (*my_iconv)(iconv_t, char **, size_t *, char **, size_t *);
66120492Sfjoestatic int (*my_iconv_close)(iconv_t);
67236028Sgabor#else
68236028Sgabor#include <iconv.h>
69236028Sgabor#define my_iconv_init() 0
70236028Sgabor#define my_iconv_open iconv_open
71236028Sgabor#define my_iconv iconv
72236028Sgabor#define my_iconv_close iconv_close
73236028Sgabor#endif
74281550Stijlstatic size_t my_iconv_char(iconv_t, u_char **, size_t *, u_char **, size_t *);
75120492Sfjoe
76120492Sfjoeint
77120492Sfjoekiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
78120492Sfjoe{
79120492Sfjoe	int error;
80194638Sdelphij	size_t idxsize;
81120492Sfjoe	struct xlat16_table xt;
82148717Sstefanf	void *data;
83148717Sstefanf	char *p;
84227650Skevlo	const char unicode[] = ENCODING_UNICODE;
85120492Sfjoe
86227650Skevlo	if ((flag & KICONV_WCTYPE) == 0 &&
87227650Skevlo	    strcmp(unicode, tocode) != 0 &&
88227650Skevlo	    strcmp(unicode, fromcode) != 0 &&
89227650Skevlo	    kiconv_lookupconv(unicode) == 0) {
90227650Skevlo		error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
91227650Skevlo		if (error)
92227650Skevlo			return (-1);
93227650Skevlo		error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
94227650Skevlo		return (error);
95227650Skevlo	}
96227650Skevlo
97194638Sdelphij	if (kiconv_lookupcs(tocode, fromcode) == 0)
98194638Sdelphij		return (0);
99120492Sfjoe
100194638Sdelphij	if (flag & KICONV_WCTYPE)
101194638Sdelphij		xt = kiconv_xlat16_open(fromcode, fromcode, flag);
102194638Sdelphij	else
103194638Sdelphij		xt = kiconv_xlat16_open(tocode, fromcode, flag);
104120492Sfjoe	if (xt.size == 0)
105120492Sfjoe		return (-1);
106120492Sfjoe
107120492Sfjoe	idxsize = sizeof(xt.idx);
108120492Sfjoe
109120492Sfjoe	if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) {
110120492Sfjoe		errno = E2BIG;
111120492Sfjoe		return (-1);
112120492Sfjoe	}
113120492Sfjoe
114120492Sfjoe	if ((data = malloc(idxsize + xt.size)) != NULL) {
115120492Sfjoe		p = data;
116120492Sfjoe		memcpy(p, xt.idx, idxsize);
117120492Sfjoe		p += idxsize;
118120492Sfjoe		memcpy(p, xt.data, xt.size);
119120492Sfjoe		error = kiconv_add_xlat16_table(tocode, fromcode, data,
120120492Sfjoe		    (int)(idxsize + xt.size));
121120492Sfjoe		return (error);
122120492Sfjoe	}
123120492Sfjoe
124120492Sfjoe	return (-1);
125120492Sfjoe}
126120492Sfjoe
127123293Sfjoeint
128123293Sfjoekiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode)
129123293Sfjoe{
130194638Sdelphij	int error, locale;
131123293Sfjoe
132123293Sfjoe	error = kiconv_add_xlat16_cspair(foreigncode, localcode,
133123293Sfjoe	    KICONV_FROM_LOWER | KICONV_FROM_UPPER);
134123293Sfjoe	if (error)
135123293Sfjoe		return (error);
136123293Sfjoe	error = kiconv_add_xlat16_cspair(localcode, foreigncode,
137123293Sfjoe	    KICONV_LOWER | KICONV_UPPER);
138123293Sfjoe	if (error)
139123293Sfjoe		return (error);
140194638Sdelphij	locale = chklocale(LC_CTYPE, localcode);
141194638Sdelphij	if (locale == 0) {
142194638Sdelphij		error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode,
143194638Sdelphij		    KICONV_WCTYPE);
144194638Sdelphij		if (error)
145194638Sdelphij			return (error);
146194638Sdelphij	}
147194638Sdelphij
148123293Sfjoe	return (0);
149123293Sfjoe}
150123293Sfjoe
151120492Sfjoestatic struct xlat16_table
152120492Sfjoekiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase)
153120492Sfjoe{
154120492Sfjoe	u_char src[3], dst[4], *srcp, *dstp, ud, ld;
155120492Sfjoe	int us, ls, ret;
156120492Sfjoe	uint16_t c;
157120492Sfjoe	uint32_t table[0x80];
158120492Sfjoe	size_t inbytesleft, outbytesleft, pre_q_size, post_q_size;
159120492Sfjoe	struct xlat16_table xt;
160120492Sfjoe	struct quirk_replace_list *pre_q_list, *post_q_list;
161120492Sfjoe	iconv_t cd;
162148717Sstefanf	char *p;
163120492Sfjoe
164120492Sfjoe	xt.data = NULL;
165120492Sfjoe	xt.size = 0;
166120492Sfjoe
167126949Sbde	src[2] = '\0';
168126949Sbde	dst[3] = '\0';
169120492Sfjoe
170120492Sfjoe	ret = my_iconv_init();
171120492Sfjoe	if (ret)
172120492Sfjoe		return (xt);
173120492Sfjoe
174120492Sfjoe	cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size),
175120492Sfjoe	    search_quirk(fromcode, tocode, &post_q_list, &post_q_size));
176120492Sfjoe	if (cd == (iconv_t) (-1))
177120492Sfjoe		return (xt);
178120492Sfjoe
179120492Sfjoe	if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL)
180120492Sfjoe		return (xt);
181120492Sfjoe
182120492Sfjoe	p = xt.data;
183120492Sfjoe
184120492Sfjoe	for (ls = 0 ; ls < 0x200 ; ls++) {
185120492Sfjoe		xt.idx[ls] = NULL;
186120492Sfjoe		for (us = 0 ; us < 0x80 ; us++) {
187120492Sfjoe			srcp = src;
188120492Sfjoe			dstp = dst;
189120492Sfjoe
190120492Sfjoe			inbytesleft = 2;
191120492Sfjoe			outbytesleft = 3;
192120492Sfjoe			bzero(dst, outbytesleft);
193120492Sfjoe
194120492Sfjoe			c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls;
195194638Sdelphij
196194638Sdelphij			if (lcase & KICONV_WCTYPE) {
197194638Sdelphij				if ((c & 0xff) == 0)
198194638Sdelphij					c >>= 8;
199194638Sdelphij				if (iswupper(c)) {
200194638Sdelphij					c = towlower(c);
201194638Sdelphij					if ((c & 0xff00) == 0)
202194638Sdelphij						c <<= 8;
203194638Sdelphij					table[us] = c | XLAT16_HAS_LOWER_CASE;
204194638Sdelphij				} else if (iswlower(c)) {
205194638Sdelphij					c = towupper(c);
206194638Sdelphij					if ((c & 0xff00) == 0)
207194638Sdelphij						c <<= 8;
208194638Sdelphij					table[us] = c | XLAT16_HAS_UPPER_CASE;
209194638Sdelphij				} else
210194638Sdelphij					table[us] = 0;
211194638Sdelphij				/*
212194638Sdelphij				 * store not NULL
213194638Sdelphij				 */
214194638Sdelphij				if (table[us])
215194638Sdelphij					xt.idx[ls] = table;
216194638Sdelphij
217194638Sdelphij				continue;
218194638Sdelphij			}
219194638Sdelphij
220120492Sfjoe			c = quirk_vendor2unix(c, pre_q_list, pre_q_size);
221120492Sfjoe			src[0] = (u_char)(c >> 8);
222120492Sfjoe			src[1] = (u_char)c;
223120492Sfjoe
224281550Stijl			ret = my_iconv_char(cd, &srcp, &inbytesleft,
225281550Stijl				&dstp, &outbytesleft);
226120492Sfjoe			if (ret == -1) {
227120492Sfjoe				table[us] = 0;
228120492Sfjoe				continue;
229120492Sfjoe			}
230120492Sfjoe
231120492Sfjoe			ud = (u_char)dst[0];
232120492Sfjoe			ld = (u_char)dst[1];
233120492Sfjoe
234120492Sfjoe			switch(outbytesleft) {
235120492Sfjoe			case 0:
236120492Sfjoe#ifdef XLAT16_ACCEPT_3BYTE_CHR
237120492Sfjoe				table[us] = (ud << 8) | ld;
238120492Sfjoe				table[us] |= (u_char)dst[2] << 16;
239120492Sfjoe				table[us] |= XLAT16_IS_3BYTE_CHR;
240120492Sfjoe#else
241120492Sfjoe				table[us] = 0;
242120492Sfjoe				continue;
243120492Sfjoe#endif
244120492Sfjoe				break;
245120492Sfjoe			case 1:
246120492Sfjoe				table[us] = quirk_unix2vendor((ud << 8) | ld,
247120492Sfjoe				    post_q_list, post_q_size);
248120492Sfjoe				if ((table[us] >> 8) == 0)
249120492Sfjoe					table[us] |= XLAT16_ACCEPT_NULL_OUT;
250120492Sfjoe				break;
251120492Sfjoe			case 2:
252120492Sfjoe				table[us] = ud;
253120492Sfjoe				if (lcase & KICONV_LOWER && ud != tolower(ud)) {
254120492Sfjoe					table[us] |= (u_char)tolower(ud) << 16;
255120492Sfjoe					table[us] |= XLAT16_HAS_LOWER_CASE;
256120492Sfjoe				}
257120492Sfjoe				if (lcase & KICONV_UPPER && ud != toupper(ud)) {
258120492Sfjoe					table[us] |= (u_char)toupper(ud) << 16;
259120492Sfjoe					table[us] |= XLAT16_HAS_UPPER_CASE;
260120492Sfjoe				}
261120492Sfjoe				break;
262120492Sfjoe			}
263120492Sfjoe
264120492Sfjoe			switch(inbytesleft) {
265120492Sfjoe			case 0:
266120492Sfjoe				if ((ls & 0xff) == 0)
267120492Sfjoe					table[us] |= XLAT16_ACCEPT_NULL_IN;
268120492Sfjoe				break;
269120492Sfjoe			case 1:
270120492Sfjoe				c = ls > 0xff ? us | 0x80 : us;
271120492Sfjoe				if (lcase & KICONV_FROM_LOWER && c != tolower(c)) {
272120492Sfjoe					table[us] |= (u_char)tolower(c) << 16;
273120492Sfjoe					table[us] |= XLAT16_HAS_FROM_LOWER_CASE;
274120492Sfjoe				}
275120492Sfjoe				if (lcase & KICONV_FROM_UPPER && c != toupper(c)) {
276120492Sfjoe					table[us] |= (u_char)toupper(c) << 16;
277120492Sfjoe					table[us] |= XLAT16_HAS_FROM_UPPER_CASE;
278120492Sfjoe				}
279120492Sfjoe				break;
280120492Sfjoe			}
281120492Sfjoe
282120492Sfjoe			if (table[us] == 0)
283120492Sfjoe				continue;
284120492Sfjoe
285120492Sfjoe			/*
286120492Sfjoe			 * store not NULL
287120492Sfjoe			 */
288120492Sfjoe			xt.idx[ls] = table;
289120492Sfjoe		}
290120492Sfjoe		if (xt.idx[ls]) {
291120492Sfjoe			memcpy(p, table, sizeof(table));
292120492Sfjoe			p += sizeof(table);
293120492Sfjoe		}
294120492Sfjoe	}
295120492Sfjoe	my_iconv_close(cd);
296120492Sfjoe
297148717Sstefanf	xt.size = p - (char *)xt.data;
298120492Sfjoe	xt.data = realloc(xt.data, xt.size);
299120492Sfjoe	return (xt);
300120492Sfjoe}
301120492Sfjoe
302120492Sfjoestatic int
303194638Sdelphijchklocale(int category, const char *code)
304194638Sdelphij{
305194638Sdelphij	char *p;
306194638Sdelphij	int error = -1;
307194638Sdelphij
308194638Sdelphij	p = strchr(setlocale(category, NULL), '.');
309194638Sdelphij	if (p++) {
310194638Sdelphij		error = strcasecmp(code, p);
311194638Sdelphij		if (error) {
312194638Sdelphij			/* XXX - can't avoid calling quirk here... */
313194638Sdelphij			error = strcasecmp(code, kiconv_quirkcs(p,
314194638Sdelphij			    KICONV_VENDOR_MICSFT));
315194638Sdelphij		}
316194638Sdelphij	}
317194638Sdelphij	return (error);
318194638Sdelphij}
319194638Sdelphij
320236028Sgabor#ifdef ICONV_DLOPEN
321194638Sdelphijstatic int
322120492Sfjoemy_iconv_init(void)
323120492Sfjoe{
324120492Sfjoe	void *iconv_lib;
325120492Sfjoe
326120492Sfjoe	iconv_lib = dlopen("libiconv.so", RTLD_LAZY | RTLD_GLOBAL);
327120492Sfjoe	if (iconv_lib == NULL) {
328120492Sfjoe		warn("Unable to load iconv library: %s\n", dlerror());
329120492Sfjoe		errno = ENOENT;
330120492Sfjoe		return (-1);
331120492Sfjoe	}
332120492Sfjoe	my_iconv_open = dlsym(iconv_lib, "iconv_open");
333120492Sfjoe	my_iconv = dlsym(iconv_lib, "iconv");
334120492Sfjoe	my_iconv_close = dlsym(iconv_lib, "iconv_close");
335120492Sfjoe
336120492Sfjoe	return (0);
337120492Sfjoe}
338236028Sgabor#endif
339120492Sfjoe
340120492Sfjoestatic size_t
341281550Stijlmy_iconv_char(iconv_t cd, u_char **ibuf, size_t * ilen, u_char **obuf,
342120492Sfjoe	size_t * olen)
343120492Sfjoe{
344281550Stijl	u_char *sp, *dp, ilocal[3], olocal[3];
345120492Sfjoe	u_char c1, c2;
346120492Sfjoe	int ret;
347120492Sfjoe	size_t ir, or;
348120492Sfjoe
349120492Sfjoe	sp = *ibuf;
350120492Sfjoe	dp = *obuf;
351120492Sfjoe	ir = *ilen;
352120492Sfjoe
353120492Sfjoe	bzero(*obuf, *olen);
354281550Stijl	ret = my_iconv(cd, (char **)&sp, ilen, (char **)&dp, olen);
355120492Sfjoe	c1 = (*obuf)[0];
356120492Sfjoe	c2 = (*obuf)[1];
357120492Sfjoe
358120492Sfjoe	if (ret == -1) {
359120492Sfjoe		if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2))
360120492Sfjoe			return (0);
361120492Sfjoe		else
362120492Sfjoe			return (-1);
363120492Sfjoe	}
364120492Sfjoe
365120492Sfjoe	/*
366120492Sfjoe	 * We must judge if inbuf is a single byte char or double byte char.
367120492Sfjoe	 * Here, to judge, try first byte(*sp) conversion and compare.
368120492Sfjoe	 */
369120492Sfjoe	ir = 1;
370120492Sfjoe	or = 3;
371120492Sfjoe
372120492Sfjoe	bzero(olocal, or);
373120492Sfjoe	memcpy(ilocal, *ibuf, sizeof(ilocal));
374120492Sfjoe	sp = ilocal;
375120492Sfjoe	dp = olocal;
376120492Sfjoe
377281550Stijl	if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) {
378120492Sfjoe		if (olocal[0] != c1)
379120492Sfjoe			return (ret);
380120492Sfjoe
381120492Sfjoe		if (olocal[1] == c2 && (*ibuf)[1] == '\0') {
382120492Sfjoe			/*
383120492Sfjoe			 * inbuf is a single byte char
384120492Sfjoe			 */
385120492Sfjoe			*ilen = 1;
386120492Sfjoe			*olen = or;
387120492Sfjoe			return (ret);
388120492Sfjoe		}
389120492Sfjoe
390120492Sfjoe		switch(or) {
391120492Sfjoe		case 0:
392120492Sfjoe		case 1:
393120492Sfjoe			if (olocal[1] == c2) {
394120492Sfjoe				/*
395120492Sfjoe				 * inbuf is a single byte char,
396120492Sfjoe				 * so return false here.
397120492Sfjoe				 */
398120492Sfjoe				return (-1);
399120492Sfjoe			} else {
400120492Sfjoe				/*
401120492Sfjoe				 * inbuf is a double byte char
402120492Sfjoe				 */
403120492Sfjoe				return (ret);
404120492Sfjoe			}
405120492Sfjoe			break;
406120492Sfjoe		case 2:
407120492Sfjoe			/*
408120492Sfjoe			 * should compare second byte of inbuf
409120492Sfjoe			 */
410120492Sfjoe			break;
411120492Sfjoe		}
412120492Sfjoe	} else {
413120492Sfjoe		/*
414120492Sfjoe		 * inbuf clould not be splitted, so inbuf is
415120492Sfjoe		 * a double byte char.
416120492Sfjoe		 */
417120492Sfjoe		return (ret);
418120492Sfjoe	}
419120492Sfjoe
420120492Sfjoe	/*
421120492Sfjoe	 * try second byte(*(sp+1)) conversion, and compare
422120492Sfjoe	 */
423120492Sfjoe	ir = 1;
424120492Sfjoe	or = 3;
425120492Sfjoe
426120492Sfjoe	bzero(olocal, or);
427120492Sfjoe
428120492Sfjoe	sp = ilocal + 1;
429120492Sfjoe	dp = olocal;
430120492Sfjoe
431281550Stijl	if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) {
432120492Sfjoe		if (olocal[0] == c2)
433120492Sfjoe			/*
434120492Sfjoe			 * inbuf is a single byte char
435120492Sfjoe			 */
436120492Sfjoe			return (-1);
437120492Sfjoe	}
438120492Sfjoe
439120492Sfjoe	return (ret);
440120492Sfjoe}
441120492Sfjoe
442120492Sfjoe#else /* statically linked */
443120492Sfjoe
444194638Sdelphij#include <sys/types.h>
445194638Sdelphij#include <sys/iconv.h>
446120492Sfjoe#include <errno.h>
447120492Sfjoe
448120492Sfjoeint
449194638Sdelphijkiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused,
450194638Sdelphij    int flag __unused)
451120492Sfjoe{
452194638Sdelphij
453120492Sfjoe	errno = EINVAL;
454120492Sfjoe	return (-1);
455120492Sfjoe}
456120492Sfjoe
457123293Sfjoeint
458194638Sdelphijkiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused)
459123293Sfjoe{
460123293Sfjoe	errno = EINVAL;
461123293Sfjoe	return (-1);
462123293Sfjoe}
463123293Sfjoe
464275004Semaste#endif /* PIC */
465