1/*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28#include <sys/endian.h>
29#include <sys/types.h>
30
31#include <err.h>
32#include <errno.h>
33#include <iconv.h>
34#include <stdbool.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39static bool uc_hook = false;
40static bool wc_hook = false;
41static bool mb_uc_fb = false;
42
43void	 unicode_hook(unsigned int mbr, void *data);
44void	 wchar_hook(wchar_t wc, void *data);
45
46void    mb_to_uc_fb(const char *, size_t,
47            void (*write_replacement) (const unsigned int *, size_t, void *),
48            void *, void *);
49
50static int
51ctl_get_translit1(void)
52{
53	iconv_t cd;
54	int arg, ret;
55
56	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
57	if (cd == (iconv_t)-1)
58		return (-1);
59	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
60		ret = (arg == 1) ? 0 : -1;
61	else
62		ret = -1;
63	if (iconv_close(cd) == -1)
64		return (-1);
65	return (ret);
66}
67
68static int
69ctl_get_translit2(void)
70{
71	iconv_t cd;
72	int arg, ret;
73
74	cd = iconv_open("ASCII", "UTF-8");
75	if (cd == (iconv_t)-1)
76		return (-1);
77	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
78		ret = (arg == 0) ? 0 : -1;
79	else
80		ret = -1;
81	if (iconv_close(cd) == -1)
82		return (-1);
83	return (ret);
84}
85
86static int
87ctl_set_translit1(void)
88{
89	iconv_t cd;
90	int arg = 1, ret;
91
92	cd = iconv_open("ASCII", "UTF-8");
93	if (cd == (iconv_t)-1)
94		return (-1);
95	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
96	if (iconv_close(cd) == -1)
97		return (-1);
98	return (ret);
99}
100
101static int
102ctl_set_translit2(void)
103{
104	iconv_t cd;
105	int arg = 0, ret;
106
107	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
108	if (cd == (iconv_t)-1)
109		return (-1);
110	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
111	if (iconv_close(cd) == -1)
112		return (-1);
113	return (ret);
114}
115
116static int
117ctl_get_discard_ilseq1(void)
118{
119	iconv_t cd;
120        int arg, ret;
121
122	cd = iconv_open("ASCII", "UTF-8");
123	if (cd == (iconv_t)-1)
124		return (-1);
125	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
126		ret = arg == 0 ? 0 : -1;
127	else
128		ret = -1;
129	if (iconv_close(cd) == -1)
130		return (-1);
131	return (ret);
132}
133
134static int
135ctl_get_discard_ilseq2(void)
136{
137	iconv_t cd;
138	int arg, ret;
139
140	cd = iconv_open("ASCII//IGNORE", "UTF-8");
141	if (cd == (iconv_t)-1)
142		return (-1);
143	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
144		ret = arg == 1 ? 0 : -1;
145	else
146		ret = -1;
147	if (iconv_close(cd) == -1)
148		return (-1);
149	return (ret);
150}
151
152static int
153ctl_set_discard_ilseq1(void)
154{
155	iconv_t cd;
156	int arg = 1, ret;
157
158	cd = iconv_open("ASCII", "UTF-8");
159	if (cd == (iconv_t)-1)
160		return (-1);
161	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
162	if (iconv_close(cd) == -1)
163		return (-1);
164	return (ret);
165}
166
167static int
168ctl_set_discard_ilseq2(void)
169{
170	iconv_t cd;
171        int arg = 0, ret;
172
173	cd = iconv_open("ASCII//IGNORE", "UTF-8");
174	if (cd == (iconv_t)-1)
175	return (-1);
176	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
177	if (iconv_close(cd) == -1)
178		return (-1);
179	return (ret);
180}
181
182static int
183ctl_trivialp1(void)
184{
185	iconv_t cd;
186        int arg, ret;
187
188	cd = iconv_open("latin2", "latin2");
189	if (cd == (iconv_t)-1)
190		return (-1);
191	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
192		ret = (arg == 1) ? 0 : -1;
193        } else
194                ret = -1;
195	if (iconv_close(cd) == -1)
196		return (-1);
197	return (ret);
198}
199
200static int
201ctl_trivialp2(void)
202{
203	iconv_t cd;
204	int arg, ret;
205
206	cd = iconv_open("ASCII", "KOI8-R");
207	if (cd == (iconv_t)-1)
208		return (-1);
209	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
210		ret = (arg == 0) ? 0 : -1;
211	} else
212		ret = -1;
213	if (iconv_close(cd) == -1)
214		return (-1);
215	return (ret);
216}
217
218void
219unicode_hook(unsigned int mbr, void *data)
220{
221
222#ifdef VERBOSE
223	printf("Unicode hook: %u\n", mbr);
224#endif
225	uc_hook = true;
226}
227
228void
229wchar_hook(wchar_t wc, void *data)
230{
231
232#ifdef VERBOSE
233	printf("Wchar hook: %ull\n", wc);
234#endif
235	wc_hook = true;
236}
237
238static int
239ctl_uc_hook(void)
240{
241	struct iconv_hooks hooks;
242	iconv_t cd;
243	size_t inbytesleft = 15, outbytesleft = 40;
244	char **inptr;
245	char *s = "Hello World!";
246	char **outptr;
247	char *outbuf;
248
249	inptr = &s;
250	hooks.uc_hook = unicode_hook;
251	hooks.wc_hook = NULL;
252
253	outbuf = malloc(40);
254	outptr = &outbuf;
255
256	cd = iconv_open("UTF-8", "ASCII");
257	if (cd == (iconv_t)-1)
258		return (-1);
259	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
260		return (-1);
261	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
262		return (-1);
263	if (iconv_close(cd) == -1)
264		return (-1);
265	return (uc_hook ? 0 : 1);
266}
267
268static int
269ctl_wc_hook(void)
270{
271	struct iconv_hooks hooks;
272	iconv_t cd;
273	size_t inbytesleft, outbytesleft = 40;
274	char **inptr;
275	char *s = "Hello World!";
276	char **outptr;
277	char *outbuf;
278
279	inptr = &s;
280	hooks.wc_hook = wchar_hook;
281	hooks.uc_hook = NULL;
282
283	outbuf = malloc(40);
284	outptr = &outbuf;
285	inbytesleft = sizeof(s);
286
287	cd = iconv_open("SHIFT_JIS", "ASCII");
288	if (cd == (iconv_t)-1)
289		return (-1);
290	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
291		return (-1);
292	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
293		return (-1);
294	if (iconv_close(cd) == -1)
295		return (-1);
296	return (wc_hook ? 0 : 1);
297}
298
299
300
301static int
302gnu_canonicalize1(void)
303{
304
305	return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
306}
307
308static int
309gnu_canonicalize2(void)
310{
311
312	return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
313}
314
315
316static int
317iconvlist_cb(unsigned int count, const char * const *names, void *data)
318{
319
320	return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
321}
322
323static int
324gnu_iconvlist(void)
325{
326	int i;
327
328	iconvlist(iconvlist_cb, (void *)&i);
329	return (i);
330}
331
332void
333mb_to_uc_fb(const char* inbuf, size_t inbufsize,
334    void (*write_replacement)(const unsigned int *buf, size_t buflen,
335       void* callback_arg), void* callback_arg, void* data)
336{
337	unsigned int c = 0x3F;
338
339	mb_uc_fb = true;
340	write_replacement((const unsigned int *)&c, 1, NULL);
341}
342
343static int __unused
344ctl_mb_to_uc_fb(void)
345{
346	struct iconv_fallbacks fb;
347	iconv_t cd;
348	size_t inbytesleft, outbytesleft;
349	uint16_t inbuf[1] = { 0xF187 };
350	uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
351	char *inptr;
352	char *outptr;
353	int ret;
354
355	if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
356		return (1);
357
358	fb.uc_to_mb_fallback = NULL;
359	fb.mb_to_wc_fallback = NULL;
360	fb.wc_to_mb_fallback = NULL;
361	fb.mb_to_uc_fallback = mb_to_uc_fb;
362	fb.data = NULL;
363
364	if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
365		return (1);
366
367	inptr = (char *)inbuf;
368	outptr = (char *)outbuf;
369	inbytesleft = 2;
370	outbytesleft = 4;
371
372	errno = 0;
373	ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
374
375#ifdef VERBOSE
376	printf("mb_uc fallback: %c\n", outbuf[0]);
377#endif
378
379	if (mb_uc_fb && (outbuf[0] == 0x3F))
380		return (0);
381	else
382		return (1);
383}
384
385static int
386gnu_openinto(void)
387{
388	iconv_allocation_t *myspace;
389	size_t inbytesleft, outbytesleft;
390	char *inptr;
391	char *inbuf = "works!", *outptr;
392	char outbuf[6];
393
394	if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
395		return (1);
396	if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
397		return (1);
398
399	inptr = (char *)inbuf;
400	outptr = (char *)outbuf;
401	inbytesleft = 6;
402	outbytesleft = 6;
403
404	iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
405
406	return ((memcmp(inbuf, outbuf, 6) == 0)	? 0 : 1);
407}
408
409static void
410test(int (tester) (void), const char * label)
411{
412	int ret;
413
414	if ((ret = tester()))
415		printf("%s failed (%d)\n", label, ret);
416	else
417		printf("%s succeeded\n", label);
418}
419
420int
421main(void)
422{
423	test(ctl_get_translit1, "ctl_get_translit1");
424	test(ctl_get_translit2, "ctl_get_translit2");
425	test(ctl_set_translit1, "ctl_set_translit1");
426	test(ctl_set_translit2, "ctl_set_translit2");
427	test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
428	test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
429	test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
430	test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
431	test(ctl_trivialp1, "ctl_trivialp1");
432	test(ctl_trivialp2, "ctl_trivialp2");
433	test(ctl_uc_hook, "ctl_uc_hook");
434	test(ctl_wc_hook, "ctl_wc_hook");
435//	test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
436	test(gnu_openinto, "gnu_openinto");
437	test(gnu_canonicalize1, "gnu_canonicalize1");
438	test(gnu_canonicalize2, "gnu_canonicalize2");
439	test(gnu_iconvlist, "gnu_iconvlist");
440}
441