1219019Sgabor/*-
2219019Sgabor * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3219019Sgabor * All rights reserved.
4219019Sgabor *
5219019Sgabor * Redistribution and use in source and binary forms, with or without
6219019Sgabor * modification, are permitted provided that the following conditions
7219019Sgabor * are met:
8219019Sgabor * 1. Redistributions of source code must retain the above copyright
9219019Sgabor *    notice, this list of conditions and the following disclaimer.
10219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
11219019Sgabor *    notice, this list of conditions and the following disclaimer in the
12219019Sgabor *    documentation and/or other materials provided with the distribution.
13219019Sgabor *
14219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24219019Sgabor * SUCH DAMAGE.
25219019Sgabor */
26219019Sgabor
27219019Sgabor#include <sys/cdefs.h>
28219019Sgabor__FBSDID("$FreeBSD$");
29219019Sgabor
30219019Sgabor#include <sys/endian.h>
31219019Sgabor#include <sys/types.h>
32219019Sgabor
33219019Sgabor#include <err.h>
34219019Sgabor#include <errno.h>
35219019Sgabor#include <iconv.h>
36219019Sgabor#include <stdbool.h>
37219019Sgabor#include <stdio.h>
38219019Sgabor#include <stdlib.h>
39219019Sgabor#include <string.h>
40219019Sgabor
41219019Sgaborstatic bool uc_hook = false;
42219019Sgaborstatic bool wc_hook = false;
43219019Sgaborstatic bool mb_uc_fb = false;
44219019Sgabor
45219019Sgaborvoid	 unicode_hook(unsigned int mbr, void *data);
46219019Sgaborvoid	 wchar_hook(wchar_t wc, void *data);
47219019Sgabor
48219019Sgaborvoid    mb_to_uc_fb(const char *, size_t,
49219019Sgabor            void (*write_replacement) (const unsigned int *, size_t, void *),
50219019Sgabor            void *, void *);
51219019Sgabor
52219019Sgaborstatic int
53219019Sgaborctl_get_translit1(void)
54219019Sgabor{
55219019Sgabor	iconv_t cd;
56219019Sgabor	int arg, ret;
57219019Sgabor
58219019Sgabor	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
59219019Sgabor	if (cd == (iconv_t)-1)
60219019Sgabor		return (-1);
61219019Sgabor	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
62219019Sgabor		ret = (arg == 1) ? 0 : -1;
63219019Sgabor	else
64219019Sgabor		ret = -1;
65219019Sgabor	if (iconv_close(cd) == -1)
66219019Sgabor		return (-1);
67219019Sgabor	return (ret);
68219019Sgabor}
69219019Sgabor
70219019Sgaborstatic int
71219019Sgaborctl_get_translit2(void)
72219019Sgabor{
73219019Sgabor	iconv_t cd;
74219019Sgabor	int arg, ret;
75219019Sgabor
76219019Sgabor	cd = iconv_open("ASCII", "UTF-8");
77219019Sgabor	if (cd == (iconv_t)-1)
78219019Sgabor		return (-1);
79219019Sgabor	if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0)
80219019Sgabor		ret = (arg == 0) ? 0 : -1;
81219019Sgabor	else
82219019Sgabor		ret = -1;
83219019Sgabor	if (iconv_close(cd) == -1)
84219019Sgabor		return (-1);
85219019Sgabor	return (ret);
86219019Sgabor}
87219019Sgabor
88219019Sgaborstatic int
89219019Sgaborctl_set_translit1(void)
90219019Sgabor{
91219019Sgabor	iconv_t cd;
92219019Sgabor	int arg = 1, ret;
93219019Sgabor
94219019Sgabor	cd = iconv_open("ASCII", "UTF-8");
95219019Sgabor	if (cd == (iconv_t)-1)
96219019Sgabor		return (-1);
97219019Sgabor	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
98219019Sgabor	if (iconv_close(cd) == -1)
99219019Sgabor		return (-1);
100219019Sgabor	return (ret);
101219019Sgabor}
102219019Sgabor
103219019Sgaborstatic int
104219019Sgaborctl_set_translit2(void)
105219019Sgabor{
106219019Sgabor	iconv_t cd;
107219019Sgabor	int arg = 0, ret;
108219019Sgabor
109219019Sgabor	cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
110219019Sgabor	if (cd == (iconv_t)-1)
111219019Sgabor		return (-1);
112219019Sgabor	ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1;
113219019Sgabor	if (iconv_close(cd) == -1)
114219019Sgabor		return (-1);
115219019Sgabor	return (ret);
116219019Sgabor}
117219019Sgabor
118219019Sgaborstatic int
119219019Sgaborctl_get_discard_ilseq1(void)
120219019Sgabor{
121219019Sgabor	iconv_t cd;
122219019Sgabor        int arg, ret;
123219019Sgabor
124219019Sgabor	cd = iconv_open("ASCII", "UTF-8");
125219019Sgabor	if (cd == (iconv_t)-1)
126219019Sgabor		return (-1);
127219019Sgabor	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
128219019Sgabor		ret = arg == 0 ? 0 : -1;
129219019Sgabor	else
130219019Sgabor		ret = -1;
131219019Sgabor	if (iconv_close(cd) == -1)
132219019Sgabor		return (-1);
133219019Sgabor	return (ret);
134219019Sgabor}
135219019Sgabor
136219019Sgaborstatic int
137219019Sgaborctl_get_discard_ilseq2(void)
138219019Sgabor{
139219019Sgabor	iconv_t cd;
140219019Sgabor	int arg, ret;
141219019Sgabor
142219019Sgabor	cd = iconv_open("ASCII//IGNORE", "UTF-8");
143219019Sgabor	if (cd == (iconv_t)-1)
144219019Sgabor		return (-1);
145219019Sgabor	if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0)
146219019Sgabor		ret = arg == 1 ? 0 : -1;
147219019Sgabor	else
148219019Sgabor		ret = -1;
149219019Sgabor	if (iconv_close(cd) == -1)
150219019Sgabor		return (-1);
151219019Sgabor	return (ret);
152219019Sgabor}
153219019Sgabor
154219019Sgaborstatic int
155219019Sgaborctl_set_discard_ilseq1(void)
156219019Sgabor{
157219019Sgabor	iconv_t cd;
158219019Sgabor	int arg = 1, ret;
159219019Sgabor
160219019Sgabor	cd = iconv_open("ASCII", "UTF-8");
161219019Sgabor	if (cd == (iconv_t)-1)
162219019Sgabor		return (-1);
163219019Sgabor	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
164219019Sgabor	if (iconv_close(cd) == -1)
165219019Sgabor		return (-1);
166219019Sgabor	return (ret);
167219019Sgabor}
168219019Sgabor
169219019Sgaborstatic int
170219019Sgaborctl_set_discard_ilseq2(void)
171219019Sgabor{
172219019Sgabor	iconv_t cd;
173219019Sgabor        int arg = 0, ret;
174219019Sgabor
175219019Sgabor	cd = iconv_open("ASCII//IGNORE", "UTF-8");
176219019Sgabor	if (cd == (iconv_t)-1)
177219019Sgabor	return (-1);
178219019Sgabor	ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1;
179219019Sgabor	if (iconv_close(cd) == -1)
180219019Sgabor		return (-1);
181219019Sgabor	return (ret);
182219019Sgabor}
183219019Sgabor
184219019Sgaborstatic int
185219019Sgaborctl_trivialp1(void)
186219019Sgabor{
187219019Sgabor	iconv_t cd;
188219019Sgabor        int arg, ret;
189219019Sgabor
190219019Sgabor	cd = iconv_open("latin2", "latin2");
191219019Sgabor	if (cd == (iconv_t)-1)
192219019Sgabor		return (-1);
193219019Sgabor	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
194219019Sgabor		ret = (arg == 1) ? 0 : -1;
195219019Sgabor        } else
196219019Sgabor                ret = -1;
197219019Sgabor	if (iconv_close(cd) == -1)
198219019Sgabor		return (-1);
199219019Sgabor	return (ret);
200219019Sgabor}
201219019Sgabor
202219019Sgaborstatic int
203219019Sgaborctl_trivialp2(void)
204219019Sgabor{
205219019Sgabor	iconv_t cd;
206219019Sgabor	int arg, ret;
207219019Sgabor
208219019Sgabor	cd = iconv_open("ASCII", "KOI8-R");
209219019Sgabor	if (cd == (iconv_t)-1)
210219019Sgabor		return (-1);
211219019Sgabor	if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) {
212219019Sgabor		ret = (arg == 0) ? 0 : -1;
213219019Sgabor	} else
214219019Sgabor		ret = -1;
215219019Sgabor	if (iconv_close(cd) == -1)
216219019Sgabor		return (-1);
217219019Sgabor	return (ret);
218219019Sgabor}
219219019Sgabor
220219019Sgaborvoid
221219019Sgaborunicode_hook(unsigned int mbr, void *data)
222219019Sgabor{
223219019Sgabor
224219019Sgabor#ifdef VERBOSE
225219019Sgabor	printf("Unicode hook: %u\n", mbr);
226219019Sgabor#endif
227219019Sgabor	uc_hook = true;
228219019Sgabor}
229219019Sgabor
230219019Sgaborvoid
231219019Sgaborwchar_hook(wchar_t wc, void *data)
232219019Sgabor{
233219019Sgabor
234219019Sgabor#ifdef VERBOSE
235219019Sgabor	printf("Wchar hook: %ull\n", wc);
236219019Sgabor#endif
237219019Sgabor	wc_hook = true;
238219019Sgabor}
239219019Sgabor
240219019Sgaborstatic int
241219019Sgaborctl_uc_hook(void)
242219019Sgabor{
243219019Sgabor	struct iconv_hooks hooks;
244219019Sgabor	iconv_t cd;
245219019Sgabor	size_t inbytesleft = 15, outbytesleft = 40;
246219019Sgabor	const char **inptr;
247219019Sgabor	const char *s = "Hello World!";
248219019Sgabor	char **outptr;
249219019Sgabor	char *outbuf;
250219019Sgabor
251219019Sgabor	inptr = &s;
252219019Sgabor	hooks.uc_hook = unicode_hook;
253219019Sgabor	hooks.wc_hook = NULL;
254219019Sgabor
255219019Sgabor	outbuf = malloc(40);
256219019Sgabor	outptr = &outbuf;
257219019Sgabor
258219019Sgabor	cd = iconv_open("UTF-8", "ASCII");
259219019Sgabor	if (cd == (iconv_t)-1)
260219019Sgabor		return (-1);
261219019Sgabor	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
262219019Sgabor		return (-1);
263219019Sgabor	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
264219019Sgabor		return (-1);
265219019Sgabor	if (iconv_close(cd) == -1)
266219019Sgabor		return (-1);
267219019Sgabor	return (uc_hook ? 0 : 1);
268219019Sgabor}
269219019Sgabor
270219019Sgaborstatic int
271219019Sgaborctl_wc_hook(void)
272219019Sgabor{
273219019Sgabor	struct iconv_hooks hooks;
274219019Sgabor	iconv_t cd;
275219019Sgabor	size_t inbytesleft, outbytesleft = 40;
276219019Sgabor	const char **inptr;
277219019Sgabor	const char *s = "Hello World!";
278219019Sgabor	char **outptr;
279219019Sgabor	char *outbuf;
280219019Sgabor
281219019Sgabor	inptr = &s;
282219019Sgabor	hooks.wc_hook = wchar_hook;
283219019Sgabor	hooks.uc_hook = NULL;
284219019Sgabor
285219019Sgabor	outbuf = malloc(40);
286219019Sgabor	outptr = &outbuf;
287219019Sgabor	inbytesleft = sizeof(s);
288219019Sgabor
289219019Sgabor	cd = iconv_open("SHIFT_JIS", "ASCII");
290219019Sgabor	if (cd == (iconv_t)-1)
291219019Sgabor		return (-1);
292219019Sgabor	if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0)
293219019Sgabor		return (-1);
294219019Sgabor	if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1)
295219019Sgabor		return (-1);
296219019Sgabor	if (iconv_close(cd) == -1)
297219019Sgabor		return (-1);
298219019Sgabor	return (wc_hook ? 0 : 1);
299219019Sgabor}
300219019Sgabor
301219019Sgabor
302219019Sgabor
303219019Sgaborstatic int
304219019Sgaborgnu_canonicalize1(void)
305219019Sgabor{
306219019Sgabor
307219019Sgabor	return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2"));
308219019Sgabor}
309219019Sgabor
310219019Sgaborstatic int
311219019Sgaborgnu_canonicalize2(void)
312219019Sgabor{
313219019Sgabor
314219019Sgabor	return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2")));
315219019Sgabor}
316219019Sgabor
317219019Sgabor
318219019Sgaborstatic int
319219019Sgaboriconvlist_cb(unsigned int count, const char * const *names, void *data)
320219019Sgabor{
321219019Sgabor
322219019Sgabor	return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0);
323219019Sgabor}
324219019Sgabor
325219019Sgaborstatic int
326219019Sgaborgnu_iconvlist(void)
327219019Sgabor{
328219019Sgabor	int i;
329219019Sgabor
330219019Sgabor	iconvlist(iconvlist_cb, (void *)&i);
331219019Sgabor	return (i);
332219019Sgabor}
333219019Sgabor
334219019Sgaborvoid
335219019Sgabormb_to_uc_fb(const char* inbuf, size_t inbufsize,
336219019Sgabor    void (*write_replacement)(const unsigned int *buf, size_t buflen,
337219019Sgabor       void* callback_arg), void* callback_arg, void* data)
338219019Sgabor{
339219019Sgabor	unsigned int c = 0x3F;
340219019Sgabor
341219019Sgabor	mb_uc_fb = true;
342219019Sgabor	write_replacement((const unsigned int *)&c, 1, NULL);
343219019Sgabor}
344219019Sgabor
345219019Sgaborstatic int __unused
346219019Sgaborctl_mb_to_uc_fb(void)
347219019Sgabor{
348219019Sgabor	struct iconv_fallbacks fb;
349219019Sgabor	iconv_t cd;
350219019Sgabor	size_t inbytesleft, outbytesleft;
351219019Sgabor	uint16_t inbuf[1] = { 0xF187 };
352219019Sgabor	uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 };
353219019Sgabor	const char *inptr;
354219019Sgabor	char *outptr;
355219019Sgabor	int ret;
356219019Sgabor
357219019Sgabor	if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1)
358219019Sgabor		return (1);
359219019Sgabor
360219019Sgabor	fb.uc_to_mb_fallback = NULL;
361219019Sgabor	fb.mb_to_wc_fallback = NULL;
362219019Sgabor	fb.wc_to_mb_fallback = NULL;
363219019Sgabor	fb.mb_to_uc_fallback = mb_to_uc_fb;
364219019Sgabor	fb.data = NULL;
365219019Sgabor
366219019Sgabor	if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0)
367219019Sgabor		return (1);
368219019Sgabor
369219019Sgabor	inptr = (const char *)inbuf;
370219019Sgabor	outptr = (char *)outbuf;
371219019Sgabor	inbytesleft = 2;
372219019Sgabor	outbytesleft = 4;
373219019Sgabor
374219019Sgabor	errno = 0;
375219019Sgabor	ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
376219019Sgabor
377219019Sgabor#ifdef VERBOSE
378219019Sgabor	printf("mb_uc fallback: %c\n", outbuf[0]);
379219019Sgabor#endif
380219019Sgabor
381219019Sgabor	if (mb_uc_fb && (outbuf[0] == 0x3F))
382219019Sgabor		return (0);
383219019Sgabor	else
384219019Sgabor		return (1);
385219019Sgabor}
386219019Sgabor
387219019Sgaborstatic int
388219019Sgaborgnu_openinto(void)
389219019Sgabor{
390219019Sgabor	iconv_allocation_t *myspace;
391219019Sgabor	size_t inbytesleft, outbytesleft;
392219019Sgabor	const char *inptr;
393219019Sgabor	char *inbuf = "works!", *outptr;
394219019Sgabor	char outbuf[6];
395219019Sgabor
396219019Sgabor	if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL)
397219019Sgabor		return (1);
398219019Sgabor	if (iconv_open_into("ASCII", "ASCII", myspace) == -1)
399219019Sgabor		return (1);
400219019Sgabor
401219019Sgabor	inptr = (const char *)inbuf;
402219019Sgabor	outptr = (char *)outbuf;
403219019Sgabor	inbytesleft = 6;
404219019Sgabor	outbytesleft = 6;
405219019Sgabor
406219019Sgabor	iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft);
407219019Sgabor
408219019Sgabor	return ((memcmp(inbuf, outbuf, 6) == 0)	? 0 : 1);
409219019Sgabor}
410219019Sgabor
411219019Sgaborstatic void
412219019Sgabortest(int (tester) (void), const char * label)
413219019Sgabor{
414219019Sgabor	int ret;
415219019Sgabor
416219019Sgabor	if ((ret = tester()))
417219019Sgabor		printf("%s failed (%d)\n", label, ret);
418219019Sgabor	else
419219019Sgabor		printf("%s succeeded\n", label);
420219019Sgabor}
421219019Sgabor
422219019Sgaborint
423219019Sgabormain(void)
424219019Sgabor{
425219019Sgabor	test(ctl_get_translit1, "ctl_get_translit1");
426219019Sgabor	test(ctl_get_translit2, "ctl_get_translit2");
427219019Sgabor	test(ctl_set_translit1, "ctl_set_translit1");
428219019Sgabor	test(ctl_set_translit2, "ctl_set_translit2");
429219019Sgabor	test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1");
430219019Sgabor	test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2");
431219019Sgabor	test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1");
432219019Sgabor	test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2");
433219019Sgabor	test(ctl_trivialp1, "ctl_trivialp1");
434219019Sgabor	test(ctl_trivialp2, "ctl_trivialp2");
435219019Sgabor	test(ctl_uc_hook, "ctl_uc_hook");
436219019Sgabor	test(ctl_wc_hook, "ctl_wc_hook");
437219019Sgabor//	test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb");
438219019Sgabor	test(gnu_openinto, "gnu_openinto");
439219019Sgabor	test(gnu_canonicalize1, "gnu_canonicalize1");
440219019Sgabor	test(gnu_canonicalize2, "gnu_canonicalize2");
441219019Sgabor	test(gnu_iconvlist, "gnu_iconvlist");
442219019Sgabor}
443