1219019Sgabor/*-
2219019Sgabor * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3219019Sgabor * All rights reserved.
4219019Sgabor *
5219019Sgabor * Redistribution and use in source and binary forms, with or without
6219019Sgabor * modification, are permitted provided that the following conditions
7219019Sgabor * are met:
8219019Sgabor * 1. Redistributions of source code must retain the above copyright
9219019Sgabor *    notice, this list of conditions and the following disclaimer.
10219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright
11219019Sgabor *    notice, this list of conditions and the following disclaimer in the
12219019Sgabor *    documentation and/or other materials provided with the distribution.
13219019Sgabor *
14219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17219019Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24219019Sgabor * SUCH DAMAGE.
25219019Sgabor */
26219019Sgabor
27219019Sgabor#include <sys/cdefs.h>
28219019Sgabor__FBSDID("$FreeBSD: releng/10.3/tools/test/iconv/tablegen/tablegen.c 219019 2011-02-25 00:04:39Z gabor $");
29219019Sgabor
30219019Sgabor#include <sys/endian.h>
31219019Sgabor#include <sys/types.h>
32219019Sgabor
33219019Sgabor#include <err.h>
34219019Sgabor#include <errno.h>
35219019Sgabor#include <getopt.h>
36219019Sgabor#include <iconv.h>
37219019Sgabor#include <stdbool.h>
38219019Sgabor#include <stdio.h>
39219019Sgabor#include <stdlib.h>
40219019Sgabor
41219019Sgabor#define	UC_TO_MB_FLAG	1
42219019Sgabor#define MB_TO_WC_FLAG	2
43219019Sgabor#define MB_TO_UC_FLAG	4
44219019Sgabor#define WC_TO_MB_FLAG	8
45219019Sgabor
46219019Sgabor#define MAX(a,b)	((a) < (b) ? (b) : (a))
47219019Sgabor
48219019Sgaborextern char	*__progname;
49219019Sgabor
50219019Sgaborstatic const char	*optstr = "cdilrt";
51219019Sgaborstatic const char	*citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
52219019Sgabor					"OOB_MODE\tILSEQ\n"
53219019Sgabor					"DST_ILSEQ\t0xFFFE\n"
54219019Sgabor					"DST_UNIT_BITS\t32\n\n"
55219019Sgabor					"BEGIN_MAP\n"
56219019Sgabor					"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
57219019Sgaborbool			 cflag;
58219019Sgaborbool			 dflag;
59219019Sgaborbool			 iflag;
60219019Sgaborbool			 lflag;
61219019Sgaborbool			 tflag;
62219019Sgaborbool			 rflag;
63219019Sgaborint			 fb_flags;
64219019Sgabor
65219019Sgaborstatic void		 do_conv(iconv_t, bool);
66219019Sgaborvoid			 mb_to_uc_fb(const char*, size_t,
67219019Sgabor			     void (*write_replacement)(const unsigned int *,
68219019Sgabor			     size_t, void *), void *, void *);
69219019Sgaborvoid			 mb_to_wc_fb(const char*, size_t,
70219019Sgabor			     void (*write_replacement) (const wchar_t *, size_t, void *),
71219019Sgabor			     void *, void *);
72219019Sgaborvoid			 uc_to_mb_fb(unsigned int,
73219019Sgabor			     void (*write_replacement) (const char *, size_t, void *), void *,
74219019Sgabor			     void *);
75219019Sgaborvoid			 wc_to_mb_fb(wchar_t,
76219019Sgabor			     void (*write_replacement)(const char *,
77219019Sgabor			     size_t, void *), void *, void *);
78219019Sgabor
79219019Sgaborstruct option long_options[] =
80219019Sgabor{
81219019Sgabor	{"citrus",	no_argument,	NULL,	'c'},
82219019Sgabor	{"diagnostic",	no_argument,	NULL,	'd'},
83219019Sgabor	{"ignore",	no_argument,	NULL,	'i'},
84219019Sgabor	{"long",	no_argument,	NULL,	'l'},
85219019Sgabor	{"reverse",	no_argument,	NULL,	'r'},
86219019Sgabor	{"translit",	no_argument,	NULL,	't'},
87219019Sgabor	{NULL,		no_argument,	NULL,	0}
88219019Sgabor};
89219019Sgabor
90219019Sgaborstatic void
91219019Sgaborusage(void) {
92219019Sgabor
93219019Sgabor	fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
94219019Sgabor	exit(EXIT_FAILURE);
95219019Sgabor}
96219019Sgabor
97219019Sgaborstatic void
98219019Sgaborformat_diag(int errcode)
99219019Sgabor{
100219019Sgabor	const char *errstr;
101219019Sgabor	const char *u2m, *m2u, *m2w, *w2m;
102219019Sgabor
103219019Sgabor	switch (errcode) {
104219019Sgabor	case EINVAL:
105219019Sgabor		errstr = "EINVAL ";
106219019Sgabor		break;
107219019Sgabor	case EILSEQ:
108219019Sgabor		errstr = "EILSEQ ";
109219019Sgabor		break;
110219019Sgabor	case E2BIG:
111219019Sgabor		errstr = "E2BIG ";
112219019Sgabor		break;
113219019Sgabor	default:
114219019Sgabor		errstr = "UNKNOWN ";
115219019Sgabor		break;
116219019Sgabor	}
117219019Sgabor
118219019Sgabor	u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
119219019Sgabor	m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
120219019Sgabor	m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
121219019Sgabor	w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
122219019Sgabor
123219019Sgabor	printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
124219019Sgabor}
125219019Sgabor
126219019Sgaborstatic int
127219019Sgabormagnitude(const uint32_t p)
128219019Sgabor{
129219019Sgabor
130219019Sgabor	if (p >> 8 == 0)
131219019Sgabor		return (1);
132219019Sgabor	else if (p >> 16 == 0)
133219019Sgabor		return (2);
134219019Sgabor	else
135219019Sgabor		return (p >> 24 == 0 ? 3 : 4);
136219019Sgabor}
137219019Sgabor
138219019Sgaborstatic void
139219019Sgaborformat(const uint32_t data)
140219019Sgabor{
141219019Sgabor
142219019Sgabor  /* XXX: could be simpler, something like this but with leading 0s?
143219019Sgabor
144219019Sgabor	printf("0x%.*X", magnitude(data), data);
145219019Sgabor  */
146219019Sgabor
147219019Sgabor	switch (magnitude(data)) {
148219019Sgabor	default:
149219019Sgabor	case 2:
150219019Sgabor		printf("0x%04X", data);
151219019Sgabor		break;
152219019Sgabor	case 3:
153219019Sgabor		printf("0x%06X", data);
154219019Sgabor		break;
155219019Sgabor	case 4:
156219019Sgabor		printf("0x%08X", data);
157219019Sgabor		break;
158219019Sgabor        }
159219019Sgabor}
160219019Sgabor
161219019Sgaborvoid
162219019Sgaboruc_to_mb_fb(unsigned int code,
163219019Sgabor    void (*write_replacement)(const char *buf, size_t buflen,
164219019Sgabor       void* callback_arg), void* callback_arg, void* data)
165219019Sgabor{
166219019Sgabor
167219019Sgabor	fb_flags |= UC_TO_MB_FLAG;
168219019Sgabor}
169219019Sgabor
170219019Sgaborvoid
171219019Sgabormb_to_wc_fb(const char* inbuf, size_t inbufsize,
172219019Sgabor    void (*write_replacement)(const wchar_t *buf, size_t buflen,
173219019Sgabor       void* callback_arg), void* callback_arg, void* data)
174219019Sgabor{
175219019Sgabor
176219019Sgabor	fb_flags |= MB_TO_WC_FLAG;
177219019Sgabor}
178219019Sgabor
179219019Sgaborvoid
180219019Sgabormb_to_uc_fb(const char* inbuf, size_t inbufsize,
181219019Sgabor    void (*write_replacement)(const unsigned int *buf, size_t buflen,
182219019Sgabor       void* callback_arg), void* callback_arg, void* data)
183219019Sgabor{
184219019Sgabor
185219019Sgabor	fb_flags |= MB_TO_UC_FLAG;
186219019Sgabor}
187219019Sgabor
188219019Sgaborvoid
189219019Sgaborwc_to_mb_fb(wchar_t wc,
190219019Sgabor    void (*write_replacement)(const char *buf, size_t buflen,
191219019Sgabor       void* callback_arg), void* callback_arg, void* data)
192219019Sgabor{
193219019Sgabor
194219019Sgabor	fb_flags |= WC_TO_MB_FLAG;
195219019Sgabor}
196219019Sgabor
197219019Sgaborint
198219019Sgabormain (int argc, char *argv[])
199219019Sgabor{
200219019Sgabor	struct iconv_fallbacks fbs;
201219019Sgabor	iconv_t cd;
202219019Sgabor	char *tocode;
203219019Sgabor	char c;
204219019Sgabor
205219019Sgabor	while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
206219019Sgabor		switch (c) {
207219019Sgabor		case 'c':
208219019Sgabor			cflag = true;
209219019Sgabor			break;
210219019Sgabor		case 'd':
211219019Sgabor			dflag = true;
212219019Sgabor			break;
213219019Sgabor		case 'i':
214219019Sgabor			iflag = true;
215219019Sgabor			break;
216219019Sgabor		case 'l':
217219019Sgabor			lflag = true;
218219019Sgabor			break;
219219019Sgabor		case 'r':
220219019Sgabor			rflag = true;
221219019Sgabor			break;
222219019Sgabor		case 't':
223219019Sgabor			tflag = true;
224219019Sgabor			break;
225219019Sgabor		}
226219019Sgabor	}
227219019Sgabor	argc -= optind;
228219019Sgabor	argv += optind;
229219019Sgabor
230219019Sgabor	if (argc < 1)
231219019Sgabor		usage();
232219019Sgabor
233219019Sgabor	fbs.uc_to_mb_fallback = uc_to_mb_fb;
234219019Sgabor	fbs.mb_to_wc_fallback = mb_to_wc_fb;
235219019Sgabor	fbs.mb_to_uc_fallback = mb_to_uc_fb;
236219019Sgabor	fbs.wc_to_mb_fallback = wc_to_mb_fb;
237219019Sgabor	fbs.data = NULL;
238219019Sgabor
239219019Sgabor	if (argc == 2) {
240219019Sgabor		asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
241219019Sgabor		    iflag ? "//IGNORE" : "");
242219019Sgabor
243219019Sgabor		if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
244219019Sgabor			err(1, NULL);
245219019Sgabor		if (dflag) {
246219019Sgabor			if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
247219019Sgabor				err(1, NULL);
248219019Sgabor		}
249219019Sgabor		do_conv(cd, false);
250219019Sgabor	} else if (rflag) {
251219019Sgabor		asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
252219019Sgabor		    iflag ? "//IGNORE" : "");
253219019Sgabor
254219019Sgabor		if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
255219019Sgabor			err(1, NULL);
256219019Sgabor		if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
257219019Sgabor			err(1, NULL);
258219019Sgabor		if (cflag) {
259219019Sgabor			printf("# $FreeBSD: releng/10.3/tools/test/iconv/tablegen/tablegen.c 219019 2011-02-25 00:04:39Z gabor $\n\n");
260219019Sgabor			printf("TYPE\t\tROWCOL\n");
261219019Sgabor			printf("NAME\t\tUCS/%s\n", argv[0]);
262219019Sgabor			printf("%s", citrus_common);
263219019Sgabor		}
264219019Sgabor		do_conv(cd, true);
265219019Sgabor	} else {
266219019Sgabor		if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
267219019Sgabor			err(1, NULL);
268219019Sgabor		if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
269219019Sgabor			err(1, NULL);
270219019Sgabor		if (cflag) {
271219019Sgabor			printf("# $FreeBSD: releng/10.3/tools/test/iconv/tablegen/tablegen.c 219019 2011-02-25 00:04:39Z gabor $\n\n");
272219019Sgabor			printf("TYPE\t\tROWCOL\n");
273219019Sgabor			printf("NAME\t\t%s/UCS\n", argv[0]);
274219019Sgabor			printf("%s", citrus_common);
275219019Sgabor                }
276219019Sgabor		do_conv(cd, false);
277219019Sgabor	}
278219019Sgabor
279219019Sgabor	if (iconv_close(cd) != 0)
280219019Sgabor		err(1, NULL);
281219019Sgabor
282219019Sgabor	return (EXIT_SUCCESS);
283219019Sgabor}
284219019Sgabor
285219019Sgaborstatic void
286219019Sgabordo_conv(iconv_t cd, bool uniinput) {
287219019Sgabor	size_t inbytesleft, outbytesleft, ret;
288219019Sgabor	uint32_t outbuf;
289219019Sgabor	uint32_t inbuf;
290219019Sgabor	const char *inbuf_;
291219019Sgabor	char *outbuf_;
292219019Sgabor
293219019Sgabor	for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
294219019Sgabor		if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
295219019Sgabor			continue;
296219019Sgabor		inbytesleft = uniinput ? 4 : magnitude(inbuf);
297219019Sgabor		outbytesleft = 4;
298219019Sgabor		outbuf = 0x00000000;
299219019Sgabor		outbuf_ = (char *)&outbuf;
300219019Sgabor		inbuf_ = (const char *)&inbuf;
301219019Sgabor		iconv(cd, NULL, NULL, NULL, NULL);
302219019Sgabor		fb_flags = 0;
303219019Sgabor		errno = 0;
304219019Sgabor		ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
305219019Sgabor		if (ret == (size_t)-1) {
306219019Sgabor			if (dflag) {
307219019Sgabor				format(inbuf);
308219019Sgabor				printf(" = ");
309219019Sgabor				format_diag(errno);
310219019Sgabor				printf("\n");
311219019Sgabor			}
312219019Sgabor			continue;
313219019Sgabor		}
314219019Sgabor		format(inbuf);
315219019Sgabor		printf(" = ");
316219019Sgabor		format(outbuf);
317219019Sgabor		printf("\n");
318219019Sgabor	}
319219019Sgabor	if (cflag)
320219019Sgabor		printf("END_MAP\n");
321219019Sgabor}
322