1/*-
2 * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/endian.h>
31#include <sys/types.h>
32
33#include <err.h>
34#include <errno.h>
35#include <getopt.h>
36#include <iconv.h>
37#include <stdbool.h>
38#include <stdio.h>
39#include <stdlib.h>
40
41#define	UC_TO_MB_FLAG	1
42#define MB_TO_WC_FLAG	2
43#define MB_TO_UC_FLAG	4
44#define WC_TO_MB_FLAG	8
45
46#define MAX(a,b)	((a) < (b) ? (b) : (a))
47
48extern char	*__progname;
49
50static const char	*optstr = "cdilrt";
51static const char	*citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
52					"OOB_MODE\tILSEQ\n"
53					"DST_ILSEQ\t0xFFFE\n"
54					"DST_UNIT_BITS\t32\n\n"
55					"BEGIN_MAP\n"
56					"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
57bool			 cflag;
58bool			 dflag;
59bool			 iflag;
60bool			 lflag;
61bool			 tflag;
62bool			 rflag;
63int			 fb_flags;
64
65static void		 do_conv(iconv_t, bool);
66void			 mb_to_uc_fb(const char*, size_t,
67			     void (*write_replacement)(const unsigned int *,
68			     size_t, void *), void *, void *);
69void			 mb_to_wc_fb(const char*, size_t,
70			     void (*write_replacement) (const wchar_t *, size_t, void *),
71			     void *, void *);
72void			 uc_to_mb_fb(unsigned int,
73			     void (*write_replacement) (const char *, size_t, void *), void *,
74			     void *);
75void			 wc_to_mb_fb(wchar_t,
76			     void (*write_replacement)(const char *,
77			     size_t, void *), void *, void *);
78
79struct option long_options[] =
80{
81	{"citrus",	no_argument,	NULL,	'c'},
82	{"diagnostic",	no_argument,	NULL,	'd'},
83	{"ignore",	no_argument,	NULL,	'i'},
84	{"long",	no_argument,	NULL,	'l'},
85	{"reverse",	no_argument,	NULL,	'r'},
86	{"translit",	no_argument,	NULL,	't'},
87	{NULL,		no_argument,	NULL,	0}
88};
89
90static void
91usage(void) {
92
93	fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
94	exit(EXIT_FAILURE);
95}
96
97static void
98format_diag(int errcode)
99{
100	const char *errstr;
101	const char *u2m, *m2u, *m2w, *w2m;
102
103	switch (errcode) {
104	case EINVAL:
105		errstr = "EINVAL ";
106		break;
107	case EILSEQ:
108		errstr = "EILSEQ ";
109		break;
110	case E2BIG:
111		errstr = "E2BIG ";
112		break;
113	default:
114		errstr = "UNKNOWN ";
115		break;
116	}
117
118	u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
119	m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
120	m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
121	w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
122
123	printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
124}
125
126static int
127magnitude(const uint32_t p)
128{
129
130	if (p >> 8 == 0)
131		return (1);
132	else if (p >> 16 == 0)
133		return (2);
134	else
135		return (p >> 24 == 0 ? 3 : 4);
136}
137
138static void
139format(const uint32_t data)
140{
141
142  /* XXX: could be simpler, something like this but with leading 0s?
143
144	printf("0x%.*X", magnitude(data), data);
145  */
146
147	switch (magnitude(data)) {
148	default:
149	case 2:
150		printf("0x%04X", data);
151		break;
152	case 3:
153		printf("0x%06X", data);
154		break;
155	case 4:
156		printf("0x%08X", data);
157		break;
158        }
159}
160
161void
162uc_to_mb_fb(unsigned int code,
163    void (*write_replacement)(const char *buf, size_t buflen,
164       void* callback_arg), void* callback_arg, void* data)
165{
166
167	fb_flags |= UC_TO_MB_FLAG;
168}
169
170void
171mb_to_wc_fb(const char* inbuf, size_t inbufsize,
172    void (*write_replacement)(const wchar_t *buf, size_t buflen,
173       void* callback_arg), void* callback_arg, void* data)
174{
175
176	fb_flags |= MB_TO_WC_FLAG;
177}
178
179void
180mb_to_uc_fb(const char* inbuf, size_t inbufsize,
181    void (*write_replacement)(const unsigned int *buf, size_t buflen,
182       void* callback_arg), void* callback_arg, void* data)
183{
184
185	fb_flags |= MB_TO_UC_FLAG;
186}
187
188void
189wc_to_mb_fb(wchar_t wc,
190    void (*write_replacement)(const char *buf, size_t buflen,
191       void* callback_arg), void* callback_arg, void* data)
192{
193
194	fb_flags |= WC_TO_MB_FLAG;
195}
196
197int
198main (int argc, char *argv[])
199{
200	struct iconv_fallbacks fbs;
201	iconv_t cd;
202	char *tocode;
203	char c;
204
205	while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
206		switch (c) {
207		case 'c':
208			cflag = true;
209			break;
210		case 'd':
211			dflag = true;
212			break;
213		case 'i':
214			iflag = true;
215			break;
216		case 'l':
217			lflag = true;
218			break;
219		case 'r':
220			rflag = true;
221			break;
222		case 't':
223			tflag = true;
224			break;
225		}
226	}
227	argc -= optind;
228	argv += optind;
229
230	if (argc < 1)
231		usage();
232
233	fbs.uc_to_mb_fallback = uc_to_mb_fb;
234	fbs.mb_to_wc_fallback = mb_to_wc_fb;
235	fbs.mb_to_uc_fallback = mb_to_uc_fb;
236	fbs.wc_to_mb_fallback = wc_to_mb_fb;
237	fbs.data = NULL;
238
239	if (argc == 2) {
240		asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
241		    iflag ? "//IGNORE" : "");
242
243		if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
244			err(1, NULL);
245		if (dflag) {
246			if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
247				err(1, NULL);
248		}
249		do_conv(cd, false);
250	} else if (rflag) {
251		asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
252		    iflag ? "//IGNORE" : "");
253
254		if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
255			err(1, NULL);
256		if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
257			err(1, NULL);
258		if (cflag) {
259			printf("# $FreeBSD$\n\n");
260			printf("TYPE\t\tROWCOL\n");
261			printf("NAME\t\tUCS/%s\n", argv[0]);
262			printf("%s", citrus_common);
263		}
264		do_conv(cd, true);
265	} else {
266		if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
267			err(1, NULL);
268		if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
269			err(1, NULL);
270		if (cflag) {
271			printf("# $FreeBSD$\n\n");
272			printf("TYPE\t\tROWCOL\n");
273			printf("NAME\t\t%s/UCS\n", argv[0]);
274			printf("%s", citrus_common);
275                }
276		do_conv(cd, false);
277	}
278
279	if (iconv_close(cd) != 0)
280		err(1, NULL);
281
282	return (EXIT_SUCCESS);
283}
284
285static void
286do_conv(iconv_t cd, bool uniinput) {
287	size_t inbytesleft, outbytesleft, ret;
288	uint32_t outbuf;
289	uint32_t inbuf;
290	const char *inbuf_;
291	char *outbuf_;
292
293	for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
294		if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
295			continue;
296		inbytesleft = uniinput ? 4 : magnitude(inbuf);
297		outbytesleft = 4;
298		outbuf = 0x00000000;
299		outbuf_ = (char *)&outbuf;
300		inbuf_ = (const char *)&inbuf;
301		iconv(cd, NULL, NULL, NULL, NULL);
302		fb_flags = 0;
303		errno = 0;
304		ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
305		if (ret == (size_t)-1) {
306			if (dflag) {
307				format(inbuf);
308				printf(" = ");
309				format_diag(errno);
310				printf("\n");
311			}
312			continue;
313		}
314		format(inbuf);
315		printf(" = ");
316		format(outbuf);
317		printf("\n");
318	}
319	if (cflag)
320		printf("END_MAP\n");
321}
322