191041Sobrien/*-
291041Sobrien * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org>
3218822Sdim * All rights reserved.
491041Sobrien *
5218822Sdim * Redistribution and use in source and binary forms, with or without
691041Sobrien * modification, are permitted provided that the following conditions
791041Sobrien * are met:
891041Sobrien * 1. Redistributions of source code must retain the above copyright
9104834Sobrien *    notice, this list of conditions and the following disclaimer.
1091041Sobrien * 2. Redistributions in binary form must reproduce the above copyright
11104834Sobrien *    notice, this list of conditions and the following disclaimer in the
12104834Sobrien *    documentation and/or other materials provided with the distribution.
13104834Sobrien *
14104834Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1591041Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16104834Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17104834Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18104834Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19104834Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2091041Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21104834Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22104834Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2491041Sobrien * SUCH DAMAGE.
2591041Sobrien */
2691041Sobrien
2791041Sobrien#include <sys/cdefs.h>
2891041Sobrien__FBSDID("$FreeBSD$");
2991041Sobrien
3091041Sobrien#include <sys/endian.h>
3191041Sobrien#include <sys/types.h>
3291041Sobrien
3391041Sobrien#include <err.h>
3491041Sobrien#include <errno.h>
3591041Sobrien#include <getopt.h>
3691041Sobrien#include <iconv.h>
3791041Sobrien#include <stdbool.h>
3891041Sobrien#include <stdio.h>
3991041Sobrien#include <stdlib.h>
4091041Sobrien
4191041Sobrien#define	UC_TO_MB_FLAG	1
42218822Sdim#define MB_TO_WC_FLAG	2
4391041Sobrien#define MB_TO_UC_FLAG	4
4491041Sobrien#define WC_TO_MB_FLAG	8
4591041Sobrien
4691041Sobrien#define MAX(a,b)	((a) < (b) ? (b) : (a))
4791041Sobrien
4891041Sobrienextern char	*__progname;
4991041Sobrien
5091041Sobrienstatic const char	*optstr = "cdilrt";
5191041Sobrienstatic const char	*citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n"
5291041Sobrien					"OOB_MODE\tILSEQ\n"
5391041Sobrien					"DST_ILSEQ\t0xFFFE\n"
5491041Sobrien					"DST_UNIT_BITS\t32\n\n"
5591041Sobrien					"BEGIN_MAP\n"
5691041Sobrien					"#\n# Generated with Citrus iconv (FreeBSD)\n#\n";
5791041Sobrienbool			 cflag;
5891041Sobrienbool			 dflag;
5991041Sobrienbool			 iflag;
6091041Sobrienbool			 lflag;
6191041Sobrienbool			 tflag;
6291041Sobrienbool			 rflag;
6391041Sobrienint			 fb_flags;
6491041Sobrien
6591041Sobrienstatic void		 do_conv(iconv_t, bool);
6691041Sobrienvoid			 mb_to_uc_fb(const char*, size_t,
6791041Sobrien			     void (*write_replacement)(const unsigned int *,
6891041Sobrien			     size_t, void *), void *, void *);
69218822Sdimvoid			 mb_to_wc_fb(const char*, size_t,
70218822Sdim			     void (*write_replacement) (const wchar_t *, size_t, void *),
71218822Sdim			     void *, void *);
7291041Sobrienvoid			 uc_to_mb_fb(unsigned int,
7391041Sobrien			     void (*write_replacement) (const char *, size_t, void *), void *,
7491041Sobrien			     void *);
7591041Sobrienvoid			 wc_to_mb_fb(wchar_t,
7691041Sobrien			     void (*write_replacement)(const char *,
7791041Sobrien			     size_t, void *), void *, void *);
7891041Sobrien
7991041Sobrienstruct option long_options[] =
8091041Sobrien{
8191041Sobrien	{"citrus",	no_argument,	NULL,	'c'},
8291041Sobrien	{"diagnostic",	no_argument,	NULL,	'd'},
8391041Sobrien	{"ignore",	no_argument,	NULL,	'i'},
8491041Sobrien	{"long",	no_argument,	NULL,	'l'},
8591041Sobrien	{"reverse",	no_argument,	NULL,	'r'},
8691041Sobrien	{"translit",	no_argument,	NULL,	't'},
8791041Sobrien	{NULL,		no_argument,	NULL,	0}
8891041Sobrien};
8991041Sobrien
9091041Sobrienstatic void
9191041Sobrienusage(void) {
9291041Sobrien
9391041Sobrien	fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname);
9491041Sobrien	exit(EXIT_FAILURE);
9591041Sobrien}
9691041Sobrien
9791041Sobrienstatic void
9891041Sobrienformat_diag(int errcode)
9991041Sobrien{
10091041Sobrien	const char *errstr;
10191041Sobrien	const char *u2m, *m2u, *m2w, *w2m;
10291041Sobrien
10391041Sobrien	switch (errcode) {
10491041Sobrien	case EINVAL:
10591041Sobrien		errstr = "EINVAL ";
10691041Sobrien		break;
10791041Sobrien	case EILSEQ:
10891041Sobrien		errstr = "EILSEQ ";
10991041Sobrien		break;
11091041Sobrien	case E2BIG:
11191041Sobrien		errstr = "E2BIG ";
11291041Sobrien		break;
11391041Sobrien	default:
11491041Sobrien		errstr = "UNKNOWN ";
11591041Sobrien		break;
11691041Sobrien	}
11791041Sobrien
11891041Sobrien	u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : "";
11991041Sobrien	m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : "";
12091041Sobrien	m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : "";
12191041Sobrien	w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : "";
12291041Sobrien
12391041Sobrien	printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m);
12491041Sobrien}
12591041Sobrien
12691041Sobrienstatic int
12791041Sobrienmagnitude(const uint32_t p)
12891041Sobrien{
12991041Sobrien
13091041Sobrien	if (p >> 8 == 0)
13191041Sobrien		return (1);
13291041Sobrien	else if (p >> 16 == 0)
13391041Sobrien		return (2);
13491041Sobrien	else
13591041Sobrien		return (p >> 24 == 0 ? 3 : 4);
13691041Sobrien}
13791041Sobrien
13891041Sobrienstatic void
13991041Sobrienformat(const uint32_t data)
14091041Sobrien{
14191041Sobrien
14291041Sobrien  /* XXX: could be simpler, something like this but with leading 0s?
14391041Sobrien
14491041Sobrien	printf("0x%.*X", magnitude(data), data);
14591041Sobrien  */
14691041Sobrien
14791041Sobrien	switch (magnitude(data)) {
14891041Sobrien	default:
14991041Sobrien	case 2:
15091041Sobrien		printf("0x%04X", data);
15191041Sobrien		break;
15291041Sobrien	case 3:
15391041Sobrien		printf("0x%06X", data);
15491041Sobrien		break;
15591041Sobrien	case 4:
15691041Sobrien		printf("0x%08X", data);
15791041Sobrien		break;
15891041Sobrien        }
15991041Sobrien}
16091041Sobrien
16191041Sobrienvoid
16291041Sobrienuc_to_mb_fb(unsigned int code,
16391041Sobrien    void (*write_replacement)(const char *buf, size_t buflen,
16491041Sobrien       void* callback_arg), void* callback_arg, void* data)
16591041Sobrien{
16691041Sobrien
16791041Sobrien	fb_flags |= UC_TO_MB_FLAG;
16891041Sobrien}
16991041Sobrien
17091041Sobrienvoid
17191041Sobrienmb_to_wc_fb(const char* inbuf, size_t inbufsize,
17291041Sobrien    void (*write_replacement)(const wchar_t *buf, size_t buflen,
17391041Sobrien       void* callback_arg), void* callback_arg, void* data)
17491041Sobrien{
17591041Sobrien
17691041Sobrien	fb_flags |= MB_TO_WC_FLAG;
17791041Sobrien}
17891041Sobrien
17991041Sobrienvoid
18091041Sobrienmb_to_uc_fb(const char* inbuf, size_t inbufsize,
18191041Sobrien    void (*write_replacement)(const unsigned int *buf, size_t buflen,
18291041Sobrien       void* callback_arg), void* callback_arg, void* data)
18399461Sobrien{
18491041Sobrien
18591041Sobrien	fb_flags |= MB_TO_UC_FLAG;
18691041Sobrien}
18791041Sobrien
18891041Sobrienvoid
18991041Sobrienwc_to_mb_fb(wchar_t wc,
19091041Sobrien    void (*write_replacement)(const char *buf, size_t buflen,
19191041Sobrien       void* callback_arg), void* callback_arg, void* data)
19291041Sobrien{
19391041Sobrien
19491041Sobrien	fb_flags |= WC_TO_MB_FLAG;
19591041Sobrien}
19691041Sobrien
19791041Sobrienint
19891041Sobrienmain (int argc, char *argv[])
19991041Sobrien{
20091041Sobrien	struct iconv_fallbacks fbs;
20191041Sobrien	iconv_t cd;
20291041Sobrien	char *tocode;
20391041Sobrien	char c;
20491041Sobrien
20591041Sobrien	while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) {
20691041Sobrien		switch (c) {
20791041Sobrien		case 'c':
20891041Sobrien			cflag = true;
20991041Sobrien			break;
21091041Sobrien		case 'd':
21191041Sobrien			dflag = true;
21291041Sobrien			break;
21391041Sobrien		case 'i':
21491041Sobrien			iflag = true;
21591041Sobrien			break;
21691041Sobrien		case 'l':
21791041Sobrien			lflag = true;
21891041Sobrien			break;
21991041Sobrien		case 'r':
22091041Sobrien			rflag = true;
22191041Sobrien			break;
22291041Sobrien		case 't':
22391041Sobrien			tflag = true;
22491041Sobrien			break;
22591041Sobrien		}
22691041Sobrien	}
22791041Sobrien	argc -= optind;
22891041Sobrien	argv += optind;
22991041Sobrien
23091041Sobrien	if (argc < 1)
23191041Sobrien		usage();
23291041Sobrien
23391041Sobrien	fbs.uc_to_mb_fallback = uc_to_mb_fb;
23491041Sobrien	fbs.mb_to_wc_fallback = mb_to_wc_fb;
23591041Sobrien	fbs.mb_to_uc_fallback = mb_to_uc_fb;
23691041Sobrien	fbs.wc_to_mb_fallback = wc_to_mb_fb;
23791041Sobrien	fbs.data = NULL;
23891041Sobrien
23991041Sobrien	if (argc == 2) {
24091041Sobrien		asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "",
24191041Sobrien		    iflag ? "//IGNORE" : "");
24291041Sobrien
24391041Sobrien		if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1)
24491041Sobrien			err(1, NULL);
24591041Sobrien		if (dflag) {
24691041Sobrien			if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
24791041Sobrien				err(1, NULL);
24891041Sobrien		}
24991041Sobrien		do_conv(cd, false);
25091041Sobrien	} else if (rflag) {
251218822Sdim		asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "",
252218822Sdim		    iflag ? "//IGNORE" : "");
253218822Sdim
254218822Sdim		if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1)
255218822Sdim			err(1, NULL);
256218822Sdim		if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)
257218822Sdim			err(1, NULL);
258218822Sdim		if (cflag) {
259218822Sdim			printf("# $FreeBSD$\n\n");
26091041Sobrien			printf("TYPE\t\tROWCOL\n");
26191041Sobrien			printf("NAME\t\tUCS/%s\n", argv[0]);
26291041Sobrien			printf("%s", citrus_common);
263130561Sobrien		}
26491041Sobrien		do_conv(cd, true);
26591041Sobrien	} else {
26691041Sobrien		if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1)
26791041Sobrien			err(1, NULL);
26891041Sobrien		if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0))
269130561Sobrien			err(1, NULL);
27091041Sobrien		if (cflag) {
27191041Sobrien			printf("# $FreeBSD$\n\n");
27291041Sobrien			printf("TYPE\t\tROWCOL\n");
27391041Sobrien			printf("NAME\t\t%s/UCS\n", argv[0]);
27491041Sobrien			printf("%s", citrus_common);
275130561Sobrien                }
27691041Sobrien		do_conv(cd, false);
27791041Sobrien	}
27891041Sobrien
27991041Sobrien	if (iconv_close(cd) != 0)
28091041Sobrien		err(1, NULL);
28191041Sobrien
28291041Sobrien	return (EXIT_SUCCESS);
28391041Sobrien}
28491041Sobrien
28591041Sobrienstatic void
28691041Sobriendo_conv(iconv_t cd, bool uniinput) {
287218822Sdim	size_t inbytesleft, outbytesleft, ret;
288218822Sdim	uint32_t outbuf;
28991041Sobrien	uint32_t inbuf;
29091041Sobrien	const char *inbuf_;
29191041Sobrien	char *outbuf_;
292218822Sdim
29391041Sobrien	for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) {
29491041Sobrien		if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00))
29591041Sobrien			continue;
296218822Sdim		inbytesleft = uniinput ? 4 : magnitude(inbuf);
29791041Sobrien		outbytesleft = 4;
29891041Sobrien		outbuf = 0x00000000;
29991041Sobrien		outbuf_ = (char *)&outbuf;
30091041Sobrien		inbuf_ = (const char *)&inbuf;
30191041Sobrien		iconv(cd, NULL, NULL, NULL, NULL);
30291041Sobrien		fb_flags = 0;
30391041Sobrien		errno = 0;
30491041Sobrien		ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft);
30591041Sobrien		if (ret == (size_t)-1) {
30691041Sobrien			if (dflag) {
30791041Sobrien				format(inbuf);
308218822Sdim				printf(" = ");
30991041Sobrien				format_diag(errno);
31091041Sobrien				printf("\n");
31191041Sobrien			}
31291041Sobrien			continue;
31391041Sobrien		}
31491041Sobrien		format(inbuf);
31591041Sobrien		printf(" = ");
31691041Sobrien		format(outbuf);
31791041Sobrien		printf("\n");
31891041Sobrien	}
31991041Sobrien	if (cflag)
32091041Sobrien		printf("END_MAP\n");
32191041Sobrien}
32291041Sobrien