191041Sobrien/*- 291041Sobrien * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org> 3218822Sdim * All rights reserved. 491041Sobrien * 5218822Sdim * Redistribution and use in source and binary forms, with or without 691041Sobrien * modification, are permitted provided that the following conditions 791041Sobrien * are met: 891041Sobrien * 1. Redistributions of source code must retain the above copyright 9104834Sobrien * notice, this list of conditions and the following disclaimer. 1091041Sobrien * 2. Redistributions in binary form must reproduce the above copyright 11104834Sobrien * notice, this list of conditions and the following disclaimer in the 12104834Sobrien * documentation and/or other materials provided with the distribution. 13104834Sobrien * 14104834Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1591041Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16104834Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17104834Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18104834Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19104834Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2091041Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21104834Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22104834Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2491041Sobrien * SUCH DAMAGE. 2591041Sobrien */ 2691041Sobrien 2791041Sobrien#include <sys/cdefs.h> 2891041Sobrien__FBSDID("$FreeBSD$"); 2991041Sobrien 3091041Sobrien#include <sys/endian.h> 3191041Sobrien#include <sys/types.h> 3291041Sobrien 3391041Sobrien#include <err.h> 3491041Sobrien#include <errno.h> 3591041Sobrien#include <getopt.h> 3691041Sobrien#include <iconv.h> 3791041Sobrien#include <stdbool.h> 3891041Sobrien#include <stdio.h> 3991041Sobrien#include <stdlib.h> 4091041Sobrien 4191041Sobrien#define UC_TO_MB_FLAG 1 42218822Sdim#define MB_TO_WC_FLAG 2 4391041Sobrien#define MB_TO_UC_FLAG 4 4491041Sobrien#define WC_TO_MB_FLAG 8 4591041Sobrien 4691041Sobrien#define MAX(a,b) ((a) < (b) ? (b) : (a)) 4791041Sobrien 4891041Sobrienextern char *__progname; 4991041Sobrien 5091041Sobrienstatic const char *optstr = "cdilrt"; 5191041Sobrienstatic const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n" 5291041Sobrien "OOB_MODE\tILSEQ\n" 5391041Sobrien "DST_ILSEQ\t0xFFFE\n" 5491041Sobrien "DST_UNIT_BITS\t32\n\n" 5591041Sobrien "BEGIN_MAP\n" 5691041Sobrien "#\n# Generated with Citrus iconv (FreeBSD)\n#\n"; 5791041Sobrienbool cflag; 5891041Sobrienbool dflag; 5991041Sobrienbool iflag; 6091041Sobrienbool lflag; 6191041Sobrienbool tflag; 6291041Sobrienbool rflag; 6391041Sobrienint fb_flags; 6491041Sobrien 6591041Sobrienstatic void do_conv(iconv_t, bool); 6691041Sobrienvoid mb_to_uc_fb(const char*, size_t, 6791041Sobrien void (*write_replacement)(const unsigned int *, 6891041Sobrien size_t, void *), void *, void *); 69218822Sdimvoid mb_to_wc_fb(const char*, size_t, 70218822Sdim void (*write_replacement) (const wchar_t *, size_t, void *), 71218822Sdim void *, void *); 7291041Sobrienvoid uc_to_mb_fb(unsigned int, 7391041Sobrien void (*write_replacement) (const char *, size_t, void *), void *, 7491041Sobrien void *); 7591041Sobrienvoid wc_to_mb_fb(wchar_t, 7691041Sobrien void (*write_replacement)(const char *, 7791041Sobrien size_t, void *), void *, void *); 7891041Sobrien 7991041Sobrienstruct option long_options[] = 8091041Sobrien{ 8191041Sobrien {"citrus", no_argument, NULL, 'c'}, 8291041Sobrien {"diagnostic", no_argument, NULL, 'd'}, 8391041Sobrien {"ignore", no_argument, NULL, 'i'}, 8491041Sobrien {"long", no_argument, NULL, 'l'}, 8591041Sobrien {"reverse", no_argument, NULL, 'r'}, 8691041Sobrien {"translit", no_argument, NULL, 't'}, 8791041Sobrien {NULL, no_argument, NULL, 0} 8891041Sobrien}; 8991041Sobrien 9091041Sobrienstatic void 9191041Sobrienusage(void) { 9291041Sobrien 9391041Sobrien fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname); 9491041Sobrien exit(EXIT_FAILURE); 9591041Sobrien} 9691041Sobrien 9791041Sobrienstatic void 9891041Sobrienformat_diag(int errcode) 9991041Sobrien{ 10091041Sobrien const char *errstr; 10191041Sobrien const char *u2m, *m2u, *m2w, *w2m; 10291041Sobrien 10391041Sobrien switch (errcode) { 10491041Sobrien case EINVAL: 10591041Sobrien errstr = "EINVAL "; 10691041Sobrien break; 10791041Sobrien case EILSEQ: 10891041Sobrien errstr = "EILSEQ "; 10991041Sobrien break; 11091041Sobrien case E2BIG: 11191041Sobrien errstr = "E2BIG "; 11291041Sobrien break; 11391041Sobrien default: 11491041Sobrien errstr = "UNKNOWN "; 11591041Sobrien break; 11691041Sobrien } 11791041Sobrien 11891041Sobrien u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : ""; 11991041Sobrien m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : ""; 12091041Sobrien m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : ""; 12191041Sobrien w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : ""; 12291041Sobrien 12391041Sobrien printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m); 12491041Sobrien} 12591041Sobrien 12691041Sobrienstatic int 12791041Sobrienmagnitude(const uint32_t p) 12891041Sobrien{ 12991041Sobrien 13091041Sobrien if (p >> 8 == 0) 13191041Sobrien return (1); 13291041Sobrien else if (p >> 16 == 0) 13391041Sobrien return (2); 13491041Sobrien else 13591041Sobrien return (p >> 24 == 0 ? 3 : 4); 13691041Sobrien} 13791041Sobrien 13891041Sobrienstatic void 13991041Sobrienformat(const uint32_t data) 14091041Sobrien{ 14191041Sobrien 14291041Sobrien /* XXX: could be simpler, something like this but with leading 0s? 14391041Sobrien 14491041Sobrien printf("0x%.*X", magnitude(data), data); 14591041Sobrien */ 14691041Sobrien 14791041Sobrien switch (magnitude(data)) { 14891041Sobrien default: 14991041Sobrien case 2: 15091041Sobrien printf("0x%04X", data); 15191041Sobrien break; 15291041Sobrien case 3: 15391041Sobrien printf("0x%06X", data); 15491041Sobrien break; 15591041Sobrien case 4: 15691041Sobrien printf("0x%08X", data); 15791041Sobrien break; 15891041Sobrien } 15991041Sobrien} 16091041Sobrien 16191041Sobrienvoid 16291041Sobrienuc_to_mb_fb(unsigned int code, 16391041Sobrien void (*write_replacement)(const char *buf, size_t buflen, 16491041Sobrien void* callback_arg), void* callback_arg, void* data) 16591041Sobrien{ 16691041Sobrien 16791041Sobrien fb_flags |= UC_TO_MB_FLAG; 16891041Sobrien} 16991041Sobrien 17091041Sobrienvoid 17191041Sobrienmb_to_wc_fb(const char* inbuf, size_t inbufsize, 17291041Sobrien void (*write_replacement)(const wchar_t *buf, size_t buflen, 17391041Sobrien void* callback_arg), void* callback_arg, void* data) 17491041Sobrien{ 17591041Sobrien 17691041Sobrien fb_flags |= MB_TO_WC_FLAG; 17791041Sobrien} 17891041Sobrien 17991041Sobrienvoid 18091041Sobrienmb_to_uc_fb(const char* inbuf, size_t inbufsize, 18191041Sobrien void (*write_replacement)(const unsigned int *buf, size_t buflen, 18291041Sobrien void* callback_arg), void* callback_arg, void* data) 18399461Sobrien{ 18491041Sobrien 18591041Sobrien fb_flags |= MB_TO_UC_FLAG; 18691041Sobrien} 18791041Sobrien 18891041Sobrienvoid 18991041Sobrienwc_to_mb_fb(wchar_t wc, 19091041Sobrien void (*write_replacement)(const char *buf, size_t buflen, 19191041Sobrien void* callback_arg), void* callback_arg, void* data) 19291041Sobrien{ 19391041Sobrien 19491041Sobrien fb_flags |= WC_TO_MB_FLAG; 19591041Sobrien} 19691041Sobrien 19791041Sobrienint 19891041Sobrienmain (int argc, char *argv[]) 19991041Sobrien{ 20091041Sobrien struct iconv_fallbacks fbs; 20191041Sobrien iconv_t cd; 20291041Sobrien char *tocode; 20391041Sobrien char c; 20491041Sobrien 20591041Sobrien while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) { 20691041Sobrien switch (c) { 20791041Sobrien case 'c': 20891041Sobrien cflag = true; 20991041Sobrien break; 21091041Sobrien case 'd': 21191041Sobrien dflag = true; 21291041Sobrien break; 21391041Sobrien case 'i': 21491041Sobrien iflag = true; 21591041Sobrien break; 21691041Sobrien case 'l': 21791041Sobrien lflag = true; 21891041Sobrien break; 21991041Sobrien case 'r': 22091041Sobrien rflag = true; 22191041Sobrien break; 22291041Sobrien case 't': 22391041Sobrien tflag = true; 22491041Sobrien break; 22591041Sobrien } 22691041Sobrien } 22791041Sobrien argc -= optind; 22891041Sobrien argv += optind; 22991041Sobrien 23091041Sobrien if (argc < 1) 23191041Sobrien usage(); 23291041Sobrien 23391041Sobrien fbs.uc_to_mb_fallback = uc_to_mb_fb; 23491041Sobrien fbs.mb_to_wc_fallback = mb_to_wc_fb; 23591041Sobrien fbs.mb_to_uc_fallback = mb_to_uc_fb; 23691041Sobrien fbs.wc_to_mb_fallback = wc_to_mb_fb; 23791041Sobrien fbs.data = NULL; 23891041Sobrien 23991041Sobrien if (argc == 2) { 24091041Sobrien asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "", 24191041Sobrien iflag ? "//IGNORE" : ""); 24291041Sobrien 24391041Sobrien if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1) 24491041Sobrien err(1, NULL); 24591041Sobrien if (dflag) { 24691041Sobrien if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 24791041Sobrien err(1, NULL); 24891041Sobrien } 24991041Sobrien do_conv(cd, false); 25091041Sobrien } else if (rflag) { 251218822Sdim asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "", 252218822Sdim iflag ? "//IGNORE" : ""); 253218822Sdim 254218822Sdim if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1) 255218822Sdim err(1, NULL); 256218822Sdim if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 257218822Sdim err(1, NULL); 258218822Sdim if (cflag) { 259218822Sdim printf("# $FreeBSD$\n\n"); 26091041Sobrien printf("TYPE\t\tROWCOL\n"); 26191041Sobrien printf("NAME\t\tUCS/%s\n", argv[0]); 26291041Sobrien printf("%s", citrus_common); 263130561Sobrien } 26491041Sobrien do_conv(cd, true); 26591041Sobrien } else { 26691041Sobrien if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1) 26791041Sobrien err(1, NULL); 26891041Sobrien if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)) 269130561Sobrien err(1, NULL); 27091041Sobrien if (cflag) { 27191041Sobrien printf("# $FreeBSD$\n\n"); 27291041Sobrien printf("TYPE\t\tROWCOL\n"); 27391041Sobrien printf("NAME\t\t%s/UCS\n", argv[0]); 27491041Sobrien printf("%s", citrus_common); 275130561Sobrien } 27691041Sobrien do_conv(cd, false); 27791041Sobrien } 27891041Sobrien 27991041Sobrien if (iconv_close(cd) != 0) 28091041Sobrien err(1, NULL); 28191041Sobrien 28291041Sobrien return (EXIT_SUCCESS); 28391041Sobrien} 28491041Sobrien 28591041Sobrienstatic void 28691041Sobriendo_conv(iconv_t cd, bool uniinput) { 287218822Sdim size_t inbytesleft, outbytesleft, ret; 288218822Sdim uint32_t outbuf; 28991041Sobrien uint32_t inbuf; 29091041Sobrien const char *inbuf_; 29191041Sobrien char *outbuf_; 292218822Sdim 29391041Sobrien for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) { 29491041Sobrien if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00)) 29591041Sobrien continue; 296218822Sdim inbytesleft = uniinput ? 4 : magnitude(inbuf); 29791041Sobrien outbytesleft = 4; 29891041Sobrien outbuf = 0x00000000; 29991041Sobrien outbuf_ = (char *)&outbuf; 30091041Sobrien inbuf_ = (const char *)&inbuf; 30191041Sobrien iconv(cd, NULL, NULL, NULL, NULL); 30291041Sobrien fb_flags = 0; 30391041Sobrien errno = 0; 30491041Sobrien ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft); 30591041Sobrien if (ret == (size_t)-1) { 30691041Sobrien if (dflag) { 30791041Sobrien format(inbuf); 308218822Sdim printf(" = "); 30991041Sobrien format_diag(errno); 31091041Sobrien printf("\n"); 31191041Sobrien } 31291041Sobrien continue; 31391041Sobrien } 31491041Sobrien format(inbuf); 31591041Sobrien printf(" = "); 31691041Sobrien format(outbuf); 31791041Sobrien printf("\n"); 31891041Sobrien } 31991041Sobrien if (cflag) 32091041Sobrien printf("END_MAP\n"); 32191041Sobrien} 32291041Sobrien