1219019Sgabor/*- 2219019Sgabor * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org> 3219019Sgabor * All rights reserved. 4219019Sgabor * 5219019Sgabor * Redistribution and use in source and binary forms, with or without 6219019Sgabor * modification, are permitted provided that the following conditions 7219019Sgabor * are met: 8219019Sgabor * 1. Redistributions of source code must retain the above copyright 9219019Sgabor * notice, this list of conditions and the following disclaimer. 10219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 11219019Sgabor * notice, this list of conditions and the following disclaimer in the 12219019Sgabor * documentation and/or other materials provided with the distribution. 13219019Sgabor * 14219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219019Sgabor * SUCH DAMAGE. 25219019Sgabor */ 26219019Sgabor 27219019Sgabor#include <sys/cdefs.h> 28219019Sgabor__FBSDID("$FreeBSD$"); 29219019Sgabor 30219019Sgabor#include <sys/endian.h> 31219019Sgabor#include <sys/types.h> 32219019Sgabor 33219019Sgabor#include <err.h> 34219019Sgabor#include <errno.h> 35219019Sgabor#include <getopt.h> 36219019Sgabor#include <iconv.h> 37219019Sgabor#include <stdbool.h> 38219019Sgabor#include <stdio.h> 39219019Sgabor#include <stdlib.h> 40219019Sgabor 41219019Sgabor#define UC_TO_MB_FLAG 1 42219019Sgabor#define MB_TO_WC_FLAG 2 43219019Sgabor#define MB_TO_UC_FLAG 4 44219019Sgabor#define WC_TO_MB_FLAG 8 45219019Sgabor 46219019Sgabor#define MAX(a,b) ((a) < (b) ? (b) : (a)) 47219019Sgabor 48219019Sgaborextern char *__progname; 49219019Sgabor 50219019Sgaborstatic const char *optstr = "cdilrt"; 51219019Sgaborstatic const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n" 52219019Sgabor "OOB_MODE\tILSEQ\n" 53219019Sgabor "DST_ILSEQ\t0xFFFE\n" 54219019Sgabor "DST_UNIT_BITS\t32\n\n" 55219019Sgabor "BEGIN_MAP\n" 56219019Sgabor "#\n# Generated with Citrus iconv (FreeBSD)\n#\n"; 57219019Sgaborbool cflag; 58219019Sgaborbool dflag; 59219019Sgaborbool iflag; 60219019Sgaborbool lflag; 61219019Sgaborbool tflag; 62219019Sgaborbool rflag; 63219019Sgaborint fb_flags; 64219019Sgabor 65219019Sgaborstatic void do_conv(iconv_t, bool); 66219019Sgaborvoid mb_to_uc_fb(const char*, size_t, 67219019Sgabor void (*write_replacement)(const unsigned int *, 68219019Sgabor size_t, void *), void *, void *); 69219019Sgaborvoid mb_to_wc_fb(const char*, size_t, 70219019Sgabor void (*write_replacement) (const wchar_t *, size_t, void *), 71219019Sgabor void *, void *); 72219019Sgaborvoid uc_to_mb_fb(unsigned int, 73219019Sgabor void (*write_replacement) (const char *, size_t, void *), void *, 74219019Sgabor void *); 75219019Sgaborvoid wc_to_mb_fb(wchar_t, 76219019Sgabor void (*write_replacement)(const char *, 77219019Sgabor size_t, void *), void *, void *); 78219019Sgabor 79219019Sgaborstruct option long_options[] = 80219019Sgabor{ 81219019Sgabor {"citrus", no_argument, NULL, 'c'}, 82219019Sgabor {"diagnostic", no_argument, NULL, 'd'}, 83219019Sgabor {"ignore", no_argument, NULL, 'i'}, 84219019Sgabor {"long", no_argument, NULL, 'l'}, 85219019Sgabor {"reverse", no_argument, NULL, 'r'}, 86219019Sgabor {"translit", no_argument, NULL, 't'}, 87219019Sgabor {NULL, no_argument, NULL, 0} 88219019Sgabor}; 89219019Sgabor 90219019Sgaborstatic void 91219019Sgaborusage(void) { 92219019Sgabor 93219019Sgabor fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname); 94219019Sgabor exit(EXIT_FAILURE); 95219019Sgabor} 96219019Sgabor 97219019Sgaborstatic void 98219019Sgaborformat_diag(int errcode) 99219019Sgabor{ 100219019Sgabor const char *errstr; 101219019Sgabor const char *u2m, *m2u, *m2w, *w2m; 102219019Sgabor 103219019Sgabor switch (errcode) { 104219019Sgabor case EINVAL: 105219019Sgabor errstr = "EINVAL "; 106219019Sgabor break; 107219019Sgabor case EILSEQ: 108219019Sgabor errstr = "EILSEQ "; 109219019Sgabor break; 110219019Sgabor case E2BIG: 111219019Sgabor errstr = "E2BIG "; 112219019Sgabor break; 113219019Sgabor default: 114219019Sgabor errstr = "UNKNOWN "; 115219019Sgabor break; 116219019Sgabor } 117219019Sgabor 118219019Sgabor u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : ""; 119219019Sgabor m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : ""; 120219019Sgabor m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : ""; 121219019Sgabor w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : ""; 122219019Sgabor 123219019Sgabor printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m); 124219019Sgabor} 125219019Sgabor 126219019Sgaborstatic int 127219019Sgabormagnitude(const uint32_t p) 128219019Sgabor{ 129219019Sgabor 130219019Sgabor if (p >> 8 == 0) 131219019Sgabor return (1); 132219019Sgabor else if (p >> 16 == 0) 133219019Sgabor return (2); 134219019Sgabor else 135219019Sgabor return (p >> 24 == 0 ? 3 : 4); 136219019Sgabor} 137219019Sgabor 138219019Sgaborstatic void 139219019Sgaborformat(const uint32_t data) 140219019Sgabor{ 141219019Sgabor 142219019Sgabor /* XXX: could be simpler, something like this but with leading 0s? 143219019Sgabor 144219019Sgabor printf("0x%.*X", magnitude(data), data); 145219019Sgabor */ 146219019Sgabor 147219019Sgabor switch (magnitude(data)) { 148219019Sgabor default: 149219019Sgabor case 2: 150219019Sgabor printf("0x%04X", data); 151219019Sgabor break; 152219019Sgabor case 3: 153219019Sgabor printf("0x%06X", data); 154219019Sgabor break; 155219019Sgabor case 4: 156219019Sgabor printf("0x%08X", data); 157219019Sgabor break; 158219019Sgabor } 159219019Sgabor} 160219019Sgabor 161219019Sgaborvoid 162219019Sgaboruc_to_mb_fb(unsigned int code, 163219019Sgabor void (*write_replacement)(const char *buf, size_t buflen, 164219019Sgabor void* callback_arg), void* callback_arg, void* data) 165219019Sgabor{ 166219019Sgabor 167219019Sgabor fb_flags |= UC_TO_MB_FLAG; 168219019Sgabor} 169219019Sgabor 170219019Sgaborvoid 171219019Sgabormb_to_wc_fb(const char* inbuf, size_t inbufsize, 172219019Sgabor void (*write_replacement)(const wchar_t *buf, size_t buflen, 173219019Sgabor void* callback_arg), void* callback_arg, void* data) 174219019Sgabor{ 175219019Sgabor 176219019Sgabor fb_flags |= MB_TO_WC_FLAG; 177219019Sgabor} 178219019Sgabor 179219019Sgaborvoid 180219019Sgabormb_to_uc_fb(const char* inbuf, size_t inbufsize, 181219019Sgabor void (*write_replacement)(const unsigned int *buf, size_t buflen, 182219019Sgabor void* callback_arg), void* callback_arg, void* data) 183219019Sgabor{ 184219019Sgabor 185219019Sgabor fb_flags |= MB_TO_UC_FLAG; 186219019Sgabor} 187219019Sgabor 188219019Sgaborvoid 189219019Sgaborwc_to_mb_fb(wchar_t wc, 190219019Sgabor void (*write_replacement)(const char *buf, size_t buflen, 191219019Sgabor void* callback_arg), void* callback_arg, void* data) 192219019Sgabor{ 193219019Sgabor 194219019Sgabor fb_flags |= WC_TO_MB_FLAG; 195219019Sgabor} 196219019Sgabor 197219019Sgaborint 198219019Sgabormain (int argc, char *argv[]) 199219019Sgabor{ 200219019Sgabor struct iconv_fallbacks fbs; 201219019Sgabor iconv_t cd; 202219019Sgabor char *tocode; 203219019Sgabor char c; 204219019Sgabor 205219019Sgabor while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) { 206219019Sgabor switch (c) { 207219019Sgabor case 'c': 208219019Sgabor cflag = true; 209219019Sgabor break; 210219019Sgabor case 'd': 211219019Sgabor dflag = true; 212219019Sgabor break; 213219019Sgabor case 'i': 214219019Sgabor iflag = true; 215219019Sgabor break; 216219019Sgabor case 'l': 217219019Sgabor lflag = true; 218219019Sgabor break; 219219019Sgabor case 'r': 220219019Sgabor rflag = true; 221219019Sgabor break; 222219019Sgabor case 't': 223219019Sgabor tflag = true; 224219019Sgabor break; 225219019Sgabor } 226219019Sgabor } 227219019Sgabor argc -= optind; 228219019Sgabor argv += optind; 229219019Sgabor 230219019Sgabor if (argc < 1) 231219019Sgabor usage(); 232219019Sgabor 233219019Sgabor fbs.uc_to_mb_fallback = uc_to_mb_fb; 234219019Sgabor fbs.mb_to_wc_fallback = mb_to_wc_fb; 235219019Sgabor fbs.mb_to_uc_fallback = mb_to_uc_fb; 236219019Sgabor fbs.wc_to_mb_fallback = wc_to_mb_fb; 237219019Sgabor fbs.data = NULL; 238219019Sgabor 239219019Sgabor if (argc == 2) { 240219019Sgabor asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "", 241219019Sgabor iflag ? "//IGNORE" : ""); 242219019Sgabor 243219019Sgabor if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1) 244219019Sgabor err(1, NULL); 245219019Sgabor if (dflag) { 246219019Sgabor if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 247219019Sgabor err(1, NULL); 248219019Sgabor } 249219019Sgabor do_conv(cd, false); 250219019Sgabor } else if (rflag) { 251219019Sgabor asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "", 252219019Sgabor iflag ? "//IGNORE" : ""); 253219019Sgabor 254219019Sgabor if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1) 255219019Sgabor err(1, NULL); 256219019Sgabor if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) 257219019Sgabor err(1, NULL); 258219019Sgabor if (cflag) { 259219019Sgabor printf("# $FreeBSD$\n\n"); 260219019Sgabor printf("TYPE\t\tROWCOL\n"); 261219019Sgabor printf("NAME\t\tUCS/%s\n", argv[0]); 262219019Sgabor printf("%s", citrus_common); 263219019Sgabor } 264219019Sgabor do_conv(cd, true); 265219019Sgabor } else { 266219019Sgabor if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1) 267219019Sgabor err(1, NULL); 268219019Sgabor if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)) 269219019Sgabor err(1, NULL); 270219019Sgabor if (cflag) { 271219019Sgabor printf("# $FreeBSD$\n\n"); 272219019Sgabor printf("TYPE\t\tROWCOL\n"); 273219019Sgabor printf("NAME\t\t%s/UCS\n", argv[0]); 274219019Sgabor printf("%s", citrus_common); 275219019Sgabor } 276219019Sgabor do_conv(cd, false); 277219019Sgabor } 278219019Sgabor 279219019Sgabor if (iconv_close(cd) != 0) 280219019Sgabor err(1, NULL); 281219019Sgabor 282219019Sgabor return (EXIT_SUCCESS); 283219019Sgabor} 284219019Sgabor 285219019Sgaborstatic void 286219019Sgabordo_conv(iconv_t cd, bool uniinput) { 287219019Sgabor size_t inbytesleft, outbytesleft, ret; 288219019Sgabor uint32_t outbuf; 289219019Sgabor uint32_t inbuf; 290219019Sgabor const char *inbuf_; 291219019Sgabor char *outbuf_; 292219019Sgabor 293219019Sgabor for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) { 294219019Sgabor if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00)) 295219019Sgabor continue; 296219019Sgabor inbytesleft = uniinput ? 4 : magnitude(inbuf); 297219019Sgabor outbytesleft = 4; 298219019Sgabor outbuf = 0x00000000; 299219019Sgabor outbuf_ = (char *)&outbuf; 300219019Sgabor inbuf_ = (const char *)&inbuf; 301219019Sgabor iconv(cd, NULL, NULL, NULL, NULL); 302219019Sgabor fb_flags = 0; 303219019Sgabor errno = 0; 304219019Sgabor ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft); 305219019Sgabor if (ret == (size_t)-1) { 306219019Sgabor if (dflag) { 307219019Sgabor format(inbuf); 308219019Sgabor printf(" = "); 309219019Sgabor format_diag(errno); 310219019Sgabor printf("\n"); 311219019Sgabor } 312219019Sgabor continue; 313219019Sgabor } 314219019Sgabor format(inbuf); 315219019Sgabor printf(" = "); 316219019Sgabor format(outbuf); 317219019Sgabor printf("\n"); 318219019Sgabor } 319219019Sgabor if (cflag) 320219019Sgabor printf("END_MAP\n"); 321219019Sgabor} 322