1219019Sgabor/*- 2219019Sgabor * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3219019Sgabor * All rights reserved. 4219019Sgabor * 5219019Sgabor * Redistribution and use in source and binary forms, with or without 6219019Sgabor * modification, are permitted provided that the following conditions 7219019Sgabor * are met: 8219019Sgabor * 1. Redistributions of source code must retain the above copyright 9219019Sgabor * notice, this list of conditions and the following disclaimer. 10219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 11219019Sgabor * notice, this list of conditions and the following disclaimer in the 12219019Sgabor * documentation and/or other materials provided with the distribution. 13219019Sgabor * 14219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24219019Sgabor * SUCH DAMAGE. 25219019Sgabor */ 26219019Sgabor 27219019Sgabor#include <sys/cdefs.h> 28219019Sgabor__FBSDID("$FreeBSD$"); 29219019Sgabor 30219019Sgabor#include <sys/endian.h> 31219019Sgabor#include <sys/types.h> 32219019Sgabor 33219019Sgabor#include <err.h> 34219019Sgabor#include <errno.h> 35219019Sgabor#include <iconv.h> 36219019Sgabor#include <stdbool.h> 37219019Sgabor#include <stdio.h> 38219019Sgabor#include <stdlib.h> 39219019Sgabor#include <string.h> 40219019Sgabor 41219019Sgaborstatic bool uc_hook = false; 42219019Sgaborstatic bool wc_hook = false; 43219019Sgaborstatic bool mb_uc_fb = false; 44219019Sgabor 45219019Sgaborvoid unicode_hook(unsigned int mbr, void *data); 46219019Sgaborvoid wchar_hook(wchar_t wc, void *data); 47219019Sgabor 48219019Sgaborvoid mb_to_uc_fb(const char *, size_t, 49219019Sgabor void (*write_replacement) (const unsigned int *, size_t, void *), 50219019Sgabor void *, void *); 51219019Sgabor 52219019Sgaborstatic int 53219019Sgaborctl_get_translit1(void) 54219019Sgabor{ 55219019Sgabor iconv_t cd; 56219019Sgabor int arg, ret; 57219019Sgabor 58219019Sgabor cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 59219019Sgabor if (cd == (iconv_t)-1) 60219019Sgabor return (-1); 61219019Sgabor if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 62219019Sgabor ret = (arg == 1) ? 0 : -1; 63219019Sgabor else 64219019Sgabor ret = -1; 65219019Sgabor if (iconv_close(cd) == -1) 66219019Sgabor return (-1); 67219019Sgabor return (ret); 68219019Sgabor} 69219019Sgabor 70219019Sgaborstatic int 71219019Sgaborctl_get_translit2(void) 72219019Sgabor{ 73219019Sgabor iconv_t cd; 74219019Sgabor int arg, ret; 75219019Sgabor 76219019Sgabor cd = iconv_open("ASCII", "UTF-8"); 77219019Sgabor if (cd == (iconv_t)-1) 78219019Sgabor return (-1); 79219019Sgabor if (iconvctl(cd, ICONV_GET_TRANSLITERATE, &arg) == 0) 80219019Sgabor ret = (arg == 0) ? 0 : -1; 81219019Sgabor else 82219019Sgabor ret = -1; 83219019Sgabor if (iconv_close(cd) == -1) 84219019Sgabor return (-1); 85219019Sgabor return (ret); 86219019Sgabor} 87219019Sgabor 88219019Sgaborstatic int 89219019Sgaborctl_set_translit1(void) 90219019Sgabor{ 91219019Sgabor iconv_t cd; 92219019Sgabor int arg = 1, ret; 93219019Sgabor 94219019Sgabor cd = iconv_open("ASCII", "UTF-8"); 95219019Sgabor if (cd == (iconv_t)-1) 96219019Sgabor return (-1); 97219019Sgabor ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 98219019Sgabor if (iconv_close(cd) == -1) 99219019Sgabor return (-1); 100219019Sgabor return (ret); 101219019Sgabor} 102219019Sgabor 103219019Sgaborstatic int 104219019Sgaborctl_set_translit2(void) 105219019Sgabor{ 106219019Sgabor iconv_t cd; 107219019Sgabor int arg = 0, ret; 108219019Sgabor 109219019Sgabor cd = iconv_open("ASCII//TRANSLIT", "UTF-8"); 110219019Sgabor if (cd == (iconv_t)-1) 111219019Sgabor return (-1); 112219019Sgabor ret = iconvctl(cd, ICONV_SET_TRANSLITERATE, &arg) == 0 ? 0 : -1; 113219019Sgabor if (iconv_close(cd) == -1) 114219019Sgabor return (-1); 115219019Sgabor return (ret); 116219019Sgabor} 117219019Sgabor 118219019Sgaborstatic int 119219019Sgaborctl_get_discard_ilseq1(void) 120219019Sgabor{ 121219019Sgabor iconv_t cd; 122219019Sgabor int arg, ret; 123219019Sgabor 124219019Sgabor cd = iconv_open("ASCII", "UTF-8"); 125219019Sgabor if (cd == (iconv_t)-1) 126219019Sgabor return (-1); 127219019Sgabor if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 128219019Sgabor ret = arg == 0 ? 0 : -1; 129219019Sgabor else 130219019Sgabor ret = -1; 131219019Sgabor if (iconv_close(cd) == -1) 132219019Sgabor return (-1); 133219019Sgabor return (ret); 134219019Sgabor} 135219019Sgabor 136219019Sgaborstatic int 137219019Sgaborctl_get_discard_ilseq2(void) 138219019Sgabor{ 139219019Sgabor iconv_t cd; 140219019Sgabor int arg, ret; 141219019Sgabor 142219019Sgabor cd = iconv_open("ASCII//IGNORE", "UTF-8"); 143219019Sgabor if (cd == (iconv_t)-1) 144219019Sgabor return (-1); 145219019Sgabor if (iconvctl(cd, ICONV_GET_DISCARD_ILSEQ, &arg) == 0) 146219019Sgabor ret = arg == 1 ? 0 : -1; 147219019Sgabor else 148219019Sgabor ret = -1; 149219019Sgabor if (iconv_close(cd) == -1) 150219019Sgabor return (-1); 151219019Sgabor return (ret); 152219019Sgabor} 153219019Sgabor 154219019Sgaborstatic int 155219019Sgaborctl_set_discard_ilseq1(void) 156219019Sgabor{ 157219019Sgabor iconv_t cd; 158219019Sgabor int arg = 1, ret; 159219019Sgabor 160219019Sgabor cd = iconv_open("ASCII", "UTF-8"); 161219019Sgabor if (cd == (iconv_t)-1) 162219019Sgabor return (-1); 163219019Sgabor ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 164219019Sgabor if (iconv_close(cd) == -1) 165219019Sgabor return (-1); 166219019Sgabor return (ret); 167219019Sgabor} 168219019Sgabor 169219019Sgaborstatic int 170219019Sgaborctl_set_discard_ilseq2(void) 171219019Sgabor{ 172219019Sgabor iconv_t cd; 173219019Sgabor int arg = 0, ret; 174219019Sgabor 175219019Sgabor cd = iconv_open("ASCII//IGNORE", "UTF-8"); 176219019Sgabor if (cd == (iconv_t)-1) 177219019Sgabor return (-1); 178219019Sgabor ret = iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, &arg) == 0 ? 0 : -1; 179219019Sgabor if (iconv_close(cd) == -1) 180219019Sgabor return (-1); 181219019Sgabor return (ret); 182219019Sgabor} 183219019Sgabor 184219019Sgaborstatic int 185219019Sgaborctl_trivialp1(void) 186219019Sgabor{ 187219019Sgabor iconv_t cd; 188219019Sgabor int arg, ret; 189219019Sgabor 190219019Sgabor cd = iconv_open("latin2", "latin2"); 191219019Sgabor if (cd == (iconv_t)-1) 192219019Sgabor return (-1); 193219019Sgabor if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 194219019Sgabor ret = (arg == 1) ? 0 : -1; 195219019Sgabor } else 196219019Sgabor ret = -1; 197219019Sgabor if (iconv_close(cd) == -1) 198219019Sgabor return (-1); 199219019Sgabor return (ret); 200219019Sgabor} 201219019Sgabor 202219019Sgaborstatic int 203219019Sgaborctl_trivialp2(void) 204219019Sgabor{ 205219019Sgabor iconv_t cd; 206219019Sgabor int arg, ret; 207219019Sgabor 208219019Sgabor cd = iconv_open("ASCII", "KOI8-R"); 209219019Sgabor if (cd == (iconv_t)-1) 210219019Sgabor return (-1); 211219019Sgabor if (iconvctl(cd, ICONV_TRIVIALP, &arg) == 0) { 212219019Sgabor ret = (arg == 0) ? 0 : -1; 213219019Sgabor } else 214219019Sgabor ret = -1; 215219019Sgabor if (iconv_close(cd) == -1) 216219019Sgabor return (-1); 217219019Sgabor return (ret); 218219019Sgabor} 219219019Sgabor 220219019Sgaborvoid 221219019Sgaborunicode_hook(unsigned int mbr, void *data) 222219019Sgabor{ 223219019Sgabor 224219019Sgabor#ifdef VERBOSE 225219019Sgabor printf("Unicode hook: %u\n", mbr); 226219019Sgabor#endif 227219019Sgabor uc_hook = true; 228219019Sgabor} 229219019Sgabor 230219019Sgaborvoid 231219019Sgaborwchar_hook(wchar_t wc, void *data) 232219019Sgabor{ 233219019Sgabor 234219019Sgabor#ifdef VERBOSE 235219019Sgabor printf("Wchar hook: %ull\n", wc); 236219019Sgabor#endif 237219019Sgabor wc_hook = true; 238219019Sgabor} 239219019Sgabor 240219019Sgaborstatic int 241219019Sgaborctl_uc_hook(void) 242219019Sgabor{ 243219019Sgabor struct iconv_hooks hooks; 244219019Sgabor iconv_t cd; 245219019Sgabor size_t inbytesleft = 15, outbytesleft = 40; 246219019Sgabor const char **inptr; 247219019Sgabor const char *s = "Hello World!"; 248219019Sgabor char **outptr; 249219019Sgabor char *outbuf; 250219019Sgabor 251219019Sgabor inptr = &s; 252219019Sgabor hooks.uc_hook = unicode_hook; 253219019Sgabor hooks.wc_hook = NULL; 254219019Sgabor 255219019Sgabor outbuf = malloc(40); 256219019Sgabor outptr = &outbuf; 257219019Sgabor 258219019Sgabor cd = iconv_open("UTF-8", "ASCII"); 259219019Sgabor if (cd == (iconv_t)-1) 260219019Sgabor return (-1); 261219019Sgabor if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 262219019Sgabor return (-1); 263219019Sgabor if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 264219019Sgabor return (-1); 265219019Sgabor if (iconv_close(cd) == -1) 266219019Sgabor return (-1); 267219019Sgabor return (uc_hook ? 0 : 1); 268219019Sgabor} 269219019Sgabor 270219019Sgaborstatic int 271219019Sgaborctl_wc_hook(void) 272219019Sgabor{ 273219019Sgabor struct iconv_hooks hooks; 274219019Sgabor iconv_t cd; 275219019Sgabor size_t inbytesleft, outbytesleft = 40; 276219019Sgabor const char **inptr; 277219019Sgabor const char *s = "Hello World!"; 278219019Sgabor char **outptr; 279219019Sgabor char *outbuf; 280219019Sgabor 281219019Sgabor inptr = &s; 282219019Sgabor hooks.wc_hook = wchar_hook; 283219019Sgabor hooks.uc_hook = NULL; 284219019Sgabor 285219019Sgabor outbuf = malloc(40); 286219019Sgabor outptr = &outbuf; 287219019Sgabor inbytesleft = sizeof(s); 288219019Sgabor 289219019Sgabor cd = iconv_open("SHIFT_JIS", "ASCII"); 290219019Sgabor if (cd == (iconv_t)-1) 291219019Sgabor return (-1); 292219019Sgabor if (iconvctl(cd, ICONV_SET_HOOKS, (void *)&hooks) != 0) 293219019Sgabor return (-1); 294219019Sgabor if (iconv(cd, inptr, &inbytesleft, outptr, &outbytesleft) == (size_t)-1) 295219019Sgabor return (-1); 296219019Sgabor if (iconv_close(cd) == -1) 297219019Sgabor return (-1); 298219019Sgabor return (wc_hook ? 0 : 1); 299219019Sgabor} 300219019Sgabor 301219019Sgabor 302219019Sgabor 303219019Sgaborstatic int 304219019Sgaborgnu_canonicalize1(void) 305219019Sgabor{ 306219019Sgabor 307219019Sgabor return (strcmp(iconv_canonicalize("latin2"), "ISO-8859-2")); 308219019Sgabor} 309219019Sgabor 310219019Sgaborstatic int 311219019Sgaborgnu_canonicalize2(void) 312219019Sgabor{ 313219019Sgabor 314219019Sgabor return (!strcmp(iconv_canonicalize("ASCII"), iconv_canonicalize("latin2"))); 315219019Sgabor} 316219019Sgabor 317219019Sgabor 318219019Sgaborstatic int 319219019Sgaboriconvlist_cb(unsigned int count, const char * const *names, void *data) 320219019Sgabor{ 321219019Sgabor 322219019Sgabor return (*(int *)data = ((names == NULL) && (count > 0)) ? -1 : 0); 323219019Sgabor} 324219019Sgabor 325219019Sgaborstatic int 326219019Sgaborgnu_iconvlist(void) 327219019Sgabor{ 328219019Sgabor int i; 329219019Sgabor 330219019Sgabor iconvlist(iconvlist_cb, (void *)&i); 331219019Sgabor return (i); 332219019Sgabor} 333219019Sgabor 334219019Sgaborvoid 335219019Sgabormb_to_uc_fb(const char* inbuf, size_t inbufsize, 336219019Sgabor void (*write_replacement)(const unsigned int *buf, size_t buflen, 337219019Sgabor void* callback_arg), void* callback_arg, void* data) 338219019Sgabor{ 339219019Sgabor unsigned int c = 0x3F; 340219019Sgabor 341219019Sgabor mb_uc_fb = true; 342219019Sgabor write_replacement((const unsigned int *)&c, 1, NULL); 343219019Sgabor} 344219019Sgabor 345219019Sgaborstatic int __unused 346219019Sgaborctl_mb_to_uc_fb(void) 347219019Sgabor{ 348219019Sgabor struct iconv_fallbacks fb; 349219019Sgabor iconv_t cd; 350219019Sgabor size_t inbytesleft, outbytesleft; 351219019Sgabor uint16_t inbuf[1] = { 0xF187 }; 352219019Sgabor uint8_t outbuf[4] = { 0x00, 0x00, 0x00, 0x00 }; 353219019Sgabor const char *inptr; 354219019Sgabor char *outptr; 355219019Sgabor int ret; 356219019Sgabor 357219019Sgabor if ((cd = iconv_open("UTF-32", "UTF-8")) == (iconv_t)-1) 358219019Sgabor return (1); 359219019Sgabor 360219019Sgabor fb.uc_to_mb_fallback = NULL; 361219019Sgabor fb.mb_to_wc_fallback = NULL; 362219019Sgabor fb.wc_to_mb_fallback = NULL; 363219019Sgabor fb.mb_to_uc_fallback = mb_to_uc_fb; 364219019Sgabor fb.data = NULL; 365219019Sgabor 366219019Sgabor if (iconvctl(cd, ICONV_SET_FALLBACKS, (void *)&fb) != 0) 367219019Sgabor return (1); 368219019Sgabor 369219019Sgabor inptr = (const char *)inbuf; 370219019Sgabor outptr = (char *)outbuf; 371219019Sgabor inbytesleft = 2; 372219019Sgabor outbytesleft = 4; 373219019Sgabor 374219019Sgabor errno = 0; 375219019Sgabor ret = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft); 376219019Sgabor 377219019Sgabor#ifdef VERBOSE 378219019Sgabor printf("mb_uc fallback: %c\n", outbuf[0]); 379219019Sgabor#endif 380219019Sgabor 381219019Sgabor if (mb_uc_fb && (outbuf[0] == 0x3F)) 382219019Sgabor return (0); 383219019Sgabor else 384219019Sgabor return (1); 385219019Sgabor} 386219019Sgabor 387219019Sgaborstatic int 388219019Sgaborgnu_openinto(void) 389219019Sgabor{ 390219019Sgabor iconv_allocation_t *myspace; 391219019Sgabor size_t inbytesleft, outbytesleft; 392219019Sgabor const char *inptr; 393219019Sgabor char *inbuf = "works!", *outptr; 394219019Sgabor char outbuf[6]; 395219019Sgabor 396219019Sgabor if ((myspace = (iconv_allocation_t *)malloc(sizeof(iconv_allocation_t))) == NULL) 397219019Sgabor return (1); 398219019Sgabor if (iconv_open_into("ASCII", "ASCII", myspace) == -1) 399219019Sgabor return (1); 400219019Sgabor 401219019Sgabor inptr = (const char *)inbuf; 402219019Sgabor outptr = (char *)outbuf; 403219019Sgabor inbytesleft = 6; 404219019Sgabor outbytesleft = 6; 405219019Sgabor 406219019Sgabor iconv((iconv_t)myspace, &inptr, &inbytesleft, &outptr, &outbytesleft); 407219019Sgabor 408219019Sgabor return ((memcmp(inbuf, outbuf, 6) == 0) ? 0 : 1); 409219019Sgabor} 410219019Sgabor 411219019Sgaborstatic void 412219019Sgabortest(int (tester) (void), const char * label) 413219019Sgabor{ 414219019Sgabor int ret; 415219019Sgabor 416219019Sgabor if ((ret = tester())) 417219019Sgabor printf("%s failed (%d)\n", label, ret); 418219019Sgabor else 419219019Sgabor printf("%s succeeded\n", label); 420219019Sgabor} 421219019Sgabor 422219019Sgaborint 423219019Sgabormain(void) 424219019Sgabor{ 425219019Sgabor test(ctl_get_translit1, "ctl_get_translit1"); 426219019Sgabor test(ctl_get_translit2, "ctl_get_translit2"); 427219019Sgabor test(ctl_set_translit1, "ctl_set_translit1"); 428219019Sgabor test(ctl_set_translit2, "ctl_set_translit2"); 429219019Sgabor test(ctl_get_discard_ilseq1, "ctl_get_discard_ilseq1"); 430219019Sgabor test(ctl_get_discard_ilseq2, "ctl_get_discard_ilseq2"); 431219019Sgabor test(ctl_set_discard_ilseq1, "ctl_set_discard_ilseq1"); 432219019Sgabor test(ctl_set_discard_ilseq2, "ctl_set_discard_ilseq2"); 433219019Sgabor test(ctl_trivialp1, "ctl_trivialp1"); 434219019Sgabor test(ctl_trivialp2, "ctl_trivialp2"); 435219019Sgabor test(ctl_uc_hook, "ctl_uc_hook"); 436219019Sgabor test(ctl_wc_hook, "ctl_wc_hook"); 437219019Sgabor// test(ctl_mb_to_uc_fb, "ctl_mb_to_uc_fb"); 438219019Sgabor test(gnu_openinto, "gnu_openinto"); 439219019Sgabor test(gnu_canonicalize1, "gnu_canonicalize1"); 440219019Sgabor test(gnu_canonicalize2, "gnu_canonicalize2"); 441219019Sgabor test(gnu_iconvlist, "gnu_iconvlist"); 442219019Sgabor} 443