1120492Sfjoe/*- 2194638Sdelphij * Copyright (c) 2003, 2005 Ryuichiro Imura 3120492Sfjoe * All rights reserved. 4120492Sfjoe * 5120492Sfjoe * Redistribution and use in source and binary forms, with or without 6120492Sfjoe * modification, are permitted provided that the following conditions 7120492Sfjoe * are met: 8120492Sfjoe * 1. Redistributions of source code must retain the above copyright 9120492Sfjoe * notice, this list of conditions and the following disclaimer. 10120492Sfjoe * 2. Redistributions in binary form must reproduce the above copyright 11120492Sfjoe * notice, this list of conditions and the following disclaimer in the 12120492Sfjoe * documentation and/or other materials provided with the distribution. 13120492Sfjoe * 14120492Sfjoe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15120492Sfjoe * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16120492Sfjoe * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17120492Sfjoe * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18120492Sfjoe * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19120492Sfjoe * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20120492Sfjoe * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21120492Sfjoe * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22120492Sfjoe * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23120492Sfjoe * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24120492Sfjoe * SUCH DAMAGE. 25120492Sfjoe * 26120492Sfjoe * $FreeBSD$ 27120492Sfjoe */ 28120492Sfjoe 29120492Sfjoe/* 30120492Sfjoe * kiconv(3) requires shared linked, and reduce module size 31120492Sfjoe * when statically linked. 32120492Sfjoe */ 33120492Sfjoe 34275004Semaste#ifdef PIC 35120492Sfjoe 36120492Sfjoe#include <sys/types.h> 37120492Sfjoe#include <sys/iconv.h> 38120492Sfjoe#include <sys/sysctl.h> 39120492Sfjoe 40120492Sfjoe#include <ctype.h> 41120492Sfjoe#include <dlfcn.h> 42120492Sfjoe#include <err.h> 43120492Sfjoe#include <errno.h> 44194638Sdelphij#include <locale.h> 45120492Sfjoe#include <stdio.h> 46120492Sfjoe#include <stdlib.h> 47120492Sfjoe#include <string.h> 48194638Sdelphij#include <wctype.h> 49120492Sfjoe 50120492Sfjoe#include "quirks.h" 51120492Sfjoe 52120492Sfjoestruct xlat16_table { 53120492Sfjoe uint32_t * idx[0x200]; 54120492Sfjoe void * data; 55120492Sfjoe size_t size; 56120492Sfjoe}; 57120492Sfjoe 58120492Sfjoestatic struct xlat16_table kiconv_xlat16_open(const char *, const char *, int); 59194638Sdelphijstatic int chklocale(int, const char *); 60120492Sfjoe 61236028Sgabor#ifdef ICONV_DLOPEN 62254273Spetertypedef void *iconv_t; 63120492Sfjoestatic int my_iconv_init(void); 64120492Sfjoestatic iconv_t (*my_iconv_open)(const char *, const char *); 65281550Stijlstatic size_t (*my_iconv)(iconv_t, char **, size_t *, char **, size_t *); 66120492Sfjoestatic int (*my_iconv_close)(iconv_t); 67236028Sgabor#else 68236028Sgabor#include <iconv.h> 69236028Sgabor#define my_iconv_init() 0 70236028Sgabor#define my_iconv_open iconv_open 71236028Sgabor#define my_iconv iconv 72236028Sgabor#define my_iconv_close iconv_close 73236028Sgabor#endif 74281550Stijlstatic size_t my_iconv_char(iconv_t, u_char **, size_t *, u_char **, size_t *); 75120492Sfjoe 76120492Sfjoeint 77120492Sfjoekiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag) 78120492Sfjoe{ 79120492Sfjoe int error; 80194638Sdelphij size_t idxsize; 81120492Sfjoe struct xlat16_table xt; 82148717Sstefanf void *data; 83148717Sstefanf char *p; 84227650Skevlo const char unicode[] = ENCODING_UNICODE; 85120492Sfjoe 86227650Skevlo if ((flag & KICONV_WCTYPE) == 0 && 87227650Skevlo strcmp(unicode, tocode) != 0 && 88227650Skevlo strcmp(unicode, fromcode) != 0 && 89227650Skevlo kiconv_lookupconv(unicode) == 0) { 90227650Skevlo error = kiconv_add_xlat16_cspair(unicode, fromcode, flag); 91227650Skevlo if (error) 92227650Skevlo return (-1); 93227650Skevlo error = kiconv_add_xlat16_cspair(tocode, unicode, flag); 94227650Skevlo return (error); 95227650Skevlo } 96227650Skevlo 97194638Sdelphij if (kiconv_lookupcs(tocode, fromcode) == 0) 98194638Sdelphij return (0); 99120492Sfjoe 100194638Sdelphij if (flag & KICONV_WCTYPE) 101194638Sdelphij xt = kiconv_xlat16_open(fromcode, fromcode, flag); 102194638Sdelphij else 103194638Sdelphij xt = kiconv_xlat16_open(tocode, fromcode, flag); 104120492Sfjoe if (xt.size == 0) 105120492Sfjoe return (-1); 106120492Sfjoe 107120492Sfjoe idxsize = sizeof(xt.idx); 108120492Sfjoe 109120492Sfjoe if ((idxsize + xt.size) > ICONV_CSMAXDATALEN) { 110120492Sfjoe errno = E2BIG; 111120492Sfjoe return (-1); 112120492Sfjoe } 113120492Sfjoe 114120492Sfjoe if ((data = malloc(idxsize + xt.size)) != NULL) { 115120492Sfjoe p = data; 116120492Sfjoe memcpy(p, xt.idx, idxsize); 117120492Sfjoe p += idxsize; 118120492Sfjoe memcpy(p, xt.data, xt.size); 119120492Sfjoe error = kiconv_add_xlat16_table(tocode, fromcode, data, 120120492Sfjoe (int)(idxsize + xt.size)); 121120492Sfjoe return (error); 122120492Sfjoe } 123120492Sfjoe 124120492Sfjoe return (-1); 125120492Sfjoe} 126120492Sfjoe 127123293Sfjoeint 128123293Sfjoekiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode) 129123293Sfjoe{ 130194638Sdelphij int error, locale; 131123293Sfjoe 132123293Sfjoe error = kiconv_add_xlat16_cspair(foreigncode, localcode, 133123293Sfjoe KICONV_FROM_LOWER | KICONV_FROM_UPPER); 134123293Sfjoe if (error) 135123293Sfjoe return (error); 136123293Sfjoe error = kiconv_add_xlat16_cspair(localcode, foreigncode, 137123293Sfjoe KICONV_LOWER | KICONV_UPPER); 138123293Sfjoe if (error) 139123293Sfjoe return (error); 140194638Sdelphij locale = chklocale(LC_CTYPE, localcode); 141194638Sdelphij if (locale == 0) { 142194638Sdelphij error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode, 143194638Sdelphij KICONV_WCTYPE); 144194638Sdelphij if (error) 145194638Sdelphij return (error); 146194638Sdelphij } 147194638Sdelphij 148123293Sfjoe return (0); 149123293Sfjoe} 150123293Sfjoe 151120492Sfjoestatic struct xlat16_table 152120492Sfjoekiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase) 153120492Sfjoe{ 154120492Sfjoe u_char src[3], dst[4], *srcp, *dstp, ud, ld; 155120492Sfjoe int us, ls, ret; 156120492Sfjoe uint16_t c; 157120492Sfjoe uint32_t table[0x80]; 158120492Sfjoe size_t inbytesleft, outbytesleft, pre_q_size, post_q_size; 159120492Sfjoe struct xlat16_table xt; 160120492Sfjoe struct quirk_replace_list *pre_q_list, *post_q_list; 161120492Sfjoe iconv_t cd; 162148717Sstefanf char *p; 163120492Sfjoe 164120492Sfjoe xt.data = NULL; 165120492Sfjoe xt.size = 0; 166120492Sfjoe 167126949Sbde src[2] = '\0'; 168126949Sbde dst[3] = '\0'; 169120492Sfjoe 170120492Sfjoe ret = my_iconv_init(); 171120492Sfjoe if (ret) 172120492Sfjoe return (xt); 173120492Sfjoe 174120492Sfjoe cd = my_iconv_open(search_quirk(tocode, fromcode, &pre_q_list, &pre_q_size), 175120492Sfjoe search_quirk(fromcode, tocode, &post_q_list, &post_q_size)); 176120492Sfjoe if (cd == (iconv_t) (-1)) 177120492Sfjoe return (xt); 178120492Sfjoe 179120492Sfjoe if ((xt.data = malloc(0x200 * 0x80 * sizeof(uint32_t))) == NULL) 180120492Sfjoe return (xt); 181120492Sfjoe 182120492Sfjoe p = xt.data; 183120492Sfjoe 184120492Sfjoe for (ls = 0 ; ls < 0x200 ; ls++) { 185120492Sfjoe xt.idx[ls] = NULL; 186120492Sfjoe for (us = 0 ; us < 0x80 ; us++) { 187120492Sfjoe srcp = src; 188120492Sfjoe dstp = dst; 189120492Sfjoe 190120492Sfjoe inbytesleft = 2; 191120492Sfjoe outbytesleft = 3; 192120492Sfjoe bzero(dst, outbytesleft); 193120492Sfjoe 194120492Sfjoe c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls; 195194638Sdelphij 196194638Sdelphij if (lcase & KICONV_WCTYPE) { 197194638Sdelphij if ((c & 0xff) == 0) 198194638Sdelphij c >>= 8; 199194638Sdelphij if (iswupper(c)) { 200194638Sdelphij c = towlower(c); 201194638Sdelphij if ((c & 0xff00) == 0) 202194638Sdelphij c <<= 8; 203194638Sdelphij table[us] = c | XLAT16_HAS_LOWER_CASE; 204194638Sdelphij } else if (iswlower(c)) { 205194638Sdelphij c = towupper(c); 206194638Sdelphij if ((c & 0xff00) == 0) 207194638Sdelphij c <<= 8; 208194638Sdelphij table[us] = c | XLAT16_HAS_UPPER_CASE; 209194638Sdelphij } else 210194638Sdelphij table[us] = 0; 211194638Sdelphij /* 212194638Sdelphij * store not NULL 213194638Sdelphij */ 214194638Sdelphij if (table[us]) 215194638Sdelphij xt.idx[ls] = table; 216194638Sdelphij 217194638Sdelphij continue; 218194638Sdelphij } 219194638Sdelphij 220120492Sfjoe c = quirk_vendor2unix(c, pre_q_list, pre_q_size); 221120492Sfjoe src[0] = (u_char)(c >> 8); 222120492Sfjoe src[1] = (u_char)c; 223120492Sfjoe 224281550Stijl ret = my_iconv_char(cd, &srcp, &inbytesleft, 225281550Stijl &dstp, &outbytesleft); 226120492Sfjoe if (ret == -1) { 227120492Sfjoe table[us] = 0; 228120492Sfjoe continue; 229120492Sfjoe } 230120492Sfjoe 231120492Sfjoe ud = (u_char)dst[0]; 232120492Sfjoe ld = (u_char)dst[1]; 233120492Sfjoe 234120492Sfjoe switch(outbytesleft) { 235120492Sfjoe case 0: 236120492Sfjoe#ifdef XLAT16_ACCEPT_3BYTE_CHR 237120492Sfjoe table[us] = (ud << 8) | ld; 238120492Sfjoe table[us] |= (u_char)dst[2] << 16; 239120492Sfjoe table[us] |= XLAT16_IS_3BYTE_CHR; 240120492Sfjoe#else 241120492Sfjoe table[us] = 0; 242120492Sfjoe continue; 243120492Sfjoe#endif 244120492Sfjoe break; 245120492Sfjoe case 1: 246120492Sfjoe table[us] = quirk_unix2vendor((ud << 8) | ld, 247120492Sfjoe post_q_list, post_q_size); 248120492Sfjoe if ((table[us] >> 8) == 0) 249120492Sfjoe table[us] |= XLAT16_ACCEPT_NULL_OUT; 250120492Sfjoe break; 251120492Sfjoe case 2: 252120492Sfjoe table[us] = ud; 253120492Sfjoe if (lcase & KICONV_LOWER && ud != tolower(ud)) { 254120492Sfjoe table[us] |= (u_char)tolower(ud) << 16; 255120492Sfjoe table[us] |= XLAT16_HAS_LOWER_CASE; 256120492Sfjoe } 257120492Sfjoe if (lcase & KICONV_UPPER && ud != toupper(ud)) { 258120492Sfjoe table[us] |= (u_char)toupper(ud) << 16; 259120492Sfjoe table[us] |= XLAT16_HAS_UPPER_CASE; 260120492Sfjoe } 261120492Sfjoe break; 262120492Sfjoe } 263120492Sfjoe 264120492Sfjoe switch(inbytesleft) { 265120492Sfjoe case 0: 266120492Sfjoe if ((ls & 0xff) == 0) 267120492Sfjoe table[us] |= XLAT16_ACCEPT_NULL_IN; 268120492Sfjoe break; 269120492Sfjoe case 1: 270120492Sfjoe c = ls > 0xff ? us | 0x80 : us; 271120492Sfjoe if (lcase & KICONV_FROM_LOWER && c != tolower(c)) { 272120492Sfjoe table[us] |= (u_char)tolower(c) << 16; 273120492Sfjoe table[us] |= XLAT16_HAS_FROM_LOWER_CASE; 274120492Sfjoe } 275120492Sfjoe if (lcase & KICONV_FROM_UPPER && c != toupper(c)) { 276120492Sfjoe table[us] |= (u_char)toupper(c) << 16; 277120492Sfjoe table[us] |= XLAT16_HAS_FROM_UPPER_CASE; 278120492Sfjoe } 279120492Sfjoe break; 280120492Sfjoe } 281120492Sfjoe 282120492Sfjoe if (table[us] == 0) 283120492Sfjoe continue; 284120492Sfjoe 285120492Sfjoe /* 286120492Sfjoe * store not NULL 287120492Sfjoe */ 288120492Sfjoe xt.idx[ls] = table; 289120492Sfjoe } 290120492Sfjoe if (xt.idx[ls]) { 291120492Sfjoe memcpy(p, table, sizeof(table)); 292120492Sfjoe p += sizeof(table); 293120492Sfjoe } 294120492Sfjoe } 295120492Sfjoe my_iconv_close(cd); 296120492Sfjoe 297148717Sstefanf xt.size = p - (char *)xt.data; 298120492Sfjoe xt.data = realloc(xt.data, xt.size); 299120492Sfjoe return (xt); 300120492Sfjoe} 301120492Sfjoe 302120492Sfjoestatic int 303194638Sdelphijchklocale(int category, const char *code) 304194638Sdelphij{ 305194638Sdelphij char *p; 306194638Sdelphij int error = -1; 307194638Sdelphij 308194638Sdelphij p = strchr(setlocale(category, NULL), '.'); 309194638Sdelphij if (p++) { 310194638Sdelphij error = strcasecmp(code, p); 311194638Sdelphij if (error) { 312194638Sdelphij /* XXX - can't avoid calling quirk here... */ 313194638Sdelphij error = strcasecmp(code, kiconv_quirkcs(p, 314194638Sdelphij KICONV_VENDOR_MICSFT)); 315194638Sdelphij } 316194638Sdelphij } 317194638Sdelphij return (error); 318194638Sdelphij} 319194638Sdelphij 320236028Sgabor#ifdef ICONV_DLOPEN 321194638Sdelphijstatic int 322120492Sfjoemy_iconv_init(void) 323120492Sfjoe{ 324120492Sfjoe void *iconv_lib; 325120492Sfjoe 326120492Sfjoe iconv_lib = dlopen("libiconv.so", RTLD_LAZY | RTLD_GLOBAL); 327120492Sfjoe if (iconv_lib == NULL) { 328120492Sfjoe warn("Unable to load iconv library: %s\n", dlerror()); 329120492Sfjoe errno = ENOENT; 330120492Sfjoe return (-1); 331120492Sfjoe } 332120492Sfjoe my_iconv_open = dlsym(iconv_lib, "iconv_open"); 333120492Sfjoe my_iconv = dlsym(iconv_lib, "iconv"); 334120492Sfjoe my_iconv_close = dlsym(iconv_lib, "iconv_close"); 335120492Sfjoe 336120492Sfjoe return (0); 337120492Sfjoe} 338236028Sgabor#endif 339120492Sfjoe 340120492Sfjoestatic size_t 341281550Stijlmy_iconv_char(iconv_t cd, u_char **ibuf, size_t * ilen, u_char **obuf, 342120492Sfjoe size_t * olen) 343120492Sfjoe{ 344281550Stijl u_char *sp, *dp, ilocal[3], olocal[3]; 345120492Sfjoe u_char c1, c2; 346120492Sfjoe int ret; 347120492Sfjoe size_t ir, or; 348120492Sfjoe 349120492Sfjoe sp = *ibuf; 350120492Sfjoe dp = *obuf; 351120492Sfjoe ir = *ilen; 352120492Sfjoe 353120492Sfjoe bzero(*obuf, *olen); 354281550Stijl ret = my_iconv(cd, (char **)&sp, ilen, (char **)&dp, olen); 355120492Sfjoe c1 = (*obuf)[0]; 356120492Sfjoe c2 = (*obuf)[1]; 357120492Sfjoe 358120492Sfjoe if (ret == -1) { 359120492Sfjoe if (*ilen == ir - 1 && (*ibuf)[1] == '\0' && (c1 || c2)) 360120492Sfjoe return (0); 361120492Sfjoe else 362120492Sfjoe return (-1); 363120492Sfjoe } 364120492Sfjoe 365120492Sfjoe /* 366120492Sfjoe * We must judge if inbuf is a single byte char or double byte char. 367120492Sfjoe * Here, to judge, try first byte(*sp) conversion and compare. 368120492Sfjoe */ 369120492Sfjoe ir = 1; 370120492Sfjoe or = 3; 371120492Sfjoe 372120492Sfjoe bzero(olocal, or); 373120492Sfjoe memcpy(ilocal, *ibuf, sizeof(ilocal)); 374120492Sfjoe sp = ilocal; 375120492Sfjoe dp = olocal; 376120492Sfjoe 377281550Stijl if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) { 378120492Sfjoe if (olocal[0] != c1) 379120492Sfjoe return (ret); 380120492Sfjoe 381120492Sfjoe if (olocal[1] == c2 && (*ibuf)[1] == '\0') { 382120492Sfjoe /* 383120492Sfjoe * inbuf is a single byte char 384120492Sfjoe */ 385120492Sfjoe *ilen = 1; 386120492Sfjoe *olen = or; 387120492Sfjoe return (ret); 388120492Sfjoe } 389120492Sfjoe 390120492Sfjoe switch(or) { 391120492Sfjoe case 0: 392120492Sfjoe case 1: 393120492Sfjoe if (olocal[1] == c2) { 394120492Sfjoe /* 395120492Sfjoe * inbuf is a single byte char, 396120492Sfjoe * so return false here. 397120492Sfjoe */ 398120492Sfjoe return (-1); 399120492Sfjoe } else { 400120492Sfjoe /* 401120492Sfjoe * inbuf is a double byte char 402120492Sfjoe */ 403120492Sfjoe return (ret); 404120492Sfjoe } 405120492Sfjoe break; 406120492Sfjoe case 2: 407120492Sfjoe /* 408120492Sfjoe * should compare second byte of inbuf 409120492Sfjoe */ 410120492Sfjoe break; 411120492Sfjoe } 412120492Sfjoe } else { 413120492Sfjoe /* 414120492Sfjoe * inbuf clould not be splitted, so inbuf is 415120492Sfjoe * a double byte char. 416120492Sfjoe */ 417120492Sfjoe return (ret); 418120492Sfjoe } 419120492Sfjoe 420120492Sfjoe /* 421120492Sfjoe * try second byte(*(sp+1)) conversion, and compare 422120492Sfjoe */ 423120492Sfjoe ir = 1; 424120492Sfjoe or = 3; 425120492Sfjoe 426120492Sfjoe bzero(olocal, or); 427120492Sfjoe 428120492Sfjoe sp = ilocal + 1; 429120492Sfjoe dp = olocal; 430120492Sfjoe 431281550Stijl if ((my_iconv(cd,(char **)&sp, &ir, (char **)&dp, &or)) != -1) { 432120492Sfjoe if (olocal[0] == c2) 433120492Sfjoe /* 434120492Sfjoe * inbuf is a single byte char 435120492Sfjoe */ 436120492Sfjoe return (-1); 437120492Sfjoe } 438120492Sfjoe 439120492Sfjoe return (ret); 440120492Sfjoe} 441120492Sfjoe 442120492Sfjoe#else /* statically linked */ 443120492Sfjoe 444194638Sdelphij#include <sys/types.h> 445194638Sdelphij#include <sys/iconv.h> 446120492Sfjoe#include <errno.h> 447120492Sfjoe 448120492Sfjoeint 449194638Sdelphijkiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused, 450194638Sdelphij int flag __unused) 451120492Sfjoe{ 452194638Sdelphij 453120492Sfjoe errno = EINVAL; 454120492Sfjoe return (-1); 455120492Sfjoe} 456120492Sfjoe 457123293Sfjoeint 458194638Sdelphijkiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused) 459123293Sfjoe{ 460123293Sfjoe errno = EINVAL; 461123293Sfjoe return (-1); 462123293Sfjoe} 463123293Sfjoe 464275004Semaste#endif /* PIC */ 465