1/* 2 Unix SMB/CIFS implementation. 3 minimal iconv implementation 4 Copyright (C) Andrew Tridgell 2001 5 Copyright (C) Jelmer Vernooij 2002,2003 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 21 From samba 3.0 beta and GNU libiconv-1.8 22 It's bad but most of the time we can't use libc iconv service: 23 - it doesn't round trip for most encoding 24 - it doesn't know about Apple extension 25*/ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif /* HAVE_CONFIG_H */ 30 31#include <stdio.h> 32#include <stdlib.h> 33#include <unistd.h> 34#include <string.h> 35#include <ctype.h> 36#include <errno.h> 37#include <sys/param.h> 38#include <sys/stat.h> 39#ifdef HAVE_USABLE_ICONV 40#include <iconv.h> 41#endif 42#include <arpa/inet.h> 43 44#include <atalk/unicode.h> 45#include <atalk/logger.h> 46#include "byteorder.h" 47 48 49/** 50 * @file 51 * 52 * @brief Samba wrapper/stub for iconv character set conversion. 53 * 54 * iconv is the XPG2 interface for converting between character 55 * encodings. This file provides a Samba wrapper around it, and also 56 * a simple reimplementation that is used if the system does not 57 * implement iconv. 58 * 59 * Samba only works with encodings that are supersets of ASCII: ascii 60 * characters like whitespace can be tested for directly, multibyte 61 * sequences start with a byte with the high bit set, and strings are 62 * terminated by a nul byte. 63 * 64 * Note that the only function provided by iconv is conversion between 65 * characters. It doesn't directly support operations like 66 * uppercasing or comparison. We have to convert to UCS-2 and compare 67 * there. 68 * 69 * @sa Samba Developers Guide 70 **/ 71#define CHARSET_WIDECHAR 32 72 73#ifdef HAVE_USABLE_ICONV 74#ifdef HAVE_UCS2INTERNAL 75#define UCS2ICONV "UCS-2-INTERNAL" 76#else /* !HAVE_UCS2INTERNAL */ 77#if BYTE_ORDER==LITTLE_ENDIAN 78#define UCS2ICONV "UCS-2LE" 79#else /* !LITTLE_ENDIAN */ 80#define UCS2ICONV "UCS-2BE" 81#endif /* BYTE_ORDER */ 82#endif /* HAVE_UCS2INTERNAL */ 83#else /* !HAVE_USABLE_ICONV */ 84#define UCS2ICONV "UCS-2" 85#endif /* HAVE_USABLE_ICONV */ 86 87static size_t ascii_pull(void *,char **, size_t *, char **, size_t *); 88static size_t ascii_push(void *,char **, size_t *, char **, size_t *); 89static size_t iconv_copy(void *,char **, size_t *, char **, size_t *); 90 91extern struct charset_functions charset_mac_roman; 92extern struct charset_functions charset_mac_hebrew; 93extern struct charset_functions charset_mac_centraleurope; 94extern struct charset_functions charset_mac_cyrillic; 95extern struct charset_functions charset_mac_greek; 96extern struct charset_functions charset_mac_turkish; 97extern struct charset_functions charset_utf8; 98extern struct charset_functions charset_utf8_mac; 99#ifdef HAVE_USABLE_ICONV 100extern struct charset_functions charset_mac_japanese; 101extern struct charset_functions charset_mac_chinese_trad; 102extern struct charset_functions charset_mac_korean; 103extern struct charset_functions charset_mac_chinese_simp; 104#endif 105 106 107static struct charset_functions builtin_functions[] = { 108 {"UCS-2", 0, iconv_copy, iconv_copy, CHARSET_WIDECHAR | CHARSET_PRECOMPOSED, NULL, NULL, NULL}, 109 {"ASCII", 0, ascii_pull, ascii_push, CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED, NULL, NULL, NULL}, 110 {NULL, 0, NULL, NULL, 0, NULL, NULL, NULL} 111}; 112 113 114#define DLIST_ADD(list, p) \ 115{ \ 116 if (!(list)) { \ 117 (list) = (p); \ 118 (p)->next = (p)->prev = NULL; \ 119 } else { \ 120 (list)->prev = (p); \ 121 (p)->next = (list); \ 122 (p)->prev = NULL; \ 123 (list) = (p); \ 124 }\ 125} 126 127static struct charset_functions *charsets = NULL; 128 129struct charset_functions *find_charset_functions(const char *name) 130{ 131 struct charset_functions *c = charsets; 132 133 while(c) { 134 if (strcasecmp(name, c->name) == 0) { 135 return c; 136 } 137 c = c->next; 138 } 139 140 return NULL; 141} 142 143int atalk_register_charset(struct charset_functions *funcs) 144{ 145 if (!funcs) { 146 return -1; 147 } 148 149 /* Check whether we already have this charset... */ 150 if (find_charset_functions(funcs->name)) { 151 LOG (log_debug, logtype_default, "Duplicate charset %s, not registering", funcs->name); 152 return -2; 153 } 154 155 funcs->next = funcs->prev = NULL; 156 DLIST_ADD(charsets, funcs); 157 return 0; 158} 159 160static void lazy_initialize_iconv(void) 161{ 162 static int initialized = 0; 163 int i; 164 165 if (!initialized) { 166 initialized = 1; 167 for(i = 0; builtin_functions[i].name; i++) 168 atalk_register_charset(&builtin_functions[i]); 169 170 /* register additional charsets */ 171 atalk_register_charset(&charset_utf8); 172 atalk_register_charset(&charset_utf8_mac); 173 atalk_register_charset(&charset_mac_roman); 174 atalk_register_charset(&charset_mac_hebrew); 175 atalk_register_charset(&charset_mac_greek); 176 atalk_register_charset(&charset_mac_turkish); 177 atalk_register_charset(&charset_mac_centraleurope); 178 atalk_register_charset(&charset_mac_cyrillic); 179#ifdef HAVE_USABLE_ICONV 180 atalk_register_charset(&charset_mac_japanese); 181 atalk_register_charset(&charset_mac_chinese_trad); 182 atalk_register_charset(&charset_mac_korean); 183 atalk_register_charset(&charset_mac_chinese_simp); 184#endif 185 } 186} 187 188/* if there was an error then reset the internal state, 189 this ensures that we don't have a shift state remaining for 190 character sets like SJIS */ 191static size_t sys_iconv(void *cd, 192 char **inbuf, size_t *inbytesleft, 193 char **outbuf, size_t *outbytesleft) 194{ 195#ifdef HAVE_USABLE_ICONV 196 size_t ret = iconv((iconv_t)cd, 197 (ICONV_CONST char**)inbuf, inbytesleft, 198 outbuf, outbytesleft); 199 if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL); 200 return ret; 201#else 202 errno = EINVAL; 203 return -1; 204#endif 205} 206 207/** 208 * This is a simple portable iconv() implementaion. 209 * 210 * It only knows about a very small number of character sets - just 211 * enough that netatalk works on systems that don't have iconv. 212 **/ 213size_t atalk_iconv(atalk_iconv_t cd, 214 const char **inbuf, size_t *inbytesleft, 215 char **outbuf, size_t *outbytesleft) 216{ 217 char cvtbuf[2048]; 218 char *bufp = cvtbuf; 219 size_t bufsize; 220 221 /* in many cases we can go direct */ 222 if (cd->direct) { 223 return cd->direct(cd->cd_direct, 224 (char **)inbuf, inbytesleft, outbuf, outbytesleft); 225 } 226 227 228 /* otherwise we have to do it chunks at a time */ 229 while (*inbytesleft > 0) { 230 bufp = cvtbuf; 231 bufsize = sizeof(cvtbuf); 232 233 if (cd->pull(cd->cd_pull, (char **)inbuf, inbytesleft, &bufp, &bufsize) == (size_t)-1 234 && errno != E2BIG) { 235 return -1; 236 } 237 238 bufp = cvtbuf; 239 bufsize = sizeof(cvtbuf) - bufsize; 240 241 if (cd->push(cd->cd_push, &bufp, &bufsize, outbuf, outbytesleft) == (size_t)-1) { 242 return -1; 243 } 244 } 245 246 return 0; 247} 248 249 250/* 251 simple iconv_open() wrapper 252 */ 253atalk_iconv_t atalk_iconv_open(const char *tocode, const char *fromcode) 254{ 255 atalk_iconv_t ret; 256 struct charset_functions *from, *to; 257 258 259 lazy_initialize_iconv(); 260 from = charsets; 261 to = charsets; 262 263 ret = (atalk_iconv_t)malloc(sizeof(*ret)); 264 if (!ret) { 265 errno = ENOMEM; 266 return (atalk_iconv_t)-1; 267 } 268 memset(ret, 0, sizeof(*ret)); 269 270 ret->from_name = strdup(fromcode); 271 ret->to_name = strdup(tocode); 272 273 /* check for the simplest null conversion */ 274 if (strcasecmp(fromcode, tocode) == 0) { 275 ret->direct = iconv_copy; 276 return ret; 277 } 278 279 /* check if we have a builtin function for this conversion */ 280 from = find_charset_functions(fromcode); 281 if (from) ret->pull = from->pull; 282 283 to = find_charset_functions(tocode); 284 if (to) ret->push = to->push; 285 286 /* check if we can use iconv for this conversion */ 287#ifdef HAVE_USABLE_ICONV 288 if (!from || (from->flags & CHARSET_ICONV)) { 289 ret->cd_pull = iconv_open(UCS2ICONV, from && from->iname ? from->iname : fromcode); 290 if (ret->cd_pull != (iconv_t)-1) { 291 if (!ret->pull) ret->pull = sys_iconv; 292 } else ret->pull = NULL; 293 } 294 if (ret->pull) { 295 if (!to || (to->flags & CHARSET_ICONV)) { 296 ret->cd_push = iconv_open(to && to->iname ? to->iname : tocode, UCS2ICONV); 297 if (ret->cd_push != (iconv_t)-1) { 298 if (!ret->push) ret->push = sys_iconv; 299 } else ret->push = NULL; 300 } 301 if (!ret->push && ret->cd_pull) iconv_close((iconv_t)ret->cd_pull); 302 } 303#endif 304 305 if (!ret->push || !ret->pull) { 306 SAFE_FREE(ret->from_name); 307 SAFE_FREE(ret->to_name); 308 SAFE_FREE(ret); 309 errno = EINVAL; 310 return (atalk_iconv_t)-1; 311 } 312 313 /* check for conversion to/from ucs2 */ 314 if (strcasecmp(fromcode, "UCS-2") == 0) { 315 ret->direct = ret->push; 316 ret->cd_direct = ret->cd_push; 317 ret->cd_push = NULL; 318 } 319 if (strcasecmp(tocode, "UCS-2") == 0) { 320 ret->direct = ret->pull; 321 ret->cd_direct = ret->cd_pull; 322 ret->cd_pull = NULL; 323 } 324 325 return ret; 326} 327 328/* 329 simple iconv_close() wrapper 330*/ 331int atalk_iconv_close (atalk_iconv_t cd) 332{ 333#ifdef HAVE_USABLE_ICONV 334 if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct); 335 if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull); 336 if (cd->cd_push) iconv_close((iconv_t)cd->cd_push); 337#endif 338 339 SAFE_FREE(cd->from_name); 340 SAFE_FREE(cd->to_name); 341 342 memset(cd, 0, sizeof(*cd)); 343 SAFE_FREE(cd); 344 return 0; 345} 346 347 348/************************************************************************ 349 the following functions implement the builtin character sets in Netatalk 350*************************************************************************/ 351 352static size_t ascii_pull(void *cd _U_, char **inbuf, size_t *inbytesleft, 353 char **outbuf, size_t *outbytesleft) 354{ 355 ucs2_t curchar; 356 357 while (*inbytesleft >= 1 && *outbytesleft >= 2) { 358 if ((unsigned char)(*inbuf)[0] < 0x80) { 359 curchar = (ucs2_t) (*inbuf)[0]; 360 SSVAL((*outbuf),0,curchar); 361 } 362 else { 363 errno = EILSEQ; 364 return -1; 365 } 366 (*inbytesleft) -= 1; 367 (*outbytesleft) -= 2; 368 (*inbuf) += 1; 369 (*outbuf) += 2; 370 } 371 372 if (*inbytesleft > 0) { 373 errno = E2BIG; 374 return -1; 375 } 376 377 return 0; 378} 379 380static size_t ascii_push(void *cd _U_, char **inbuf, size_t *inbytesleft, 381 char **outbuf, size_t *outbytesleft) 382{ 383 int ir_count=0; 384 ucs2_t curchar; 385 386 while (*inbytesleft >= 2 && *outbytesleft >= 1) { 387 curchar = SVAL((*inbuf), 0); 388 if (curchar < 0x0080) { 389 (*outbuf)[0] = curchar; 390 } 391 else { 392 errno = EILSEQ; 393 return -1; 394 } 395 (*inbytesleft) -= 2; 396 (*outbytesleft) -= 1; 397 (*inbuf) += 2; 398 (*outbuf) += 1; 399 } 400 401 if (*inbytesleft == 1) { 402 errno = EINVAL; 403 return -1; 404 } 405 406 if (*inbytesleft > 1) { 407 errno = E2BIG; 408 return -1; 409 } 410 411 return ir_count; 412} 413 414 415static size_t iconv_copy(void *cd _U_, char **inbuf, size_t *inbytesleft, 416 char **outbuf, size_t *outbytesleft) 417{ 418 int n; 419 420 n = MIN(*inbytesleft, *outbytesleft); 421 422 memmove(*outbuf, *inbuf, n); 423 424 (*inbytesleft) -= n; 425 (*outbytesleft) -= n; 426 (*inbuf) += n; 427 (*outbuf) += n; 428 429 if (*inbytesleft > 0) { 430 errno = E2BIG; 431 return -1; 432 } 433 434 return 0; 435} 436 437/* ------------------------ */ 438