1/* $NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $ */ 2/*- 3 * Copyright (c) 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11#include "config.h" 12 13#include <sys/cdefs.h> 14#if 0 15#ifndef lint 16static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 "; 17#endif /* not lint */ 18#else 19__RCSID("$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $"); 20#endif 21 22#include <sys/types.h> 23#include <sys/queue.h> 24#include <sys/time.h> 25 26#include <bitstring.h> 27#include <errno.h> 28#include <limits.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <unistd.h> 33 34#include "common.h" 35 36#if defined(USE_WIDECHAR) && defined(USE_ICONV) 37#include <langinfo.h> 38#include <iconv.h> 39 40#define LANGCODESET nl_langinfo(CODESET) 41#else 42#define LANGCODESET "" 43#endif 44 45#include <locale.h> 46 47#ifdef USE_WIDECHAR 48#ifdef USE_ICONV 49static int 50raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 51 const CHAR_T **dst) 52{ 53 int i; 54 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 55 size_t *blen = &cw->blen1; 56 57 BINC_RETW(NULL, *tostr, *blen, len); 58 59 *tolen = len; 60 for (i = 0; i < len; ++i) { 61 CHAR_T w = (u_char)str[i]; 62 memcpy((*tostr) + i, &w, sizeof(**tostr)); 63 } 64 65 *dst = cw->bp1; 66 67 return 0; 68} 69#endif 70 71#ifndef ERROR_ON_CONVERT 72#define HANDLE_ICONV_ERROR(o, i, ol, il) do { \ 73 *o++ = *i++; \ 74 ol--; il--; \ 75 } while (/*CONSTCOND*/0) 76#define HANDLE_MBR_ERROR(n, mbs, d, s) do { \ 77 d = s; \ 78 MEMSET(&mbs, 0, 1); \ 79 n = 1; \ 80 } while (/*CONSTCOND*/0) 81#else 82#define HANDLE_ICONV_ERROR goto err 83#define HANDLE_MBR_ERROR goto err 84#endif 85 86#define CONV_BUFFER_SIZE 512 87/* fill the buffer with codeset encoding of string pointed to by str 88 * left has the number of bytes left in str and is adjusted 89 * len contains the number of bytes put in the buffer 90 */ 91#ifdef USE_ICONV 92#define CONVERT(str, left, src, len) \ 93 do { \ 94 size_t outleft; \ 95 char *bp = buffer; \ 96 outleft = CONV_BUFFER_SIZE; \ 97 errno = 0; \ 98 if (iconv(id, (char **)(void *)&str, &left, &bp, &outleft) \ 99 == (size_t)-1 && errno != E2BIG) \ 100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \ 101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 102 error = -left; \ 103 goto err; \ 104 } \ 105 src = buffer; \ 106 } while (0) 107#endif 108 109static int 110default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 111 size_t *tolen, const CHAR_T **dst, const char *enc) 112{ 113 int j; 114 size_t i = 0; 115 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 116 size_t *blen = &cw->blen1; 117 mbstate_t mbs; 118 size_t n; 119 ssize_t nlen = len; 120 const char *src = (const char *)str; 121 int error = 1; 122#ifdef USE_ICONV 123 iconv_t id = (iconv_t)-1; 124 char buffer[CONV_BUFFER_SIZE]; 125 size_t left = len; 126#endif 127 128 MEMSET(&mbs, 0, 1); 129 BINC_RETW(NULL, *tostr, *blen, nlen); 130 131#ifdef USE_ICONV 132 if (strcmp(nl_langinfo(CODESET), enc)) { 133 id = iconv_open(nl_langinfo(CODESET), enc); 134 if (id == (iconv_t)-1) 135 goto err; 136 CONVERT(str, left, src, len); 137 } 138#endif 139 140 for (i = 0, j = 0; j < len; ) { 141 CHAR_T w; 142 n = mbrtowc(&w, src + j, len - j, &mbs); 143 memcpy((*tostr) + i, &w, sizeof(**tostr)); 144 /* NULL character converted */ 145 if (n == (size_t)-2) error = -(len - j); 146 if (n == (size_t)-1 || n == (size_t)-2) { 147 HANDLE_MBR_ERROR(n, mbs, w, src[j]); 148 memcpy((*tostr) + i, &w, sizeof(**tostr)); 149 } 150 if (n == 0) n = 1; 151 j += n; 152 if (++i >= *blen) { 153 nlen += 256; 154 BINC_GOTOW(NULL, *tostr, *blen, nlen); 155 } 156#ifdef USE_ICONV 157 if (id != (iconv_t)-1 && j == len && left) { 158 CONVERT(str, left, src, len); 159 j = 0; 160 } 161#endif 162 } 163 *tolen = i; 164 165#ifdef USE_ICONV 166 if (id != (iconv_t)-1) 167 iconv_close(id); 168#endif 169 170 *dst = cw->bp1; 171 172 return 0; 173alloc_err: 174#ifdef USE_ICONV 175err: 176 if (id != (iconv_t)-1) 177 iconv_close(id); 178#endif 179 *tolen = i; 180 *dst = cw->bp1; 181 182 return error; 183} 184 185static int 186fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 187 size_t *tolen, const CHAR_T **dst) 188{ 189 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 190} 191 192static int 193ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 194 size_t *tolen, const CHAR_T **dst) 195{ 196 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING)); 197} 198 199static int 200cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 201 size_t *tolen, const CHAR_T **dst) 202{ 203 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET); 204} 205 206#ifdef USE_ICONV 207static int 208CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 209 size_t *tolen, const char **dst) 210{ 211 *tolen = len * sizeof(CHAR_T); 212 *dst = (const char *)(const void *)str; 213 214 return 0; 215} 216 217static int 218CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 219 size_t *tolen, const CHAR_T **dst) 220{ 221 *tolen = len / sizeof(CHAR_T); 222 *dst = (const CHAR_T*) str; 223 224 return 0; 225} 226 227static int 228int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 229 const char **dst) 230{ 231 int i; 232 char **tostr = (char **)(void *)&cw->bp1; 233 size_t *blen = &cw->blen1; 234 235 BINC_RETC(NULL, *tostr, *blen, len); 236 237 *tolen = len; 238 for (i = 0; i < len; ++i) { 239 CHAR_T w; 240 memcpy(&w, str + i, sizeof(w)); 241 (*tostr)[i] = w; 242 } 243 244 *dst = cw->bp1; 245 246 return 0; 247} 248#endif 249 250static int 251default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 252 size_t *tolen, const char **pdst, const char *enc) 253{ 254 size_t i, j = 0; 255 char **tostr = (char **)(void *)&cw->bp1; 256 size_t *blen = &cw->blen1; 257 mbstate_t mbs; 258 size_t n; 259 ssize_t nlen = len + MB_CUR_MAX; 260 char *dst; 261 size_t buflen; 262#ifdef USE_ICONV 263 int offset = 0; 264 char buffer[CONV_BUFFER_SIZE]; 265 iconv_t id = (iconv_t)-1; 266#endif 267 268/* convert first len bytes of buffer and append it to cw->bp 269 * len is adjusted => 0 270 * offset contains the offset in cw->bp and is adjusted 271 * cw->bp is grown as required 272 */ 273#ifdef USE_ICONV 274#define CONVERT2(_buffer, lenp, cw, offset) \ 275 do { \ 276 const char *bp = _buffer; \ 277 size_t ret; \ 278 do { \ 279 size_t outleft = cw->blen1 - offset; \ 280 char *obp = (char *)cw->bp1 + offset; \ 281 if (cw->blen1 < offset + MB_CUR_MAX) { \ 282 nlen += 256; \ 283 BINC_GOTOC(NULL, cw->bp1, cw->blen1, nlen); \ 284 } \ 285 errno = 0; \ 286 ret = iconv(id, (char **)(void *)&bp, lenp, &obp, &outleft);\ 287 if (ret == (size_t)-1 && errno != E2BIG) \ 288 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \ 289 offset = cw->blen1 - outleft; \ 290 } while (ret != 0); \ 291 } while (0) 292#endif 293 294 MEMSET(&mbs, 0, 1); 295 BINC_RETC(NULL, *tostr, *blen, nlen); 296 dst = *tostr; buflen = *blen; 297 298#ifdef USE_ICONV 299 if (strcmp(nl_langinfo(CODESET), enc)) { 300 id = iconv_open(enc, nl_langinfo(CODESET)); 301 if (id == (iconv_t)-1) 302 goto err; 303 dst = buffer; buflen = CONV_BUFFER_SIZE; 304 } 305#endif 306 307 for (i = 0, j = 0; i < (size_t)len; ++i) { 308 CHAR_T w; 309 memcpy(&w, str + i, sizeof(w)); 310 n = wcrtomb(dst + j, w, &mbs); 311 if (n == (size_t)-1) 312 HANDLE_MBR_ERROR(n, mbs, dst[j], w); 313 j += n; 314 if (buflen < j + MB_CUR_MAX) { 315#ifdef USE_ICONV 316 if (id != (iconv_t)-1) { 317 CONVERT2(buffer, &j, cw, offset); 318 } else 319#endif 320 { 321 nlen += 256; 322 BINC_RETC(NULL, *tostr, *blen, nlen); 323 dst = *tostr; buflen = *blen; 324 } 325 } 326 } 327 328 n = wcrtomb(dst + j, L'\0', &mbs); 329 j += n - 1; /* don't count NUL at the end */ 330 *tolen = j; 331 332#ifdef USE_ICONV 333 if (id != (iconv_t)-1) { 334 CONVERT2(buffer, &j, cw, offset); 335 CONVERT2(NULL, NULL, cw, offset); /* back to the initial state */ 336 *tolen = offset; 337 iconv_close(id); 338 } 339#endif 340 341 *pdst = cw->bp1; 342 343 return 0; 344#ifdef USE_ICONV 345alloc_err: 346err: 347 if (id != (iconv_t)-1) 348 iconv_close(id); 349 *tolen = j; 350 *pdst = cw->bp1; 351 352 return 1; 353#endif 354} 355 356static int 357fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 358 size_t *tolen, const char **dst) 359{ 360 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 361} 362 363static int 364cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 365 size_t *tolen, const char **dst) 366{ 367 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET); 368} 369 370#endif 371 372 373void 374conv_init (SCR *orig, SCR *sp) 375{ 376 if (orig != NULL) 377 MEMCPY(&sp->conv, &orig->conv, 1); 378 else { 379 setlocale(LC_ALL, ""); 380#ifdef USE_WIDECHAR 381 sp->conv.sys2int = cs_char2int; 382 sp->conv.int2sys = cs_int2char; 383 sp->conv.file2int = fe_char2int; 384 sp->conv.int2file = fe_int2char; 385 sp->conv.input2int = ie_char2int; 386#ifdef USE_ICONV 387 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 388 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 389#endif 390#endif 391 } 392} 393 394int 395conv_enc (SCR *sp, int option, const char *enc) 396{ 397#if defined(USE_WIDECHAR) && defined(USE_ICONV) 398 iconv_t id; 399 char2wchar_t *c2w; 400 wchar2char_t *w2c; 401 402 switch (option) { 403 case O_FILEENCODING: 404 c2w = &sp->conv.file2int; 405 w2c = &sp->conv.int2file; 406 break; 407 case O_INPUTENCODING: 408 c2w = &sp->conv.input2int; 409 w2c = NULL; 410 break; 411 default: 412 c2w = NULL; 413 w2c = NULL; 414 break; 415 } 416 417 if (!*enc) { 418 if (c2w) *c2w = raw2int; 419 if (w2c) *w2c = int2raw; 420 return 0; 421 } 422 423 if (!strcmp(enc, "WCHAR_T")) { 424 if (c2w) *c2w = CHAR_T_char2int; 425 if (w2c) *w2c = CHAR_T_int2char; 426 return 0; 427 } 428 429 id = iconv_open(enc, nl_langinfo(CODESET)); 430 if (id == (iconv_t)-1) 431 goto err; 432 iconv_close(id); 433 id = iconv_open(nl_langinfo(CODESET), enc); 434 if (id == (iconv_t)-1) 435 goto err; 436 iconv_close(id); 437 438 switch (option) { 439 case O_FILEENCODING: 440 *c2w = fe_char2int; 441 *w2c = fe_int2char; 442 break; 443 case O_INPUTENCODING: 444 *c2w = ie_char2int; 445 break; 446 } 447 448 F_CLR(sp, SC_CONV_ERROR); 449 F_SET(sp, SC_SCR_REFORMAT); 450 451 return 0; 452err: 453 switch (option) { 454 case O_FILEENCODING: 455 msgq(sp, M_ERR, 456 "321|File encoding conversion not supported"); 457 break; 458 case O_INPUTENCODING: 459 msgq(sp, M_ERR, 460 "322|Input encoding conversion not supported"); 461 break; 462 } 463#endif 464 return 1; 465} 466