conv.c revision 1.5
1/* $NetBSD: conv.c,v 1.5 2017/11/06 03:02:22 rin Exp $ */ 2/*- 3 * Copyright (c) 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11#include "config.h" 12 13#include <sys/cdefs.h> 14#if 0 15#ifndef lint 16static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 "; 17#endif /* not lint */ 18#else 19__RCSID("$NetBSD: conv.c,v 1.5 2017/11/06 03:02:22 rin Exp $"); 20#endif 21 22#include <sys/types.h> 23#include <sys/queue.h> 24#include <sys/time.h> 25 26#include <bitstring.h> 27#include <errno.h> 28#include <limits.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <string.h> 32#include <unistd.h> 33 34#include "common.h" 35 36#ifdef USE_ICONV 37#include <langinfo.h> 38#include <iconv.h> 39 40#define LANGCODESET nl_langinfo(CODESET) 41#else 42typedef int iconv_t; 43 44#define LANGCODESET "" 45#endif 46 47#include <locale.h> 48 49#ifdef USE_WIDECHAR 50static int 51raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 52 const CHAR_T **dst) 53{ 54 int i; 55 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 56 size_t *blen = &cw->blen1; 57 58 BINC_RETW(NULL, *tostr, *blen, len); 59 60 *tolen = len; 61 for (i = 0; i < len; ++i) { 62 CHAR_T w = (u_char)str[i]; 63 memcpy((*tostr) + i, &w, sizeof(**tostr)); 64 } 65 66 *dst = cw->bp1; 67 68 return 0; 69} 70 71#ifndef ERROR_ON_CONVERT 72#define HANDLE_ICONV_ERROR(o, i, ol, il) do { \ 73 *o++ = *i++; \ 74 ol--; il--; \ 75 } while (/*CONSTCOND*/0) 76#define HANDLE_MBR_ERROR(n, mbs, d, s) do { \ 77 d = s; \ 78 MEMSET(&mbs, 0, 1); \ 79 n = 1; \ 80 } while (/*CONSTCOND*/0) 81#else 82#define HANDLE_ICONV_ERROR goto err 83#define HANDLE_MBR_ERROR goto err 84#endif 85 86#define CONV_BUFFER_SIZE 512 87/* fill the buffer with codeset encoding of string pointed to by str 88 * left has the number of bytes left in str and is adjusted 89 * len contains the number of bytes put in the buffer 90 */ 91#ifdef USE_ICONV 92#define CONVERT(str, left, src, len) \ 93 do { \ 94 size_t outleft; \ 95 char *bp = buffer; \ 96 outleft = CONV_BUFFER_SIZE; \ 97 errno = 0; \ 98 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \ 99 == (size_t)-1 && errno != E2BIG) \ 100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \ 101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 102 error = -left; \ 103 goto err; \ 104 } \ 105 src = buffer; \ 106 } while (0) 107#else 108#define CONVERT(str, left, src, len) 109#endif 110 111static int 112default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 113 size_t *tolen, const CHAR_T **dst, const char *enc) 114{ 115 int j; 116 size_t i = 0; 117 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 118 size_t *blen = &cw->blen1; 119 mbstate_t mbs; 120 size_t n; 121 ssize_t nlen = len; 122 const char *src = (const char *)str; 123 iconv_t id = (iconv_t)-1; 124 char buffer[CONV_BUFFER_SIZE]; 125 size_t left = len; 126 int error = 1; 127 128 MEMSET(&mbs, 0, 1); 129 BINC_RETW(NULL, *tostr, *blen, nlen); 130 131#ifdef USE_ICONV 132 if (strcmp(nl_langinfo(CODESET), enc)) { 133 id = iconv_open(nl_langinfo(CODESET), enc); 134 if (id == (iconv_t)-1) 135 goto err; 136 CONVERT(str, left, src, len); 137 } 138#endif 139 140 for (i = 0, j = 0; j < len; ) { 141 CHAR_T w; 142 n = mbrtowc(&w, src + j, len - j, &mbs); 143 memcpy((*tostr) + i, &w, sizeof(**tostr)); 144 /* NULL character converted */ 145 if (n == (size_t)-2) error = -(len - j); 146 if (n == (size_t)-1 || n == (size_t)-2) { 147 HANDLE_MBR_ERROR(n, mbs, w, src[j]); 148 memcpy((*tostr) + i, &w, sizeof(**tostr)); 149 } 150 if (n == 0) n = 1; 151 j += n; 152 if (++i >= *blen) { 153 nlen += 256; 154 BINC_RETW(NULL, *tostr, *blen, nlen); 155 } 156 if (id != (iconv_t)-1 && j == len && left) { 157 CONVERT(str, left, src, len); 158 j = 0; 159 } 160 } 161 *tolen = i; 162 163 if (id != (iconv_t)-1) 164 iconv_close(id); 165 166 *dst = cw->bp1; 167 168 return 0; 169err: 170 *tolen = i; 171 if (id != (iconv_t)-1) 172 iconv_close(id); 173 *dst = cw->bp1; 174 175 return error; 176} 177 178static int 179fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 180 size_t *tolen, const CHAR_T **dst) 181{ 182 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 183} 184 185static int 186ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 187 size_t *tolen, const CHAR_T **dst) 188{ 189 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING)); 190} 191 192static int 193cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 194 size_t *tolen, const CHAR_T **dst) 195{ 196 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET); 197} 198 199static int 200CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 201 size_t *tolen, const char **dst) 202{ 203 *tolen = len * sizeof(CHAR_T); 204 *dst = (const char *)(const void *)str; 205 206 return 0; 207} 208 209static int 210CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 211 size_t *tolen, const CHAR_T **dst) 212{ 213 *tolen = len / sizeof(CHAR_T); 214 *dst = (const CHAR_T*) str; 215 216 return 0; 217} 218 219static int 220int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 221 const char **dst) 222{ 223 int i; 224 char **tostr = (char **)(void *)&cw->bp1; 225 size_t *blen = &cw->blen1; 226 227 BINC_RETC(NULL, *tostr, *blen, len); 228 229 *tolen = len; 230 for (i = 0; i < len; ++i) { 231 CHAR_T w; 232 memcpy(&w, str + i, sizeof(w)); 233 (*tostr)[i] = w; 234 } 235 236 *dst = cw->bp1; 237 238 return 0; 239} 240 241static int 242default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 243 size_t *tolen, const char **pdst, const char *enc) 244{ 245 size_t i, j; 246 int offset = 0; 247 char **tostr = (char **)(void *)&cw->bp1; 248 size_t *blen = &cw->blen1; 249 mbstate_t mbs; 250 size_t n; 251 ssize_t nlen = len + MB_CUR_MAX; 252 char *dst; 253 size_t buflen; 254 char buffer[CONV_BUFFER_SIZE]; 255 iconv_t id = (iconv_t)-1; 256 257/* convert first len bytes of buffer and append it to cw->bp 258 * len is adjusted => 0 259 * offset contains the offset in cw->bp and is adjusted 260 * cw->bp is grown as required 261 */ 262#ifdef USE_ICONV 263#define CONVERT2(len, cw, offset) \ 264 do { \ 265 const char *bp = buffer; \ 266 while (len != 0) { \ 267 size_t outleft = cw->blen1 - offset; \ 268 char *obp = (char *)cw->bp1 + offset; \ 269 if (cw->blen1 < offset + MB_CUR_MAX) { \ 270 nlen += 256; \ 271 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \ 272 } \ 273 errno = 0; \ 274 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \ 275 errno != E2BIG) \ 276 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \ 277 offset = cw->blen1 - outleft; \ 278 } \ 279 } while (0) 280#else 281#define CONVERT2(len, cw, offset) 282#endif 283 284 285 MEMSET(&mbs, 0, 1); 286 BINC_RETC(NULL, *tostr, *blen, nlen); 287 dst = *tostr; buflen = *blen; 288 289#ifdef USE_ICONV 290 if (strcmp(nl_langinfo(CODESET), enc)) { 291 id = iconv_open(enc, nl_langinfo(CODESET)); 292 if (id == (iconv_t)-1) 293 goto err; 294 dst = buffer; buflen = CONV_BUFFER_SIZE; 295 } 296#endif 297 298 for (i = 0, j = 0; i < (size_t)len; ++i) { 299 CHAR_T w; 300 memcpy(&w, str + i, sizeof(w)); 301 n = wcrtomb(dst + j, w, &mbs); 302 if (n == (size_t)-1) 303 HANDLE_MBR_ERROR(n, mbs, dst[j], w); 304 j += n; 305 if (buflen < j + MB_CUR_MAX) { 306 if (id != (iconv_t)-1) { 307 CONVERT2(j, cw, offset); 308 } else { 309 nlen += 256; 310 BINC_RETC(NULL, *tostr, *blen, nlen); 311 dst = *tostr; buflen = *blen; 312 } 313 } 314 } 315 316 n = wcrtomb(dst + j, L'\0', &mbs); 317 j += n - 1; /* don't count NUL at the end */ 318 *tolen = j; 319 320 if (id != (iconv_t)-1) { 321 CONVERT2(j, cw, offset); 322 *tolen = offset; 323 } 324 325 *pdst = cw->bp1; 326 327 return 0; 328err: 329 *tolen = j; 330 331 *pdst = cw->bp1; 332 333 return 1; 334} 335 336static int 337fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 338 size_t *tolen, const char **dst) 339{ 340 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 341} 342 343static int 344cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 345 size_t *tolen, const char **dst) 346{ 347 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET); 348} 349 350#endif 351 352 353void 354conv_init (SCR *orig, SCR *sp) 355{ 356 if (orig != NULL) 357 MEMCPY(&sp->conv, &orig->conv, 1); 358 else { 359 setlocale(LC_ALL, ""); 360#ifdef USE_WIDECHAR 361 sp->conv.sys2int = cs_char2int; 362 sp->conv.int2sys = cs_int2char; 363 sp->conv.file2int = fe_char2int; 364 sp->conv.int2file = fe_int2char; 365 sp->conv.input2int = ie_char2int; 366#endif 367#ifdef USE_ICONV 368 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 369 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 370#endif 371 } 372} 373 374int 375conv_enc (SCR *sp, int option, const char *enc) 376{ 377#if defined(USE_WIDECHAR) && defined(USE_ICONV) 378 iconv_t id; 379 char2wchar_t *c2w; 380 wchar2char_t *w2c; 381 382 switch (option) { 383 case O_FILEENCODING: 384 c2w = &sp->conv.file2int; 385 w2c = &sp->conv.int2file; 386 break; 387 case O_INPUTENCODING: 388 c2w = &sp->conv.input2int; 389 w2c = NULL; 390 break; 391 default: 392 c2w = NULL; 393 w2c = NULL; 394 break; 395 } 396 397 if (!*enc) { 398 if (c2w) *c2w = raw2int; 399 if (w2c) *w2c = int2raw; 400 return 0; 401 } 402 403 if (!strcmp(enc, "WCHAR_T")) { 404 if (c2w) *c2w = CHAR_T_char2int; 405 if (w2c) *w2c = CHAR_T_int2char; 406 return 0; 407 } 408 409 id = iconv_open(enc, nl_langinfo(CODESET)); 410 if (id == (iconv_t)-1) 411 goto err; 412 iconv_close(id); 413 id = iconv_open(nl_langinfo(CODESET), enc); 414 if (id == (iconv_t)-1) 415 goto err; 416 iconv_close(id); 417 418 switch (option) { 419 case O_FILEENCODING: 420 *c2w = fe_char2int; 421 *w2c = fe_int2char; 422 break; 423 case O_INPUTENCODING: 424 *c2w = ie_char2int; 425 break; 426 } 427 428 F_CLR(sp, SC_CONV_ERROR); 429 F_SET(sp, SC_SCR_REFORMAT); 430 431 return 0; 432err: 433 switch (option) { 434 case O_FILEENCODING: 435 msgq(sp, M_ERR, 436 "321|File encoding conversion not supported"); 437 break; 438 case O_INPUTENCODING: 439 msgq(sp, M_ERR, 440 "322|Input encoding conversion not supported"); 441 break; 442 } 443#endif 444 return 1; 445} 446 447