1254225Speter/*- 2254225Speter * Copyright (c) 1993, 1994 3254225Speter * The Regents of the University of California. All rights reserved. 4254225Speter * Copyright (c) 1993, 1994, 1995, 1996 5254225Speter * Keith Bostic. All rights reserved. 6254225Speter * Copyright (c) 2011, 2012 7254225Speter * Zhihao Yuan. All rights reserved. 8254225Speter * 9254225Speter * See the LICENSE file for redistribution information. 10254225Speter */ 11254225Speter 12254225Speter#include "config.h" 13254225Speter 14254225Speter#ifndef lint 15254225Speterstatic const char sccsid[] = "$Id: conv.c,v 2.39 2013/07/01 23:28:13 zy Exp $"; 16254225Speter#endif /* not lint */ 17254225Speter 18254225Speter#include <sys/types.h> 19254225Speter#include <sys/queue.h> 20254225Speter#include <sys/time.h> 21254225Speter 22254225Speter#include <bitstring.h> 23254225Speter#include <errno.h> 24254225Speter#include <limits.h> 25254225Speter#include <langinfo.h> 26254225Speter#include <locale.h> 27254225Speter#include <stdio.h> 28254225Speter#include <stdlib.h> 29254225Speter#include <string.h> 30254225Speter#include <strings.h> 31254225Speter#include <unistd.h> 32254225Speter 33254225Speter#include "common.h" 34254225Speter 35254225Speter/* 36254225Speter * codeset -- 37254225Speter * Get the locale encoding. 38254225Speter * 39254225Speter * PUBLIC: char * codeset __P((void)); 40254225Speter */ 41254225Speterchar * 42254225Spetercodeset(void) { 43254225Speter static char *cs; 44254225Speter 45254225Speter if (cs == NULL) 46254225Speter cs = nl_langinfo(CODESET); 47254225Speter return cs; 48254225Speter} 49254225Speter 50254225Speter#ifdef USE_WIDECHAR 51254225Speterstatic int 52254225Speterraw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 53254225Speter size_t *tolen, CHAR_T **dst) 54254225Speter{ 55254225Speter int i; 56254225Speter CHAR_T **tostr = &cw->bp1.wc; 57254225Speter size_t *blen = &cw->blen1; 58254225Speter 59254225Speter BINC_RETW(NULL, *tostr, *blen, len); 60254225Speter 61254225Speter *tolen = len; 62254225Speter for (i = 0; i < len; ++i) 63254225Speter (*tostr)[i] = (u_char) str[i]; 64254225Speter 65254225Speter *dst = cw->bp1.wc; 66254225Speter 67254225Speter return 0; 68254225Speter} 69254225Speter 70254225Speter#define CONV_BUFFER_SIZE 512 71254225Speter/* fill the buffer with codeset encoding of string pointed to by str 72254225Speter * left has the number of bytes left in str and is adjusted 73254225Speter * len contains the number of bytes put in the buffer 74254225Speter */ 75254225Speter#ifdef USE_ICONV 76254225Speter#define CONVERT(str, left, src, len) \ 77254225Speter do { \ 78254225Speter size_t outleft; \ 79254225Speter char *bp = buffer; \ 80254225Speter outleft = CONV_BUFFER_SIZE; \ 81254225Speter errno = 0; \ 82254225Speter if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) == -1 && \ 83254225Speter errno != E2BIG) \ 84254225Speter goto err; \ 85254225Speter if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 86254225Speter error = -left; \ 87254225Speter goto err; \ 88254225Speter } \ 89254225Speter src = buffer; \ 90254225Speter } while (0) 91254225Speter 92254225Speter#define IC_RESET() \ 93254225Speter do { \ 94254225Speter if (id != (iconv_t)-1) \ 95254225Speter iconv(id, NULL, NULL, NULL, NULL); \ 96254225Speter } while(0) 97254225Speter#else 98254225Speter#define CONVERT(str, left, src, len) 99254225Speter#define IC_RESET() 100254225Speter#endif 101254225Speter 102254225Speterstatic int 103254225Speterdefault_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 104254225Speter size_t *tolen, CHAR_T **dst, iconv_t id) 105254225Speter{ 106254225Speter size_t i = 0, j; 107254225Speter CHAR_T **tostr = &cw->bp1.wc; 108254225Speter size_t *blen = &cw->blen1; 109254225Speter mbstate_t mbs; 110254225Speter size_t n; 111254225Speter ssize_t nlen = len; 112254225Speter char *src = (char *)str; 113254225Speter#ifdef USE_ICONV 114254225Speter char buffer[CONV_BUFFER_SIZE]; 115254225Speter#endif 116254225Speter size_t left = len; 117254225Speter int error = 1; 118254225Speter 119254225Speter BZERO(&mbs, 1); 120254225Speter BINC_RETW(NULL, *tostr, *blen, nlen); 121254225Speter 122254225Speter#ifdef USE_ICONV 123254225Speter if (id != (iconv_t)-1) 124254225Speter CONVERT(str, left, src, len); 125254225Speter#endif 126254225Speter 127254225Speter for (i = 0, j = 0; j < len; ) { 128254225Speter n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 129254225Speter /* NULL character converted */ 130254225Speter if (n == -2) error = -(len-j); 131254225Speter if (n == -1 || n == -2) goto err; 132254225Speter if (n == 0) n = 1; 133254225Speter j += n; 134254225Speter if (++i >= *blen) { 135254225Speter nlen += 256; 136254225Speter BINC_RETW(NULL, *tostr, *blen, nlen); 137254225Speter } 138254225Speter if (id != (iconv_t)-1 && j == len && left) { 139254225Speter CONVERT(str, left, src, len); 140254225Speter j = 0; 141254225Speter } 142254225Speter } 143254225Speter 144254225Speter error = 0; 145254225Spetererr: 146254225Speter *tolen = i; 147254225Speter *dst = cw->bp1.wc; 148254225Speter IC_RESET(); 149254225Speter 150254225Speter return error; 151254225Speter} 152254225Speter 153254225Speterstatic int 154254225Speterfe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 155254225Speter size_t *tolen, CHAR_T **dst) 156254225Speter{ 157254225Speter return default_char2int(sp, str, len, cw, tolen, dst, 158254225Speter sp->conv.id[IC_FE_CHAR2INT]); 159254225Speter} 160254225Speter 161254225Speterstatic int 162254225Speterie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 163254225Speter size_t *tolen, CHAR_T **dst) 164254225Speter{ 165254225Speter return default_char2int(sp, str, len, cw, tolen, dst, 166254225Speter sp->conv.id[IC_IE_CHAR2INT]); 167254225Speter} 168254225Speter 169254225Speterstatic int 170254225Spetercs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 171254225Speter size_t *tolen, CHAR_T **dst) 172254225Speter{ 173254225Speter return default_char2int(sp, str, len, cw, tolen, dst, 174254225Speter (iconv_t)-1); 175254225Speter} 176254225Speter 177254225Speterstatic int 178254225Speterint2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 179254225Speter size_t *tolen, char **dst) 180254225Speter{ 181254225Speter int i; 182254225Speter char **tostr = &cw->bp1.c; 183254225Speter size_t *blen = &cw->blen1; 184254225Speter 185254225Speter BINC_RETC(NULL, *tostr, *blen, len); 186254225Speter 187254225Speter *tolen = len; 188254225Speter for (i = 0; i < len; ++i) 189254225Speter (*tostr)[i] = str[i]; 190254225Speter 191254225Speter *dst = cw->bp1.c; 192254225Speter 193254225Speter return 0; 194254225Speter} 195254225Speter 196254225Speterstatic int 197254225Speterdefault_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 198254225Speter size_t *tolen, char **pdst, iconv_t id) 199254225Speter{ 200254225Speter size_t i, j, offset = 0; 201254225Speter char **tostr = &cw->bp1.c; 202254225Speter size_t *blen = &cw->blen1; 203254225Speter mbstate_t mbs; 204254225Speter size_t n; 205254225Speter ssize_t nlen = len + MB_CUR_MAX; 206254225Speter char *dst; 207254225Speter size_t buflen; 208254225Speter#ifdef USE_ICONV 209254225Speter char buffer[CONV_BUFFER_SIZE]; 210254225Speter#endif 211254225Speter int error = 1; 212254225Speter 213254225Speter/* convert first len bytes of buffer and append it to cw->bp 214254225Speter * len is adjusted => 0 215254225Speter * offset contains the offset in cw->bp and is adjusted 216254225Speter * cw->bp is grown as required 217254225Speter */ 218254225Speter#ifdef USE_ICONV 219254225Speter#define CONVERT2(_buffer, lenp, cw, offset) \ 220254225Speter do { \ 221254225Speter char *bp = _buffer; \ 222254225Speter int ret; \ 223254225Speter do { \ 224254225Speter size_t outleft = cw->blen1 - offset; \ 225254225Speter char *obp = cw->bp1.c + offset; \ 226254225Speter if (cw->blen1 < offset + MB_CUR_MAX) { \ 227254225Speter nlen += 256; \ 228254225Speter BINC_RETC(NULL, cw->bp1.c, cw->blen1, nlen); \ 229254225Speter } \ 230254225Speter errno = 0; \ 231254225Speter ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, &outleft); \ 232254225Speter if (ret == -1 && errno != E2BIG) \ 233254225Speter goto err; \ 234254225Speter offset = cw->blen1 - outleft; \ 235254225Speter } while (ret != 0); \ 236254225Speter } while (0) 237254225Speter#else 238254225Speter#define CONVERT2(_buffer, lenp, cw, offset) 239254225Speter#endif 240254225Speter 241254225Speter 242254225Speter BZERO(&mbs, 1); 243254225Speter BINC_RETC(NULL, *tostr, *blen, nlen); 244254225Speter dst = *tostr; buflen = *blen; 245254225Speter 246254225Speter#ifdef USE_ICONV 247254225Speter if (id != (iconv_t)-1) { 248254225Speter dst = buffer; buflen = CONV_BUFFER_SIZE; 249254225Speter } 250254225Speter#endif 251254225Speter 252254225Speter for (i = 0, j = 0; i < len; ++i) { 253254225Speter n = wcrtomb(dst+j, str[i], &mbs); 254254225Speter if (n == -1) goto err; 255254225Speter j += n; 256254225Speter if (buflen < j + MB_CUR_MAX) { 257254225Speter if (id != (iconv_t)-1) { 258254225Speter CONVERT2(buffer, &j, cw, offset); 259254225Speter } else { 260254225Speter nlen += 256; 261254225Speter BINC_RETC(NULL, *tostr, *blen, nlen); 262254225Speter dst = *tostr; buflen = *blen; 263254225Speter } 264254225Speter } 265254225Speter } 266254225Speter 267254225Speter n = wcrtomb(dst+j, L'\0', &mbs); 268254225Speter j += n - 1; /* don't count NUL at the end */ 269254225Speter *tolen = j; 270254225Speter 271254225Speter if (id != (iconv_t)-1) { 272254225Speter CONVERT2(buffer, &j, cw, offset); 273254225Speter CONVERT2(NULL, NULL, cw, offset); /* back to the initial state */ 274254225Speter *tolen = offset; 275254225Speter } 276254225Speter 277254225Speter error = 0; 278254225Spetererr: 279254225Speter if (error) 280254225Speter *tolen = j; 281254225Speter *pdst = cw->bp1.c; 282254225Speter IC_RESET(); 283254225Speter 284254225Speter return error; 285254225Speter} 286254225Speter 287254225Speterstatic int 288254225Speterfe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 289254225Speter size_t *tolen, char **dst) 290254225Speter{ 291254225Speter return default_int2char(sp, str, len, cw, tolen, dst, 292254225Speter sp->conv.id[IC_FE_INT2CHAR]); 293254225Speter} 294254225Speter 295254225Speterstatic int 296254225Spetercs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 297254225Speter size_t *tolen, char **dst) 298254225Speter{ 299254225Speter return default_int2char(sp, str, len, cw, tolen, dst, 300254225Speter (iconv_t)-1); 301254225Speter} 302254225Speter 303254225Speter#endif 304254225Speter 305254225Speter/* 306254225Speter * conv_init -- 307254225Speter * Initialize the iconv environment. 308254225Speter * 309254225Speter * PUBLIC: void conv_init __P((SCR *, SCR *)); 310254225Speter */ 311254225Spetervoid 312254225Speterconv_init(SCR *orig, SCR *sp) 313254225Speter{ 314254225Speter int i; 315254225Speter 316254225Speter if (orig == NULL) 317254225Speter setlocale(LC_ALL, ""); 318254225Speter if (orig != NULL) 319254225Speter BCOPY(&orig->conv, &sp->conv, 1); 320254225Speter#ifdef USE_WIDECHAR 321254225Speter else { 322254225Speter char *ctype = setlocale(LC_CTYPE, NULL); 323254225Speter 324254225Speter /* 325254225Speter * XXX 326254225Speter * This hack fixes the libncursesw issue on FreeBSD. 327254225Speter */ 328254225Speter if (!strcmp(ctype, "ko_KR.CP949")) 329254225Speter setlocale(LC_CTYPE, "ko_KR.eucKR"); 330254225Speter else if (!strcmp(ctype, "zh_CN.GB2312")) 331254225Speter setlocale(LC_CTYPE, "zh_CN.eucCN"); 332254225Speter else if (!strcmp(ctype, "zh_CN.GBK")) 333254225Speter setlocale(LC_CTYPE, "zh_CN.GB18030"); 334254225Speter 335254225Speter /* 336254225Speter * Switch to 8bit mode if locale is C; 337254225Speter * LC_CTYPE should be reseted to C if unmatched. 338254225Speter */ 339254225Speter if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { 340254225Speter sp->conv.sys2int = sp->conv.file2int = raw2int; 341254225Speter sp->conv.int2sys = sp->conv.int2file = int2raw; 342254225Speter sp->conv.input2int = raw2int; 343254225Speter } else { 344254225Speter sp->conv.sys2int = cs_char2int; 345254225Speter sp->conv.int2sys = cs_int2char; 346254225Speter sp->conv.file2int = fe_char2int; 347254225Speter sp->conv.int2file = fe_int2char; 348254225Speter sp->conv.input2int = ie_char2int; 349254225Speter } 350254225Speter#ifdef USE_ICONV 351254225Speter o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); 352254225Speter#endif 353254225Speter } 354254225Speter#endif 355254225Speter 356254225Speter /* iconv descriptors must be distinct to screens. */ 357254225Speter for (i = 0; i <= IC_IE_TO_UTF16; ++i) 358254225Speter sp->conv.id[i] = (iconv_t)-1; 359254225Speter#ifdef USE_ICONV 360254225Speter conv_enc(sp, O_INPUTENCODING, 0); 361254225Speter#endif 362254225Speter} 363254225Speter 364254225Speter/* 365254225Speter * conv_enc -- 366254225Speter * Convert file/input encoding. 367254225Speter * 368254225Speter * PUBLIC: int conv_enc __P((SCR *, int, char *)); 369254225Speter */ 370254225Speterint 371254225Speterconv_enc(SCR *sp, int option, char *enc) 372254225Speter{ 373254225Speter#if defined(USE_WIDECHAR) && defined(USE_ICONV) 374254225Speter iconv_t *c2w, *w2c; 375254225Speter 376254225Speter switch (option) { 377254225Speter case O_FILEENCODING: 378254225Speter c2w = sp->conv.id + IC_FE_CHAR2INT; 379254225Speter w2c = sp->conv.id + IC_FE_INT2CHAR; 380254225Speter if (!enc) enc = O_STR(sp, O_FILEENCODING); 381254225Speter if (*c2w != (iconv_t)-1) 382254225Speter iconv_close(*c2w); 383254225Speter if (*w2c != (iconv_t)-1) 384254225Speter iconv_close(*w2c); 385254225Speter if (strcasecmp(codeset(), enc)) { 386254225Speter if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) 387254225Speter goto err; 388254225Speter if ((*w2c = iconv_open(enc, codeset())) == (iconv_t)-1) 389254225Speter goto err; 390254225Speter } else *c2w = *w2c = (iconv_t)-1; 391254225Speter break; 392254225Speter case O_INPUTENCODING: 393254225Speter c2w = sp->conv.id + IC_IE_CHAR2INT; 394254225Speter w2c = sp->conv.id + IC_IE_TO_UTF16; 395254225Speter if (!enc) enc = O_STR(sp, O_INPUTENCODING); 396254225Speter if (*c2w != (iconv_t)-1) 397254225Speter iconv_close(*c2w); 398254225Speter if (*w2c != (iconv_t)-1) 399254225Speter iconv_close(*w2c); 400254225Speter if (strcasecmp(codeset(), enc)) { 401254225Speter if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) 402254225Speter goto err; 403254225Speter } else *c2w = (iconv_t)-1; 404254225Speter /* UTF-16 can not be locale and can not be inputed. */ 405254225Speter if ((*w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) 406254225Speter goto err; 407254225Speter break; 408254225Speter } 409254225Speter 410254225Speter F_CLR(sp, SC_CONV_ERROR); 411254225Speter F_SET(sp, SC_SCR_REFORMAT); 412254225Speter 413254225Speter return 0; 414254225Spetererr: 415254225Speter#endif 416254225Speter switch (option) { 417254225Speter case O_FILEENCODING: 418254225Speter msgq(sp, M_ERR, 419254225Speter "321|File encoding conversion not supported"); 420254225Speter break; 421254225Speter case O_INPUTENCODING: 422254225Speter msgq(sp, M_ERR, 423254225Speter "322|Input encoding conversion not supported"); 424254225Speter break; 425254225Speter } 426254225Speter return 1; 427254225Speter} 428254225Speter 429254225Speter/* 430254225Speter * conv_end -- 431254225Speter * Close the iconv descriptors, release the buffer. 432254225Speter * 433254225Speter * PUBLIC: void conv_end __P((SCR *)); 434254225Speter */ 435254225Spetervoid 436254225Speterconv_end(SCR *sp) 437254225Speter{ 438254225Speter#if defined(USE_WIDECHAR) && defined(USE_ICONV) 439254225Speter int i; 440254225Speter for (i = 0; i <= IC_IE_TO_UTF16; ++i) 441254225Speter if (sp->conv.id[i] != (iconv_t)-1) 442254225Speter iconv_close(sp->conv.id[i]); 443254225Speter if (sp->cw.bp1.c != NULL) 444254225Speter free(sp->cw.bp1.c); 445254225Speter#endif 446254225Speter} 447