1254225Speter/*-
2254225Speter * Copyright (c) 1993, 1994
3254225Speter *	The Regents of the University of California.  All rights reserved.
4254225Speter * Copyright (c) 1993, 1994, 1995, 1996
5254225Speter *	Keith Bostic.  All rights reserved.
6254225Speter * Copyright (c) 2011, 2012
7254225Speter *	Zhihao Yuan.  All rights reserved.
8254225Speter *
9254225Speter * See the LICENSE file for redistribution information.
10254225Speter */
11254225Speter
12254225Speter#include "config.h"
13254225Speter
14254225Speter#ifndef lint
15254225Speterstatic const char sccsid[] = "$Id: conv.c,v 2.39 2013/07/01 23:28:13 zy Exp $";
16254225Speter#endif /* not lint */
17254225Speter
18254225Speter#include <sys/types.h>
19254225Speter#include <sys/queue.h>
20254225Speter#include <sys/time.h>
21254225Speter
22254225Speter#include <bitstring.h>
23254225Speter#include <errno.h>
24254225Speter#include <limits.h>
25254225Speter#include <langinfo.h>
26254225Speter#include <locale.h>
27254225Speter#include <stdio.h>
28254225Speter#include <stdlib.h>
29254225Speter#include <string.h>
30254225Speter#include <strings.h>
31254225Speter#include <unistd.h>
32254225Speter
33254225Speter#include "common.h"
34254225Speter
35254225Speter/*
36254225Speter * codeset --
37254225Speter *	Get the locale encoding.
38254225Speter *
39254225Speter * PUBLIC: char * codeset __P((void));
40254225Speter */
41254225Speterchar *
42254225Spetercodeset(void) {
43254225Speter    static char *cs;
44254225Speter
45254225Speter    if (cs == NULL)
46254225Speter	cs = nl_langinfo(CODESET);
47254225Speter    return cs;
48254225Speter}
49254225Speter
50254225Speter#ifdef USE_WIDECHAR
51254225Speterstatic int
52254225Speterraw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
53254225Speter	size_t *tolen, CHAR_T **dst)
54254225Speter{
55254225Speter    int i;
56254225Speter    CHAR_T **tostr = &cw->bp1.wc;
57254225Speter    size_t  *blen = &cw->blen1;
58254225Speter
59254225Speter    BINC_RETW(NULL, *tostr, *blen, len);
60254225Speter
61254225Speter    *tolen = len;
62254225Speter    for (i = 0; i < len; ++i)
63254225Speter	(*tostr)[i] = (u_char) str[i];
64254225Speter
65254225Speter    *dst = cw->bp1.wc;
66254225Speter
67254225Speter    return 0;
68254225Speter}
69254225Speter
70254225Speter#define CONV_BUFFER_SIZE    512
71254225Speter/* fill the buffer with codeset encoding of string pointed to by str
72254225Speter * left has the number of bytes left in str and is adjusted
73254225Speter * len contains the number of bytes put in the buffer
74254225Speter */
75254225Speter#ifdef USE_ICONV
76254225Speter#define CONVERT(str, left, src, len)				    	\
77254225Speter    do {								\
78254225Speter	size_t outleft;							\
79254225Speter	char *bp = buffer;						\
80254225Speter	outleft = CONV_BUFFER_SIZE;					\
81254225Speter	errno = 0;							\
82254225Speter	if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) == -1 &&	\
83254225Speter		errno != E2BIG)						\
84254225Speter	    goto err;							\
85254225Speter	if ((len = CONV_BUFFER_SIZE - outleft) == 0) {			\
86254225Speter	    error = -left;						\
87254225Speter	    goto err;							\
88254225Speter	}				    				\
89254225Speter	src = buffer;							\
90254225Speter    } while (0)
91254225Speter
92254225Speter#define IC_RESET()							\
93254225Speter    do {								\
94254225Speter	if (id != (iconv_t)-1)						\
95254225Speter	    iconv(id, NULL, NULL, NULL, NULL);				\
96254225Speter    } while(0)
97254225Speter#else
98254225Speter#define CONVERT(str, left, src, len)
99254225Speter#define IC_RESET()
100254225Speter#endif
101254225Speter
102254225Speterstatic int
103254225Speterdefault_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
104254225Speter		size_t *tolen, CHAR_T **dst, iconv_t id)
105254225Speter{
106254225Speter    size_t i = 0, j;
107254225Speter    CHAR_T **tostr = &cw->bp1.wc;
108254225Speter    size_t  *blen = &cw->blen1;
109254225Speter    mbstate_t mbs;
110254225Speter    size_t   n;
111254225Speter    ssize_t  nlen = len;
112254225Speter    char *src = (char *)str;
113254225Speter#ifdef USE_ICONV
114254225Speter    char	buffer[CONV_BUFFER_SIZE];
115254225Speter#endif
116254225Speter    size_t	left = len;
117254225Speter    int		error = 1;
118254225Speter
119254225Speter    BZERO(&mbs, 1);
120254225Speter    BINC_RETW(NULL, *tostr, *blen, nlen);
121254225Speter
122254225Speter#ifdef USE_ICONV
123254225Speter    if (id != (iconv_t)-1)
124254225Speter	CONVERT(str, left, src, len);
125254225Speter#endif
126254225Speter
127254225Speter    for (i = 0, j = 0; j < len; ) {
128254225Speter	n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
129254225Speter	/* NULL character converted */
130254225Speter	if (n == -2) error = -(len-j);
131254225Speter	if (n == -1 || n == -2) goto err;
132254225Speter	if (n == 0) n = 1;
133254225Speter	j += n;
134254225Speter	if (++i >= *blen) {
135254225Speter	    nlen += 256;
136254225Speter	    BINC_RETW(NULL, *tostr, *blen, nlen);
137254225Speter	}
138254225Speter	if (id != (iconv_t)-1 && j == len && left) {
139254225Speter	    CONVERT(str, left, src, len);
140254225Speter	    j = 0;
141254225Speter	}
142254225Speter    }
143254225Speter
144254225Speter    error = 0;
145254225Spetererr:
146254225Speter    *tolen = i;
147254225Speter    *dst = cw->bp1.wc;
148254225Speter    IC_RESET();
149254225Speter
150254225Speter    return error;
151254225Speter}
152254225Speter
153254225Speterstatic int
154254225Speterfe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
155254225Speter	    size_t *tolen, CHAR_T **dst)
156254225Speter{
157254225Speter    return default_char2int(sp, str, len, cw, tolen, dst,
158254225Speter	sp->conv.id[IC_FE_CHAR2INT]);
159254225Speter}
160254225Speter
161254225Speterstatic int
162254225Speterie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
163254225Speter	    size_t *tolen, CHAR_T **dst)
164254225Speter{
165254225Speter    return default_char2int(sp, str, len, cw, tolen, dst,
166254225Speter	sp->conv.id[IC_IE_CHAR2INT]);
167254225Speter}
168254225Speter
169254225Speterstatic int
170254225Spetercs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
171254225Speter	    size_t *tolen, CHAR_T **dst)
172254225Speter{
173254225Speter    return default_char2int(sp, str, len, cw, tolen, dst,
174254225Speter	(iconv_t)-1);
175254225Speter}
176254225Speter
177254225Speterstatic int
178254225Speterint2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
179254225Speter	size_t *tolen, char **dst)
180254225Speter{
181254225Speter    int i;
182254225Speter    char **tostr = &cw->bp1.c;
183254225Speter    size_t  *blen = &cw->blen1;
184254225Speter
185254225Speter    BINC_RETC(NULL, *tostr, *blen, len);
186254225Speter
187254225Speter    *tolen = len;
188254225Speter    for (i = 0; i < len; ++i)
189254225Speter	(*tostr)[i] = str[i];
190254225Speter
191254225Speter    *dst = cw->bp1.c;
192254225Speter
193254225Speter    return 0;
194254225Speter}
195254225Speter
196254225Speterstatic int
197254225Speterdefault_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
198254225Speter		size_t *tolen, char **pdst, iconv_t id)
199254225Speter{
200254225Speter    size_t i, j, offset = 0;
201254225Speter    char **tostr = &cw->bp1.c;
202254225Speter    size_t  *blen = &cw->blen1;
203254225Speter    mbstate_t mbs;
204254225Speter    size_t n;
205254225Speter    ssize_t  nlen = len + MB_CUR_MAX;
206254225Speter    char *dst;
207254225Speter    size_t buflen;
208254225Speter#ifdef USE_ICONV
209254225Speter    char	buffer[CONV_BUFFER_SIZE];
210254225Speter#endif
211254225Speter    int		error = 1;
212254225Speter
213254225Speter/* convert first len bytes of buffer and append it to cw->bp
214254225Speter * len is adjusted => 0
215254225Speter * offset contains the offset in cw->bp and is adjusted
216254225Speter * cw->bp is grown as required
217254225Speter */
218254225Speter#ifdef USE_ICONV
219254225Speter#define CONVERT2(_buffer, lenp, cw, offset)				\
220254225Speter    do {								\
221254225Speter	char *bp = _buffer;						\
222254225Speter	int ret;							\
223254225Speter	do {								\
224254225Speter	    size_t outleft = cw->blen1 - offset;			\
225254225Speter	    char *obp = cw->bp1.c + offset;				\
226254225Speter	    if (cw->blen1 < offset + MB_CUR_MAX) {		    	\
227254225Speter		nlen += 256;						\
228254225Speter		BINC_RETC(NULL, cw->bp1.c, cw->blen1, nlen);		\
229254225Speter	    }						    		\
230254225Speter	    errno = 0;						    	\
231254225Speter	    ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, &outleft);	\
232254225Speter	    if (ret == -1 && errno != E2BIG)				\
233254225Speter		goto err;						\
234254225Speter	    offset = cw->blen1 - outleft;			        \
235254225Speter	} while (ret != 0); 					        \
236254225Speter    } while (0)
237254225Speter#else
238254225Speter#define CONVERT2(_buffer, lenp, cw, offset)
239254225Speter#endif
240254225Speter
241254225Speter
242254225Speter    BZERO(&mbs, 1);
243254225Speter    BINC_RETC(NULL, *tostr, *blen, nlen);
244254225Speter    dst = *tostr; buflen = *blen;
245254225Speter
246254225Speter#ifdef USE_ICONV
247254225Speter    if (id != (iconv_t)-1) {
248254225Speter	dst = buffer; buflen = CONV_BUFFER_SIZE;
249254225Speter    }
250254225Speter#endif
251254225Speter
252254225Speter    for (i = 0, j = 0; i < len; ++i) {
253254225Speter	n = wcrtomb(dst+j, str[i], &mbs);
254254225Speter	if (n == -1) goto err;
255254225Speter	j += n;
256254225Speter	if (buflen < j + MB_CUR_MAX) {
257254225Speter	    if (id != (iconv_t)-1) {
258254225Speter		CONVERT2(buffer, &j, cw, offset);
259254225Speter	    } else {
260254225Speter		nlen += 256;
261254225Speter		BINC_RETC(NULL, *tostr, *blen, nlen);
262254225Speter		dst = *tostr; buflen = *blen;
263254225Speter	    }
264254225Speter	}
265254225Speter    }
266254225Speter
267254225Speter    n = wcrtomb(dst+j, L'\0', &mbs);
268254225Speter    j += n - 1;				/* don't count NUL at the end */
269254225Speter    *tolen = j;
270254225Speter
271254225Speter    if (id != (iconv_t)-1) {
272254225Speter	CONVERT2(buffer, &j, cw, offset);
273254225Speter	CONVERT2(NULL, NULL, cw, offset);  /* back to the initial state */
274254225Speter	*tolen = offset;
275254225Speter    }
276254225Speter
277254225Speter    error = 0;
278254225Spetererr:
279254225Speter    if (error)
280254225Speter	*tolen = j;
281254225Speter    *pdst = cw->bp1.c;
282254225Speter    IC_RESET();
283254225Speter
284254225Speter    return error;
285254225Speter}
286254225Speter
287254225Speterstatic int
288254225Speterfe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
289254225Speter	    size_t *tolen, char **dst)
290254225Speter{
291254225Speter    return default_int2char(sp, str, len, cw, tolen, dst,
292254225Speter	sp->conv.id[IC_FE_INT2CHAR]);
293254225Speter}
294254225Speter
295254225Speterstatic int
296254225Spetercs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
297254225Speter	    size_t *tolen, char **dst)
298254225Speter{
299254225Speter    return default_int2char(sp, str, len, cw, tolen, dst,
300254225Speter	(iconv_t)-1);
301254225Speter}
302254225Speter
303254225Speter#endif
304254225Speter
305254225Speter/*
306254225Speter * conv_init --
307254225Speter *	Initialize the iconv environment.
308254225Speter *
309254225Speter * PUBLIC: void conv_init __P((SCR *, SCR *));
310254225Speter */
311254225Spetervoid
312254225Speterconv_init(SCR *orig, SCR *sp)
313254225Speter{
314254225Speter    int i;
315254225Speter
316254225Speter    if (orig == NULL)
317254225Speter	setlocale(LC_ALL, "");
318254225Speter    if (orig != NULL)
319254225Speter	BCOPY(&orig->conv, &sp->conv, 1);
320254225Speter#ifdef USE_WIDECHAR
321254225Speter    else {
322254225Speter	char *ctype = setlocale(LC_CTYPE, NULL);
323254225Speter
324254225Speter	/*
325254225Speter	 * XXX
326254225Speter	 * This hack fixes the libncursesw issue on FreeBSD.
327254225Speter	 */
328254225Speter	if (!strcmp(ctype, "ko_KR.CP949"))
329254225Speter	    setlocale(LC_CTYPE, "ko_KR.eucKR");
330254225Speter	else if (!strcmp(ctype, "zh_CN.GB2312"))
331254225Speter	    setlocale(LC_CTYPE, "zh_CN.eucCN");
332254225Speter	else if (!strcmp(ctype, "zh_CN.GBK"))
333254225Speter	    setlocale(LC_CTYPE, "zh_CN.GB18030");
334254225Speter
335254225Speter	/*
336254225Speter	 * Switch to 8bit mode if locale is C;
337254225Speter	 * LC_CTYPE should be reseted to C if unmatched.
338254225Speter	 */
339254225Speter	if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
340254225Speter	    sp->conv.sys2int = sp->conv.file2int = raw2int;
341254225Speter	    sp->conv.int2sys = sp->conv.int2file = int2raw;
342254225Speter	    sp->conv.input2int = raw2int;
343254225Speter	} else {
344254225Speter	    sp->conv.sys2int = cs_char2int;
345254225Speter	    sp->conv.int2sys = cs_int2char;
346254225Speter	    sp->conv.file2int = fe_char2int;
347254225Speter	    sp->conv.int2file = fe_int2char;
348254225Speter	    sp->conv.input2int = ie_char2int;
349254225Speter	}
350254225Speter#ifdef USE_ICONV
351254225Speter	o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
352254225Speter#endif
353254225Speter    }
354254225Speter#endif
355254225Speter
356254225Speter    /* iconv descriptors must be distinct to screens. */
357254225Speter    for (i = 0; i <= IC_IE_TO_UTF16; ++i)
358254225Speter	sp->conv.id[i] = (iconv_t)-1;
359254225Speter#ifdef USE_ICONV
360254225Speter    conv_enc(sp, O_INPUTENCODING, 0);
361254225Speter#endif
362254225Speter}
363254225Speter
364254225Speter/*
365254225Speter * conv_enc --
366254225Speter *	Convert file/input encoding.
367254225Speter *
368254225Speter * PUBLIC: int conv_enc __P((SCR *, int, char *));
369254225Speter */
370254225Speterint
371254225Speterconv_enc(SCR *sp, int option, char *enc)
372254225Speter{
373254225Speter#if defined(USE_WIDECHAR) && defined(USE_ICONV)
374254225Speter    iconv_t *c2w, *w2c;
375254225Speter
376254225Speter    switch (option) {
377254225Speter    case O_FILEENCODING:
378254225Speter	c2w = sp->conv.id + IC_FE_CHAR2INT;
379254225Speter	w2c = sp->conv.id + IC_FE_INT2CHAR;
380254225Speter	if (!enc) enc = O_STR(sp, O_FILEENCODING);
381254225Speter	if (*c2w != (iconv_t)-1)
382254225Speter	    iconv_close(*c2w);
383254225Speter	if (*w2c != (iconv_t)-1)
384254225Speter	    iconv_close(*w2c);
385254225Speter	if (strcasecmp(codeset(), enc)) {
386254225Speter	    if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1)
387254225Speter		goto err;
388254225Speter	    if ((*w2c = iconv_open(enc, codeset())) == (iconv_t)-1)
389254225Speter		goto err;
390254225Speter	} else *c2w = *w2c = (iconv_t)-1;
391254225Speter	break;
392254225Speter    case O_INPUTENCODING:
393254225Speter	c2w = sp->conv.id + IC_IE_CHAR2INT;
394254225Speter	w2c = sp->conv.id + IC_IE_TO_UTF16;
395254225Speter	if (!enc) enc = O_STR(sp, O_INPUTENCODING);
396254225Speter	if (*c2w != (iconv_t)-1)
397254225Speter	    iconv_close(*c2w);
398254225Speter	if (*w2c != (iconv_t)-1)
399254225Speter	    iconv_close(*w2c);
400254225Speter	if (strcasecmp(codeset(), enc)) {
401254225Speter	    if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1)
402254225Speter		goto err;
403254225Speter	} else *c2w = (iconv_t)-1;
404254225Speter	/* UTF-16 can not be locale and can not be inputed. */
405254225Speter	if ((*w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
406254225Speter	    goto err;
407254225Speter	break;
408254225Speter    }
409254225Speter
410254225Speter    F_CLR(sp, SC_CONV_ERROR);
411254225Speter    F_SET(sp, SC_SCR_REFORMAT);
412254225Speter
413254225Speter    return 0;
414254225Spetererr:
415254225Speter#endif
416254225Speter    switch (option) {
417254225Speter    case O_FILEENCODING:
418254225Speter	msgq(sp, M_ERR,
419254225Speter	    "321|File encoding conversion not supported");
420254225Speter	break;
421254225Speter    case O_INPUTENCODING:
422254225Speter	msgq(sp, M_ERR,
423254225Speter	    "322|Input encoding conversion not supported");
424254225Speter	break;
425254225Speter    }
426254225Speter    return 1;
427254225Speter}
428254225Speter
429254225Speter/*
430254225Speter * conv_end --
431254225Speter *	Close the iconv descriptors, release the buffer.
432254225Speter *
433254225Speter * PUBLIC: void conv_end __P((SCR *));
434254225Speter */
435254225Spetervoid
436254225Speterconv_end(SCR *sp)
437254225Speter{
438254225Speter#if defined(USE_WIDECHAR) && defined(USE_ICONV)
439254225Speter    int i;
440254225Speter    for (i = 0; i <= IC_IE_TO_UTF16; ++i)
441254225Speter	if (sp->conv.id[i] != (iconv_t)-1)
442254225Speter	    iconv_close(sp->conv.id[i]);
443254225Speter	if (sp->cw.bp1.c != NULL)
444254225Speter	    free(sp->cw.bp1.c);
445254225Speter#endif
446254225Speter}
447