1/*-
2 * Copyright (c) 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 * Copyright (c) 2011, 2012
7 *	Zhihao Yuan.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#ifndef lint
15static const char sccsid[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $";
16#endif /* not lint */
17
18#include <sys/types.h>
19#include <sys/queue.h>
20#include <sys/time.h>
21
22#include <bitstring.h>
23#include <errno.h>
24#include <limits.h>
25#include <langinfo.h>
26#include <locale.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <strings.h>
31#include <unistd.h>
32
33#include "common.h"
34
35/*
36 * codeset --
37 *	Get the locale encoding.
38 *
39 * PUBLIC: char * codeset(void);
40 */
41char *
42codeset(void)
43{
44	static char *cs;
45
46	if (cs == NULL)
47		cs = nl_langinfo(CODESET);
48
49	return cs;
50}
51
52#ifdef USE_WIDECHAR
53static int
54raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
55    CHAR_T **dst)
56{
57	int i;
58	CHAR_T **tostr = &cw->bp1.wc;
59	size_t  *blen = &cw->blen1;
60
61	BINC_RETW(NULL, *tostr, *blen, len);
62
63	*tolen = len;
64	for (i = 0; i < len; ++i)
65		(*tostr)[i] = (u_char) str[i];
66
67	*dst = cw->bp1.wc;
68
69	return 0;
70}
71
72#define CONV_BUFFER_SIZE    512
73/* fill the buffer with codeset encoding of string pointed to by str
74 * left has the number of bytes left in str and is adjusted
75 * len contains the number of bytes put in the buffer
76 */
77#ifdef USE_ICONV
78#define CONVERT(str, left, src, len)					\
79	do {								\
80		size_t outleft;						\
81		char *bp = buffer;					\
82		outleft = CONV_BUFFER_SIZE;				\
83		errno = 0;						\
84		if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft)	\
85		    == -1 && errno != E2BIG)				\
86			goto err;					\
87		if ((len = CONV_BUFFER_SIZE - outleft) == 0) {		\
88			error = -left;					\
89			goto err;					\
90		}							\
91		src = buffer;						\
92	} while (0)
93
94#define IC_RESET()							\
95	do {								\
96		if (id != (iconv_t)-1)					\
97			iconv(id, NULL, NULL, NULL, NULL);		\
98	} while(0)
99#else
100#define CONVERT(str, left, src, len)
101#define IC_RESET()
102#endif
103
104static int
105default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
106    size_t *tolen, CHAR_T **dst, iconv_t id)
107{
108	size_t i = 0, j;
109	CHAR_T **tostr = &cw->bp1.wc;
110	size_t *blen = &cw->blen1;
111	mbstate_t mbs;
112	size_t n;
113	ssize_t nlen = len;
114	char *src = (char *)str;
115#ifdef USE_ICONV
116	char buffer[CONV_BUFFER_SIZE];
117#endif
118	size_t left = len;
119	int error = 1;
120
121	BZERO(&mbs, 1);
122	BINC_RETW(NULL, *tostr, *blen, nlen);
123
124#ifdef USE_ICONV
125	if (id != (iconv_t)-1)
126		CONVERT(str, left, src, len);
127#endif
128
129	for (i = 0, j = 0; j < len; ) {
130		n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
131		/* NULL character converted */
132		if (n == -2)
133			error = -(len-j);
134		if (n == -1 || n == -2)
135			goto err;
136		if (n == 0)
137			n = 1;
138		j += n;
139		if (++i >= *blen) {
140			nlen += 256;
141			BINC_RETW(NULL, *tostr, *blen, nlen);
142		}
143		if (id != (iconv_t)-1 && j == len && left) {
144			CONVERT(str, left, src, len);
145			j = 0;
146		}
147	}
148
149	error = 0;
150err:
151	*tolen = i;
152	*dst = cw->bp1.wc;
153	IC_RESET();
154
155	return error;
156}
157
158static int
159fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
160    CHAR_T **dst)
161{
162	return default_char2int(sp, str, len, cw, tolen, dst,
163	    sp->conv.id[IC_FE_CHAR2INT]);
164}
165
166static int
167ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
168    CHAR_T **dst)
169{
170	return default_char2int(sp, str, len, cw, tolen, dst,
171	    sp->conv.id[IC_IE_CHAR2INT]);
172}
173
174static int
175cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
176    CHAR_T **dst)
177{
178	return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
179}
180
181static int
182int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
183    char **dst)
184{
185	int i;
186	char **tostr = &cw->bp1.c;
187	size_t  *blen = &cw->blen1;
188
189	BINC_RETC(NULL, *tostr, *blen, len);
190
191	*tolen = len;
192	for (i = 0; i < len; ++i)
193		(*tostr)[i] = str[i];
194
195	*dst = cw->bp1.c;
196
197	return 0;
198}
199
200static int
201default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
202    size_t *tolen, char **pdst, iconv_t id)
203{
204	size_t i, j, offset = 0;
205	char **tostr = &cw->bp1.c;
206	size_t *blen = &cw->blen1;
207	mbstate_t mbs;
208	size_t n;
209	ssize_t  nlen = len + MB_CUR_MAX;
210	char *dst;
211	size_t buflen;
212#ifdef USE_ICONV
213	char buffer[CONV_BUFFER_SIZE];
214#endif
215	int error = 1;
216
217/* convert first len bytes of buffer and append it to cw->bp
218 * len is adjusted => 0
219 * offset contains the offset in cw->bp and is adjusted
220 * cw->bp is grown as required
221 */
222#ifdef USE_ICONV
223#define CONVERT2(_buffer, lenp, cw, offset)				\
224	do {								\
225		char *bp = _buffer;					\
226		int ret;						\
227		do {							\
228			size_t outleft = cw->blen1 - offset;		\
229			char *obp = cw->bp1.c + offset;			\
230			if (cw->blen1 < offset + MB_CUR_MAX) {		\
231				nlen += 256;				\
232				BINC_RETC(NULL, cw->bp1.c, cw->blen1,	\
233				    nlen);				\
234			}						\
235			errno = 0;					\
236			ret = iconv(id, (iconv_src_t)&bp, lenp, &obp,	\
237			    &outleft);					\
238			if (ret == -1 && errno != E2BIG)		\
239				goto err;				\
240			offset = cw->blen1 - outleft;			\
241		} while (ret != 0); 					\
242	} while (0)
243#else
244#define CONVERT2(_buffer, lenp, cw, offset)
245#endif
246
247
248	BZERO(&mbs, 1);
249	BINC_RETC(NULL, *tostr, *blen, nlen);
250	dst = *tostr; buflen = *blen;
251
252#ifdef USE_ICONV
253	if (id != (iconv_t)-1) {
254		dst = buffer; buflen = CONV_BUFFER_SIZE;
255	}
256#endif
257
258	for (i = 0, j = 0; i < len; ++i) {
259		n = wcrtomb(dst+j, str[i], &mbs);
260		if (n == -1)
261			goto err;
262		j += n;
263		if (buflen < j + MB_CUR_MAX) {
264			if (id != (iconv_t)-1) {
265				CONVERT2(buffer, &j, cw, offset);
266			} else {
267				nlen += 256;
268				BINC_RETC(NULL, *tostr, *blen, nlen);
269				dst = *tostr; buflen = *blen;
270			}
271		}
272	}
273
274	n = wcrtomb(dst+j, L'\0', &mbs);
275	j += n - 1;				/* don't count NUL at the end */
276	*tolen = j;
277
278	if (id != (iconv_t)-1) {
279		CONVERT2(buffer, &j, cw, offset);
280		/* back to the initial state */
281		CONVERT2(NULL, NULL, cw, offset);
282		*tolen = offset;
283	}
284
285	error = 0;
286err:
287	if (error)
288		*tolen = j;
289	*pdst = cw->bp1.c;
290	IC_RESET();
291
292	return error;
293}
294
295static int
296fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
297    size_t *tolen, char **dst)
298{
299	return default_int2char(sp, str, len, cw, tolen, dst,
300		sp->conv.id[IC_FE_INT2CHAR]);
301}
302
303static int
304cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
305    size_t *tolen, char **dst)
306{
307	return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
308}
309
310#endif
311
312/*
313 * conv_init --
314 *	Initialize the iconv environment.
315 *
316 * PUBLIC: void conv_init(SCR *, SCR *);
317 */
318void
319conv_init(SCR *orig, SCR *sp)
320{
321	int i;
322
323	if (orig == NULL)
324		setlocale(LC_ALL, "");
325	if (orig != NULL)
326		BCOPY(&orig->conv, &sp->conv, 1);
327#ifdef USE_WIDECHAR
328	else {
329		char *ctype = setlocale(LC_CTYPE, NULL);
330
331		/*
332		 * XXX
333		 * This hack fixes the libncursesw issue on FreeBSD.
334		 */
335		if (!strcmp(ctype, "ko_KR.CP949"))
336			setlocale(LC_CTYPE, "ko_KR.eucKR");
337		else if (!strcmp(ctype, "zh_CN.GB2312"))
338			setlocale(LC_CTYPE, "zh_CN.eucCN");
339		else if (!strcmp(ctype, "zh_CN.GBK"))
340			setlocale(LC_CTYPE, "zh_CN.GB18030");
341
342		/*
343		 * Switch to 8bit mode if locale is C;
344		 * LC_CTYPE should be reseted to C if unmatched.
345		 */
346		if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
347			sp->conv.sys2int = sp->conv.file2int = raw2int;
348			sp->conv.int2sys = sp->conv.int2file = int2raw;
349			sp->conv.input2int = raw2int;
350		} else {
351			sp->conv.sys2int = cs_char2int;
352			sp->conv.int2sys = cs_int2char;
353			sp->conv.file2int = fe_char2int;
354			sp->conv.int2file = fe_int2char;
355			sp->conv.input2int = ie_char2int;
356		}
357#ifdef USE_ICONV
358		o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
359#endif
360	}
361#endif
362
363	/* iconv descriptors must be distinct to screens. */
364	for (i = 0; i <= IC_IE_TO_UTF16; ++i)
365		sp->conv.id[i] = (iconv_t)-1;
366#ifdef USE_ICONV
367	conv_enc(sp, O_INPUTENCODING, 0);
368#endif
369}
370
371/*
372 * conv_enc --
373 *	Convert file/input encoding.
374 *
375 * PUBLIC: int conv_enc(SCR *, int, char *);
376 */
377int
378conv_enc(SCR *sp, int option, char *enc)
379{
380#if defined(USE_WIDECHAR) && defined(USE_ICONV)
381	iconv_t *c2w, *w2c;
382	iconv_t id_c2w, id_w2c;
383
384	switch (option) {
385	case O_FILEENCODING:
386		c2w = sp->conv.id + IC_FE_CHAR2INT;
387		w2c = sp->conv.id + IC_FE_INT2CHAR;
388		if (!enc)
389			enc = O_STR(sp, O_FILEENCODING);
390
391		if (strcasecmp(codeset(), enc)) {
392			if ((id_c2w = iconv_open(codeset(), enc)) ==
393			    (iconv_t)-1)
394				goto err;
395			if ((id_w2c = iconv_open(enc, codeset())) ==
396			    (iconv_t)-1)
397				goto err;
398		} else {
399			id_c2w = (iconv_t)-1;
400			id_w2c = (iconv_t)-1;
401		}
402
403		break;
404
405	case O_INPUTENCODING:
406		c2w = sp->conv.id + IC_IE_CHAR2INT;
407		w2c = sp->conv.id + IC_IE_TO_UTF16;
408		if (!enc)
409			enc = O_STR(sp, O_INPUTENCODING);
410
411		if (strcasecmp(codeset(), enc)) {
412			if ((id_c2w = iconv_open(codeset(), enc)) ==
413			    (iconv_t)-1)
414				goto err;
415		} else
416			id_c2w = (iconv_t)-1;
417
418		/* UTF-16 can not be locale and can not be inputed. */
419		if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
420			goto err;
421
422		break;
423
424	default:
425		abort();
426	}
427
428	if (*c2w != (iconv_t)-1)
429		iconv_close(*c2w);
430	if (*w2c != (iconv_t)-1)
431		iconv_close(*w2c);
432
433	*c2w = id_c2w;
434	*w2c = id_w2c;
435
436	F_CLR(sp, SC_CONV_ERROR);
437	F_SET(sp, SC_SCR_REFORMAT);
438
439	return 0;
440err:
441#endif
442	switch (option) {
443	case O_FILEENCODING:
444		msgq(sp, M_ERR, "321|File encoding conversion not supported");
445		break;
446	case O_INPUTENCODING:
447		msgq(sp, M_ERR, "322|Input encoding conversion not supported");
448		break;
449	}
450	return 1;
451}
452
453/*
454 * conv_end --
455 *	Close the iconv descriptors, release the buffer.
456 *
457 * PUBLIC: void conv_end(SCR *);
458 */
459void
460conv_end(SCR *sp)
461{
462#if defined(USE_WIDECHAR) && defined(USE_ICONV)
463	int i;
464	for (i = 0; i <= IC_IE_TO_UTF16; ++i)
465		if (sp->conv.id[i] != (iconv_t)-1)
466			iconv_close(sp->conv.id[i]);
467	if (sp->cw.bp1.c != NULL)
468		free(sp->cw.bp1.c);
469#endif
470}
471