1/*-
2 * Copyright (c) 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 * Copyright (c) 2011, 2012
7 *	Zhihao Yuan.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/types.h>
15#include <sys/queue.h>
16#include <sys/time.h>
17
18#include <bitstring.h>
19#include <errno.h>
20#include <limits.h>
21#include <langinfo.h>
22#include <locale.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <strings.h>
27#include <unistd.h>
28
29#include "common.h"
30
31/*
32 * codeset --
33 *	Get the locale encoding.
34 *
35 * PUBLIC: char * codeset(void);
36 */
37char *
38codeset(void)
39{
40	static char *cs;
41
42	if (cs == NULL)
43		cs = nl_langinfo(CODESET);
44
45	return cs;
46}
47
48#ifdef USE_WIDECHAR
49static int
50raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
51    CHAR_T **dst)
52{
53	int i;
54	CHAR_T **tostr = &cw->bp1.wc;
55	size_t  *blen = &cw->blen1;
56
57	BINC_RETW(NULL, *tostr, *blen, len);
58
59	*tolen = len;
60	for (i = 0; i < len; ++i)
61		(*tostr)[i] = (u_char) str[i];
62
63	*dst = cw->bp1.wc;
64
65	return 0;
66}
67
68#define CONV_BUFFER_SIZE    512
69/* fill the buffer with codeset encoding of string pointed to by str
70 * left has the number of bytes left in str and is adjusted
71 * len contains the number of bytes put in the buffer
72 */
73#ifdef USE_ICONV
74#define CONVERT(str, left, src, len)					\
75	do {								\
76		size_t outleft;						\
77		char *bp = buffer;					\
78		outleft = CONV_BUFFER_SIZE;				\
79		errno = 0;						\
80		if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft)	\
81		    == -1 && errno != E2BIG)				\
82			goto err;					\
83		if ((len = CONV_BUFFER_SIZE - outleft) == 0) {		\
84			error = -left;					\
85			goto err;					\
86		}							\
87		src = buffer;						\
88	} while (0)
89
90#define IC_RESET()							\
91	do {								\
92		if (id != (iconv_t)-1)					\
93			iconv(id, NULL, NULL, NULL, NULL);		\
94	} while(0)
95#else
96#define CONVERT(str, left, src, len)
97#define IC_RESET()
98#endif
99
100static int
101default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
102    size_t *tolen, CHAR_T **dst, iconv_t id)
103{
104	size_t i = 0, j;
105	CHAR_T **tostr = &cw->bp1.wc;
106	size_t *blen = &cw->blen1;
107	mbstate_t mbs;
108	size_t n;
109	ssize_t nlen = len;
110	char *src = (char *)str;
111#ifdef USE_ICONV
112	char buffer[CONV_BUFFER_SIZE];
113#endif
114	size_t left = len;
115	int error = 1;
116
117	memset(&mbs, 0, sizeof(mbs));
118	BINC_RETW(NULL, *tostr, *blen, nlen);
119
120#ifdef USE_ICONV
121	if (id != (iconv_t)-1)
122		CONVERT(str, left, src, len);
123#endif
124
125	for (i = 0, j = 0; j < len; ) {
126		n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
127		/* NULL character converted */
128		if (n == -2)
129			error = -(len-j);
130		if (n == -1 || n == -2)
131			goto err;
132		if (n == 0)
133			n = 1;
134		j += n;
135		if (++i >= *blen) {
136			nlen += 256;
137			BINC_RETW(NULL, *tostr, *blen, nlen);
138		}
139		if (id != (iconv_t)-1 && j == len && left) {
140			CONVERT(str, left, src, len);
141			j = 0;
142		}
143	}
144
145	error = 0;
146err:
147	*tolen = i;
148	*dst = cw->bp1.wc;
149	IC_RESET();
150
151	return error;
152}
153
154static int
155fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
156    CHAR_T **dst)
157{
158	return default_char2int(sp, str, len, cw, tolen, dst,
159	    sp->conv.id[IC_FE_CHAR2INT]);
160}
161
162static int
163ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
164    CHAR_T **dst)
165{
166	return default_char2int(sp, str, len, cw, tolen, dst,
167	    sp->conv.id[IC_IE_CHAR2INT]);
168}
169
170static int
171cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
172    CHAR_T **dst)
173{
174	return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
175}
176
177static int
178int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
179    char **dst)
180{
181	int i;
182	char **tostr = &cw->bp1.c;
183	size_t  *blen = &cw->blen1;
184
185	BINC_RETC(NULL, *tostr, *blen, len);
186
187	*tolen = len;
188	for (i = 0; i < len; ++i)
189		(*tostr)[i] = str[i];
190
191	*dst = cw->bp1.c;
192
193	return 0;
194}
195
196static int
197default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
198    size_t *tolen, char **pdst, iconv_t id)
199{
200	size_t i, j, offset = 0;
201	char **tostr = &cw->bp1.c;
202	size_t *blen = &cw->blen1;
203	mbstate_t mbs;
204	size_t n;
205	ssize_t  nlen = len + MB_CUR_MAX;
206	char *dst;
207	size_t buflen;
208#ifdef USE_ICONV
209	char buffer[CONV_BUFFER_SIZE];
210#endif
211	int error = 1;
212
213/* convert first len bytes of buffer and append it to cw->bp
214 * len is adjusted => 0
215 * offset contains the offset in cw->bp and is adjusted
216 * cw->bp is grown as required
217 */
218#ifdef USE_ICONV
219#define CONVERT2(_buffer, lenp, cw, offset)				\
220	do {								\
221		char *bp = _buffer;					\
222		int ret;						\
223		do {							\
224			size_t outleft = cw->blen1 - offset;		\
225			char *obp = cw->bp1.c + offset;			\
226			if (cw->blen1 < offset + MB_CUR_MAX) {		\
227				nlen += 256;				\
228				BINC_RETC(NULL, cw->bp1.c, cw->blen1,	\
229				    nlen);				\
230			}						\
231			errno = 0;					\
232			ret = iconv(id, (iconv_src_t)&bp, lenp, &obp,	\
233			    &outleft);					\
234			if (ret == -1 && errno != E2BIG)		\
235				goto err;				\
236			offset = cw->blen1 - outleft;			\
237		} while (ret != 0); 					\
238	} while (0)
239#else
240#define CONVERT2(_buffer, lenp, cw, offset)
241#endif
242
243
244	memset(&mbs, 0, sizeof(mbs));
245	BINC_RETC(NULL, *tostr, *blen, nlen);
246	dst = *tostr; buflen = *blen;
247
248#ifdef USE_ICONV
249	if (id != (iconv_t)-1) {
250		dst = buffer; buflen = CONV_BUFFER_SIZE;
251	}
252#endif
253
254	for (i = 0, j = 0; i < len; ++i) {
255		n = wcrtomb(dst+j, str[i], &mbs);
256		if (n == -1)
257			goto err;
258		j += n;
259		if (buflen < j + MB_CUR_MAX) {
260			if (id != (iconv_t)-1) {
261				CONVERT2(buffer, &j, cw, offset);
262			} else {
263				nlen += 256;
264				BINC_RETC(NULL, *tostr, *blen, nlen);
265				dst = *tostr; buflen = *blen;
266			}
267		}
268	}
269
270	n = wcrtomb(dst+j, L'\0', &mbs);
271	j += n - 1;				/* don't count NUL at the end */
272	*tolen = j;
273
274	if (id != (iconv_t)-1) {
275		CONVERT2(buffer, &j, cw, offset);
276		/* back to the initial state */
277		CONVERT2(NULL, NULL, cw, offset);
278		*tolen = offset;
279	}
280
281	error = 0;
282err:
283	if (error)
284		*tolen = j;
285	*pdst = cw->bp1.c;
286	IC_RESET();
287
288	return error;
289}
290
291static int
292fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
293    size_t *tolen, char **dst)
294{
295	return default_int2char(sp, str, len, cw, tolen, dst,
296		sp->conv.id[IC_FE_INT2CHAR]);
297}
298
299static int
300cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
301    size_t *tolen, char **dst)
302{
303	return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
304}
305
306#endif
307
308/*
309 * conv_init --
310 *	Initialize the iconv environment.
311 *
312 * PUBLIC: void conv_init(SCR *, SCR *);
313 */
314void
315conv_init(SCR *orig, SCR *sp)
316{
317	int i;
318
319	if (orig == NULL)
320		setlocale(LC_ALL, "");
321	if (orig != NULL)
322		memmove(&sp->conv, &orig->conv, sizeof(CONV));
323#ifdef USE_WIDECHAR
324	else {
325		char *ctype = setlocale(LC_CTYPE, NULL);
326
327		/*
328		 * XXX
329		 * This hack fixes the libncursesw issue on FreeBSD.
330		 */
331		if (!strcmp(ctype, "ko_KR.CP949"))
332			setlocale(LC_CTYPE, "ko_KR.eucKR");
333		else if (!strcmp(ctype, "zh_CN.GB2312"))
334			setlocale(LC_CTYPE, "zh_CN.eucCN");
335		else if (!strcmp(ctype, "zh_CN.GBK"))
336			setlocale(LC_CTYPE, "zh_CN.GB18030");
337
338		/*
339		 * Switch to 8bit mode if locale is C;
340		 * LC_CTYPE should be reseted to C if unmatched.
341		 */
342		if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
343			sp->conv.sys2int = sp->conv.file2int = raw2int;
344			sp->conv.int2sys = sp->conv.int2file = int2raw;
345			sp->conv.input2int = raw2int;
346		} else {
347			sp->conv.sys2int = cs_char2int;
348			sp->conv.int2sys = cs_int2char;
349			sp->conv.file2int = fe_char2int;
350			sp->conv.int2file = fe_int2char;
351			sp->conv.input2int = ie_char2int;
352		}
353#ifdef USE_ICONV
354		o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
355#endif
356	}
357#endif
358
359	/* iconv descriptors must be distinct to screens. */
360	for (i = 0; i <= IC_IE_TO_UTF16; ++i)
361		sp->conv.id[i] = (iconv_t)-1;
362#ifdef USE_ICONV
363	conv_enc(sp, O_INPUTENCODING, 0);
364#endif
365}
366
367/*
368 * conv_enc --
369 *	Convert file/input encoding.
370 *
371 * PUBLIC: int conv_enc(SCR *, int, char *);
372 */
373int
374conv_enc(SCR *sp, int option, char *enc)
375{
376#if defined(USE_WIDECHAR) && defined(USE_ICONV)
377	iconv_t *c2w, *w2c;
378	iconv_t id_c2w, id_w2c;
379
380	switch (option) {
381	case O_FILEENCODING:
382		c2w = sp->conv.id + IC_FE_CHAR2INT;
383		w2c = sp->conv.id + IC_FE_INT2CHAR;
384		if (!enc)
385			enc = O_STR(sp, O_FILEENCODING);
386
387		if (strcasecmp(codeset(), enc)) {
388			if ((id_c2w = iconv_open(codeset(), enc)) ==
389			    (iconv_t)-1)
390				goto err;
391			if ((id_w2c = iconv_open(enc, codeset())) ==
392			    (iconv_t)-1)
393				goto err;
394		} else {
395			id_c2w = (iconv_t)-1;
396			id_w2c = (iconv_t)-1;
397		}
398
399		break;
400
401	case O_INPUTENCODING:
402		c2w = sp->conv.id + IC_IE_CHAR2INT;
403		w2c = sp->conv.id + IC_IE_TO_UTF16;
404		if (!enc)
405			enc = O_STR(sp, O_INPUTENCODING);
406
407		if (strcasecmp(codeset(), enc)) {
408			if ((id_c2w = iconv_open(codeset(), enc)) ==
409			    (iconv_t)-1)
410				goto err;
411		} else
412			id_c2w = (iconv_t)-1;
413
414		/* UTF-16 can not be locale and can not be inputed. */
415		if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
416			goto err;
417
418		break;
419
420	default:
421		abort();
422	}
423
424	if (*c2w != (iconv_t)-1)
425		iconv_close(*c2w);
426	if (*w2c != (iconv_t)-1)
427		iconv_close(*w2c);
428
429	*c2w = id_c2w;
430	*w2c = id_w2c;
431
432	F_CLR(sp, SC_CONV_ERROR);
433	F_SET(sp, SC_SCR_REFORMAT);
434
435	return 0;
436err:
437#endif
438	switch (option) {
439	case O_FILEENCODING:
440		msgq(sp, M_ERR, "321|File encoding conversion not supported");
441		break;
442	case O_INPUTENCODING:
443		msgq(sp, M_ERR, "322|Input encoding conversion not supported");
444		break;
445	}
446	return 1;
447}
448
449/*
450 * conv_end --
451 *	Close the iconv descriptors, release the buffer.
452 *
453 * PUBLIC: void conv_end(SCR *);
454 */
455void
456conv_end(SCR *sp)
457{
458#if defined(USE_WIDECHAR) && defined(USE_ICONV)
459	int i;
460	for (i = 0; i <= IC_IE_TO_UTF16; ++i)
461		if (sp->conv.id[i] != (iconv_t)-1)
462			iconv_close(sp->conv.id[i]);
463	free(sp->cw.bp1.c);
464#endif
465}
466