1/*	$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $ */
2/*-
3 * Copyright (c) 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 *	Keith Bostic.  All rights reserved.
7 *
8 * See the LICENSE file for redistribution information.
9 */
10
11#include "config.h"
12
13#include <sys/cdefs.h>
14#if 0
15#ifndef lint
16static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp  (Berkeley) Date: 2001/08/18 21:41:41 ";
17#endif /* not lint */
18#else
19__RCSID("$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $");
20#endif
21
22#include <sys/types.h>
23#include <sys/queue.h>
24#include <sys/time.h>
25
26#include <bitstring.h>
27#include <errno.h>
28#include <limits.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <unistd.h>
33
34#include "common.h"
35
36#if defined(USE_WIDECHAR) && defined(USE_ICONV)
37#include <langinfo.h>
38#include <iconv.h>
39
40#define LANGCODESET	nl_langinfo(CODESET)
41#else
42#define LANGCODESET	""
43#endif
44
45#include <locale.h>
46
47#ifdef USE_WIDECHAR
48#ifdef USE_ICONV
49static int
50raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
51	const CHAR_T **dst)
52{
53    int i;
54    CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
55    size_t  *blen = &cw->blen1;
56
57    BINC_RETW(NULL, *tostr, *blen, len);
58
59    *tolen = len;
60    for (i = 0; i < len; ++i) {
61	CHAR_T w = (u_char)str[i];
62	memcpy((*tostr) + i, &w, sizeof(**tostr));
63    }
64
65    *dst = cw->bp1;
66
67    return 0;
68}
69#endif
70
71#ifndef ERROR_ON_CONVERT
72#define HANDLE_ICONV_ERROR(o, i, ol, il) do {				\
73		*o++ = *i++;						\
74		ol--; il--;						\
75	} while (/*CONSTCOND*/0)
76#define HANDLE_MBR_ERROR(n, mbs, d, s) do {				\
77		d = s;							\
78		MEMSET(&mbs, 0, 1); 					\
79		n = 1; 							\
80	} while (/*CONSTCOND*/0)
81#else
82#define HANDLE_ICONV_ERROR goto err
83#define	HANDLE_MBR_ERROR goto err
84#endif
85
86#define CONV_BUFFER_SIZE    512
87/* fill the buffer with codeset encoding of string pointed to by str
88 * left has the number of bytes left in str and is adjusted
89 * len contains the number of bytes put in the buffer
90 */
91#ifdef USE_ICONV
92#define CONVERT(str, left, src, len)				    	\
93    do {								\
94	size_t outleft;							\
95	char *bp = buffer;						\
96	outleft = CONV_BUFFER_SIZE;					\
97	errno = 0;							\
98	if (iconv(id, (char **)(void *)&str, &left, &bp, &outleft) 	\
99	    == (size_t)-1 && errno != E2BIG)				\
100		HANDLE_ICONV_ERROR(bp, str, outleft, left);		\
101	if ((len = CONV_BUFFER_SIZE - outleft) == 0) {			\
102	    error = -left;						\
103	    goto err;							\
104	}				    				\
105	src = buffer;							\
106    } while (0)
107#endif
108
109static int
110default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
111		size_t *tolen, const CHAR_T **dst, const char *enc)
112{
113    int j;
114    size_t i = 0;
115    CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
116    size_t  *blen = &cw->blen1;
117    mbstate_t mbs;
118    size_t   n;
119    ssize_t  nlen = len;
120    const char *src = (const char *)str;
121    int		error = 1;
122#ifdef USE_ICONV
123    iconv_t	id = (iconv_t)-1;
124    char	buffer[CONV_BUFFER_SIZE];
125    size_t	left = len;
126#endif
127
128    MEMSET(&mbs, 0, 1);
129    BINC_RETW(NULL, *tostr, *blen, nlen);
130
131#ifdef USE_ICONV
132    if (strcmp(nl_langinfo(CODESET), enc)) {
133	id = iconv_open(nl_langinfo(CODESET), enc);
134	if (id == (iconv_t)-1)
135	    goto err;
136	CONVERT(str, left, src, len);
137    }
138#endif
139
140    for (i = 0, j = 0; j < len; ) {
141	CHAR_T w;
142	n = mbrtowc(&w, src + j, len - j, &mbs);
143	memcpy((*tostr) + i, &w, sizeof(**tostr));
144	/* NULL character converted */
145	if (n == (size_t)-2) error = -(len - j);
146	if (n == (size_t)-1 || n == (size_t)-2) {
147	    HANDLE_MBR_ERROR(n, mbs, w, src[j]);
148	    memcpy((*tostr) + i, &w, sizeof(**tostr));
149	}
150	if (n == 0) n = 1;
151	j += n;
152	if (++i >= *blen) {
153	    nlen += 256;
154	    BINC_GOTOW(NULL, *tostr, *blen, nlen);
155	}
156#ifdef USE_ICONV
157	if (id != (iconv_t)-1 && j == len && left) {
158	    CONVERT(str, left, src, len);
159	    j = 0;
160	}
161#endif
162    }
163    *tolen = i;
164
165#ifdef USE_ICONV
166    if (id != (iconv_t)-1)
167	iconv_close(id);
168#endif
169
170    *dst = cw->bp1;
171
172    return 0;
173alloc_err:
174#ifdef USE_ICONV
175err:
176    if (id != (iconv_t)-1)
177	iconv_close(id);
178#endif
179    *tolen = i;
180    *dst = cw->bp1;
181
182    return error;
183}
184
185static int
186fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
187	    size_t *tolen, const CHAR_T **dst)
188{
189    return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
190}
191
192static int
193ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
194	    size_t *tolen, const CHAR_T **dst)
195{
196    return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
197}
198
199static int
200cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
201	    size_t *tolen, const CHAR_T **dst)
202{
203    return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
204}
205
206#ifdef USE_ICONV
207static int
208CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
209	size_t *tolen, const char **dst)
210{
211    *tolen = len * sizeof(CHAR_T);
212    *dst = (const char *)(const void *)str;
213
214    return 0;
215}
216
217static int
218CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
219	size_t *tolen, const CHAR_T **dst)
220{
221    *tolen = len / sizeof(CHAR_T);
222    *dst = (const CHAR_T*) str;
223
224    return 0;
225}
226
227static int
228int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
229	const char **dst)
230{
231    int i;
232    char **tostr = (char **)(void *)&cw->bp1;
233    size_t  *blen = &cw->blen1;
234
235    BINC_RETC(NULL, *tostr, *blen, len);
236
237    *tolen = len;
238    for (i = 0; i < len; ++i) {
239	CHAR_T w;
240	memcpy(&w, str + i, sizeof(w));
241	(*tostr)[i] = w;
242    }
243
244    *dst = cw->bp1;
245
246    return 0;
247}
248#endif
249
250static int
251default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
252		size_t *tolen, const char **pdst, const char *enc)
253{
254    size_t i, j = 0;
255    char **tostr = (char **)(void *)&cw->bp1;
256    size_t  *blen = &cw->blen1;
257    mbstate_t mbs;
258    size_t n;
259    ssize_t  nlen = len + MB_CUR_MAX;
260    char *dst;
261    size_t buflen;
262#ifdef USE_ICONV
263    int		offset = 0;
264    char	buffer[CONV_BUFFER_SIZE];
265    iconv_t	id = (iconv_t)-1;
266#endif
267
268/* convert first len bytes of buffer and append it to cw->bp
269 * len is adjusted => 0
270 * offset contains the offset in cw->bp and is adjusted
271 * cw->bp is grown as required
272 */
273#ifdef USE_ICONV
274#define CONVERT2(_buffer, lenp, cw, offset)				\
275    do {								\
276	const char *bp = _buffer;					\
277	size_t ret;							\
278	do {								\
279	    size_t outleft = cw->blen1 - offset;			\
280	    char *obp = (char *)cw->bp1 + offset;		    	\
281	    if (cw->blen1 < offset + MB_CUR_MAX) {		    	\
282		nlen += 256;						\
283		BINC_GOTOC(NULL, cw->bp1, cw->blen1, nlen);		\
284	    }						    		\
285	    errno = 0;						    	\
286	    ret = iconv(id, (char **)(void *)&bp, lenp, &obp, &outleft);\
287	    if (ret == (size_t)-1 && errno != E2BIG) 			\
288		    HANDLE_ICONV_ERROR(obp, bp, outleft, len);		\
289	    offset = cw->blen1 - outleft;			        \
290	} while (ret != 0);					        \
291    } while (0)
292#endif
293
294    MEMSET(&mbs, 0, 1);
295    BINC_RETC(NULL, *tostr, *blen, nlen);
296    dst = *tostr; buflen = *blen;
297
298#ifdef USE_ICONV
299    if (strcmp(nl_langinfo(CODESET), enc)) {
300	id = iconv_open(enc, nl_langinfo(CODESET));
301	if (id == (iconv_t)-1)
302	    goto err;
303	dst = buffer; buflen = CONV_BUFFER_SIZE;
304    }
305#endif
306
307    for (i = 0, j = 0; i < (size_t)len; ++i) {
308	CHAR_T w;
309	memcpy(&w, str + i, sizeof(w));
310	n = wcrtomb(dst + j, w, &mbs);
311	if (n == (size_t)-1)
312	   HANDLE_MBR_ERROR(n, mbs, dst[j], w);
313	j += n;
314	if (buflen < j + MB_CUR_MAX) {
315#ifdef USE_ICONV
316	    if (id != (iconv_t)-1) {
317		CONVERT2(buffer, &j, cw, offset);
318	    } else
319#endif
320	    {
321		nlen += 256;
322		BINC_RETC(NULL, *tostr, *blen, nlen);
323		dst = *tostr; buflen = *blen;
324	    }
325	}
326    }
327
328    n = wcrtomb(dst + j, L'\0', &mbs);
329    j += n - 1;				/* don't count NUL at the end */
330    *tolen = j;
331
332#ifdef USE_ICONV
333    if (id != (iconv_t)-1) {
334	CONVERT2(buffer, &j, cw, offset);
335	CONVERT2(NULL, NULL, cw, offset);  /* back to the initial state */
336	*tolen = offset;
337	iconv_close(id);
338    }
339#endif
340
341    *pdst = cw->bp1;
342
343    return 0;
344#ifdef USE_ICONV
345alloc_err:
346err:
347    if (id != (iconv_t)-1)
348	iconv_close(id);
349    *tolen = j;
350    *pdst = cw->bp1;
351
352    return 1;
353#endif
354}
355
356static int
357fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
358	    size_t *tolen, const char **dst)
359{
360    return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
361}
362
363static int
364cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
365	    size_t *tolen, const char **dst)
366{
367    return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
368}
369
370#endif
371
372
373void
374conv_init (SCR *orig, SCR *sp)
375{
376    if (orig != NULL)
377	MEMCPY(&sp->conv, &orig->conv, 1);
378    else {
379	setlocale(LC_ALL, "");
380#ifdef USE_WIDECHAR
381	sp->conv.sys2int = cs_char2int;
382	sp->conv.int2sys = cs_int2char;
383	sp->conv.file2int = fe_char2int;
384	sp->conv.int2file = fe_int2char;
385	sp->conv.input2int = ie_char2int;
386#ifdef USE_ICONV
387	o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
388	o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
389#endif
390#endif
391    }
392}
393
394int
395conv_enc (SCR *sp, int option, const char *enc)
396{
397#if defined(USE_WIDECHAR) && defined(USE_ICONV)
398    iconv_t id;
399    char2wchar_t    *c2w;
400    wchar2char_t    *w2c;
401
402    switch (option) {
403    case O_FILEENCODING:
404	c2w = &sp->conv.file2int;
405	w2c = &sp->conv.int2file;
406	break;
407    case O_INPUTENCODING:
408	c2w = &sp->conv.input2int;
409	w2c = NULL;
410	break;
411    default:
412	c2w = NULL;
413	w2c = NULL;
414	break;
415    }
416
417    if (!*enc) {
418	if (c2w) *c2w = raw2int;
419	if (w2c) *w2c = int2raw;
420	return 0;
421    }
422
423    if (!strcmp(enc, "WCHAR_T")) {
424	if (c2w) *c2w = CHAR_T_char2int;
425	if (w2c) *w2c = CHAR_T_int2char;
426	return 0;
427    }
428
429    id = iconv_open(enc, nl_langinfo(CODESET));
430    if (id == (iconv_t)-1)
431	goto err;
432    iconv_close(id);
433    id = iconv_open(nl_langinfo(CODESET), enc);
434    if (id == (iconv_t)-1)
435	goto err;
436    iconv_close(id);
437
438    switch (option) {
439    case O_FILEENCODING:
440	*c2w = fe_char2int;
441	*w2c = fe_int2char;
442	break;
443    case O_INPUTENCODING:
444	*c2w = ie_char2int;
445	break;
446    }
447
448    F_CLR(sp, SC_CONV_ERROR);
449    F_SET(sp, SC_SCR_REFORMAT);
450
451    return 0;
452err:
453    switch (option) {
454    case O_FILEENCODING:
455	msgq(sp, M_ERR,
456	    "321|File encoding conversion not supported");
457	break;
458    case O_INPUTENCODING:
459	msgq(sp, M_ERR,
460	    "322|Input encoding conversion not supported");
461	break;
462    }
463#endif
464    return 1;
465}
466