1/*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2003 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32#include <sys/endian.h>
33#include <sys/queue.h>
34
35#include <assert.h>
36#include <errno.h>
37#include <limits.h>
38#include <stdbool.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42
43#include "citrus_namespace.h"
44#include "citrus_types.h"
45#include "citrus_module.h"
46#include "citrus_region.h"
47#include "citrus_mmap.h"
48#include "citrus_hash.h"
49#include "citrus_iconv.h"
50#include "citrus_stdenc.h"
51#include "citrus_mapper.h"
52#include "citrus_csmapper.h"
53#include "citrus_memstream.h"
54#include "citrus_iconv_std.h"
55#include "citrus_esdb.h"
56
57/* ---------------------------------------------------------------------- */
58
59_CITRUS_ICONV_DECLS(iconv_std);
60_CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63/* ---------------------------------------------------------------------- */
64
65int
66_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
67{
68
69	memcpy(ops, &_citrus_iconv_std_iconv_ops,
70	    sizeof(_citrus_iconv_std_iconv_ops));
71
72	return (0);
73}
74
75/* ---------------------------------------------------------------------- */
76
77/*
78 * convenience routines for stdenc.
79 */
80static __inline void
81save_encoding_state(struct _citrus_iconv_std_encoding *se)
82{
83
84	if (se->se_ps)
85		memcpy(se->se_pssaved, se->se_ps,
86		    _stdenc_get_state_size(se->se_handle));
87}
88
89static __inline void
90restore_encoding_state(struct _citrus_iconv_std_encoding *se)
91{
92
93	if (se->se_ps)
94		memcpy(se->se_ps, se->se_pssaved,
95		    _stdenc_get_state_size(se->se_handle));
96}
97
98static __inline void
99init_encoding_state(struct _citrus_iconv_std_encoding *se)
100{
101
102	if (se->se_ps)
103		_stdenc_init_state(se->se_handle, se->se_ps);
104}
105
106static __inline int
107mbtocsx(struct _citrus_iconv_std_encoding *se,
108    _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
109    struct iconv_hooks *hooks)
110{
111
112	return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
113			      nresult, hooks));
114}
115
116static __inline int
117cstombx(struct _citrus_iconv_std_encoding *se,
118    char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
119    struct iconv_hooks *hooks)
120{
121
122	return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
123			      nresult, hooks));
124}
125
126static __inline int
127wctombx(struct _citrus_iconv_std_encoding *se,
128    char *s, size_t n, _wc_t wc, size_t *nresult,
129    struct iconv_hooks *hooks)
130{
131
132	return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
133			     hooks));
134}
135
136static __inline int
137put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
138    size_t *nresult)
139{
140
141	return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
142}
143
144static __inline int
145get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
146{
147	struct _stdenc_state_desc ssd;
148	int ret;
149
150	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
151	    _STDENC_SDID_GENERIC, &ssd);
152	if (!ret)
153		*rstate = ssd.u.generic.state;
154
155	return (ret);
156}
157
158/*
159 * init encoding context
160 */
161static int
162init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
163    void *ps1, void *ps2)
164{
165	int ret = -1;
166
167	se->se_handle = cs;
168	se->se_ps = ps1;
169	se->se_pssaved = ps2;
170
171	if (se->se_ps)
172		ret = _stdenc_init_state(cs, se->se_ps);
173	if (!ret && se->se_pssaved)
174		ret = _stdenc_init_state(cs, se->se_pssaved);
175
176	return (ret);
177}
178
179static int
180open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
181    unsigned long *rnorm)
182{
183	struct _csmapper *cm;
184	int ret;
185
186	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
187	if (ret)
188		return (ret);
189	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
190	    _csmapper_get_state_size(cm) != 0) {
191		_csmapper_close(cm);
192		return (EINVAL);
193	}
194
195	*rcm = cm;
196
197	return (0);
198}
199
200static void
201close_dsts(struct _citrus_iconv_std_dst_list *dl)
202{
203	struct _citrus_iconv_std_dst *sd;
204
205	while ((sd = TAILQ_FIRST(dl)) != NULL) {
206		TAILQ_REMOVE(dl, sd, sd_entry);
207		_csmapper_close(sd->sd_mapper);
208		free(sd);
209	}
210}
211
212static int
213open_dsts(struct _citrus_iconv_std_dst_list *dl,
214    const struct _esdb_charset *ec, const struct _esdb *dbdst)
215{
216	struct _citrus_iconv_std_dst *sd, *sdtmp;
217	unsigned long norm;
218	int i, ret;
219
220	sd = malloc(sizeof(*sd));
221	if (sd == NULL)
222		return (errno);
223
224	for (i = 0; i < dbdst->db_num_charsets; i++) {
225		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
226		    dbdst->db_charsets[i].ec_csname, &norm);
227		if (ret == 0) {
228			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
229			sd->sd_norm = norm;
230			/* insert this mapper by sorted order. */
231			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
232				if (sdtmp->sd_norm > norm) {
233					TAILQ_INSERT_BEFORE(sdtmp, sd,
234					    sd_entry);
235					sd = NULL;
236					break;
237				}
238			}
239			if (sd)
240				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
241			sd = malloc(sizeof(*sd));
242			if (sd == NULL) {
243				ret = errno;
244				close_dsts(dl);
245				return (ret);
246			}
247		} else if (ret != ENOENT) {
248			close_dsts(dl);
249			free(sd);
250			return (ret);
251		}
252	}
253	free(sd);
254	return (0);
255}
256
257static void
258close_srcs(struct _citrus_iconv_std_src_list *sl)
259{
260	struct _citrus_iconv_std_src *ss;
261
262	while ((ss = TAILQ_FIRST(sl)) != NULL) {
263		TAILQ_REMOVE(sl, ss, ss_entry);
264		close_dsts(&ss->ss_dsts);
265		free(ss);
266	}
267}
268
269static int
270open_srcs(struct _citrus_iconv_std_src_list *sl,
271    const struct _esdb *dbsrc, const struct _esdb *dbdst)
272{
273	struct _citrus_iconv_std_src *ss;
274	int count = 0, i, ret;
275
276	ss = malloc(sizeof(*ss));
277	if (ss == NULL)
278		return (errno);
279
280	TAILQ_INIT(&ss->ss_dsts);
281
282	for (i = 0; i < dbsrc->db_num_charsets; i++) {
283		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
284		if (ret)
285			goto err;
286		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
287			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
288			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
289			ss = malloc(sizeof(*ss));
290			if (ss == NULL) {
291				ret = errno;
292				goto err;
293			}
294			count++;
295			TAILQ_INIT(&ss->ss_dsts);
296		}
297	}
298	free(ss);
299
300	return (count ? 0 : ENOENT);
301
302err:
303	free(ss);
304	close_srcs(sl);
305	return (ret);
306}
307
308/* do convert a character */
309#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
310static int
311/*ARGSUSED*/
312do_conv(const struct _citrus_iconv_std_shared *is,
313	_csid_t *csid, _index_t *idx)
314{
315	struct _citrus_iconv_std_dst *sd;
316	struct _citrus_iconv_std_src *ss;
317	_index_t tmpidx;
318	int ret;
319
320	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
321		if (ss->ss_csid == *csid) {
322			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
323				ret = _csmapper_convert(sd->sd_mapper,
324				    &tmpidx, *idx, NULL);
325				switch (ret) {
326				case _MAPPER_CONVERT_SUCCESS:
327					*csid = sd->sd_csid;
328					*idx = tmpidx;
329					return (0);
330				case _MAPPER_CONVERT_NONIDENTICAL:
331					break;
332				case _MAPPER_CONVERT_SRC_MORE:
333					/*FALLTHROUGH*/
334				case _MAPPER_CONVERT_DST_MORE:
335					/*FALLTHROUGH*/
336				case _MAPPER_CONVERT_ILSEQ:
337					return (EILSEQ);
338				case _MAPPER_CONVERT_FATAL:
339					return (EINVAL);
340				}
341			}
342			break;
343		}
344	}
345
346	return (E_NO_CORRESPONDING_CHAR);
347}
348/* ---------------------------------------------------------------------- */
349
350static int
351/*ARGSUSED*/
352_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
353    const char * __restrict src, const char * __restrict dst)
354{
355	struct _citrus_esdb esdbdst, esdbsrc;
356	struct _citrus_iconv_std_shared *is;
357	int ret;
358
359	is = malloc(sizeof(*is));
360	if (is == NULL) {
361		ret = errno;
362		goto err0;
363	}
364	ret = _citrus_esdb_open(&esdbsrc, src);
365	if (ret)
366		goto err1;
367	ret = _citrus_esdb_open(&esdbdst, dst);
368	if (ret)
369		goto err2;
370	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
371	    esdbsrc.db_variable, esdbsrc.db_len_variable);
372	if (ret)
373		goto err3;
374	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
375	    esdbdst.db_variable, esdbdst.db_len_variable);
376	if (ret)
377		goto err4;
378	is->is_use_invalid = esdbdst.db_use_invalid;
379	is->is_invalid = esdbdst.db_invalid;
380
381	TAILQ_INIT(&is->is_srcs);
382	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
383	if (ret)
384		goto err5;
385
386	_esdb_close(&esdbsrc);
387	_esdb_close(&esdbdst);
388	ci->ci_closure = is;
389
390	return (0);
391
392err5:
393	_stdenc_close(is->is_dst_encoding);
394err4:
395	_stdenc_close(is->is_src_encoding);
396err3:
397	_esdb_close(&esdbdst);
398err2:
399	_esdb_close(&esdbsrc);
400err1:
401	free(is);
402err0:
403	return (ret);
404}
405
406static void
407_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
408{
409	struct _citrus_iconv_std_shared *is = ci->ci_closure;
410
411	if (is == NULL)
412		return;
413
414	_stdenc_close(is->is_src_encoding);
415	_stdenc_close(is->is_dst_encoding);
416	close_srcs(&is->is_srcs);
417	free(is);
418}
419
420static int
421_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
422{
423	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
424	struct _citrus_iconv_std_context *sc;
425	char *ptr;
426	size_t sz, szpsdst, szpssrc;
427
428	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
429	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
430
431	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
432	sc = malloc(sz);
433	if (sc == NULL)
434		return (errno);
435
436	ptr = (char *)&sc[1];
437	if (szpssrc > 0)
438		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
439		    ptr, ptr+szpssrc);
440	else
441		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
442		    NULL, NULL);
443	ptr += szpssrc*2;
444	if (szpsdst > 0)
445		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
446		    ptr, ptr+szpsdst);
447	else
448		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
449		    NULL, NULL);
450
451	cv->cv_closure = (void *)sc;
452
453	return (0);
454}
455
456static void
457_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
458{
459
460	free(cv->cv_closure);
461}
462
463static int
464_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
465    char * __restrict * __restrict in, size_t * __restrict inbytes,
466    char * __restrict * __restrict out, size_t * __restrict outbytes,
467    uint32_t flags, size_t * __restrict invalids)
468{
469	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
470	struct _citrus_iconv_std_context *sc = cv->cv_closure;
471	_csid_t csid;
472	_index_t idx;
473	char *tmpin;
474	size_t inval, in_mb_cur_min, szrin, szrout;
475	int ret, state = 0;
476
477	inval = 0;
478	if (in == NULL || *in == NULL) {
479		/* special cases */
480		if (out != NULL && *out != NULL) {
481			/* init output state and store the shift sequence */
482			save_encoding_state(&sc->sc_src_encoding);
483			save_encoding_state(&sc->sc_dst_encoding);
484			szrout = 0;
485
486			ret = put_state_resetx(&sc->sc_dst_encoding,
487			    *out, *outbytes, &szrout);
488			if (ret)
489				goto err;
490
491			if (szrout == (size_t)-2) {
492				/* too small to store the character */
493				ret = EINVAL;
494				goto err;
495			}
496			*out += szrout;
497			*outbytes -= szrout;
498		} else
499			/* otherwise, discard the shift sequence */
500			init_encoding_state(&sc->sc_dst_encoding);
501		init_encoding_state(&sc->sc_src_encoding);
502		*invalids = 0;
503		return (0);
504	}
505
506	in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);
507
508	/* normal case */
509	for (;;) {
510		if (*inbytes == 0) {
511			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
512			if (state == _STDENC_SDGEN_INITIAL ||
513			    state == _STDENC_SDGEN_STABLE)
514				break;
515		}
516
517		/* save the encoding states for the error recovery */
518		save_encoding_state(&sc->sc_src_encoding);
519		save_encoding_state(&sc->sc_dst_encoding);
520
521		/* mb -> csid/index */
522		tmpin = *in;
523		szrin = szrout = 0;
524		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
525		    *inbytes, &szrin, cv->cv_shared->ci_hooks);
526		if (ret != 0 && (ret != EILSEQ ||
527		    !cv->cv_shared->ci_discard_ilseq)) {
528			goto err;
529		} else if (ret == EILSEQ) {
530			/*
531			 * If //IGNORE was specified, we'll just keep crunching
532			 * through invalid characters.
533			 */
534			*in += in_mb_cur_min;
535			*inbytes -= in_mb_cur_min;
536			restore_encoding_state(&sc->sc_src_encoding);
537			restore_encoding_state(&sc->sc_dst_encoding);
538			continue;
539		}
540
541		if (szrin == (size_t)-2) {
542			/* incompleted character */
543			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
544			if (ret) {
545				ret = EINVAL;
546				goto err;
547			}
548			switch (state) {
549			case _STDENC_SDGEN_INITIAL:
550			case _STDENC_SDGEN_STABLE:
551				/* fetch shift sequences only. */
552				goto next;
553			}
554			ret = EINVAL;
555			goto err;
556		}
557		/* convert the character */
558		ret = do_conv(is, &csid, &idx);
559		if (ret) {
560			if (ret == E_NO_CORRESPONDING_CHAR) {
561				/*
562				 * GNU iconv returns EILSEQ when no
563				 * corresponding character in the output.
564				 * Some software depends on this behavior
565				 * though this is against POSIX specification.
566				 */
567				if (cv->cv_shared->ci_ilseq_invalid != 0) {
568					ret = EILSEQ;
569					goto err;
570				}
571				inval++;
572				szrout = 0;
573				if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
574				    !cv->cv_shared->ci_discard_ilseq) &&
575				    is->is_use_invalid) {
576					ret = wctombx(&sc->sc_dst_encoding,
577					    *out, *outbytes, is->is_invalid,
578					    &szrout, cv->cv_shared->ci_hooks);
579					if (ret)
580						goto err;
581				}
582				goto next;
583			} else
584				goto err;
585		}
586		/* csid/index -> mb */
587		ret = cstombx(&sc->sc_dst_encoding,
588		    *out, *outbytes, csid, idx, &szrout,
589		    cv->cv_shared->ci_hooks);
590		if (ret)
591			goto err;
592next:
593		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
594		*in = tmpin;
595		*outbytes -= szrout;
596		*out += szrout;
597	}
598	*invalids = inval;
599
600	return (0);
601
602err:
603	restore_encoding_state(&sc->sc_src_encoding);
604	restore_encoding_state(&sc->sc_dst_encoding);
605	*invalids = inval;
606
607	return (ret);
608}
609