1209975Snwhitehorn/* $FreeBSD$ */
2209975Snwhitehorn/*	$NetBSD: citrus_iconv.c,v 1.10 2011/11/19 18:34:21 tnozaki Exp $	*/
3209975Snwhitehorn
4209975Snwhitehorn/*-
5209975Snwhitehorn * Copyright (c)2003 Citrus Project,
6209975Snwhitehorn * All rights reserved.
7209975Snwhitehorn *
8209975Snwhitehorn * Redistribution and use in source and binary forms, with or without
9209975Snwhitehorn * modification, are permitted provided that the following conditions
10209975Snwhitehorn * are met:
11209975Snwhitehorn * 1. Redistributions of source code must retain the above copyright
12209975Snwhitehorn *    notice, this list of conditions and the following disclaimer.
13209975Snwhitehorn * 2. Redistributions in binary form must reproduce the above copyright
14209975Snwhitehorn *    notice, this list of conditions and the following disclaimer in the
15209975Snwhitehorn *    documentation and/or other materials provided with the distribution.
16209975Snwhitehorn *
17209975Snwhitehorn * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18209975Snwhitehorn * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19209975Snwhitehorn * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20209975Snwhitehorn * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21209975Snwhitehorn * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22209975Snwhitehorn * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23209975Snwhitehorn * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24209975Snwhitehorn * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25209975Snwhitehorn * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26209975Snwhitehorn * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27209975Snwhitehorn * SUCH DAMAGE.
28209975Snwhitehorn */
29209975Snwhitehorn
30209975Snwhitehorn#include <sys/cdefs.h>
31209975Snwhitehorn#include <sys/types.h>
32243040Skib#include <sys/queue.h>
33209975Snwhitehorn
34209975Snwhitehorn#include <assert.h>
35209975Snwhitehorn#include <dirent.h>
36209975Snwhitehorn#include <errno.h>
37209975Snwhitehorn#include <iconv.h>
38209975Snwhitehorn#include <langinfo.h>
39209975Snwhitehorn#include <limits.h>
40215159Snwhitehorn#include <paths.h>
41209975Snwhitehorn#include <stdbool.h>
42215159Snwhitehorn#include <stdio.h>
43215159Snwhitehorn#include <stdlib.h>
44209975Snwhitehorn#include <string.h>
45209975Snwhitehorn#include <unistd.h>
46215159Snwhitehorn
47209975Snwhitehorn#include "citrus_namespace.h"
48209975Snwhitehorn#include "citrus_bcs.h"
49209975Snwhitehorn#include "citrus_esdb.h"
50209975Snwhitehorn#include "citrus_region.h"
51212715Snwhitehorn#include "citrus_memstream.h"
52209975Snwhitehorn#include "citrus_mmap.h"
53222620Snwhitehorn#include "citrus_module.h"
54222620Snwhitehorn#include "citrus_lock.h"
55222620Snwhitehorn#include "citrus_lookup.h"
56212715Snwhitehorn#include "citrus_hash.h"
57212715Snwhitehorn#include "citrus_iconv.h"
58212715Snwhitehorn
59212715Snwhitehorn#define _CITRUS_ICONV_DIR	"iconv.dir"
60212715Snwhitehorn#define _CITRUS_ICONV_ALIAS	"iconv.alias"
61212715Snwhitehorn
62212715Snwhitehorn#define CI_HASH_SIZE 101
63212715Snwhitehorn#define CI_INITIAL_MAX_REUSE	5
64212715Snwhitehorn#define CI_ENV_MAX_REUSE	"ICONV_MAX_REUSE"
65212715Snwhitehorn
66212715Snwhitehornstatic bool			 isinit = false;
67212715Snwhitehornstatic int			 shared_max_reuse, shared_num_unused;
68209975Snwhitehornstatic _CITRUS_HASH_HEAD(, _citrus_iconv_shared, CI_HASH_SIZE) shared_pool;
69209975Snwhitehornstatic TAILQ_HEAD(, _citrus_iconv_shared) shared_unused;
70212715Snwhitehorn
71212715Snwhitehornstatic pthread_rwlock_t		 ci_lock = PTHREAD_RWLOCK_INITIALIZER;
72212715Snwhitehorn
73212715Snwhitehornstatic __inline void
74212715Snwhitehorninit_cache(void)
75212715Snwhitehorn{
76212715Snwhitehorn
77212715Snwhitehorn	WLOCK(&ci_lock);
78212715Snwhitehorn	if (!isinit) {
79212715Snwhitehorn		_CITRUS_HASH_INIT(&shared_pool, CI_HASH_SIZE);
80212715Snwhitehorn		TAILQ_INIT(&shared_unused);
81209975Snwhitehorn		shared_max_reuse = -1;
82212715Snwhitehorn		if (!issetugid() && getenv(CI_ENV_MAX_REUSE))
83212715Snwhitehorn			shared_max_reuse = atoi(getenv(CI_ENV_MAX_REUSE));
84212715Snwhitehorn		if (shared_max_reuse < 0)
85212715Snwhitehorn			shared_max_reuse = CI_INITIAL_MAX_REUSE;
86209975Snwhitehorn		isinit = true;
87212715Snwhitehorn	}
88212715Snwhitehorn	UNLOCK(&ci_lock);
89212715Snwhitehorn}
90209975Snwhitehorn
91212715Snwhitehornstatic __inline void
92212715Snwhitehornclose_shared(struct _citrus_iconv_shared *ci)
93212715Snwhitehorn{
94212715Snwhitehorn
95212715Snwhitehorn	if (ci) {
96212715Snwhitehorn		if (ci->ci_module) {
97209975Snwhitehorn			if (ci->ci_ops) {
98212715Snwhitehorn				if (ci->ci_closure)
99212715Snwhitehorn					(*ci->ci_ops->io_uninit_shared)(ci);
100212715Snwhitehorn				free(ci->ci_ops);
101209975Snwhitehorn			}
102212715Snwhitehorn			_citrus_unload_module(ci->ci_module);
103212715Snwhitehorn		}
104209975Snwhitehorn		free(ci);
105212715Snwhitehorn	}
106212715Snwhitehorn}
107212715Snwhitehorn
108212715Snwhitehornstatic __inline int
109212715Snwhitehornopen_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
110212715Snwhitehorn    const char * __restrict convname, const char * __restrict src,
111212715Snwhitehorn    const char * __restrict dst)
112212715Snwhitehorn{
113212715Snwhitehorn	struct _citrus_iconv_shared *ci;
114212715Snwhitehorn	_citrus_iconv_getops_t getops;
115212715Snwhitehorn	const char *module;
116212715Snwhitehorn	size_t len_convname;
117212715Snwhitehorn	int ret;
118212715Snwhitehorn
119212715Snwhitehorn#ifdef INCOMPATIBLE_WITH_GNU_ICONV
120212715Snwhitehorn	/*
121212715Snwhitehorn	 * Sadly, the gnu tools expect iconv to actually parse the
122212715Snwhitehorn	 * byte stream and don't allow for a pass-through when
123212715Snwhitehorn	 * the (src,dest) encodings are the same.
124212715Snwhitehorn	 * See gettext-0.18.3+ NEWS:
125212715Snwhitehorn	 *   msgfmt now checks PO file headers more strictly with less
126212715Snwhitehorn	 *   false-positives.
127212715Snwhitehorn	 * NetBSD don't do this either.
128212715Snwhitehorn	 */
129212715Snwhitehorn	module = (strcmp(src, dst) != 0) ? "iconv_std" : "iconv_none";
130212715Snwhitehorn#else
131212715Snwhitehorn	module = "iconv_std";
132212715Snwhitehorn#endif
133212715Snwhitehorn
134212715Snwhitehorn	/* initialize iconv handle */
135212715Snwhitehorn	len_convname = strlen(convname);
136212715Snwhitehorn	ci = malloc(sizeof(*ci) + len_convname + 1);
137212715Snwhitehorn	if (!ci) {
138212715Snwhitehorn		ret = errno;
139212715Snwhitehorn		goto err;
140212715Snwhitehorn	}
141212715Snwhitehorn	ci->ci_module = NULL;
142291262Snwhitehorn	ci->ci_ops = NULL;
143212715Snwhitehorn	ci->ci_closure = NULL;
144212715Snwhitehorn	ci->ci_convname = (void *)&ci[1];
145212715Snwhitehorn	memcpy(ci->ci_convname, convname, len_convname + 1);
146212715Snwhitehorn
147212715Snwhitehorn	/* load module */
148212715Snwhitehorn	ret = _citrus_load_module(&ci->ci_module, module);
149212715Snwhitehorn	if (ret)
150212715Snwhitehorn		goto err;
151212715Snwhitehorn
152212715Snwhitehorn	/* get operators */
153212715Snwhitehorn	getops = (_citrus_iconv_getops_t)_citrus_find_getops(ci->ci_module,
154212715Snwhitehorn	    module, "iconv");
155212715Snwhitehorn	if (!getops) {
156212715Snwhitehorn		ret = EOPNOTSUPP;
157212715Snwhitehorn		goto err;
158212715Snwhitehorn	}
159212715Snwhitehorn	ci->ci_ops = malloc(sizeof(*ci->ci_ops));
160212715Snwhitehorn	if (!ci->ci_ops) {
161212715Snwhitehorn		ret = errno;
162212715Snwhitehorn		goto err;
163212715Snwhitehorn	}
164212715Snwhitehorn	ret = (*getops)(ci->ci_ops);
165212715Snwhitehorn	if (ret)
166212715Snwhitehorn		goto err;
167212715Snwhitehorn
168212715Snwhitehorn	if (ci->ci_ops->io_init_shared == NULL ||
169212715Snwhitehorn	    ci->ci_ops->io_uninit_shared == NULL ||
170212715Snwhitehorn	    ci->ci_ops->io_init_context == NULL ||
171212715Snwhitehorn	    ci->ci_ops->io_uninit_context == NULL ||
172212715Snwhitehorn	    ci->ci_ops->io_convert == NULL) {
173212715Snwhitehorn		ret = EINVAL;
174212715Snwhitehorn		goto err;
175212715Snwhitehorn	}
176212715Snwhitehorn
177212715Snwhitehorn	/* initialize the converter */
178212715Snwhitehorn	ret = (*ci->ci_ops->io_init_shared)(ci, src, dst);
179212715Snwhitehorn	if (ret)
180212715Snwhitehorn		goto err;
181212715Snwhitehorn
182212715Snwhitehorn	*rci = ci;
183212715Snwhitehorn
184212715Snwhitehorn	return (0);
185212715Snwhitehornerr:
186212715Snwhitehorn	close_shared(ci);
187212715Snwhitehorn	return (ret);
188212715Snwhitehorn}
189212715Snwhitehorn
190291262Snwhitehornstatic __inline int
191212715Snwhitehornhash_func(const char *key)
192212715Snwhitehorn{
193212715Snwhitehorn
194212715Snwhitehorn	return (_string_hash_func(key, CI_HASH_SIZE));
195212715Snwhitehorn}
196212715Snwhitehorn
197212715Snwhitehornstatic __inline int
198212715Snwhitehornmatch_func(struct _citrus_iconv_shared * __restrict ci,
199212715Snwhitehorn    const char * __restrict key)
200212715Snwhitehorn{
201212715Snwhitehorn
202212715Snwhitehorn	return (strcmp(ci->ci_convname, key));
203217451Sandreast}
204212715Snwhitehorn
205212715Snwhitehornstatic int
206214574Snwhitehornget_shared(struct _citrus_iconv_shared * __restrict * __restrict rci,
207209975Snwhitehorn    const char *src, const char *dst)
208212715Snwhitehorn{
209212715Snwhitehorn	struct _citrus_iconv_shared * ci;
210212715Snwhitehorn	char convname[PATH_MAX];
211212715Snwhitehorn	int hashval, ret = 0;
212212715Snwhitehorn
213212715Snwhitehorn	snprintf(convname, sizeof(convname), "%s/%s", src, dst);
214212715Snwhitehorn
215212715Snwhitehorn	WLOCK(&ci_lock);
216209975Snwhitehorn
217212715Snwhitehorn	/* lookup alread existing entry */
218212715Snwhitehorn	hashval = hash_func(convname);
219212715Snwhitehorn	_CITRUS_HASH_SEARCH(&shared_pool, ci, ci_hash_entry, match_func,
220209975Snwhitehorn	    convname, hashval);
221212715Snwhitehorn	if (ci != NULL) {
222212715Snwhitehorn		/* found */
223212715Snwhitehorn		if (ci->ci_used_count == 0) {
224212715Snwhitehorn			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
225212715Snwhitehorn			shared_num_unused--;
226212715Snwhitehorn		}
227209975Snwhitehorn		ci->ci_used_count++;
228212715Snwhitehorn		*rci = ci;
229209975Snwhitehorn		goto quit;
230212715Snwhitehorn	}
231212715Snwhitehorn
232212715Snwhitehorn	/* create new entry */
233212715Snwhitehorn	ret = open_shared(&ci, convname, src, dst);
234209975Snwhitehorn	if (ret)
235212715Snwhitehorn		goto quit;
236212715Snwhitehorn
237212715Snwhitehorn	_CITRUS_HASH_INSERT(&shared_pool, ci, ci_hash_entry, hashval);
238212715Snwhitehorn	ci->ci_used_count = 1;
239212715Snwhitehorn	*rci = ci;
240212715Snwhitehorn
241212715Snwhitehornquit:
242212715Snwhitehorn	UNLOCK(&ci_lock);
243291262Snwhitehorn
244291262Snwhitehorn	return (ret);
245291262Snwhitehorn}
246291262Snwhitehorn
247291262Snwhitehornstatic void
248291262Snwhitehornrelease_shared(struct _citrus_iconv_shared * __restrict ci)
249212715Snwhitehorn{
250212715Snwhitehorn
251212715Snwhitehorn	WLOCK(&ci_lock);
252212715Snwhitehorn	ci->ci_used_count--;
253212715Snwhitehorn	if (ci->ci_used_count == 0) {
254212715Snwhitehorn		/* put it into unused list */
255212715Snwhitehorn		shared_num_unused++;
256209975Snwhitehorn		TAILQ_INSERT_TAIL(&shared_unused, ci, ci_tailq_entry);
257209975Snwhitehorn		/* flood out */
258209975Snwhitehorn		while (shared_num_unused > shared_max_reuse) {
259209975Snwhitehorn			ci = TAILQ_FIRST(&shared_unused);
260209975Snwhitehorn			TAILQ_REMOVE(&shared_unused, ci, ci_tailq_entry);
261212715Snwhitehorn			_CITRUS_HASH_REMOVE(ci, ci_hash_entry);
262209975Snwhitehorn			shared_num_unused--;
263209975Snwhitehorn			close_shared(ci);
264210704Snwhitehorn		}
265210704Snwhitehorn	}
266209975Snwhitehorn
267209975Snwhitehorn	UNLOCK(&ci_lock);
268209975Snwhitehorn}
269209975Snwhitehorn
270209975Snwhitehorn/*
271209975Snwhitehorn * _citrus_iconv_open:
272212715Snwhitehorn *	open a converter for the specified in/out codes.
273212715Snwhitehorn */
274212715Snwhitehornint
275212722Snwhitehorn_citrus_iconv_open(struct _citrus_iconv * __restrict * __restrict rcv,
276212722Snwhitehorn    const char * __restrict src, const char * __restrict dst)
277209975Snwhitehorn{
278212715Snwhitehorn	struct _citrus_iconv *cv = NULL;
279209975Snwhitehorn	struct _citrus_iconv_shared *ci = NULL;
280209975Snwhitehorn	char realdst[PATH_MAX], realsrc[PATH_MAX];
281209975Snwhitehorn#ifdef _PATH_ICONV
282212722Snwhitehorn	char buf[PATH_MAX], path[PATH_MAX];
283209975Snwhitehorn#endif
284212715Snwhitehorn	int ret;
285212715Snwhitehorn
286212715Snwhitehorn	init_cache();
287212715Snwhitehorn
288209975Snwhitehorn	/* GNU behaviour, using locale encoding if "" or "char" is specified */
289212715Snwhitehorn	if ((strcmp(src, "") == 0) || (strcmp(src, "char") == 0))
290209975Snwhitehorn		src = nl_langinfo(CODESET);
291212715Snwhitehorn	if ((strcmp(dst, "") == 0) || (strcmp(dst, "char") == 0))
292212715Snwhitehorn		dst = nl_langinfo(CODESET);
293209975Snwhitehorn
294212715Snwhitehorn	/* resolve codeset name aliases */
295212715Snwhitehorn#ifdef _PATH_ICONV
296212715Snwhitehorn	snprintf(path, sizeof(path), "%s/%s", _PATH_ICONV, _CITRUS_ICONV_ALIAS);
297209975Snwhitehorn	strlcpy(realsrc, _lookup_alias(path, src, buf, (size_t)PATH_MAX,
298212715Snwhitehorn	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
299209975Snwhitehorn	strlcpy(realdst, _lookup_alias(path, dst, buf, (size_t)PATH_MAX,
300212715Snwhitehorn	    _LOOKUP_CASE_IGNORE), (size_t)PATH_MAX);
301212715Snwhitehorn#else
302212715Snwhitehorn	strlcpy(realsrc, src, (size_t)PATH_MAX);
303212715Snwhitehorn	strlcpy(realdst, dst, (size_t)PATH_MAX);
304212715Snwhitehorn#endif
305209975Snwhitehorn
306212715Snwhitehorn	/* sanity check */
307212715Snwhitehorn	if (strchr(realsrc, '/') != NULL || strchr(realdst, '/'))
308212715Snwhitehorn		return (EINVAL);
309212715Snwhitehorn
310212715Snwhitehorn	/* get shared record */
311212715Snwhitehorn	ret = get_shared(&ci, realsrc, realdst);
312212715Snwhitehorn	if (ret)
313212715Snwhitehorn		return (ret);
314212715Snwhitehorn
315209975Snwhitehorn	/* create/init context */
316212715Snwhitehorn	if (*rcv == NULL) {
317212715Snwhitehorn		cv = malloc(sizeof(*cv));
318212715Snwhitehorn		if (cv == NULL) {
319212715Snwhitehorn			ret = errno;
320212715Snwhitehorn			release_shared(ci);
321212715Snwhitehorn			return (ret);
322212715Snwhitehorn		}
323212715Snwhitehorn		*rcv = cv;
324212715Snwhitehorn	}
325212715Snwhitehorn	(*rcv)->cv_shared = ci;
326212715Snwhitehorn	ret = (*ci->ci_ops->io_init_context)(*rcv);
327212715Snwhitehorn	if (ret) {
328212715Snwhitehorn		release_shared(ci);
329212715Snwhitehorn		free(cv);
330212715Snwhitehorn		return (ret);
331212715Snwhitehorn	}
332212715Snwhitehorn	return (0);
333209975Snwhitehorn}
334209975Snwhitehorn
335209975Snwhitehorn/*
336209975Snwhitehorn * _citrus_iconv_close:
337209975Snwhitehorn *	close the specified converter.
338209975Snwhitehorn */
339212715Snwhitehornvoid
340212722Snwhitehorn_citrus_iconv_close(struct _citrus_iconv *cv)
341209975Snwhitehorn{
342209975Snwhitehorn
343209975Snwhitehorn	if (cv) {
344209975Snwhitehorn		(*cv->cv_shared->ci_ops->io_uninit_context)(cv);
345212715Snwhitehorn		release_shared(cv->cv_shared);
346212715Snwhitehorn		free(cv);
347212715Snwhitehorn	}
348212715Snwhitehorn}
349212715Snwhitehorn
350212715Snwhitehornconst char
351212715Snwhitehorn*_citrus_iconv_canonicalize(const char *name)
352212715Snwhitehorn{
353212715Snwhitehorn	char *buf;
354212715Snwhitehorn
355212715Snwhitehorn	if ((buf = calloc((size_t)PATH_MAX, sizeof(*buf))) == NULL)
356212715Snwhitehorn		return (NULL);
357212715Snwhitehorn	_citrus_esdb_alias(name, buf, (size_t)PATH_MAX);
358212715Snwhitehorn	return (buf);
359212715Snwhitehorn}
360212715Snwhitehorn