gettext_iconv.c revision 1.8
1/*	$NetBSD: gettext_iconv.c,v 1.8 2009/02/18 13:08:22 yamt Exp $	*/
2
3/*-
4 * Copyright (c) 2004 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus$
29 */
30
31
32#include <sys/types.h>
33#include <sys/param.h>
34
35#include <errno.h>
36#include <iconv.h>
37#include <libintl.h>
38#include <langinfo.h>
39#include <search.h>
40#include <stdlib.h>
41#include <string.h>
42
43#include "libintl_local.h"
44
45struct cache {
46	const char *c_origmsg;
47	const char *c_resultmsg;
48};
49
50static const struct cache *cache_find(const char *, struct domainbinding *);
51static int cache_enter(const char *, const char *);
52static int cache_cmp(const void *, const void *);
53
54static void *cacheroot;
55
56/* ARGSUSED1 */
57static const struct cache *
58cache_find(const char *msg, struct domainbinding *db)
59{
60	struct cache key;
61	struct cache **c;
62
63	key.c_origmsg = msg;
64	c = tfind(&key, &cacheroot, cache_cmp);
65
66	return c ? *c : NULL;
67}
68
69static int
70cache_enter(const char *origmsg, const char *resultmsg)
71{
72	struct cache *c;
73
74	c = malloc(sizeof(*c));
75	if (c == NULL)
76		return -1;
77
78	c->c_origmsg = origmsg;
79	c->c_resultmsg = resultmsg;
80
81	if (tsearch(c, &cacheroot, cache_cmp) == NULL) {
82		free(c);
83		return -1;
84	}
85
86	return 0;
87}
88
89static int
90cache_cmp(const void *va, const void *vb)
91{
92	const struct cache *a = va;
93	const struct cache *b = vb;
94	int result;
95
96	if (a->c_origmsg > b->c_origmsg) {
97		result = 1;
98	} else if (a->c_origmsg < b->c_origmsg) {
99		result = -1;
100	} else {
101		result = 0;
102	}
103
104	return result;
105}
106
107#define	GETTEXT_ICONV_MALLOC_CHUNK	(16 * 1024)
108
109const char *
110__gettext_iconv(const char *origmsg, struct domainbinding *db)
111{
112	const char *tocode;
113	const char *fromcode = db->mohandle.mo.mo_charset;
114	const struct cache *cache;
115	const char *result;
116	iconv_t cd;
117	const char *src;
118	char *dst;
119	size_t origlen;
120	size_t srclen;
121	size_t dstlen;
122	size_t nvalid;
123	int savederrno = errno;
124
125	/*
126	 * static buffer for converted texts.
127	 *
128	 * note:
129	 * we never free buffers once returned to callers.
130	 * because of interface design of gettext, we can't know
131	 * the lifetime of them.
132	 */
133	static char *buffer;
134	static size_t bufferlen;
135
136	/*
137	 * don't convert message if *.mo doesn't specify codeset.
138	 */
139	if (fromcode == NULL)
140		return origmsg;
141
142	tocode = db->codeset;
143	if (tocode == NULL) {
144		/*
145		 * codeset isn't specified explicitly by
146		 * bind_textdomain_codeset().
147		 * use current locale(LC_CTYPE)'s codeset.
148		 *
149		 * XXX maybe wrong; it can mismatch with
150		 * environment variable setting.
151		 */
152		tocode = nl_langinfo(CODESET);
153	}
154
155	/*
156	 * shortcut if possible.
157	 * XXX should handle aliases
158	 */
159	if (!strcasecmp(tocode, fromcode))
160		return origmsg;
161
162	/* XXX LOCK */
163
164	/* XXX should detect change of tocode and purge caches? */
165
166	/*
167	 * see if we have already converted this message.
168	 */
169	cache = cache_find(origmsg, db);
170	if (cache) {
171		result = cache->c_resultmsg;
172		goto out;
173	}
174
175	origlen = strlen(origmsg) + 1;
176again:
177	cd = iconv_open(tocode, fromcode);
178	if (cd == (iconv_t)-1) {
179		result = origmsg;
180		goto out;
181	}
182
183	src = origmsg;
184	srclen = origlen;
185	dst = buffer;
186	dstlen = bufferlen;
187	nvalid = iconv(cd, &src, &srclen, &dst, &dstlen);
188	iconv_close(cd);
189
190	if (nvalid == (size_t)-1) {
191		/*
192		 * try to allocate a new buffer.
193		 *
194		 * just give up if GETTEXT_ICONV_MALLOC_CHUNK was not enough.
195		 */
196		if (errno == E2BIG &&
197		    bufferlen != GETTEXT_ICONV_MALLOC_CHUNK) {
198			buffer = malloc(GETTEXT_ICONV_MALLOC_CHUNK);
199			if (buffer) {
200				bufferlen = GETTEXT_ICONV_MALLOC_CHUNK;
201				goto again;
202			}
203		}
204
205		result = origmsg;
206	} else if (cache_enter(origmsg, buffer)) {
207		/*
208		 * failed to enter cache.  give up.
209		 */
210		result = origmsg;
211	} else {
212		size_t resultlen = dst - buffer;
213
214		result = buffer;
215		bufferlen -= resultlen;
216		buffer += resultlen;
217	}
218
219out:
220	/* XXX UNLOCK */
221	errno = savederrno;
222
223	return result;
224}
225