• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/netatalk-2.2.5/libatalk/unicode/
1/*
2   Unix SMB/CIFS implementation.
3   minimal iconv implementation
4   Copyright (C) Andrew Tridgell 2001
5   Copyright (C) Jelmer Vernooij 2002,2003
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21   From samba 3.0 beta and GNU libiconv-1.8
22   It's bad but most of the time we can't use libc iconv service:
23   - it doesn't round trip for most encoding
24   - it doesn't know about Apple extension
25*/
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif /* HAVE_CONFIG_H */
30
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <string.h>
35#include <ctype.h>
36#include <errno.h>
37#include <sys/param.h>
38#include <sys/stat.h>
39#ifdef HAVE_USABLE_ICONV
40#include <iconv.h>
41#endif
42
43#include <netatalk/endian.h>
44#include <atalk/unicode.h>
45#include <atalk/logger.h>
46#include "byteorder.h"
47
48
49/**
50 * @file
51 *
52 * @brief Samba wrapper/stub for iconv character set conversion.
53 *
54 * iconv is the XPG2 interface for converting between character
55 * encodings.  This file provides a Samba wrapper around it, and also
56 * a simple reimplementation that is used if the system does not
57 * implement iconv.
58 *
59 * Samba only works with encodings that are supersets of ASCII: ascii
60 * characters like whitespace can be tested for directly, multibyte
61 * sequences start with a byte with the high bit set, and strings are
62 * terminated by a nul byte.
63 *
64 * Note that the only function provided by iconv is conversion between
65 * characters.  It doesn't directly support operations like
66 * uppercasing or comparison.  We have to convert to UCS-2 and compare
67 * there.
68 *
69 * @sa Samba Developers Guide
70 **/
71#define CHARSET_WIDECHAR    32
72
73#ifdef HAVE_USABLE_ICONV
74#ifdef HAVE_UCS2INTERNAL
75#define UCS2ICONV "UCS-2-INTERNAL"
76#else /* !HAVE_UCS2INTERNAL */
77#if BYTE_ORDER==LITTLE_ENDIAN
78#define UCS2ICONV "UCS-2LE"
79#else /* !LITTLE_ENDIAN */
80#define UCS2ICONV "UCS-2BE"
81#endif /* BYTE_ORDER */
82#endif /* HAVE_UCS2INTERNAL */
83#else /* !HAVE_USABLE_ICONV */
84#define UCS2ICONV "UCS-2"
85#endif /* HAVE_USABLE_ICONV */
86
87static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
88static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
89static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
90
91extern  struct charset_functions charset_mac_roman;
92extern  struct charset_functions charset_mac_hebrew;
93extern  struct charset_functions charset_mac_centraleurope;
94extern  struct charset_functions charset_mac_cyrillic;
95extern  struct charset_functions charset_mac_greek;
96extern  struct charset_functions charset_mac_turkish;
97extern  struct charset_functions charset_utf8;
98extern  struct charset_functions charset_utf8_mac;
99#ifdef HAVE_USABLE_ICONV
100extern  struct charset_functions charset_mac_japanese;
101extern  struct charset_functions charset_mac_chinese_trad;
102extern  struct charset_functions charset_mac_korean;
103extern  struct charset_functions charset_mac_chinese_simp;
104#endif
105
106
107static struct charset_functions builtin_functions[] = {
108	{"UCS-2",   0, iconv_copy, iconv_copy, CHARSET_WIDECHAR | CHARSET_PRECOMPOSED, NULL, NULL, NULL},
109	{"ASCII",     0, ascii_pull, ascii_push, CHARSET_MULTIBYTE | CHARSET_PRECOMPOSED, NULL, NULL, NULL},
110	{NULL, 0, NULL, NULL, 0, NULL, NULL, NULL}
111};
112
113
114#define DLIST_ADD(list, p) \
115{ \
116        if (!(list)) { \
117                (list) = (p); \
118                (p)->next = (p)->prev = NULL; \
119        } else { \
120                (list)->prev = (p); \
121                (p)->next = (list); \
122                (p)->prev = NULL; \
123                (list) = (p); \
124        }\
125}
126
127static struct charset_functions *charsets = NULL;
128
129struct charset_functions *find_charset_functions(const char *name)
130{
131	struct charset_functions *c = charsets;
132
133	while(c) {
134		if (strcasecmp(name, c->name) == 0) {
135			return c;
136		}
137		c = c->next;
138	}
139
140	return NULL;
141}
142
143int atalk_register_charset(struct charset_functions *funcs)
144{
145	if (!funcs) {
146		return -1;
147	}
148
149	/* Check whether we already have this charset... */
150	if (find_charset_functions(funcs->name)) {
151		LOG (log_debug, logtype_default, "Duplicate charset %s, not registering", funcs->name);
152		return -2;
153	}
154
155	funcs->next = funcs->prev = NULL;
156	DLIST_ADD(charsets, funcs);
157	return 0;
158}
159
160static void lazy_initialize_iconv(void)
161{
162	static int initialized = 0;
163	int i;
164
165	if (!initialized) {
166		initialized = 1;
167		for(i = 0; builtin_functions[i].name; i++)
168			atalk_register_charset(&builtin_functions[i]);
169
170		/* register additional charsets */
171		atalk_register_charset(&charset_utf8);
172		atalk_register_charset(&charset_utf8_mac);
173		atalk_register_charset(&charset_mac_roman);
174		atalk_register_charset(&charset_mac_hebrew);
175		atalk_register_charset(&charset_mac_greek);
176		atalk_register_charset(&charset_mac_turkish);
177		atalk_register_charset(&charset_mac_centraleurope);
178		atalk_register_charset(&charset_mac_cyrillic);
179#ifdef HAVE_USABLE_ICONV
180		atalk_register_charset(&charset_mac_japanese);
181		atalk_register_charset(&charset_mac_chinese_trad);
182		atalk_register_charset(&charset_mac_korean);
183		atalk_register_charset(&charset_mac_chinese_simp);
184#endif
185	}
186}
187
188/* if there was an error then reset the internal state,
189   this ensures that we don't have a shift state remaining for
190   character sets like SJIS */
191static size_t sys_iconv(void *cd,
192			char **inbuf, size_t *inbytesleft,
193			char **outbuf, size_t *outbytesleft)
194{
195#ifdef HAVE_USABLE_ICONV
196	size_t ret = iconv((iconv_t)cd,
197			   (ICONV_CONST char**)inbuf, inbytesleft,
198			   outbuf, outbytesleft);
199	if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
200	return ret;
201#else
202	errno = EINVAL;
203	return -1;
204#endif
205}
206
207/**
208 * This is a simple portable iconv() implementaion.
209 *
210 * It only knows about a very small number of character sets - just
211 * enough that netatalk works on systems that don't have iconv.
212 **/
213size_t atalk_iconv(atalk_iconv_t cd,
214		 const char **inbuf, size_t *inbytesleft,
215		 char **outbuf, size_t *outbytesleft)
216{
217	char cvtbuf[2048];
218	char *bufp = cvtbuf;
219	size_t bufsize;
220
221	/* in many cases we can go direct */
222	if (cd->direct) {
223		return cd->direct(cd->cd_direct,
224				  (char **)inbuf, inbytesleft, outbuf, outbytesleft);
225	}
226
227
228	/* otherwise we have to do it chunks at a time */
229	while (*inbytesleft > 0) {
230		bufp = cvtbuf;
231		bufsize = sizeof(cvtbuf);
232
233		if (cd->pull(cd->cd_pull, (char **)inbuf, inbytesleft, &bufp, &bufsize) == (size_t)-1
234		       && errno != E2BIG) {
235		    return -1;
236		}
237
238		bufp = cvtbuf;
239		bufsize = sizeof(cvtbuf) - bufsize;
240
241		if (cd->push(cd->cd_push, &bufp, &bufsize, outbuf, outbytesleft) == (size_t)-1) {
242		    return -1;
243		}
244	}
245
246	return 0;
247}
248
249
250/*
251  simple iconv_open() wrapper
252 */
253atalk_iconv_t atalk_iconv_open(const char *tocode, const char *fromcode)
254{
255	atalk_iconv_t ret;
256	struct charset_functions *from, *to;
257
258
259	lazy_initialize_iconv();
260	from = charsets;
261	to = charsets;
262
263	ret = (atalk_iconv_t)malloc(sizeof(*ret));
264	if (!ret) {
265		errno = ENOMEM;
266		return (atalk_iconv_t)-1;
267	}
268	memset(ret, 0, sizeof(*ret));
269
270	ret->from_name = strdup(fromcode);
271	ret->to_name = strdup(tocode);
272
273	/* check for the simplest null conversion */
274	if (strcasecmp(fromcode, tocode) == 0) {
275		ret->direct = iconv_copy;
276		return ret;
277	}
278
279	/* check if we have a builtin function for this conversion */
280	from = find_charset_functions(fromcode);
281	if (from) ret->pull = from->pull;
282
283	to = find_charset_functions(tocode);
284	if (to) ret->push = to->push;
285
286	/* check if we can use iconv for this conversion */
287#ifdef HAVE_USABLE_ICONV
288	if (!from || (from->flags & CHARSET_ICONV)) {
289	  ret->cd_pull = iconv_open(UCS2ICONV, from && from->iname ? from->iname : fromcode);
290	  if (ret->cd_pull != (iconv_t)-1) {
291	    if (!ret->pull) ret->pull = sys_iconv;
292	  } else ret->pull = NULL;
293	}
294	if (ret->pull) {
295	  if (!to || (to->flags & CHARSET_ICONV)) {
296	    ret->cd_push = iconv_open(to && to->iname ? to->iname : tocode, UCS2ICONV);
297	    if (ret->cd_push != (iconv_t)-1) {
298	      if (!ret->push) ret->push = sys_iconv;
299	    } else ret->push = NULL;
300	  }
301	  if (!ret->push && ret->cd_pull) iconv_close((iconv_t)ret->cd_pull);
302	}
303#endif
304
305	if (!ret->push || !ret->pull) {
306		SAFE_FREE(ret->from_name);
307		SAFE_FREE(ret->to_name);
308		SAFE_FREE(ret);
309		errno = EINVAL;
310		return (atalk_iconv_t)-1;
311	}
312
313	/* check for conversion to/from ucs2 */
314	if (strcasecmp(fromcode, "UCS-2") == 0) {
315	  ret->direct = ret->push;
316	  ret->cd_direct = ret->cd_push;
317	  ret->cd_push = NULL;
318	}
319	if (strcasecmp(tocode, "UCS-2") == 0) {
320	  ret->direct = ret->pull;
321	  ret->cd_direct = ret->cd_pull;
322	  ret->cd_pull = NULL;
323	}
324
325	return ret;
326}
327
328/*
329  simple iconv_close() wrapper
330*/
331int atalk_iconv_close (atalk_iconv_t cd)
332{
333#ifdef HAVE_USABLE_ICONV
334	if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
335	if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
336	if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
337#endif
338
339	SAFE_FREE(cd->from_name);
340	SAFE_FREE(cd->to_name);
341
342	memset(cd, 0, sizeof(*cd));
343	SAFE_FREE(cd);
344	return 0;
345}
346
347
348/************************************************************************
349 the following functions implement the builtin character sets in Netatalk
350*************************************************************************/
351
352static size_t ascii_pull(void *cd _U_, char **inbuf, size_t *inbytesleft,
353			 char **outbuf, size_t *outbytesleft)
354{
355	ucs2_t curchar;
356
357	while (*inbytesleft >= 1 && *outbytesleft >= 2) {
358		if ((unsigned char)(*inbuf)[0] < 0x80) {
359			curchar = (ucs2_t) (*inbuf)[0];
360			SSVAL((*outbuf),0,curchar);
361		}
362		else {
363			errno = EILSEQ;
364			return -1;
365		}
366		(*inbytesleft)  -= 1;
367		(*outbytesleft) -= 2;
368		(*inbuf)  += 1;
369		(*outbuf) += 2;
370	}
371
372	if (*inbytesleft > 0) {
373		errno = E2BIG;
374		return -1;
375	}
376
377	return 0;
378}
379
380static size_t ascii_push(void *cd _U_, char **inbuf, size_t *inbytesleft,
381			 char **outbuf, size_t *outbytesleft)
382{
383	int ir_count=0;
384	ucs2_t curchar;
385
386	while (*inbytesleft >= 2 && *outbytesleft >= 1) {
387		curchar = SVAL((*inbuf), 0);
388		if (curchar < 0x0080) {
389			(*outbuf)[0] = curchar;
390		}
391		else {
392			errno = EILSEQ;
393			return -1;
394		}
395		(*inbytesleft)  -= 2;
396		(*outbytesleft) -= 1;
397		(*inbuf)  += 2;
398		(*outbuf) += 1;
399	}
400
401	if (*inbytesleft == 1) {
402		errno = EINVAL;
403		return -1;
404	}
405
406	if (*inbytesleft > 1) {
407		errno = E2BIG;
408		return -1;
409	}
410
411	return ir_count;
412}
413
414
415static size_t iconv_copy(void *cd _U_, char **inbuf, size_t *inbytesleft,
416			 char **outbuf, size_t *outbytesleft)
417{
418	int n;
419
420	n = MIN(*inbytesleft, *outbytesleft);
421
422	memmove(*outbuf, *inbuf, n);
423
424	(*inbytesleft) -= n;
425	(*outbytesleft) -= n;
426	(*inbuf) += n;
427	(*outbuf) += n;
428
429	if (*inbytesleft > 0) {
430		errno = E2BIG;
431		return -1;
432	}
433
434	return 0;
435}
436
437/* ------------------------ */
438