• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/netatalk-2.2.5/libatalk/unicode/
1/*
2  Unix SMB/CIFS implementation.
3  Character set conversion Extensions
4  Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5  Copyright (C) Andrew Tridgell 2001
6  Copyright (C) Simo Sorce 2001
7  Copyright (C) Martin Pool 2003
8
9  This program is free software; you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation; either version 2 of the License, or
12  (at your option) any later version.
13
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  GNU General Public License for more details.
18
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23*/
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif /* HAVE_CONFIG_H */
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <string.h>
32#include <ctype.h>
33#include <errno.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#ifdef HAVE_USABLE_ICONV
37#include <iconv.h>
38#endif
39#if HAVE_LOCALE_H
40#include <locale.h>
41#endif
42#if HAVE_LANGINFO_H
43#include <langinfo.h>
44#endif
45
46#include <netatalk/endian.h>
47#include <atalk/logger.h>
48#include <atalk/unicode.h>
49#include <atalk/util.h>
50#include "byteorder.h"
51
52
53/**
54 * @file
55 *
56 * @brief Character-set conversion routines built on our iconv.
57 *
58 * @note Samba's internal character set (at least in the 3.0 series)
59 * is always the same as the one for the Unix filesystem.  It is
60 * <b>not</b> necessarily UTF-8 and may be different on machines that
61 * need i18n filenames to be compatible with Unix software.  It does
62 * have to be a superset of ASCII.  All multibyte sequences must start
63 * with a byte with the high bit set.
64 *
65 * @sa lib/iconv.c
66 */
67
68
69#define MAX_CHARSETS 20
70
71#define CHECK_FLAGS(a,b) (((a)!=NULL) ? (*(a) & (b)) : 0 )
72
73static atalk_iconv_t conv_handles[MAX_CHARSETS][MAX_CHARSETS];
74static char* charset_names[MAX_CHARSETS];
75static struct charset_functions* charsets[MAX_CHARSETS];
76static char hexdig[] = "0123456789abcdef";
77#define hextoint( c )   ( isdigit( c ) ? c - '0' : c + 10 - 'a' )
78
79static char* read_charsets_from_env(charset_t ch)
80{
81    char *name;
82
83    switch (ch) {
84    case CH_MAC:
85        if (( name = getenv( "ATALK_MAC_CHARSET" )) != NULL )
86            return name;
87        else
88            return "MAC_ROMAN";
89        break;
90    case CH_UNIX:
91        if (( name = getenv( "ATALK_UNIX_CHARSET" )) != NULL )
92            return name;
93        else
94            return "LOCALE";
95        break;
96    default:
97        break;
98    }
99    return "ASCII";
100}
101
102
103/**
104 * Return the name of a charset to give to iconv().
105 **/
106static const char *charset_name(charset_t ch)
107{
108    const char *ret = NULL;
109    static int first = 1;
110    static char macname[128];
111    static char unixname[128];
112
113    if (first) {
114        memset(macname, 0, sizeof(macname));
115        memset(unixname, 0, sizeof(unixname));
116        first = 0;
117    }
118
119    if (ch == CH_UCS2) ret = "UCS-2";
120    else if (ch == CH_UTF8) ret = "UTF8";
121    else if (ch == CH_UTF8_MAC) ret = "UTF8-MAC";
122    else if (ch == CH_UNIX) {
123        if (unixname[0] == '\0') {
124            ret = read_charsets_from_env(CH_UNIX);
125            strlcpy(unixname, ret, sizeof(unixname));
126        }
127        else
128            ret = unixname;
129    }
130    else if (ch == CH_MAC) {
131        if (macname[0] == '\0') {
132            ret = read_charsets_from_env(CH_MAC);
133            strlcpy(macname, ret, sizeof(macname));
134        }
135        else
136            ret = macname;
137    }
138
139    if (!ret)
140        ret = charset_names[ch];
141
142#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
143    if (ret && strcasecmp(ret, "LOCALE") == 0) {
144        const char *ln = NULL;
145
146#ifdef HAVE_SETLOCALE
147        setlocale(LC_ALL, "");
148#endif
149        ln = nl_langinfo(CODESET);
150        if (ln) {
151            /* Check whether the charset name is supported
152               by iconv */
153            atalk_iconv_t handle = atalk_iconv_open(ln, "UCS-2");
154            if (handle == (atalk_iconv_t) -1) {
155                LOG(log_debug, logtype_default, "Locale charset '%s' unsupported, using ASCII instead", ln);
156                ln = "ASCII";
157            } else {
158                atalk_iconv_close(handle);
159            }
160            if (ch==CH_UNIX)
161                strlcpy(unixname, ln, sizeof(unixname));
162        }
163        ret = ln;
164    }
165#else /* system doesn't have LOCALE support */
166    if (ch == CH_UNIX) ret = NULL;
167#endif
168
169    if (!ret || !*ret) ret = "ASCII";
170    return ret;
171}
172
173static struct charset_functions* get_charset_functions (charset_t ch)
174{
175    if (charsets[ch] != NULL)
176        return charsets[ch];
177
178    charsets[ch] = find_charset_functions(charset_name(ch));
179
180    return charsets[ch];
181}
182
183
184static void lazy_initialize_conv(void)
185{
186    static int initialized = 0;
187
188    if (!initialized) {
189        initialized = 1;
190        init_iconv();
191    }
192}
193
194charset_t add_charset(const char* name)
195{
196    static charset_t max_charset_t = NUM_CHARSETS-1;
197    charset_t cur_charset_t = max_charset_t+1;
198    unsigned int c1;
199
200    lazy_initialize_conv();
201
202    for (c1=0; c1<=max_charset_t;c1++) {
203        if ( strcasecmp(name, charset_name(c1)) == 0)
204            return (c1);
205    }
206
207    if ( cur_charset_t >= MAX_CHARSETS )  {
208        LOG (log_debug, logtype_default, "Adding charset %s failed, too many charsets (max. %u allowed)",
209             name, MAX_CHARSETS);
210        return (charset_t) -1;
211    }
212
213    /* First try to setup the required conversions */
214
215    conv_handles[cur_charset_t][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
216    if (conv_handles[cur_charset_t][CH_UCS2] == (atalk_iconv_t)-1) {
217        LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
218            name,  charset_name(CH_UCS2));
219        conv_handles[cur_charset_t][CH_UCS2] = NULL;
220        return (charset_t) -1;
221    }
222
223    conv_handles[CH_UCS2][cur_charset_t] = atalk_iconv_open( name, charset_name(CH_UCS2));
224    if (conv_handles[CH_UCS2][cur_charset_t] == (atalk_iconv_t)-1) {
225        LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
226            charset_name(CH_UCS2), name);
227        conv_handles[CH_UCS2][cur_charset_t] = NULL;
228        return (charset_t) -1;
229    }
230
231    /* register the new charset_t name */
232    charset_names[cur_charset_t] = strdup(name);
233
234    charsets[cur_charset_t] = get_charset_functions (cur_charset_t);
235    max_charset_t++;
236
237#ifdef DEBUG
238    LOG(log_debug9, logtype_default, "Added charset %s with handle %u", name, cur_charset_t);
239#endif
240    return (cur_charset_t);
241}
242
243/**
244 * Initialize iconv conversion descriptors.
245 *
246 * This is called the first time it is needed, and also called again
247 * every time the configuration is reloaded, because the charset or
248 * codepage might have changed.
249 **/
250void init_iconv(void)
251{
252    int c1;
253
254    for (c1=0;c1<NUM_CHARSETS;c1++) {
255        const char *name = charset_name((charset_t)c1);
256
257        conv_handles[c1][CH_UCS2] = atalk_iconv_open( charset_name(CH_UCS2), name);
258        if (conv_handles[c1][CH_UCS2] == (atalk_iconv_t)-1) {
259            LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
260                name,  charset_name(CH_UCS2));
261            conv_handles[c1][CH_UCS2] = NULL;
262        }
263
264        if (c1 != CH_UCS2) { /* avoid lost memory, make valgrind happy */
265            conv_handles[CH_UCS2][c1] = atalk_iconv_open( name, charset_name(CH_UCS2));
266            if (conv_handles[CH_UCS2][c1] == (atalk_iconv_t)-1) {
267                LOG(log_error, logtype_default, "Required conversion from %s to %s not supported",
268                    charset_name(CH_UCS2), name);
269                conv_handles[CH_UCS2][c1] = NULL;
270            }
271        }
272
273        charsets[c1] = get_charset_functions (c1);
274    }
275}
276
277/**
278 *
279 **/
280static size_t add_null(charset_t to, char *buf, size_t bytesleft, size_t len)
281{
282    /* Terminate the string */
283    if (to == CH_UCS2 && bytesleft >= 2) {
284        buf[len]   = 0;
285        buf[len+1] = 0;
286
287    }
288    else if ( to != CH_UCS2 && bytesleft > 0 )
289        buf[len]   = 0;
290    else {
291        errno = E2BIG;
292        return (size_t)(-1);
293    }
294
295    return len;
296}
297
298
299/**
300 * Convert string from one encoding to another, making error checking etc
301 *
302 * @param src pointer to source string (multibyte or singlebyte)
303 * @param srclen length of the source string in bytes
304 * @param dest pointer to destination string (multibyte or singlebyte)
305 * @param destlen maximal length allowed for string
306 * @returns the number of bytes occupied in the destination
307 **/
308static size_t convert_string_internal(charset_t from, charset_t to,
309                                      void const *src, size_t srclen,
310                                      void *dest, size_t destlen)
311{
312    size_t i_len, o_len;
313    size_t retval;
314    const char* inbuf = (const char*)src;
315    char* outbuf = (char*)dest;
316    char* o_save = outbuf;
317    atalk_iconv_t descriptor;
318
319    /* Fixed based on Samba 3.0.6 */
320    if (srclen == (size_t)-1) {
321        if (from == CH_UCS2) {
322            srclen = (strlen_w((const ucs2_t *)src)) * 2;
323        } else {
324            srclen = strlen((const char *)src);
325        }
326    }
327
328
329    lazy_initialize_conv();
330
331    descriptor = conv_handles[from][to];
332
333    if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
334        return (size_t) -1;
335    }
336
337    i_len=srclen;
338    o_len=destlen;
339    retval = atalk_iconv(descriptor,  &inbuf, &i_len, &outbuf, &o_len);
340    if(retval==(size_t)-1) {
341        const char *reason="unknown error";
342        switch(errno) {
343        case EINVAL:
344            reason="Incomplete multibyte sequence";
345            break;
346        case E2BIG:
347            reason="No more room";
348            break;
349        case EILSEQ:
350            reason="Illegal multibyte sequence";
351            break;
352        }
353        LOG(log_debug, logtype_default,"Conversion error: %s",reason);
354        return (size_t)-1;
355    }
356
357    /* Terminate the string */
358    return add_null( to, o_save, o_len, destlen -o_len);
359}
360
361
362size_t convert_string(charset_t from, charset_t to,
363                      void const *src, size_t srclen,
364                      void *dest, size_t destlen)
365{
366    size_t i_len, o_len;
367    ucs2_t *u;
368    ucs2_t buffer[MAXPATHLEN];
369    ucs2_t buffer2[MAXPATHLEN];
370
371    /* convert from_set to UCS2 */
372    if ((size_t)-1 == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
373                                                           (char*) buffer, sizeof(buffer))) ) {
374        LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
375        return (size_t) -1;
376    }
377
378    /* Do pre/decomposition */
379    i_len = sizeof(buffer2);
380    u = buffer2;
381    if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
382        if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
383            return (size_t)-1;
384    }
385    else if (!charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED)) {
386        if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
387            return (size_t)-1;
388    }
389    else {
390        u = buffer;
391        i_len = o_len;
392    }
393    /* Convert UCS2 to to_set */
394    if ((size_t)(-1) == ( o_len = convert_string_internal( CH_UCS2, to, (char*) u, i_len, dest, destlen)) ) {
395        LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
396        return (size_t) -1;
397    }
398
399    return o_len;
400}
401
402
403
404/**
405 * Convert between character sets, allocating a new buffer for the result.
406 *
407 * @param srclen length of source buffer.
408 * @param dest always set at least to NULL
409 * @note -1 is not accepted for srclen.
410 *
411 * @returns Size in bytes of the converted string; or -1 in case of error.
412 **/
413
414static size_t convert_string_allocate_internal(charset_t from, charset_t to,
415                                               void const *src, size_t srclen, char **dest)
416{
417    size_t i_len, o_len, destlen;
418    size_t retval;
419    const char *inbuf = (const char *)src;
420    char *outbuf = NULL, *ob = NULL;
421    atalk_iconv_t descriptor;
422
423    *dest = NULL;
424
425    if (src == NULL || srclen == (size_t)-1)
426        return (size_t)-1;
427
428    lazy_initialize_conv();
429
430    descriptor = conv_handles[from][to];
431
432    if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
433        /* conversion not supported, return -1*/
434        LOG(log_debug, logtype_default, "convert_string_allocate: conversion not supported!");
435        return -1;
436    }
437
438    destlen = MAX(srclen, 512);
439convert:
440    destlen = destlen * 2;
441    outbuf = (char *)realloc(ob, destlen);
442    if (!outbuf) {
443        LOG(log_debug, logtype_default,"convert_string_allocate: realloc failed!");
444        SAFE_FREE(ob);
445        return (size_t)-1;
446    } else {
447        ob = outbuf;
448    }
449    inbuf = src;   /* this restarts the whole conversion if buffer needed to be increased */
450    i_len = srclen;
451    o_len = destlen;
452    retval = atalk_iconv(descriptor,
453                         &inbuf, &i_len,
454                         &outbuf, &o_len);
455    if(retval == (size_t)-1)        {
456        const char *reason="unknown error";
457        switch(errno) {
458        case EINVAL:
459            reason="Incomplete multibyte sequence";
460            break;
461        case E2BIG:
462            goto convert;
463        case EILSEQ:
464            reason="Illegal multibyte sequence";
465            break;
466        }
467        LOG(log_debug, logtype_default,"Conversion error: %s(%s)",reason,inbuf);
468        SAFE_FREE(ob);
469        return (size_t)-1;
470    }
471
472
473    destlen = destlen - o_len;
474
475    /* Terminate the string */
476    if (to == CH_UCS2 && o_len >= 2) {
477        ob[destlen] = 0;
478        ob[destlen+1] = 0;
479        *dest = (char *)realloc(ob,destlen+2);
480    }
481    else if ( to != CH_UCS2 && o_len > 0 ) {
482        ob[destlen] = 0;
483        *dest = (char *)realloc(ob,destlen+1);
484    }
485    else {
486        goto convert; /* realloc */
487    }
488
489    if (destlen && !*dest) {
490        LOG(log_debug, logtype_default, "convert_string_allocate: out of memory!");
491        SAFE_FREE(ob);
492        return (size_t)-1;
493    }
494
495    return destlen;
496}
497
498
499size_t convert_string_allocate(charset_t from, charset_t to,
500                               void const *src, size_t srclen,
501                               char ** dest)
502{
503    size_t i_len, o_len;
504    ucs2_t *u;
505    ucs2_t buffer[MAXPATHLEN];
506    ucs2_t buffer2[MAXPATHLEN];
507
508    *dest = NULL;
509
510    /* convert from_set to UCS2 */
511    if ((size_t)(-1) == ( o_len = convert_string_internal( from, CH_UCS2, src, srclen,
512                                                           buffer, sizeof(buffer))) ) {
513        LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from));
514        return (size_t) -1;
515    }
516
517    /* Do pre/decomposition */
518    i_len = sizeof(buffer2);
519    u = buffer2;
520    if (charsets[to] && (charsets[to]->flags & CHARSET_DECOMPOSED) ) {
521        if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
522            return (size_t)-1;
523    }
524    else if ( !charsets[from] || (charsets[from]->flags & CHARSET_DECOMPOSED) ) {
525        if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
526            return (size_t)-1;
527    }
528    else {
529        u = buffer;
530        i_len = o_len;
531    }
532
533    /* Convert UCS2 to to_set */
534    if ((size_t)-1 == ( o_len = convert_string_allocate_internal( CH_UCS2, to, (char*)u, i_len, dest)) )
535        LOG(log_error, logtype_default, "Conversion failed (CH_UCS2 to %s):%s", charset_name(to), strerror(errno));
536
537    return o_len;
538
539}
540
541size_t charset_strupper(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
542{
543    size_t size;
544    char *buffer;
545
546    size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
547                                            (char**) &buffer);
548    if (size == (size_t)-1) {
549        SAFE_FREE(buffer);
550        return size;
551    }
552    if (!strupper_w((ucs2_t *)buffer) && (dest == src)) {
553        free(buffer);
554        return srclen;
555    }
556
557    size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
558    free(buffer);
559    return size;
560}
561
562size_t charset_strlower(charset_t ch, const char *src, size_t srclen, char *dest, size_t destlen)
563{
564    size_t size;
565    char *buffer;
566
567    size = convert_string_allocate_internal(ch, CH_UCS2, src, srclen,
568                                            (char **) &buffer);
569    if (size == (size_t)-1) {
570        SAFE_FREE(buffer);
571        return size;
572    }
573    if (!strlower_w((ucs2_t *)buffer) && (dest == src)) {
574        free(buffer);
575        return srclen;
576    }
577
578    size = convert_string_internal(CH_UCS2, ch, buffer, size, dest, destlen);
579    free(buffer);
580    return size;
581}
582
583
584size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
585{
586    return charset_strupper( CH_UNIX, src, srclen, dest, destlen);
587}
588
589size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
590{
591    return charset_strlower( CH_UNIX, src, srclen, dest, destlen);
592}
593
594size_t utf8_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
595{
596    return charset_strupper( CH_UTF8, src, srclen, dest, destlen);
597}
598
599size_t utf8_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
600{
601    return charset_strlower( CH_UTF8, src, srclen, dest, destlen);
602}
603
604/**
605 * Copy a string from a charset_t char* src to a UCS2 destination, allocating a buffer
606 *
607 * @param dest always set at least to NULL
608 *
609 * @returns The number of bytes occupied by the string in the destination
610 *         or -1 in case of error.
611 **/
612
613size_t charset_to_ucs2_allocate(charset_t ch, ucs2_t **dest, const char *src)
614{
615    size_t src_len = strlen(src);
616
617    *dest = NULL;
618    return convert_string_allocate(ch, CH_UCS2, src, src_len, (char**) dest);
619}
620
621/** -----------------------------------
622 * Copy a string from a charset_t char* src to a UTF-8 destination, allocating a buffer
623 *
624 * @param dest always set at least to NULL
625 *
626 * @returns The number of bytes occupied by the string in the destination
627 **/
628
629size_t charset_to_utf8_allocate(charset_t ch, char **dest, const char *src)
630{
631    size_t src_len = strlen(src);
632
633    *dest = NULL;
634    return convert_string_allocate(ch, CH_UTF8, src, src_len, dest);
635}
636
637/** -----------------------------------
638 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
639 *
640 * @param dest always set at least to NULL
641 *
642 * @returns The number of bytes occupied by the string in the destination
643 **/
644
645size_t ucs2_to_charset(charset_t ch, const ucs2_t *src, char *dest, size_t destlen)
646{
647    size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
648    return convert_string(CH_UCS2, ch, src, src_len, dest, destlen);
649}
650
651/* --------------------------------- */
652size_t ucs2_to_charset_allocate(charset_t ch, char **dest, const ucs2_t *src)
653{
654    size_t src_len = (strlen_w(src)) * sizeof(ucs2_t);
655    *dest = NULL;
656    return convert_string_allocate(CH_UCS2, ch, src, src_len, dest);
657}
658
659/** ---------------------------------
660 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
661 *
662 * @param dest always set at least to NULL
663 *
664 * @returns The number of bytes occupied by the string in the destination
665 **/
666
667size_t utf8_to_charset_allocate(charset_t ch, char **dest, const char *src)
668{
669    size_t src_len = strlen(src);
670    *dest = NULL;
671    return convert_string_allocate(CH_UTF8, ch, src, src_len, dest);
672}
673
674size_t charset_precompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
675{
676    char *buffer;
677    ucs2_t u[MAXPATHLEN];
678    size_t len;
679    size_t ilen;
680
681    if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
682        return len;
683
684    ilen=sizeof(u);
685
686    if ( (size_t)-1 == (ilen = precompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
687        free (buffer);
688        return (size_t)(-1);
689    }
690
691    if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
692        free (buffer);
693        return (size_t)(-1);
694    }
695
696    free(buffer);
697    return (len);
698}
699
700size_t charset_decompose ( charset_t ch, char * src, size_t inlen, char * dst, size_t outlen)
701{
702    char *buffer;
703    ucs2_t u[MAXPATHLEN];
704    size_t len;
705    size_t ilen;
706
707    if ((size_t)(-1) == (len = convert_string_allocate_internal(ch, CH_UCS2, src, inlen, &buffer)) )
708        return len;
709
710    ilen=sizeof(u);
711
712    if ( (size_t)-1 == (ilen = decompose_w((ucs2_t *)buffer, len, u, &ilen)) ) {
713        free (buffer);
714        return (size_t)(-1);
715    }
716
717    if ((size_t)(-1) == (len = convert_string_internal( CH_UCS2, ch, (char*)u, ilen, dst, outlen)) ) {
718        free (buffer);
719        return (size_t)(-1);
720    }
721
722    free(buffer);
723    return (len);
724}
725
726size_t utf8_precompose ( char * src, size_t inlen, char * dst, size_t outlen)
727{
728    return charset_precompose ( CH_UTF8, src, inlen, dst, outlen);
729}
730
731size_t utf8_decompose ( char * src, size_t inlen, char * dst, size_t outlen)
732{
733    return charset_decompose ( CH_UTF8, src, inlen, dst, outlen);
734}
735
736#if 0
737static char  debugbuf[ MAXPATHLEN +1 ];
738char * debug_out ( char * seq, size_t len)
739{
740    size_t i = 0;
741    unsigned char *p;
742    char *q;
743
744    p = (unsigned char*) seq;
745    q = debugbuf;
746
747    for ( i = 0; i<=(len-1); i++)
748    {
749        sprintf(q, "%2.2x.", *p);
750        q += 3;
751        p++;
752    }
753    *q=0;
754    q = debugbuf;
755    return q;
756}
757#endif
758
759/*
760 * Convert from MB to UCS2 charset
761 * Flags:
762 *      CONV_UNESCAPEHEX:    ':XX' will be converted to an UCS2 character
763 *      CONV_IGNORE:         return the first convertable characters.
764 *      CONV_FORCE:  force convertion
765 * FIXME:
766 *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
767 *      The (un)escape scheme is not compatible to the old cap style escape. This is bad, we need it
768 *      for e.g. HFS cdroms.
769 */
770
771static size_t pull_charset_flags (charset_t from_set, charset_t cap_set, const char *src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
772{
773    const u_int16_t option = (flags ? *flags : 0);
774    size_t i_len, o_len;
775    size_t j = 0;
776    const char* inbuf = (const char*)src;
777    char* outbuf = dest;
778    atalk_iconv_t descriptor;
779    atalk_iconv_t descriptor_cap;
780
781    if (srclen == (size_t)-1)
782        srclen = strlen(src) + 1;
783
784    descriptor = conv_handles[from_set][CH_UCS2];
785    descriptor_cap = conv_handles[cap_set][CH_UCS2];
786
787    if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
788        errno = EINVAL;
789        return (size_t)-1;
790    }
791
792    i_len=srclen;
793    o_len=destlen;
794
795    while (i_len > 0) {
796        if ((option & CONV_UNESCAPEHEX)) {
797            for (j = 0; j < i_len; ++j) {
798                if (inbuf[j] == ':') break;
799            }
800            j = i_len - j;
801            i_len -= j;
802        }
803
804        if (i_len > 0 &&
805            atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
806            if (errno == EILSEQ || errno == EINVAL) {
807                errno = EILSEQ;
808                if ((option & CONV_IGNORE)) {
809                    *flags |= CONV_REQMANGLE;
810                    return destlen - o_len;
811                }
812                if ((option & CONV__EILSEQ)) {
813                    if (o_len < 2) {
814                        errno = E2BIG;
815                        goto end;
816                    }
817                    *((ucs2_t *)outbuf) = (ucs2_t) IGNORE_CHAR; /**inbuf */
818                    inbuf++;
819                    i_len--;
820                    outbuf += 2;
821                    o_len -= 2;
822                    /* FIXME reset stat ? */
823                    continue;
824                }
825            }
826            goto end;
827        }
828
829        if (j) {
830            /* we're at the start on an hex encoded ucs2 char */
831            char h[MAXPATHLEN];
832            size_t hlen = 0;
833
834            i_len = j, j = 0;
835            while (i_len >= 3 && inbuf[0] == ':' &&
836                   isxdigit(inbuf[1]) && isxdigit(inbuf[2])) {
837                h[hlen++] = (hextoint(inbuf[1]) << 4) | hextoint(inbuf[2]);
838                inbuf += 3;
839                i_len -= 3;
840            }
841            if (hlen) {
842                const char *h_buf = h;
843                if (atalk_iconv(descriptor_cap, &h_buf, &hlen, &outbuf, &o_len) == (size_t)-1) {
844                    i_len += hlen * 3;
845                    inbuf -= hlen * 3;
846                    if (errno == EILSEQ && (option & CONV_IGNORE)) {
847                        *flags |= CONV_REQMANGLE;
848                        return destlen - o_len;
849                    }
850                    goto end;
851                }
852            } else {
853                /* We have an invalid :xx sequence */
854                errno = EILSEQ;
855                if ((option & CONV_IGNORE)) {
856                    *flags |= CONV_REQMANGLE;
857                    return destlen - o_len;
858                }
859                goto end;
860            }
861        }
862    }
863end:
864    return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
865}
866
867/*
868 * Convert from UCS2 to MB charset
869 * Flags:
870 *      CONV_ESCAPEDOTS: escape leading dots
871 *      CONV_ESCAPEHEX:  unconvertable characters and '/' will be escaped to :XX
872 *      CONV_IGNORE:     return the first convertable characters.
873 *      CONV__EILSEQ:    unconvertable characters will be replaced with '_'
874 *      CONV_FORCE:  force convertion
875 * FIXME:
876 *      CONV_IGNORE and CONV_ESCAPEHEX can't work together. Should we check this ?
877 *      This will *not* work if the destination charset is not multibyte, i.e. UCS2->UCS2 will fail
878 *      The escape scheme is not compatible to the old cap style escape. This is bad, we need it
879 *      for e.g. HFS cdroms.
880 */
881
882
883static size_t push_charset_flags (charset_t to_set, charset_t cap_set, char* src, size_t srclen, char* dest, size_t destlen, u_int16_t *flags)
884{
885    const u_int16_t option = (flags ? *flags : 0);
886    size_t i_len, o_len, i;
887    size_t j = 0;
888    const char* inbuf = (const char*)src;
889    char* outbuf = (char*)dest;
890    atalk_iconv_t descriptor;
891    atalk_iconv_t descriptor_cap;
892    char escch;                 /* 150210: uninitialized OK, depends on j */
893
894    descriptor = conv_handles[CH_UCS2][to_set];
895    descriptor_cap = conv_handles[CH_UCS2][cap_set];
896
897    if (descriptor == (atalk_iconv_t)-1 || descriptor == (atalk_iconv_t)0) {
898        errno = EINVAL;
899        return (size_t) -1;
900    }
901
902    i_len=srclen;
903    o_len=destlen;
904
905    if ((option & CONV_ESCAPEDOTS) &&
906        i_len >= 2 && SVAL(inbuf, 0) == 0x002e) { /* 0x002e = . */
907        if (o_len < 3) {
908            errno = E2BIG;
909            goto end;
910        }
911        *outbuf++ = ':';
912        *outbuf++ = '2';
913        *outbuf++ = 'e';
914        o_len -= 3;
915        inbuf += 2;
916        i_len -= 2;
917        *flags |= CONV_REQESCAPE;
918    }
919
920    while (i_len >= 2) {
921        if ((option & CONV_ESCAPEHEX)) {
922            for (i = 0; i < i_len; i += 2) {
923                ucs2_t c = SVAL(inbuf, i);
924                switch (c) {
925                case 0x003a: /* 0x003a = ':' */
926                    if ( ! (option & CONV_ALLOW_COLON)) {
927                        errno = EILSEQ;
928                        goto end;
929                    }
930                    escch = c;
931                    j = i_len - i;
932                    i_len = i;
933                    break;
934                case 0x002f: /* 0x002f = '/' */
935                    escch = c;
936                    j = i_len - i;
937                    i_len = i;
938                    break;
939                }
940            }
941        }
942        while (i_len > 0 &&
943               atalk_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len) == (size_t)-1) {
944            if (errno == EILSEQ) {
945                if ((option & CONV_IGNORE)) {
946                    *flags |= CONV_REQMANGLE;
947                    return destlen - o_len;
948                }
949                if ((option & CONV_ESCAPEHEX)) {
950                    const size_t bufsiz = o_len / 3 + 1;
951                    char *buf = malloc(bufsiz);
952                    size_t buflen;
953
954                    if (!buf)
955                        goto end;
956                    i = i_len;
957                    for (buflen = 1; buflen <= bufsiz; ++buflen) {
958                        char *b = buf;
959                        size_t o = buflen;
960                        if (atalk_iconv(descriptor_cap, &inbuf, &i, &b, &o) != (size_t)-1) {
961                            buflen -= o;
962                            break;
963                        } else if (errno != E2BIG) {
964                            SAFE_FREE(buf);
965                            goto end;
966                        } else if (o < buflen) {
967                            buflen -= o;
968                            break;
969                        }
970                    }
971                    if (o_len < buflen * 3) {
972                        SAFE_FREE(buf);
973                        errno = E2BIG;
974                        goto end;
975                    }
976                    o_len -= buflen * 3;
977                    i_len = i;
978                    for (i = 0; i < buflen; ++i) {
979                        *outbuf++ = ':';
980                        *outbuf++ = hexdig[(buf[i] >> 4) & 0x0f];
981                        *outbuf++ = hexdig[buf[i] & 0x0f];
982                    }
983                    SAFE_FREE(buf);
984                    *flags |= CONV_REQESCAPE;
985                    continue;
986                }
987            }
988            goto end;
989        }
990
991        if (j) {
992            i_len = j, j = 0;
993            if (o_len < 3) {
994                errno = E2BIG;
995                goto end;
996            }
997            switch (escch) {
998            case '/':
999                *outbuf++ = ':';
1000                *outbuf++ = '2';
1001                *outbuf++ = 'f';
1002                break;
1003            case ':':
1004                *outbuf++ = ':';
1005                *outbuf++ = '3';
1006                *outbuf++ = 'a';
1007                break;
1008            default:
1009                /*
1010                 *  THIS SHOULD NEVER BE REACHED !!!
1011                 *  As a safety net I put in a ' ' here
1012                 */
1013                *outbuf++ = ':';
1014                *outbuf++ = '2';
1015                *outbuf++ = '0';
1016                break;
1017            }
1018            o_len -= 3;
1019            inbuf += 2;
1020            i_len -= 2;
1021        }
1022    }
1023    if (i_len > 0) errno = EINVAL;
1024end:
1025    return (i_len + j == 0 || (option & CONV_FORCE)) ? destlen - o_len : (size_t)-1;
1026}
1027
1028/*
1029 * FIXME the size is a mess we really need a malloc/free logic
1030 *`dest size must be dest_len +2
1031 */
1032size_t convert_charset ( charset_t from_set, charset_t to_set, charset_t cap_charset, const char *src, size_t src_len, char *dest, size_t dest_len, u_int16_t *flags)
1033{
1034    size_t i_len, o_len;
1035    ucs2_t *u;
1036    ucs2_t buffer[MAXPATHLEN +2];
1037    ucs2_t buffer2[MAXPATHLEN +2];
1038
1039    lazy_initialize_conv();
1040
1041    /* convert from_set to UCS2 */
1042    if ((size_t)(-1) == ( o_len = pull_charset_flags( from_set, cap_charset, src, src_len,
1043                                                      (char *) buffer, sizeof(buffer) -2, flags)) ) {
1044        LOG(log_error, logtype_default, "Conversion failed ( %s to CH_UCS2 )", charset_name(from_set));
1045        return (size_t) -1;
1046    }
1047
1048    if ( o_len == 0)
1049        return o_len;
1050
1051    /* Do pre/decomposition */
1052    i_len = sizeof(buffer2) -2;
1053    u = buffer2;
1054    if (CHECK_FLAGS(flags, CONV_DECOMPOSE) || (charsets[to_set] && (charsets[to_set]->flags & CHARSET_DECOMPOSED)) ) {
1055        if ( (size_t)-1 == (i_len = decompose_w(buffer, o_len, u, &i_len)) )
1056            return (size_t)(-1);
1057    }
1058    else if (CHECK_FLAGS(flags, CONV_PRECOMPOSE) || !charsets[from_set] || (charsets[from_set]->flags & CHARSET_DECOMPOSED)) {
1059        if ( (size_t)-1 == (i_len = precompose_w(buffer, o_len, u, &i_len)) )
1060            return (size_t)(-1);
1061    }
1062    else {
1063        u = buffer;
1064        i_len = o_len;
1065    }
1066    /* null terminate */
1067    u[i_len] = 0;
1068    u[i_len +1] = 0;
1069
1070    /* Do case conversions */
1071    if (CHECK_FLAGS(flags, CONV_TOUPPER)) {
1072        strupper_w(u);
1073    }
1074    else if (CHECK_FLAGS(flags, CONV_TOLOWER)) {
1075        strlower_w(u);
1076    }
1077
1078    /* Convert UCS2 to to_set */
1079    if ((size_t)(-1) == ( o_len = push_charset_flags( to_set, cap_charset, (char *)u, i_len, dest, dest_len, flags )) ) {
1080        LOG(log_error, logtype_default,
1081            "Conversion failed (CH_UCS2 to %s):%s", charset_name(to_set), strerror(errno));
1082        return (size_t) -1;
1083    }
1084    /* null terminate */
1085    dest[o_len] = 0;
1086    dest[o_len +1] = 0;
1087
1088    return o_len;
1089}
1090