1/////////////////////////////////////////////////////////////////////////////
2// Name:        src/common/strconv.cpp
3// Purpose:     Unicode conversion classes
4// Author:      Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5//              Ryan Norton, Fredrik Roubert (UTF7)
6// Modified by:
7// Created:     29/01/98
8// RCS-ID:      $Id: strconv.cpp 64156 2010-04-27 08:52:30Z VZ $
9// Copyright:   (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10//              (c) 2000-2003 Vadim Zeitlin
11//              (c) 2004 Ryan Norton, Fredrik Roubert
12// Licence:     wxWindows licence
13/////////////////////////////////////////////////////////////////////////////
14
15// For compilers that support precompilation, includes "wx.h".
16#include "wx/wxprec.h"
17
18#ifndef WX_PRECOMP
19    #ifdef __WXMSW__
20        #include "wx/msw/missing.h"
21    #endif
22    #include "wx/intl.h"
23    #include "wx/log.h"
24    #include "wx/utils.h"
25    #include "wx/hashmap.h"
26#endif
27
28#include "wx/strconv.h"
29
30#if wxUSE_WCHAR_T
31
32#ifdef __WINDOWS__
33    #include "wx/msw/private.h"
34#endif
35
36#ifndef __WXWINCE__
37#include <errno.h>
38#endif
39
40#include <ctype.h>
41#include <string.h>
42#include <stdlib.h>
43
44#if defined(__WIN32__) && !defined(__WXMICROWIN__)
45    #define wxHAVE_WIN32_MB2WC
46#endif
47
48#ifdef __SALFORDC__
49    #include <clib.h>
50#endif
51
52#ifdef HAVE_ICONV
53    #include <iconv.h>
54    #include "wx/thread.h"
55#endif
56
57#include "wx/encconv.h"
58#include "wx/fontmap.h"
59
60#ifdef __WXMAC__
61#ifndef __DARWIN__
62#include <ATSUnicode.h>
63#include <TextCommon.h>
64#include <TextEncodingConverter.h>
65#endif
66
67// includes Mac headers
68#include "wx/mac/private.h"
69#include "wx/thread.h"
70
71#endif
72
73
74#define TRACE_STRCONV _T("strconv")
75
76// WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
77// be 4 bytes
78#if SIZEOF_WCHAR_T == 2
79    #define WC_UTF16
80#endif
81
82
83// ============================================================================
84// implementation
85// ============================================================================
86
87// helper function of cMB2WC(): check if n bytes at this location are all NUL
88static bool NotAllNULs(const char *p, size_t n)
89{
90    while ( n && *p++ == '\0' )
91        n--;
92
93    return n != 0;
94}
95
96// ----------------------------------------------------------------------------
97// UTF-16 en/decoding to/from UCS-4 with surrogates handling
98// ----------------------------------------------------------------------------
99
100static size_t encode_utf16(wxUint32 input, wxUint16 *output)
101{
102    if (input <= 0xffff)
103    {
104        if (output)
105            *output = (wxUint16) input;
106
107        return 1;
108    }
109    else if (input >= 0x110000)
110    {
111        return wxCONV_FAILED;
112    }
113    else
114    {
115        if (output)
116        {
117            *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
118            *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
119        }
120
121        return 2;
122    }
123}
124
125static size_t decode_utf16(const wxUint16* input, wxUint32& output)
126{
127    if ((*input < 0xd800) || (*input > 0xdfff))
128    {
129        output = *input;
130        return 1;
131    }
132    else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
133    {
134        output = *input;
135        return wxCONV_FAILED;
136    }
137    else
138    {
139        output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
140        return 2;
141    }
142}
143
144#ifdef WC_UTF16
145    typedef wchar_t wxDecodeSurrogate_t;
146#else // !WC_UTF16
147    typedef wxUint16 wxDecodeSurrogate_t;
148#endif // WC_UTF16/!WC_UTF16
149
150// returns the next UTF-32 character from the wchar_t buffer and advances the
151// pointer to the character after this one
152//
153// if an invalid character is found, *pSrc is set to NULL, the caller must
154// check for this
155static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
156{
157    wxUint32 out;
158    const size_t
159        n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
160    if ( n == wxCONV_FAILED )
161        *pSrc = NULL;
162    else
163        *pSrc += n;
164
165    return out;
166}
167
168// ----------------------------------------------------------------------------
169// wxMBConv
170// ----------------------------------------------------------------------------
171
172size_t
173wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
174                  const char *src, size_t srcLen) const
175{
176    // although new conversion classes are supposed to implement this function
177    // directly, the existins ones only implement the old MB2WC() and so, to
178    // avoid to have to rewrite all conversion classes at once, we provide a
179    // default (but not efficient) implementation of this one in terms of the
180    // old function by copying the input to ensure that it's NUL-terminated and
181    // then using MB2WC() to convert it
182
183    // the number of chars [which would be] written to dst [if it were not NULL]
184    size_t dstWritten = 0;
185
186    // the number of NULs terminating this string
187    size_t nulLen = 0;  // not really needed, but just to avoid warnings
188
189    // if we were not given the input size we just have to assume that the
190    // string is properly terminated as we have no way of knowing how long it
191    // is anyhow, but if we do have the size check whether there are enough
192    // NULs at the end
193    wxCharBuffer bufTmp;
194    const char *srcEnd;
195    if ( srcLen != wxNO_LEN )
196    {
197        // we need to know how to find the end of this string
198        nulLen = GetMBNulLen();
199        if ( nulLen == wxCONV_FAILED )
200            return wxCONV_FAILED;
201
202        // if there are enough NULs we can avoid the copy
203        if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
204        {
205            // make a copy in order to properly NUL-terminate the string
206            bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
207            char * const p = bufTmp.data();
208            memcpy(p, src, srcLen);
209            for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
210                *s = '\0';
211
212            src = bufTmp;
213        }
214
215        srcEnd = src + srcLen;
216    }
217    else // quit after the first loop iteration
218    {
219        srcEnd = NULL;
220    }
221
222    for ( ;; )
223    {
224        // try to convert the current chunk
225        size_t lenChunk = MB2WC(NULL, src, 0);
226        if ( lenChunk == wxCONV_FAILED )
227            return wxCONV_FAILED;
228
229        lenChunk++; // for the L'\0' at the end of this chunk
230
231        dstWritten += lenChunk;
232
233        if ( lenChunk == 1 )
234        {
235            // nothing left in the input string, conversion succeeded
236            break;
237        }
238
239        if ( dst )
240        {
241            if ( dstWritten > dstLen )
242                return wxCONV_FAILED;
243
244            if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
245                return wxCONV_FAILED;
246
247            dst += lenChunk;
248        }
249
250        if ( !srcEnd )
251        {
252            // we convert just one chunk in this case as this is the entire
253            // string anyhow
254            break;
255        }
256
257        // advance the input pointer past the end of this chunk
258        while ( NotAllNULs(src, nulLen) )
259        {
260            // notice that we must skip over multiple bytes here as we suppose
261            // that if NUL takes 2 or 4 bytes, then all the other characters do
262            // too and so if advanced by a single byte we might erroneously
263            // detect sequences of NUL bytes in the middle of the input
264            src += nulLen;
265        }
266
267        src += nulLen; // skipping over its terminator as well
268
269        // note that ">=" (and not just "==") is needed here as the terminator
270        // we skipped just above could be inside or just after the buffer
271        // delimited by inEnd
272        if ( src >= srcEnd )
273            break;
274    }
275
276    return dstWritten;
277}
278
279size_t
280wxMBConv::FromWChar(char *dst, size_t dstLen,
281                    const wchar_t *src, size_t srcLen) const
282{
283    // the number of chars [which would be] written to dst [if it were not NULL]
284    size_t dstWritten = 0;
285
286    // make a copy of the input string unless it is already properly
287    // NUL-terminated
288    //
289    // if we don't know its length we have no choice but to assume that it is,
290    // indeed, properly terminated
291    wxWCharBuffer bufTmp;
292    if ( srcLen == wxNO_LEN )
293    {
294        srcLen = wxWcslen(src) + 1;
295    }
296    else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
297    {
298        // make a copy in order to properly NUL-terminate the string
299        bufTmp = wxWCharBuffer(srcLen);
300        memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
301        src = bufTmp;
302    }
303
304    const size_t lenNul = GetMBNulLen();
305    for ( const wchar_t * const srcEnd = src + srcLen;
306          src < srcEnd;
307          src += wxWcslen(src) + 1 /* skip L'\0' too */ )
308    {
309        // try to convert the current chunk
310        size_t lenChunk = WC2MB(NULL, src, 0);
311
312        if ( lenChunk == wxCONV_FAILED )
313            return wxCONV_FAILED;
314
315        lenChunk += lenNul;
316        dstWritten += lenChunk;
317
318        if ( dst )
319        {
320            if ( dstWritten > dstLen )
321                return wxCONV_FAILED;
322
323            if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
324                return wxCONV_FAILED;
325
326            dst += lenChunk;
327        }
328    }
329
330    return dstWritten;
331}
332
333size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
334{
335    size_t rc = ToWChar(outBuff, outLen, inBuff);
336    if ( rc != wxCONV_FAILED )
337    {
338        // ToWChar() returns the buffer length, i.e. including the trailing
339        // NUL, while this method doesn't take it into account
340        rc--;
341    }
342
343    return rc;
344}
345
346size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
347{
348    size_t rc = FromWChar(outBuff, outLen, inBuff);
349    if ( rc != wxCONV_FAILED )
350    {
351        rc -= GetMBNulLen();
352    }
353
354    return rc;
355}
356
357wxMBConv::~wxMBConv()
358{
359    // nothing to do here (necessary for Darwin linking probably)
360}
361
362const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
363{
364    if ( psz )
365    {
366        // calculate the length of the buffer needed first
367        const size_t nLen = MB2WC(NULL, psz, 0);
368        if ( nLen != wxCONV_FAILED )
369        {
370            // now do the actual conversion
371            wxWCharBuffer buf(nLen /* +1 added implicitly */);
372
373            // +1 for the trailing NULL
374            if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
375                return buf;
376        }
377    }
378
379    return wxWCharBuffer();
380}
381
382const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
383{
384    if ( pwz )
385    {
386        const size_t nLen = WC2MB(NULL, pwz, 0);
387        if ( nLen != wxCONV_FAILED )
388        {
389            // extra space for trailing NUL(s)
390            static const size_t extraLen = GetMaxMBNulLen();
391
392            wxCharBuffer buf(nLen + extraLen - 1);
393            if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
394                return buf;
395        }
396    }
397
398    return wxCharBuffer();
399}
400
401const wxWCharBuffer
402wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
403{
404    const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
405    if ( dstLen != wxCONV_FAILED )
406    {
407        wxWCharBuffer wbuf(dstLen - 1);
408        if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
409        {
410            if ( outLen )
411            {
412                *outLen = dstLen;
413                if ( wbuf[dstLen - 1] == L'\0' )
414                    (*outLen)--;
415            }
416
417            return wbuf;
418        }
419    }
420
421    if ( outLen )
422        *outLen = 0;
423
424    return wxWCharBuffer();
425}
426
427const wxCharBuffer
428wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
429{
430    size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
431    if ( dstLen != wxCONV_FAILED )
432    {
433        // special case of empty input: can't allocate 0 size buffer below as
434        // wxCharBuffer insists on NUL-terminating it
435        wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
436        if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
437        {
438            if ( outLen )
439            {
440                *outLen = dstLen;
441
442                const size_t nulLen = GetMBNulLen();
443                if ( dstLen >= nulLen &&
444                        !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
445                {
446                    // in this case the output is NUL-terminated and we're not
447                    // supposed to count NUL
448                    *outLen -= nulLen;
449                }
450            }
451
452            return buf;
453        }
454    }
455
456    if ( outLen )
457        *outLen = 0;
458
459    return wxCharBuffer();
460}
461
462// ----------------------------------------------------------------------------
463// wxMBConvLibc
464// ----------------------------------------------------------------------------
465
466size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
467{
468    return wxMB2WC(buf, psz, n);
469}
470
471size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
472{
473    return wxWC2MB(buf, psz, n);
474}
475
476// ----------------------------------------------------------------------------
477// wxConvBrokenFileNames
478// ----------------------------------------------------------------------------
479
480#ifdef __UNIX__
481
482wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
483{
484    if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
485                  || wxStricmp(charset, _T("UTF8")) == 0  )
486        m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
487    else
488        m_conv = new wxCSConv(charset);
489}
490
491#endif // __UNIX__
492
493// ----------------------------------------------------------------------------
494// UTF-7
495// ----------------------------------------------------------------------------
496
497// Implementation (C) 2004 Fredrik Roubert
498
499//
500// BASE64 decoding table
501//
502static const unsigned char utf7unb64[] =
503{
504    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
507    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
509    0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
510    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
511    0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
512    0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
513    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
514    0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
515    0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
516    0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
517    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
518    0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
519    0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
520    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
533    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
534    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
535    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
536};
537
538size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
539{
540    size_t len = 0;
541
542    while ( *psz && (!buf || (len < n)) )
543    {
544        unsigned char cc = *psz++;
545        if (cc != '+')
546        {
547            // plain ASCII char
548            if (buf)
549                *buf++ = cc;
550            len++;
551        }
552        else if (*psz == '-')
553        {
554            // encoded plus sign
555            if (buf)
556                *buf++ = cc;
557            len++;
558            psz++;
559        }
560        else // start of BASE64 encoded string
561        {
562            bool lsb, ok;
563            unsigned int d, l;
564            for ( ok = lsb = false, d = 0, l = 0;
565                  (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
566                  psz++ )
567            {
568                d <<= 6;
569                d += cc;
570                for (l += 6; l >= 8; lsb = !lsb)
571                {
572                    unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
573                    if (lsb)
574                    {
575                        if (buf)
576                            *buf++ |= c;
577                        len ++;
578                    }
579                    else
580                    {
581                        if (buf)
582                            *buf = (wchar_t)(c << 8);
583                    }
584
585                    ok = true;
586                }
587            }
588
589            if ( !ok )
590            {
591                // in valid UTF7 we should have valid characters after '+'
592                return wxCONV_FAILED;
593            }
594
595            if (*psz == '-')
596                psz++;
597        }
598    }
599
600    if ( buf && (len < n) )
601        *buf = '\0';
602
603    return len;
604}
605
606//
607// BASE64 encoding table
608//
609static const unsigned char utf7enb64[] =
610{
611    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
612    'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
613    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
614    'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
615    'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
616    'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
617    'w', 'x', 'y', 'z', '0', '1', '2', '3',
618    '4', '5', '6', '7', '8', '9', '+', '/'
619};
620
621//
622// UTF-7 encoding table
623//
624// 0 - Set D (directly encoded characters)
625// 1 - Set O (optional direct characters)
626// 2 - whitespace characters (optional)
627// 3 - special characters
628//
629static const unsigned char utf7encode[128] =
630{
631    3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
632    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
633    2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
634    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
635    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
636    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
637    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
639};
640
641size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
642{
643    size_t len = 0;
644
645    while (*psz && ((!buf) || (len < n)))
646    {
647        wchar_t cc = *psz++;
648        if (cc < 0x80 && utf7encode[cc] < 1)
649        {
650            // plain ASCII char
651            if (buf)
652                *buf++ = (char)cc;
653
654            len++;
655        }
656#ifndef WC_UTF16
657        else if (((wxUint32)cc) > 0xffff)
658        {
659            // no surrogate pair generation (yet?)
660            return wxCONV_FAILED;
661        }
662#endif
663        else
664        {
665            if (buf)
666                *buf++ = '+';
667
668            len++;
669            if (cc != '+')
670            {
671                // BASE64 encode string
672                unsigned int lsb, d, l;
673                for (d = 0, l = 0; /*nothing*/; psz++)
674                {
675                    for (lsb = 0; lsb < 2; lsb ++)
676                    {
677                        d <<= 8;
678                        d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
679
680                        for (l += 8; l >= 6; )
681                        {
682                            l -= 6;
683                            if (buf)
684                                *buf++ = utf7enb64[(d >> l) % 64];
685                            len++;
686                        }
687                    }
688
689                    cc = *psz;
690                    if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
691                        break;
692                }
693
694                if (l != 0)
695                {
696                    if (buf)
697                        *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
698
699                    len++;
700                }
701            }
702
703            if (buf)
704                *buf++ = '-';
705            len++;
706        }
707    }
708
709    if (buf && (len < n))
710        *buf = 0;
711
712    return len;
713}
714
715// ----------------------------------------------------------------------------
716// UTF-8
717// ----------------------------------------------------------------------------
718
719static wxUint32 utf8_max[]=
720    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
721
722// boundaries of the private use area we use to (temporarily) remap invalid
723// characters invalid in a UTF-8 encoded string
724const wxUint32 wxUnicodePUA = 0x100000;
725const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
726
727size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
728{
729    size_t len = 0;
730
731    while (*psz && ((!buf) || (len < n)))
732    {
733        const char *opsz = psz;
734        bool invalid = false;
735        unsigned char cc = *psz++, fc = cc;
736        unsigned cnt;
737        for (cnt = 0; fc & 0x80; cnt++)
738            fc <<= 1;
739
740        if (!cnt)
741        {
742            // plain ASCII char
743            if (buf)
744                *buf++ = cc;
745            len++;
746
747            // escape the escape character for octal escapes
748            if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
749                    && cc == '\\' && (!buf || len < n))
750            {
751                if (buf)
752                    *buf++ = cc;
753                len++;
754            }
755        }
756        else
757        {
758            cnt--;
759            if (!cnt)
760            {
761                // invalid UTF-8 sequence
762                invalid = true;
763            }
764            else
765            {
766                unsigned ocnt = cnt - 1;
767                wxUint32 res = cc & (0x3f >> cnt);
768                while (cnt--)
769                {
770                    cc = *psz;
771                    if ((cc & 0xC0) != 0x80)
772                    {
773                        // invalid UTF-8 sequence
774                        invalid = true;
775                        break;
776                    }
777
778                    psz++;
779                    res = (res << 6) | (cc & 0x3f);
780                }
781
782                if (invalid || res <= utf8_max[ocnt])
783                {
784                    // illegal UTF-8 encoding
785                    invalid = true;
786                }
787                else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
788                        res >= wxUnicodePUA && res < wxUnicodePUAEnd)
789                {
790                    // if one of our PUA characters turns up externally
791                    // it must also be treated as an illegal sequence
792                    // (a bit like you have to escape an escape character)
793                    invalid = true;
794                }
795                else
796                {
797#ifdef WC_UTF16
798                    // cast is ok because wchar_t == wxUuint16 if WC_UTF16
799                    size_t pa = encode_utf16(res, (wxUint16 *)buf);
800                    if (pa == wxCONV_FAILED)
801                    {
802                        invalid = true;
803                    }
804                    else
805                    {
806                        if (buf)
807                            buf += pa;
808                        len += pa;
809                    }
810#else // !WC_UTF16
811                    if (buf)
812                        *buf++ = (wchar_t)res;
813                    len++;
814#endif // WC_UTF16/!WC_UTF16
815                }
816            }
817
818            if (invalid)
819            {
820                if (m_options & MAP_INVALID_UTF8_TO_PUA)
821                {
822                    while (opsz < psz && (!buf || len < n))
823                    {
824#ifdef WC_UTF16
825                        // cast is ok because wchar_t == wxUuint16 if WC_UTF16
826                        size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
827                        wxASSERT(pa != wxCONV_FAILED);
828                        if (buf)
829                            buf += pa;
830                        opsz++;
831                        len += pa;
832#else
833                        if (buf)
834                            *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
835                        opsz++;
836                        len++;
837#endif
838                    }
839                }
840                else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
841                {
842                    while (opsz < psz && (!buf || len < n))
843                    {
844                        if ( buf && len + 3 < n )
845                        {
846                            unsigned char on = *opsz;
847                            *buf++ = L'\\';
848                            *buf++ = (wchar_t)( L'0' + on / 0100 );
849                            *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
850                            *buf++ = (wchar_t)( L'0' + on % 010 );
851                        }
852
853                        opsz++;
854                        len += 4;
855                    }
856                }
857                else // MAP_INVALID_UTF8_NOT
858                {
859                    return wxCONV_FAILED;
860                }
861            }
862        }
863    }
864
865    if (buf && (len < n))
866        *buf = 0;
867
868    return len;
869}
870
871static inline bool isoctal(wchar_t wch)
872{
873    return L'0' <= wch && wch <= L'7';
874}
875
876size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
877{
878    size_t len = 0;
879
880    while (*psz && ((!buf) || (len < n)))
881    {
882        wxUint32 cc;
883
884#ifdef WC_UTF16
885        // cast is ok for WC_UTF16
886        size_t pa = decode_utf16((const wxUint16 *)psz, cc);
887        psz += (pa == wxCONV_FAILED) ? 1 : pa;
888#else
889        cc = (*psz++) & 0x7fffffff;
890#endif
891
892        if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
893                && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
894        {
895            if (buf)
896                *buf++ = (char)(cc - wxUnicodePUA);
897            len++;
898        }
899        else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
900                    && cc == L'\\' && psz[0] == L'\\' )
901        {
902            if (buf)
903                *buf++ = (char)cc;
904            psz++;
905            len++;
906        }
907        else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
908                    cc == L'\\' &&
909                        isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
910        {
911            if (buf)
912            {
913                *buf++ = (char) ((psz[0] - L'0') * 0100 +
914                                 (psz[1] - L'0') * 010 +
915                                 (psz[2] - L'0'));
916            }
917
918            psz += 3;
919            len++;
920        }
921        else
922        {
923            unsigned cnt;
924            for (cnt = 0; cc > utf8_max[cnt]; cnt++)
925            {
926            }
927
928            if (!cnt)
929            {
930                // plain ASCII char
931                if (buf)
932                    *buf++ = (char) cc;
933                len++;
934            }
935            else
936            {
937                len += cnt + 1;
938                if (buf)
939                {
940                    *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
941                    while (cnt--)
942                        *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
943                }
944            }
945        }
946    }
947
948    if (buf && (len < n))
949        *buf = 0;
950
951    return len;
952}
953
954// ============================================================================
955// UTF-16
956// ============================================================================
957
958#ifdef WORDS_BIGENDIAN
959    #define wxMBConvUTF16straight wxMBConvUTF16BE
960    #define wxMBConvUTF16swap     wxMBConvUTF16LE
961#else
962    #define wxMBConvUTF16swap     wxMBConvUTF16BE
963    #define wxMBConvUTF16straight wxMBConvUTF16LE
964#endif
965
966/* static */
967size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
968{
969    if ( srcLen == wxNO_LEN )
970    {
971        // count the number of bytes in input, including the trailing NULs
972        const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
973        for ( srcLen = 1; *inBuff++; srcLen++ )
974            ;
975
976        srcLen *= BYTES_PER_CHAR;
977    }
978    else // we already have the length
979    {
980        // we can only convert an entire number of UTF-16 characters
981        if ( srcLen % BYTES_PER_CHAR )
982            return wxCONV_FAILED;
983    }
984
985    return srcLen;
986}
987
988// case when in-memory representation is UTF-16 too
989#ifdef WC_UTF16
990
991// ----------------------------------------------------------------------------
992// conversions without endianness change
993// ----------------------------------------------------------------------------
994
995size_t
996wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
997                               const char *src, size_t srcLen) const
998{
999    // set up the scene for using memcpy() (which is presumably more efficient
1000    // than copying the bytes one by one)
1001    srcLen = GetLength(src, srcLen);
1002    if ( srcLen == wxNO_LEN )
1003        return wxCONV_FAILED;
1004
1005    const size_t inLen = srcLen / BYTES_PER_CHAR;
1006    if ( dst )
1007    {
1008        if ( dstLen < inLen )
1009            return wxCONV_FAILED;
1010
1011        memcpy(dst, src, srcLen);
1012    }
1013
1014    return inLen;
1015}
1016
1017size_t
1018wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1019                                 const wchar_t *src, size_t srcLen) const
1020{
1021    if ( srcLen == wxNO_LEN )
1022        srcLen = wxWcslen(src) + 1;
1023
1024    srcLen *= BYTES_PER_CHAR;
1025
1026    if ( dst )
1027    {
1028        if ( dstLen < srcLen )
1029            return wxCONV_FAILED;
1030
1031        memcpy(dst, src, srcLen);
1032    }
1033
1034    return srcLen;
1035}
1036
1037// ----------------------------------------------------------------------------
1038// endian-reversing conversions
1039// ----------------------------------------------------------------------------
1040
1041size_t
1042wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1043                           const char *src, size_t srcLen) const
1044{
1045    srcLen = GetLength(src, srcLen);
1046    if ( srcLen == wxNO_LEN )
1047        return wxCONV_FAILED;
1048
1049    srcLen /= BYTES_PER_CHAR;
1050
1051    if ( dst )
1052    {
1053        if ( dstLen < srcLen )
1054            return wxCONV_FAILED;
1055
1056        const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1057        for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1058        {
1059            *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1060        }
1061    }
1062
1063    return srcLen;
1064}
1065
1066size_t
1067wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1068                             const wchar_t *src, size_t srcLen) const
1069{
1070    if ( srcLen == wxNO_LEN )
1071        srcLen = wxWcslen(src) + 1;
1072
1073    srcLen *= BYTES_PER_CHAR;
1074
1075    if ( dst )
1076    {
1077        if ( dstLen < srcLen )
1078            return wxCONV_FAILED;
1079
1080        wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1081        for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1082        {
1083            *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1084        }
1085    }
1086
1087    return srcLen;
1088}
1089
1090#else // !WC_UTF16: wchar_t is UTF-32
1091
1092// ----------------------------------------------------------------------------
1093// conversions without endianness change
1094// ----------------------------------------------------------------------------
1095
1096size_t
1097wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1098                               const char *src, size_t srcLen) const
1099{
1100    srcLen = GetLength(src, srcLen);
1101    if ( srcLen == wxNO_LEN )
1102        return wxCONV_FAILED;
1103
1104    const size_t inLen = srcLen / BYTES_PER_CHAR;
1105    if ( !dst )
1106    {
1107        // optimization: return maximal space which could be needed for this
1108        // string even if the real size could be smaller if the buffer contains
1109        // any surrogates
1110        return inLen;
1111    }
1112
1113    size_t outLen = 0;
1114    const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1115    for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1116    {
1117        const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1118        if ( !inBuff )
1119            return wxCONV_FAILED;
1120
1121        if ( ++outLen > dstLen )
1122            return wxCONV_FAILED;
1123
1124        *dst++ = ch;
1125    }
1126
1127
1128    return outLen;
1129}
1130
1131size_t
1132wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1133                                 const wchar_t *src, size_t srcLen) const
1134{
1135    if ( srcLen == wxNO_LEN )
1136        srcLen = wxWcslen(src) + 1;
1137
1138    size_t outLen = 0;
1139    wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1140    for ( size_t n = 0; n < srcLen; n++ )
1141    {
1142        wxUint16 cc[2];
1143        const size_t numChars = encode_utf16(*src++, cc);
1144        if ( numChars == wxCONV_FAILED )
1145            return wxCONV_FAILED;
1146
1147        outLen += numChars * BYTES_PER_CHAR;
1148        if ( outBuff )
1149        {
1150            if ( outLen > dstLen )
1151                return wxCONV_FAILED;
1152
1153            *outBuff++ = cc[0];
1154            if ( numChars == 2 )
1155            {
1156                // second character of a surrogate
1157                *outBuff++ = cc[1];
1158            }
1159        }
1160    }
1161
1162    return outLen;
1163}
1164
1165// ----------------------------------------------------------------------------
1166// endian-reversing conversions
1167// ----------------------------------------------------------------------------
1168
1169size_t
1170wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1171                           const char *src, size_t srcLen) const
1172{
1173    srcLen = GetLength(src, srcLen);
1174    if ( srcLen == wxNO_LEN )
1175        return wxCONV_FAILED;
1176
1177    const size_t inLen = srcLen / BYTES_PER_CHAR;
1178    if ( !dst )
1179    {
1180        // optimization: return maximal space which could be needed for this
1181        // string even if the real size could be smaller if the buffer contains
1182        // any surrogates
1183        return inLen;
1184    }
1185
1186    size_t outLen = 0;
1187    const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1188    for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1189    {
1190        wxUint32 ch;
1191        wxUint16 tmp[2];
1192
1193        tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1194        inBuff++;
1195        tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1196
1197        const size_t numChars = decode_utf16(tmp, ch);
1198        if ( numChars == wxCONV_FAILED )
1199            return wxCONV_FAILED;
1200
1201        if ( numChars == 2 )
1202            inBuff++;
1203
1204        if ( ++outLen > dstLen )
1205            return wxCONV_FAILED;
1206
1207        *dst++ = ch;
1208    }
1209
1210
1211    return outLen;
1212}
1213
1214size_t
1215wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1216                             const wchar_t *src, size_t srcLen) const
1217{
1218    if ( srcLen == wxNO_LEN )
1219        srcLen = wxWcslen(src) + 1;
1220
1221    size_t outLen = 0;
1222    wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1223    for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1224    {
1225        wxUint16 cc[2];
1226        const size_t numChars = encode_utf16(*src, cc);
1227        if ( numChars == wxCONV_FAILED )
1228            return wxCONV_FAILED;
1229
1230        outLen += numChars * BYTES_PER_CHAR;
1231        if ( outBuff )
1232        {
1233            if ( outLen > dstLen )
1234                return wxCONV_FAILED;
1235
1236            *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1237            if ( numChars == 2 )
1238            {
1239                // second character of a surrogate
1240                *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1241            }
1242        }
1243    }
1244
1245    return outLen;
1246}
1247
1248#endif // WC_UTF16/!WC_UTF16
1249
1250
1251// ============================================================================
1252// UTF-32
1253// ============================================================================
1254
1255#ifdef WORDS_BIGENDIAN
1256    #define wxMBConvUTF32straight  wxMBConvUTF32BE
1257    #define wxMBConvUTF32swap      wxMBConvUTF32LE
1258#else
1259    #define wxMBConvUTF32swap      wxMBConvUTF32BE
1260    #define wxMBConvUTF32straight  wxMBConvUTF32LE
1261#endif
1262
1263
1264WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1265WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1266
1267/* static */
1268size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1269{
1270    if ( srcLen == wxNO_LEN )
1271    {
1272        // count the number of bytes in input, including the trailing NULs
1273        const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1274        for ( srcLen = 1; *inBuff++; srcLen++ )
1275            ;
1276
1277        srcLen *= BYTES_PER_CHAR;
1278    }
1279    else // we already have the length
1280    {
1281        // we can only convert an entire number of UTF-32 characters
1282        if ( srcLen % BYTES_PER_CHAR )
1283            return wxCONV_FAILED;
1284    }
1285
1286    return srcLen;
1287}
1288
1289// case when in-memory representation is UTF-16
1290#ifdef WC_UTF16
1291
1292// ----------------------------------------------------------------------------
1293// conversions without endianness change
1294// ----------------------------------------------------------------------------
1295
1296size_t
1297wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1298                               const char *src, size_t srcLen) const
1299{
1300    srcLen = GetLength(src, srcLen);
1301    if ( srcLen == wxNO_LEN )
1302        return wxCONV_FAILED;
1303
1304    const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1305    const size_t inLen = srcLen / BYTES_PER_CHAR;
1306    size_t outLen = 0;
1307    for ( size_t n = 0; n < inLen; n++ )
1308    {
1309        wxUint16 cc[2];
1310        const size_t numChars = encode_utf16(*inBuff++, cc);
1311        if ( numChars == wxCONV_FAILED )
1312            return wxCONV_FAILED;
1313
1314        outLen += numChars;
1315        if ( dst )
1316        {
1317            if ( outLen > dstLen )
1318                return wxCONV_FAILED;
1319
1320            *dst++ = cc[0];
1321            if ( numChars == 2 )
1322            {
1323                // second character of a surrogate
1324                *dst++ = cc[1];
1325            }
1326        }
1327    }
1328
1329    return outLen;
1330}
1331
1332size_t
1333wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1334                                 const wchar_t *src, size_t srcLen) const
1335{
1336    if ( srcLen == wxNO_LEN )
1337        srcLen = wxWcslen(src) + 1;
1338
1339    if ( !dst )
1340    {
1341        // optimization: return maximal space which could be needed for this
1342        // string instead of the exact amount which could be less if there are
1343        // any surrogates in the input
1344        //
1345        // we consider that surrogates are rare enough to make it worthwhile to
1346        // avoid running the loop below at the cost of slightly extra memory
1347        // consumption
1348        return srcLen * BYTES_PER_CHAR;
1349    }
1350
1351    wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1352    size_t outLen = 0;
1353    for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1354    {
1355        const wxUint32 ch = wxDecodeSurrogate(&src);
1356        if ( !src )
1357            return wxCONV_FAILED;
1358
1359        outLen += BYTES_PER_CHAR;
1360
1361        if ( outLen > dstLen )
1362            return wxCONV_FAILED;
1363
1364        *outBuff++ = ch;
1365    }
1366
1367    return outLen;
1368}
1369
1370// ----------------------------------------------------------------------------
1371// endian-reversing conversions
1372// ----------------------------------------------------------------------------
1373
1374size_t
1375wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1376                           const char *src, size_t srcLen) const
1377{
1378    srcLen = GetLength(src, srcLen);
1379    if ( srcLen == wxNO_LEN )
1380        return wxCONV_FAILED;
1381
1382    const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1383    const size_t inLen = srcLen / BYTES_PER_CHAR;
1384    size_t outLen = 0;
1385    for ( size_t n = 0; n < inLen; n++, inBuff++ )
1386    {
1387        wxUint16 cc[2];
1388        const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1389        if ( numChars == wxCONV_FAILED )
1390            return wxCONV_FAILED;
1391
1392        outLen += numChars;
1393        if ( dst )
1394        {
1395            if ( outLen > dstLen )
1396                return wxCONV_FAILED;
1397
1398            *dst++ = cc[0];
1399            if ( numChars == 2 )
1400            {
1401                // second character of a surrogate
1402                *dst++ = cc[1];
1403            }
1404        }
1405    }
1406
1407    return outLen;
1408}
1409
1410size_t
1411wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1412                             const wchar_t *src, size_t srcLen) const
1413{
1414    if ( srcLen == wxNO_LEN )
1415        srcLen = wxWcslen(src) + 1;
1416
1417    if ( !dst )
1418    {
1419        // optimization: return maximal space which could be needed for this
1420        // string instead of the exact amount which could be less if there are
1421        // any surrogates in the input
1422        //
1423        // we consider that surrogates are rare enough to make it worthwhile to
1424        // avoid running the loop below at the cost of slightly extra memory
1425        // consumption
1426        return srcLen*BYTES_PER_CHAR;
1427    }
1428
1429    wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1430    size_t outLen = 0;
1431    for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1432    {
1433        const wxUint32 ch = wxDecodeSurrogate(&src);
1434        if ( !src )
1435            return wxCONV_FAILED;
1436
1437        outLen += BYTES_PER_CHAR;
1438
1439        if ( outLen > dstLen )
1440            return wxCONV_FAILED;
1441
1442        *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1443    }
1444
1445    return outLen;
1446}
1447
1448#else // !WC_UTF16: wchar_t is UTF-32
1449
1450// ----------------------------------------------------------------------------
1451// conversions without endianness change
1452// ----------------------------------------------------------------------------
1453
1454size_t
1455wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1456                               const char *src, size_t srcLen) const
1457{
1458    // use memcpy() as it should be much faster than hand-written loop
1459    srcLen = GetLength(src, srcLen);
1460    if ( srcLen == wxNO_LEN )
1461        return wxCONV_FAILED;
1462
1463    const size_t inLen = srcLen/BYTES_PER_CHAR;
1464    if ( dst )
1465    {
1466        if ( dstLen < inLen )
1467            return wxCONV_FAILED;
1468
1469        memcpy(dst, src, srcLen);
1470    }
1471
1472    return inLen;
1473}
1474
1475size_t
1476wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1477                                 const wchar_t *src, size_t srcLen) const
1478{
1479    if ( srcLen == wxNO_LEN )
1480        srcLen = wxWcslen(src) + 1;
1481
1482    srcLen *= BYTES_PER_CHAR;
1483
1484    if ( dst )
1485    {
1486        if ( dstLen < srcLen )
1487            return wxCONV_FAILED;
1488
1489        memcpy(dst, src, srcLen);
1490    }
1491
1492    return srcLen;
1493}
1494
1495// ----------------------------------------------------------------------------
1496// endian-reversing conversions
1497// ----------------------------------------------------------------------------
1498
1499size_t
1500wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1501                           const char *src, size_t srcLen) const
1502{
1503    srcLen = GetLength(src, srcLen);
1504    if ( srcLen == wxNO_LEN )
1505        return wxCONV_FAILED;
1506
1507    srcLen /= BYTES_PER_CHAR;
1508
1509    if ( dst )
1510    {
1511        if ( dstLen < srcLen )
1512            return wxCONV_FAILED;
1513
1514        const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1515        for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1516        {
1517            *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1518        }
1519    }
1520
1521    return srcLen;
1522}
1523
1524size_t
1525wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1526                             const wchar_t *src, size_t srcLen) const
1527{
1528    if ( srcLen == wxNO_LEN )
1529        srcLen = wxWcslen(src) + 1;
1530
1531    srcLen *= BYTES_PER_CHAR;
1532
1533    if ( dst )
1534    {
1535        if ( dstLen < srcLen )
1536            return wxCONV_FAILED;
1537
1538        wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1539        for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1540        {
1541            *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1542        }
1543    }
1544
1545    return srcLen;
1546}
1547
1548#endif // WC_UTF16/!WC_UTF16
1549
1550
1551// ============================================================================
1552// The classes doing conversion using the iconv_xxx() functions
1553// ============================================================================
1554
1555#ifdef HAVE_ICONV
1556
1557// VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1558//     E2BIG if output buffer is _exactly_ as big as needed. Such case is
1559//     (unless there's yet another bug in glibc) the only case when iconv()
1560//     returns with (size_t)-1 (which means error) and says there are 0 bytes
1561//     left in the input buffer -- when _real_ error occurs,
1562//     bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1563//     iconv() failure.
1564//     [This bug does not appear in glibc 2.2.]
1565#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1566#define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1567                                     (errno != E2BIG || bufLeft != 0))
1568#else
1569#define ICONV_FAILED(cres, bufLeft)  (cres == (size_t)-1)
1570#endif
1571
1572#define ICONV_CHAR_CAST(x)  ((ICONV_CONST char **)(x))
1573
1574#define ICONV_T_INVALID ((iconv_t)-1)
1575
1576#if SIZEOF_WCHAR_T == 4
1577    #define WC_BSWAP    wxUINT32_SWAP_ALWAYS
1578    #define WC_ENC      wxFONTENCODING_UTF32
1579#elif SIZEOF_WCHAR_T == 2
1580    #define WC_BSWAP    wxUINT16_SWAP_ALWAYS
1581    #define WC_ENC      wxFONTENCODING_UTF16
1582#else // sizeof(wchar_t) != 2 nor 4
1583    // does this ever happen?
1584    #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1585#endif
1586
1587// ----------------------------------------------------------------------------
1588// wxMBConv_iconv: encapsulates an iconv character set
1589// ----------------------------------------------------------------------------
1590
1591class wxMBConv_iconv : public wxMBConv
1592{
1593public:
1594    wxMBConv_iconv(const wxChar *name);
1595    virtual ~wxMBConv_iconv();
1596
1597    virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1598    virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1599
1600    // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1601    virtual size_t GetMBNulLen() const;
1602
1603    virtual wxMBConv *Clone() const
1604    {
1605        wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1606        p->m_minMBCharWidth = m_minMBCharWidth;
1607        return p;
1608    }
1609
1610    bool IsOk() const
1611        { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1612
1613protected:
1614    // the iconv handlers used to translate from multibyte
1615    // to wide char and in the other direction
1616    iconv_t m2w,
1617            w2m;
1618
1619#if wxUSE_THREADS
1620    // guards access to m2w and w2m objects
1621    wxMutex m_iconvMutex;
1622#endif
1623
1624private:
1625    // the name (for iconv_open()) of a wide char charset -- if none is
1626    // available on this machine, it will remain NULL
1627    static wxString ms_wcCharsetName;
1628
1629    // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1630    // different endian-ness than the native one
1631    static bool ms_wcNeedsSwap;
1632
1633
1634    // name of the encoding handled by this conversion
1635    wxString m_name;
1636
1637    // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1638    // initially
1639    size_t m_minMBCharWidth;
1640};
1641
1642// make the constructor available for unit testing
1643WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1644{
1645    wxMBConv_iconv* result = new wxMBConv_iconv( name );
1646    if ( !result->IsOk() )
1647    {
1648        delete result;
1649        return 0;
1650    }
1651
1652    return result;
1653}
1654
1655wxString wxMBConv_iconv::ms_wcCharsetName;
1656bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1657
1658wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1659              : m_name(name)
1660{
1661    m_minMBCharWidth = 0;
1662
1663    // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1664    // names for the charsets
1665    const wxCharBuffer cname(wxString(name).ToAscii());
1666
1667    // check for charset that represents wchar_t:
1668    if ( ms_wcCharsetName.empty() )
1669    {
1670        wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1671
1672#if wxUSE_FONTMAP
1673        const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1674#else // !wxUSE_FONTMAP
1675        static const wxChar *names_static[] =
1676        {
1677#if SIZEOF_WCHAR_T == 4
1678            _T("UCS-4"),
1679#elif SIZEOF_WCHAR_T == 2
1680            _T("UCS-2"),
1681#endif
1682            NULL
1683        };
1684        const wxChar **names = names_static;
1685#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1686
1687        for ( ; *names && ms_wcCharsetName.empty(); ++names )
1688        {
1689            const wxString nameCS(*names);
1690
1691            // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1692            wxString nameXE(nameCS);
1693
1694#ifdef WORDS_BIGENDIAN
1695                nameXE += _T("BE");
1696#else // little endian
1697                nameXE += _T("LE");
1698#endif
1699
1700            wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1701                       nameXE.c_str());
1702
1703            m2w = iconv_open(nameXE.ToAscii(), cname);
1704            if ( m2w == ICONV_T_INVALID )
1705            {
1706                // try charset w/o bytesex info (e.g. "UCS4")
1707                wxLogTrace(TRACE_STRCONV, _T("  trying charset \"%s\""),
1708                           nameCS.c_str());
1709                m2w = iconv_open(nameCS.ToAscii(), cname);
1710
1711                // and check for bytesex ourselves:
1712                if ( m2w != ICONV_T_INVALID )
1713                {
1714                    char    buf[2], *bufPtr;
1715                    wchar_t wbuf[2], *wbufPtr;
1716                    size_t  insz, outsz;
1717                    size_t  res;
1718
1719                    buf[0] = 'A';
1720                    buf[1] = 0;
1721                    wbuf[0] = 0;
1722                    insz = 2;
1723                    outsz = SIZEOF_WCHAR_T * 2;
1724                    wbufPtr = wbuf;
1725                    bufPtr = buf;
1726
1727                    res = iconv(
1728                        m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1729                        (char**)&wbufPtr, &outsz);
1730
1731                    if (ICONV_FAILED(res, insz))
1732                    {
1733                        wxLogLastError(wxT("iconv"));
1734                        wxLogError(_("Conversion to charset '%s' doesn't work."),
1735                                   nameCS.c_str());
1736                    }
1737                    else // ok, can convert to this encoding, remember it
1738                    {
1739                        ms_wcCharsetName = nameCS;
1740                        ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1741                    }
1742                }
1743            }
1744            else // use charset not requiring byte swapping
1745            {
1746                ms_wcCharsetName = nameXE;
1747            }
1748        }
1749
1750        wxLogTrace(TRACE_STRCONV,
1751                   wxT("iconv wchar_t charset is \"%s\"%s"),
1752                   ms_wcCharsetName.empty() ? _T("<none>")
1753                                            : ms_wcCharsetName.c_str(),
1754                   ms_wcNeedsSwap ? _T(" (needs swap)")
1755                                  : _T(""));
1756    }
1757    else // we already have ms_wcCharsetName
1758    {
1759        m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1760    }
1761
1762    if ( ms_wcCharsetName.empty() )
1763    {
1764        w2m = ICONV_T_INVALID;
1765    }
1766    else
1767    {
1768        w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1769        if ( w2m == ICONV_T_INVALID )
1770        {
1771            wxLogTrace(TRACE_STRCONV,
1772                       wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1773                       ms_wcCharsetName.c_str(), cname.data());
1774        }
1775    }
1776}
1777
1778wxMBConv_iconv::~wxMBConv_iconv()
1779{
1780    if ( m2w != ICONV_T_INVALID )
1781        iconv_close(m2w);
1782    if ( w2m != ICONV_T_INVALID )
1783        iconv_close(w2m);
1784}
1785
1786size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1787{
1788    // find the string length: notice that must be done differently for
1789    // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1790    size_t inbuf;
1791    const size_t nulLen = GetMBNulLen();
1792    switch ( nulLen )
1793    {
1794        default:
1795            return wxCONV_FAILED;
1796
1797        case 1:
1798            inbuf = strlen(psz); // arguably more optimized than our version
1799            break;
1800
1801        case 2:
1802        case 4:
1803            // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1804            // they also have to start at character boundary and not span two
1805            // adjacent characters
1806            const char *p;
1807            for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1808                ;
1809            inbuf = p - psz;
1810            break;
1811    }
1812
1813#if wxUSE_THREADS
1814    // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1815    //     Unfortunately there are a couple of global wxCSConv objects such as
1816    //     wxConvLocal that are used all over wx code, so we have to make sure
1817    //     the handle is used by at most one thread at the time. Otherwise
1818    //     only a few wx classes would be safe to use from non-main threads
1819    //     as MB<->WC conversion would fail "randomly".
1820    wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1821#endif // wxUSE_THREADS
1822
1823    size_t outbuf = n * SIZEOF_WCHAR_T;
1824    size_t res, cres;
1825    // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1826    wchar_t *bufPtr = buf;
1827    const char *pszPtr = psz;
1828
1829    if (buf)
1830    {
1831        // have destination buffer, convert there
1832        cres = iconv(m2w,
1833                     ICONV_CHAR_CAST(&pszPtr), &inbuf,
1834                     (char**)&bufPtr, &outbuf);
1835        res = n - (outbuf / SIZEOF_WCHAR_T);
1836
1837        if (ms_wcNeedsSwap)
1838        {
1839            // convert to native endianness
1840            for ( unsigned i = 0; i < res; i++ )
1841                buf[n] = WC_BSWAP(buf[i]);
1842        }
1843
1844        // NUL-terminate the string if there is any space left
1845        if (res < n)
1846            buf[res] = 0;
1847    }
1848    else
1849    {
1850        // no destination buffer... convert using temp buffer
1851        // to calculate destination buffer requirement
1852        wchar_t tbuf[8];
1853        res = 0;
1854
1855        do
1856        {
1857            bufPtr = tbuf;
1858            outbuf = 8 * SIZEOF_WCHAR_T;
1859
1860            cres = iconv(m2w,
1861                         ICONV_CHAR_CAST(&pszPtr), &inbuf,
1862                         (char**)&bufPtr, &outbuf );
1863
1864            res += 8 - (outbuf / SIZEOF_WCHAR_T);
1865        }
1866        while ((cres == (size_t)-1) && (errno == E2BIG));
1867    }
1868
1869    if (ICONV_FAILED(cres, inbuf))
1870    {
1871        //VS: it is ok if iconv fails, hence trace only
1872        wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1873        return wxCONV_FAILED;
1874    }
1875
1876    return res;
1877}
1878
1879size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1880{
1881#if wxUSE_THREADS
1882    // NB: explained in MB2WC
1883    wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1884#endif
1885
1886    size_t inlen = wxWcslen(psz);
1887    size_t inbuf = inlen * SIZEOF_WCHAR_T;
1888    size_t outbuf = n;
1889    size_t res, cres;
1890
1891    wchar_t *tmpbuf = 0;
1892
1893    if (ms_wcNeedsSwap)
1894    {
1895        // need to copy to temp buffer to switch endianness
1896        // (doing WC_BSWAP twice on the original buffer won't help, as it
1897        //  could be in read-only memory, or be accessed in some other thread)
1898        tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1899        for ( size_t i = 0; i < inlen; i++ )
1900            tmpbuf[n] = WC_BSWAP(psz[i]);
1901
1902        tmpbuf[inlen] = L'\0';
1903        psz = tmpbuf;
1904    }
1905
1906    if (buf)
1907    {
1908        // have destination buffer, convert there
1909        cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1910
1911        res = n - outbuf;
1912
1913        // NB: iconv was given only wcslen(psz) characters on input, and so
1914        //     it couldn't convert the trailing zero. Let's do it ourselves
1915        //     if there's some room left for it in the output buffer.
1916        if (res < n)
1917            buf[0] = 0;
1918    }
1919    else
1920    {
1921        // no destination buffer: convert using temp buffer
1922        // to calculate destination buffer requirement
1923        char tbuf[16];
1924        res = 0;
1925        do
1926        {
1927            buf = tbuf;
1928            outbuf = 16;
1929
1930            cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1931
1932            res += 16 - outbuf;
1933        }
1934        while ((cres == (size_t)-1) && (errno == E2BIG));
1935    }
1936
1937    if (ms_wcNeedsSwap)
1938    {
1939        free(tmpbuf);
1940    }
1941
1942    if (ICONV_FAILED(cres, inbuf))
1943    {
1944        wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1945        return wxCONV_FAILED;
1946    }
1947
1948    return res;
1949}
1950
1951size_t wxMBConv_iconv::GetMBNulLen() const
1952{
1953    if ( m_minMBCharWidth == 0 )
1954    {
1955        wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1956
1957#if wxUSE_THREADS
1958        // NB: explained in MB2WC
1959        wxMutexLocker lock(self->m_iconvMutex);
1960#endif
1961
1962        const wchar_t *wnul = L"";
1963        char buf[8]; // should be enough for NUL in any encoding
1964        size_t inLen = sizeof(wchar_t),
1965               outLen = WXSIZEOF(buf);
1966        char *inBuff = (char *)wnul;
1967        char *outBuff = buf;
1968        if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1969        {
1970            self->m_minMBCharWidth = (size_t)-1;
1971        }
1972        else // ok
1973        {
1974            self->m_minMBCharWidth = outBuff - buf;
1975        }
1976    }
1977
1978    return m_minMBCharWidth;
1979}
1980
1981#endif // HAVE_ICONV
1982
1983
1984// ============================================================================
1985// Win32 conversion classes
1986// ============================================================================
1987
1988#ifdef wxHAVE_WIN32_MB2WC
1989
1990// from utils.cpp
1991#if wxUSE_FONTMAP
1992extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1993extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1994#endif
1995
1996class wxMBConv_win32 : public wxMBConv
1997{
1998public:
1999    wxMBConv_win32()
2000    {
2001        m_CodePage = CP_ACP;
2002        m_minMBCharWidth = 0;
2003    }
2004
2005    wxMBConv_win32(const wxMBConv_win32& conv)
2006        : wxMBConv()
2007    {
2008        m_CodePage = conv.m_CodePage;
2009        m_minMBCharWidth = conv.m_minMBCharWidth;
2010    }
2011
2012#if wxUSE_FONTMAP
2013    wxMBConv_win32(const wxChar* name)
2014    {
2015        m_CodePage = wxCharsetToCodepage(name);
2016        m_minMBCharWidth = 0;
2017    }
2018
2019    wxMBConv_win32(wxFontEncoding encoding)
2020    {
2021        m_CodePage = wxEncodingToCodepage(encoding);
2022        m_minMBCharWidth = 0;
2023    }
2024#endif // wxUSE_FONTMAP
2025
2026    virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2027    {
2028        // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2029        // the behaviour is not compatible with the Unix version (using iconv)
2030        // and break the library itself, e.g. wxTextInputStream::NextChar()
2031        // wouldn't work if reading an incomplete MB char didn't result in an
2032        // error
2033        //
2034        // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2035        // Win XP or newer and it is not supported for UTF-[78] so we always
2036        // use our own conversions in this case. See
2037        //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2038        //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2039        if ( m_CodePage == CP_UTF8 )
2040        {
2041            return wxConvUTF8.MB2WC(buf, psz, n);
2042        }
2043
2044        if ( m_CodePage == CP_UTF7 )
2045        {
2046            return wxConvUTF7.MB2WC(buf, psz, n);
2047        }
2048
2049        int flags = 0;
2050        if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2051                IsAtLeastWin2kSP4() )
2052        {
2053            flags = MB_ERR_INVALID_CHARS;
2054        }
2055
2056        const size_t len = ::MultiByteToWideChar
2057                             (
2058                                m_CodePage,     // code page
2059                                flags,          // flags: fall on error
2060                                psz,            // input string
2061                                -1,             // its length (NUL-terminated)
2062                                buf,            // output string
2063                                buf ? n : 0     // size of output buffer
2064                             );
2065        if ( !len )
2066        {
2067            // function totally failed
2068            return wxCONV_FAILED;
2069        }
2070
2071        // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2072        // check if we succeeded, by doing a double trip:
2073        if ( !flags && buf )
2074        {
2075            const size_t mbLen = strlen(psz);
2076            wxCharBuffer mbBuf(mbLen);
2077            if ( ::WideCharToMultiByte
2078                   (
2079                      m_CodePage,
2080                      0,
2081                      buf,
2082                      -1,
2083                      mbBuf.data(),
2084                      mbLen + 1,        // size in bytes, not length
2085                      NULL,
2086                      NULL
2087                   ) == 0 ||
2088                  strcmp(mbBuf, psz) != 0 )
2089            {
2090                // we didn't obtain the same thing we started from, hence
2091                // the conversion was lossy and we consider that it failed
2092                return wxCONV_FAILED;
2093            }
2094        }
2095
2096        // note that it returns count of written chars for buf != NULL and size
2097        // of the needed buffer for buf == NULL so in either case the length of
2098        // the string (which never includes the terminating NUL) is one less
2099        return len - 1;
2100    }
2101
2102    virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2103    {
2104        /*
2105            we have a problem here: by default, WideCharToMultiByte() may
2106            replace characters unrepresentable in the target code page with bad
2107            quality approximations such as turning "1/2" symbol (U+00BD) into
2108            "1" for the code pages which don't have it and we, obviously, want
2109            to avoid this at any price
2110
2111            the trouble is that this function does it _silently_, i.e. it won't
2112            even tell us whether it did or not... Win98/2000 and higher provide
2113            WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2114            we have to resort to a round trip, i.e. check that converting back
2115            results in the same string -- this is, of course, expensive but
2116            otherwise we simply can't be sure to not garble the data.
2117         */
2118
2119        // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2120        // it doesn't work with CJK encodings (which we test for rather roughly
2121        // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2122        // supporting it
2123        BOOL usedDef wxDUMMY_INITIALIZE(false);
2124        BOOL *pUsedDef;
2125        int flags;
2126        if ( CanUseNoBestFit() && m_CodePage < 50000 )
2127        {
2128            // it's our lucky day
2129            flags = WC_NO_BEST_FIT_CHARS;
2130            pUsedDef = &usedDef;
2131        }
2132        else // old system or unsupported encoding
2133        {
2134            flags = 0;
2135            pUsedDef = NULL;
2136        }
2137
2138        const size_t len = ::WideCharToMultiByte
2139                             (
2140                                m_CodePage,     // code page
2141                                flags,          // either none or no best fit
2142                                pwz,            // input string
2143                                -1,             // it is (wide) NUL-terminated
2144                                buf,            // output buffer
2145                                buf ? n : 0,    // and its size
2146                                NULL,           // default "replacement" char
2147                                pUsedDef        // [out] was it used?
2148                             );
2149
2150        if ( !len )
2151        {
2152            // function totally failed
2153            return wxCONV_FAILED;
2154        }
2155
2156        // if we were really converting, check if we succeeded
2157        if ( buf )
2158        {
2159            if ( flags )
2160            {
2161                // check if the conversion failed, i.e. if any replacements
2162                // were done
2163                if ( usedDef )
2164                    return wxCONV_FAILED;
2165            }
2166            else // we must resort to double tripping...
2167            {
2168                wxWCharBuffer wcBuf(n);
2169                if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2170                        wcscmp(wcBuf, pwz) != 0 )
2171                {
2172                    // we didn't obtain the same thing we started from, hence
2173                    // the conversion was lossy and we consider that it failed
2174                    return wxCONV_FAILED;
2175                }
2176            }
2177        }
2178
2179        // see the comment above for the reason of "len - 1"
2180        return len - 1;
2181    }
2182
2183    virtual size_t GetMBNulLen() const
2184    {
2185        if ( m_minMBCharWidth == 0 )
2186        {
2187            int len = ::WideCharToMultiByte
2188                        (
2189                            m_CodePage,     // code page
2190                            0,              // no flags
2191                            L"",            // input string
2192                            1,              // translate just the NUL
2193                            NULL,           // output buffer
2194                            0,              // and its size
2195                            NULL,           // no replacement char
2196                            NULL            // [out] don't care if it was used
2197                        );
2198
2199            wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2200            switch ( len )
2201            {
2202                default:
2203                    wxLogDebug(_T("Unexpected NUL length %d"), len);
2204                    self->m_minMBCharWidth = (size_t)-1;
2205                    break;
2206
2207                case 0:
2208                    self->m_minMBCharWidth = (size_t)-1;
2209                    break;
2210
2211                case 1:
2212                case 2:
2213                case 4:
2214                    self->m_minMBCharWidth = len;
2215                    break;
2216            }
2217        }
2218
2219        return m_minMBCharWidth;
2220    }
2221
2222    virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2223
2224    bool IsOk() const { return m_CodePage != -1; }
2225
2226private:
2227    static bool CanUseNoBestFit()
2228    {
2229        static int s_isWin98Or2k = -1;
2230
2231        if ( s_isWin98Or2k == -1 )
2232        {
2233            int verMaj, verMin;
2234            switch ( wxGetOsVersion(&verMaj, &verMin) )
2235            {
2236                case wxOS_WINDOWS_9X:
2237                    s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2238                    break;
2239
2240                case wxOS_WINDOWS_NT:
2241                    s_isWin98Or2k = verMaj >= 5;
2242                    break;
2243
2244                default:
2245                    // unknown: be conservative by default
2246                    s_isWin98Or2k = 0;
2247                    break;
2248            }
2249
2250            wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2251        }
2252
2253        return s_isWin98Or2k == 1;
2254    }
2255
2256    static bool IsAtLeastWin2kSP4()
2257    {
2258#ifdef __WXWINCE__
2259        return false;
2260#else
2261        static int s_isAtLeastWin2kSP4 = -1;
2262
2263        if ( s_isAtLeastWin2kSP4 == -1 )
2264        {
2265            OSVERSIONINFOEX ver;
2266
2267            memset(&ver, 0, sizeof(ver));
2268            ver.dwOSVersionInfoSize = sizeof(ver);
2269            GetVersionEx((OSVERSIONINFO*)&ver);
2270
2271            s_isAtLeastWin2kSP4 =
2272              ((ver.dwMajorVersion > 5) || // Vista+
2273               (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2274               (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2275               ver.wServicePackMajor >= 4)) // 2000 SP4+
2276              ? 1 : 0;
2277        }
2278
2279        return s_isAtLeastWin2kSP4 == 1;
2280#endif
2281    }
2282
2283
2284    // the code page we're working with
2285    long m_CodePage;
2286
2287    // cached result of GetMBNulLen(), set to 0 initially meaning
2288    // "unknown"
2289    size_t m_minMBCharWidth;
2290};
2291
2292#endif // wxHAVE_WIN32_MB2WC
2293
2294// ============================================================================
2295// Cocoa conversion classes
2296// ============================================================================
2297
2298#if defined(__WXCOCOA__)
2299
2300// RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2301// Strangely enough, internally Core Foundation uses
2302// UTF-32 internally quite a bit - its just not public (yet).
2303
2304#include <CoreFoundation/CFString.h>
2305#include <CoreFoundation/CFStringEncodingExt.h>
2306
2307CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2308{
2309    CFStringEncoding enc = kCFStringEncodingInvalidId ;
2310
2311    switch (encoding)
2312    {
2313        case wxFONTENCODING_DEFAULT :
2314            enc = CFStringGetSystemEncoding();
2315            break ;
2316
2317        case wxFONTENCODING_ISO8859_1 :
2318            enc = kCFStringEncodingISOLatin1 ;
2319            break ;
2320        case wxFONTENCODING_ISO8859_2 :
2321            enc = kCFStringEncodingISOLatin2;
2322            break ;
2323        case wxFONTENCODING_ISO8859_3 :
2324            enc = kCFStringEncodingISOLatin3 ;
2325            break ;
2326        case wxFONTENCODING_ISO8859_4 :
2327            enc = kCFStringEncodingISOLatin4;
2328            break ;
2329        case wxFONTENCODING_ISO8859_5 :
2330            enc = kCFStringEncodingISOLatinCyrillic;
2331            break ;
2332        case wxFONTENCODING_ISO8859_6 :
2333            enc = kCFStringEncodingISOLatinArabic;
2334            break ;
2335        case wxFONTENCODING_ISO8859_7 :
2336            enc = kCFStringEncodingISOLatinGreek;
2337            break ;
2338        case wxFONTENCODING_ISO8859_8 :
2339            enc = kCFStringEncodingISOLatinHebrew;
2340            break ;
2341        case wxFONTENCODING_ISO8859_9 :
2342            enc = kCFStringEncodingISOLatin5;
2343            break ;
2344        case wxFONTENCODING_ISO8859_10 :
2345            enc = kCFStringEncodingISOLatin6;
2346            break ;
2347        case wxFONTENCODING_ISO8859_11 :
2348            enc = kCFStringEncodingISOLatinThai;
2349            break ;
2350        case wxFONTENCODING_ISO8859_13 :
2351            enc = kCFStringEncodingISOLatin7;
2352            break ;
2353        case wxFONTENCODING_ISO8859_14 :
2354            enc = kCFStringEncodingISOLatin8;
2355            break ;
2356        case wxFONTENCODING_ISO8859_15 :
2357            enc = kCFStringEncodingISOLatin9;
2358            break ;
2359
2360        case wxFONTENCODING_KOI8 :
2361            enc = kCFStringEncodingKOI8_R;
2362            break ;
2363        case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2364            enc = kCFStringEncodingDOSRussian;
2365            break ;
2366
2367//      case wxFONTENCODING_BULGARIAN :
2368//          enc = ;
2369//          break ;
2370
2371        case wxFONTENCODING_CP437 :
2372            enc = kCFStringEncodingDOSLatinUS ;
2373            break ;
2374        case wxFONTENCODING_CP850 :
2375            enc = kCFStringEncodingDOSLatin1;
2376            break ;
2377        case wxFONTENCODING_CP852 :
2378            enc = kCFStringEncodingDOSLatin2;
2379            break ;
2380        case wxFONTENCODING_CP855 :
2381            enc = kCFStringEncodingDOSCyrillic;
2382            break ;
2383        case wxFONTENCODING_CP866 :
2384            enc = kCFStringEncodingDOSRussian ;
2385            break ;
2386        case wxFONTENCODING_CP874 :
2387            enc = kCFStringEncodingDOSThai;
2388            break ;
2389        case wxFONTENCODING_CP932 :
2390            enc = kCFStringEncodingDOSJapanese;
2391            break ;
2392        case wxFONTENCODING_CP936 :
2393            enc = kCFStringEncodingDOSChineseSimplif ;
2394            break ;
2395        case wxFONTENCODING_CP949 :
2396            enc = kCFStringEncodingDOSKorean;
2397            break ;
2398        case wxFONTENCODING_CP950 :
2399            enc = kCFStringEncodingDOSChineseTrad;
2400            break ;
2401        case wxFONTENCODING_CP1250 :
2402            enc = kCFStringEncodingWindowsLatin2;
2403            break ;
2404        case wxFONTENCODING_CP1251 :
2405            enc = kCFStringEncodingWindowsCyrillic ;
2406            break ;
2407        case wxFONTENCODING_CP1252 :
2408            enc = kCFStringEncodingWindowsLatin1 ;
2409            break ;
2410        case wxFONTENCODING_CP1253 :
2411            enc = kCFStringEncodingWindowsGreek;
2412            break ;
2413        case wxFONTENCODING_CP1254 :
2414            enc = kCFStringEncodingWindowsLatin5;
2415            break ;
2416        case wxFONTENCODING_CP1255 :
2417            enc = kCFStringEncodingWindowsHebrew ;
2418            break ;
2419        case wxFONTENCODING_CP1256 :
2420            enc = kCFStringEncodingWindowsArabic ;
2421            break ;
2422        case wxFONTENCODING_CP1257 :
2423            enc = kCFStringEncodingWindowsBalticRim;
2424            break ;
2425//   This only really encodes to UTF7 (if that) evidently
2426//        case wxFONTENCODING_UTF7 :
2427//            enc = kCFStringEncodingNonLossyASCII ;
2428//            break ;
2429        case wxFONTENCODING_UTF8 :
2430            enc = kCFStringEncodingUTF8 ;
2431            break ;
2432        case wxFONTENCODING_EUC_JP :
2433            enc = kCFStringEncodingEUC_JP;
2434            break ;
2435        case wxFONTENCODING_UTF16 :
2436            enc = kCFStringEncodingUnicode ;
2437            break ;
2438        case wxFONTENCODING_MACROMAN :
2439            enc = kCFStringEncodingMacRoman ;
2440            break ;
2441        case wxFONTENCODING_MACJAPANESE :
2442            enc = kCFStringEncodingMacJapanese ;
2443            break ;
2444        case wxFONTENCODING_MACCHINESETRAD :
2445            enc = kCFStringEncodingMacChineseTrad ;
2446            break ;
2447        case wxFONTENCODING_MACKOREAN :
2448            enc = kCFStringEncodingMacKorean ;
2449            break ;
2450        case wxFONTENCODING_MACARABIC :
2451            enc = kCFStringEncodingMacArabic ;
2452            break ;
2453        case wxFONTENCODING_MACHEBREW :
2454            enc = kCFStringEncodingMacHebrew ;
2455            break ;
2456        case wxFONTENCODING_MACGREEK :
2457            enc = kCFStringEncodingMacGreek ;
2458            break ;
2459        case wxFONTENCODING_MACCYRILLIC :
2460            enc = kCFStringEncodingMacCyrillic ;
2461            break ;
2462        case wxFONTENCODING_MACDEVANAGARI :
2463            enc = kCFStringEncodingMacDevanagari ;
2464            break ;
2465        case wxFONTENCODING_MACGURMUKHI :
2466            enc = kCFStringEncodingMacGurmukhi ;
2467            break ;
2468        case wxFONTENCODING_MACGUJARATI :
2469            enc = kCFStringEncodingMacGujarati ;
2470            break ;
2471        case wxFONTENCODING_MACORIYA :
2472            enc = kCFStringEncodingMacOriya ;
2473            break ;
2474        case wxFONTENCODING_MACBENGALI :
2475            enc = kCFStringEncodingMacBengali ;
2476            break ;
2477        case wxFONTENCODING_MACTAMIL :
2478            enc = kCFStringEncodingMacTamil ;
2479            break ;
2480        case wxFONTENCODING_MACTELUGU :
2481            enc = kCFStringEncodingMacTelugu ;
2482            break ;
2483        case wxFONTENCODING_MACKANNADA :
2484            enc = kCFStringEncodingMacKannada ;
2485            break ;
2486        case wxFONTENCODING_MACMALAJALAM :
2487            enc = kCFStringEncodingMacMalayalam ;
2488            break ;
2489        case wxFONTENCODING_MACSINHALESE :
2490            enc = kCFStringEncodingMacSinhalese ;
2491            break ;
2492        case wxFONTENCODING_MACBURMESE :
2493            enc = kCFStringEncodingMacBurmese ;
2494            break ;
2495        case wxFONTENCODING_MACKHMER :
2496            enc = kCFStringEncodingMacKhmer ;
2497            break ;
2498        case wxFONTENCODING_MACTHAI :
2499            enc = kCFStringEncodingMacThai ;
2500            break ;
2501        case wxFONTENCODING_MACLAOTIAN :
2502            enc = kCFStringEncodingMacLaotian ;
2503            break ;
2504        case wxFONTENCODING_MACGEORGIAN :
2505            enc = kCFStringEncodingMacGeorgian ;
2506            break ;
2507        case wxFONTENCODING_MACARMENIAN :
2508            enc = kCFStringEncodingMacArmenian ;
2509            break ;
2510        case wxFONTENCODING_MACCHINESESIMP :
2511            enc = kCFStringEncodingMacChineseSimp ;
2512            break ;
2513        case wxFONTENCODING_MACTIBETAN :
2514            enc = kCFStringEncodingMacTibetan ;
2515            break ;
2516        case wxFONTENCODING_MACMONGOLIAN :
2517            enc = kCFStringEncodingMacMongolian ;
2518            break ;
2519        case wxFONTENCODING_MACETHIOPIC :
2520            enc = kCFStringEncodingMacEthiopic ;
2521            break ;
2522        case wxFONTENCODING_MACCENTRALEUR :
2523            enc = kCFStringEncodingMacCentralEurRoman ;
2524            break ;
2525        case wxFONTENCODING_MACVIATNAMESE :
2526            enc = kCFStringEncodingMacVietnamese ;
2527            break ;
2528        case wxFONTENCODING_MACARABICEXT :
2529            enc = kCFStringEncodingMacExtArabic ;
2530            break ;
2531        case wxFONTENCODING_MACSYMBOL :
2532            enc = kCFStringEncodingMacSymbol ;
2533            break ;
2534        case wxFONTENCODING_MACDINGBATS :
2535            enc = kCFStringEncodingMacDingbats ;
2536            break ;
2537        case wxFONTENCODING_MACTURKISH :
2538            enc = kCFStringEncodingMacTurkish ;
2539            break ;
2540        case wxFONTENCODING_MACCROATIAN :
2541            enc = kCFStringEncodingMacCroatian ;
2542            break ;
2543        case wxFONTENCODING_MACICELANDIC :
2544            enc = kCFStringEncodingMacIcelandic ;
2545            break ;
2546        case wxFONTENCODING_MACROMANIAN :
2547            enc = kCFStringEncodingMacRomanian ;
2548            break ;
2549        case wxFONTENCODING_MACCELTIC :
2550            enc = kCFStringEncodingMacCeltic ;
2551            break ;
2552        case wxFONTENCODING_MACGAELIC :
2553            enc = kCFStringEncodingMacGaelic ;
2554            break ;
2555//      case wxFONTENCODING_MACKEYBOARD :
2556//          enc = kCFStringEncodingMacKeyboardGlyphs ;
2557//          break ;
2558
2559        default :
2560            // because gcc is picky
2561            break ;
2562    }
2563
2564    return enc ;
2565}
2566
2567class wxMBConv_cocoa : public wxMBConv
2568{
2569public:
2570    wxMBConv_cocoa()
2571    {
2572        Init(CFStringGetSystemEncoding()) ;
2573    }
2574
2575    wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2576    {
2577        m_encoding = conv.m_encoding;
2578    }
2579
2580#if wxUSE_FONTMAP
2581    wxMBConv_cocoa(const wxChar* name)
2582    {
2583        Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2584    }
2585#endif
2586
2587    wxMBConv_cocoa(wxFontEncoding encoding)
2588    {
2589        Init( wxCFStringEncFromFontEnc(encoding) );
2590    }
2591
2592    virtual ~wxMBConv_cocoa()
2593    {
2594    }
2595
2596    void Init( CFStringEncoding encoding)
2597    {
2598        m_encoding = encoding ;
2599    }
2600
2601    size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2602    {
2603        wxASSERT(szUnConv);
2604
2605        CFStringRef theString = CFStringCreateWithBytes (
2606                                                NULL, //the allocator
2607                                                (const UInt8*)szUnConv,
2608                                                strlen(szUnConv),
2609                                                m_encoding,
2610                                                false //no BOM/external representation
2611                                                );
2612
2613        wxASSERT(theString);
2614
2615        size_t nOutLength = CFStringGetLength(theString);
2616
2617        if (szOut == NULL)
2618        {
2619            CFRelease(theString);
2620            return nOutLength;
2621        }
2622
2623        CFRange theRange = { 0, nOutSize };
2624
2625#if SIZEOF_WCHAR_T == 4
2626        UniChar* szUniCharBuffer = new UniChar[nOutSize];
2627#endif
2628
2629        CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2630
2631        CFRelease(theString);
2632
2633        szUniCharBuffer[nOutLength] = '\0';
2634
2635#if SIZEOF_WCHAR_T == 4
2636        wxMBConvUTF16 converter;
2637        converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2638        delete [] szUniCharBuffer;
2639#endif
2640
2641        return nOutLength;
2642    }
2643
2644    size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2645    {
2646        wxASSERT(szUnConv);
2647
2648        size_t nRealOutSize;
2649        size_t nBufSize = wxWcslen(szUnConv);
2650        UniChar* szUniBuffer = (UniChar*) szUnConv;
2651
2652#if SIZEOF_WCHAR_T == 4
2653        wxMBConvUTF16 converter ;
2654        nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2655        szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2656        converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2657        nBufSize /= sizeof(UniChar);
2658#endif
2659
2660        CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2661                                NULL, //allocator
2662                                szUniBuffer,
2663                                nBufSize,
2664                                kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2665                            );
2666
2667        wxASSERT(theString);
2668
2669        //Note that CER puts a BOM when converting to unicode
2670        //so we  check and use getchars instead in that case
2671        if (m_encoding == kCFStringEncodingUnicode)
2672        {
2673            if (szOut != NULL)
2674                CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2675
2676            nRealOutSize = CFStringGetLength(theString) + 1;
2677        }
2678        else
2679        {
2680            CFStringGetBytes(
2681                theString,
2682                CFRangeMake(0, CFStringGetLength(theString)),
2683                m_encoding,
2684                0, //what to put in characters that can't be converted -
2685                    //0 tells CFString to return NULL if it meets such a character
2686                false, //not an external representation
2687                (UInt8*) szOut,
2688                nOutSize,
2689                (CFIndex*) &nRealOutSize
2690                        );
2691        }
2692
2693        CFRelease(theString);
2694
2695#if SIZEOF_WCHAR_T == 4
2696        delete[] szUniBuffer;
2697#endif
2698
2699        return  nRealOutSize - 1;
2700    }
2701
2702    virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2703
2704    bool IsOk() const
2705    {
2706        return m_encoding != kCFStringEncodingInvalidId &&
2707              CFStringIsEncodingAvailable(m_encoding);
2708    }
2709
2710private:
2711    CFStringEncoding m_encoding ;
2712};
2713
2714#endif // defined(__WXCOCOA__)
2715
2716// ============================================================================
2717// Mac conversion classes
2718// ============================================================================
2719
2720#if defined(__WXMAC__) && defined(TARGET_CARBON)
2721
2722class wxMBConv_mac : public wxMBConv
2723{
2724public:
2725    wxMBConv_mac()
2726    {
2727        Init(CFStringGetSystemEncoding()) ;
2728    }
2729
2730    wxMBConv_mac(const wxMBConv_mac& conv)
2731    {
2732        Init(conv.m_char_encoding);
2733    }
2734
2735#if wxUSE_FONTMAP
2736    wxMBConv_mac(const wxChar* name)
2737    {
2738        wxFontEncoding enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2739        Init( (enc != wxFONTENCODING_SYSTEM) ? wxMacGetSystemEncFromFontEnc( enc ) : kTextEncodingUnknown);
2740    }
2741#endif
2742
2743    wxMBConv_mac(wxFontEncoding encoding)
2744    {
2745        Init( wxMacGetSystemEncFromFontEnc(encoding) );
2746    }
2747
2748    virtual ~wxMBConv_mac()
2749    {
2750        OSStatus status = noErr ;
2751        if (m_MB2WC_converter)
2752            status = TECDisposeConverter(m_MB2WC_converter);
2753        if (m_WC2MB_converter)
2754            status = TECDisposeConverter(m_WC2MB_converter);
2755    }
2756
2757    void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2758            TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2759    {
2760        m_MB2WC_converter = NULL ;
2761        m_WC2MB_converter = NULL ;
2762        if ( encoding != kTextEncodingUnknown )
2763        {
2764            m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2765            m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2766        }
2767        else
2768        {
2769            m_char_encoding = kTextEncodingUnknown;
2770            m_unicode_encoding = kTextEncodingUnknown;
2771        }
2772    }
2773
2774    virtual void CreateIfNeeded() const
2775    {
2776        if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL &&
2777            m_char_encoding != kTextEncodingUnknown && m_unicode_encoding != kTextEncodingUnknown )
2778        {
2779            OSStatus status = noErr ;
2780            status = TECCreateConverter(&m_MB2WC_converter,
2781                                    m_char_encoding,
2782                                    m_unicode_encoding);
2783            wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2784            status = TECCreateConverter(&m_WC2MB_converter,
2785                                    m_unicode_encoding,
2786                                    m_char_encoding);
2787            wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2788        }
2789    }
2790
2791    size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2792    {
2793        CreateIfNeeded() ;
2794        OSStatus status = noErr ;
2795        ByteCount byteOutLen ;
2796        ByteCount byteInLen = strlen(psz) + 1;
2797        wchar_t *tbuf = NULL ;
2798        UniChar* ubuf = NULL ;
2799        size_t res = 0 ;
2800
2801        if (buf == NULL)
2802        {
2803            // Apple specs say at least 32
2804            n = wxMax( 32, byteInLen ) ;
2805            tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2806        }
2807
2808        ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2809
2810#if SIZEOF_WCHAR_T == 4
2811        ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2812#else
2813        ubuf = (UniChar*) (buf ? buf : tbuf) ;
2814#endif
2815        {
2816#if wxUSE_THREADS
2817            wxMutexLocker lock( m_MB2WC_guard );
2818#endif
2819            status = TECConvertText(
2820            m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2821            (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2822        }
2823
2824#if SIZEOF_WCHAR_T == 4
2825        // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2826        // is not properly terminated we get random characters at the end
2827        ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2828        wxMBConvUTF16 converter ;
2829        res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2830        free( ubuf ) ;
2831#else
2832        res = byteOutLen / sizeof( UniChar ) ;
2833#endif
2834
2835        if ( buf == NULL )
2836             free(tbuf) ;
2837
2838        if ( buf  && res < n)
2839            buf[res] = 0;
2840
2841        return res ;
2842    }
2843
2844    size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2845    {
2846        CreateIfNeeded() ;
2847        OSStatus status = noErr ;
2848        ByteCount byteOutLen ;
2849        ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2850
2851        char *tbuf = NULL ;
2852
2853        if (buf == NULL)
2854        {
2855            // Apple specs say at least 32
2856            n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2857            tbuf = (char*) malloc( n ) ;
2858        }
2859
2860        ByteCount byteBufferLen = n ;
2861        UniChar* ubuf = NULL ;
2862
2863#if SIZEOF_WCHAR_T == 4
2864        wxMBConvUTF16 converter ;
2865        size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2866        byteInLen = unicharlen ;
2867        ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2868        converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2869#else
2870        ubuf = (UniChar*) psz ;
2871#endif
2872
2873        {
2874#if wxUSE_THREADS
2875            wxMutexLocker lock( m_WC2MB_guard );
2876#endif
2877            status = TECConvertText(
2878            m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2879            (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2880        }
2881
2882#if SIZEOF_WCHAR_T == 4
2883        free( ubuf ) ;
2884#endif
2885
2886        if ( buf == NULL )
2887            free(tbuf) ;
2888
2889        size_t res = byteOutLen ;
2890        if ( buf  && res < n)
2891        {
2892            buf[res] = 0;
2893
2894            //we need to double-trip to verify it didn't insert any ? in place
2895            //of bogus characters
2896            wxWCharBuffer wcBuf(n);
2897            size_t pszlen = wxWcslen(psz);
2898            if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2899                        wxWcslen(wcBuf) != pszlen ||
2900                        memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2901            {
2902                // we didn't obtain the same thing we started from, hence
2903                // the conversion was lossy and we consider that it failed
2904                return wxCONV_FAILED;
2905            }
2906        }
2907
2908        return res ;
2909    }
2910
2911    virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2912
2913    bool IsOk() const
2914    {
2915        CreateIfNeeded() ;
2916        return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2917    }
2918
2919protected :
2920    mutable TECObjectRef m_MB2WC_converter;
2921    mutable TECObjectRef m_WC2MB_converter;
2922#if wxUSE_THREADS
2923    mutable wxMutex m_MB2WC_guard;
2924    mutable wxMutex m_WC2MB_guard;
2925#endif
2926
2927    TextEncodingBase m_char_encoding;
2928    TextEncodingBase m_unicode_encoding;
2929};
2930
2931// MB is decomposed (D) normalized UTF8
2932
2933class wxMBConv_macUTF8D : public wxMBConv_mac
2934{
2935public :
2936    wxMBConv_macUTF8D()
2937    {
2938        Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2939        m_uni = NULL;
2940        m_uniBack = NULL ;
2941    }
2942
2943    virtual ~wxMBConv_macUTF8D()
2944    {
2945        if (m_uni!=NULL)
2946            DisposeUnicodeToTextInfo(&m_uni);
2947        if (m_uniBack!=NULL)
2948            DisposeUnicodeToTextInfo(&m_uniBack);
2949    }
2950
2951    size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2952    {
2953        CreateIfNeeded() ;
2954        OSStatus status = noErr ;
2955        ByteCount byteOutLen ;
2956        ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2957
2958        char *tbuf = NULL ;
2959
2960        if (buf == NULL)
2961        {
2962            // Apple specs say at least 32
2963            n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2964            tbuf = (char*) malloc( n ) ;
2965        }
2966
2967        ByteCount byteBufferLen = n ;
2968        UniChar* ubuf = NULL ;
2969
2970#if SIZEOF_WCHAR_T == 4
2971        wxMBConvUTF16 converter ;
2972        size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2973        byteInLen = unicharlen ;
2974        ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2975        converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2976#else
2977        ubuf = (UniChar*) psz ;
2978#endif
2979
2980        // ubuf is a non-decomposed UniChar buffer
2981
2982        ByteCount dcubuflen = byteInLen * 2 + 2 ;
2983        ByteCount dcubufread , dcubufwritten ;
2984        UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2985
2986        {
2987#if wxUSE_THREADS
2988            wxMutexLocker lock( m_WC2MB_guard );
2989#endif
2990            ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2991                kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , dcubuf ) ;
2992
2993            // we now convert that decomposed buffer into UTF8
2994
2995            status = TECConvertText(
2996            m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2997            (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2998        }
2999
3000        free( dcubuf );
3001
3002#if SIZEOF_WCHAR_T == 4
3003        free( ubuf ) ;
3004#endif
3005
3006        if ( buf == NULL )
3007            free(tbuf) ;
3008
3009        size_t res = byteOutLen ;
3010        if ( buf  && res < n)
3011        {
3012            buf[res] = 0;
3013            // don't test for round-trip fidelity yet, we cannot guarantee it yet
3014        }
3015
3016        return res ;
3017    }
3018
3019    size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
3020    {
3021        CreateIfNeeded() ;
3022        OSStatus status = noErr ;
3023        ByteCount byteOutLen ;
3024        ByteCount byteInLen = strlen(psz) + 1;
3025        wchar_t *tbuf = NULL ;
3026        UniChar* ubuf = NULL ;
3027        size_t res = 0 ;
3028
3029        if (buf == NULL)
3030        {
3031            // Apple specs say at least 32
3032            n = wxMax( 32, byteInLen ) ;
3033            tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3034        }
3035
3036        ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3037
3038#if SIZEOF_WCHAR_T == 4
3039        ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3040#else
3041        ubuf = (UniChar*) (buf ? buf : tbuf) ;
3042#endif
3043
3044        ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3045        ByteCount dcubufread , dcubufwritten ;
3046        UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3047
3048        {
3049#if wxUSE_THREADS
3050            wxMutexLocker lock( m_MB2WC_guard );
3051#endif
3052            status = TECConvertText(
3053                                m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3054                                (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3055            // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3056            // is not properly terminated we get random characters at the end
3057            dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3058
3059            // now from the decomposed UniChar to properly composed uniChar
3060            ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3061                                  kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen  , &dcubufread , &dcubufwritten , ubuf ) ;
3062        }
3063
3064        free( dcubuf );
3065        byteOutLen = dcubufwritten ;
3066        ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3067
3068
3069#if SIZEOF_WCHAR_T == 4
3070        wxMBConvUTF16 converter ;
3071        res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3072        free( ubuf ) ;
3073#else
3074        res = byteOutLen / sizeof( UniChar ) ;
3075#endif
3076
3077        if ( buf == NULL )
3078            free(tbuf) ;
3079
3080        if ( buf  && res < n)
3081            buf[res] = 0;
3082
3083        return res ;
3084    }
3085
3086    virtual void CreateIfNeeded() const
3087    {
3088        wxMBConv_mac::CreateIfNeeded() ;
3089        if ( m_uni == NULL )
3090        {
3091            m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3092                kUnicodeNoSubset, kTextEncodingDefaultFormat);
3093            m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3094                kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3095            m_map.mappingVersion = kUnicodeUseLatestMapping;
3096
3097            OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3098            wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3099
3100            m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3101                                                       kUnicodeNoSubset, kTextEncodingDefaultFormat);
3102            m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3103                                                     kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3104            m_map.mappingVersion = kUnicodeUseLatestMapping;
3105            err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3106            wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3107        }
3108    }
3109protected :
3110    mutable UnicodeToTextInfo   m_uni;
3111    mutable UnicodeToTextInfo   m_uniBack;
3112    mutable UnicodeMapping      m_map;
3113};
3114#endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3115
3116// ============================================================================
3117// wxEncodingConverter based conversion classes
3118// ============================================================================
3119
3120#if wxUSE_FONTMAP
3121
3122class wxMBConv_wxwin : public wxMBConv
3123{
3124private:
3125    void Init()
3126    {
3127        m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3128               w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3129    }
3130
3131public:
3132    // temporarily just use wxEncodingConverter stuff,
3133    // so that it works while a better implementation is built
3134    wxMBConv_wxwin(const wxChar* name)
3135    {
3136        if (name)
3137            m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3138        else
3139            m_enc = wxFONTENCODING_SYSTEM;
3140
3141        Init();
3142    }
3143
3144    wxMBConv_wxwin(wxFontEncoding enc)
3145    {
3146        m_enc = enc;
3147
3148        Init();
3149    }
3150
3151    size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3152    {
3153        size_t inbuf = strlen(psz);
3154        if (buf)
3155        {
3156            if (!m2w.Convert(psz, buf))
3157                return wxCONV_FAILED;
3158        }
3159        return inbuf;
3160    }
3161
3162    size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3163    {
3164        const size_t inbuf = wxWcslen(psz);
3165        if (buf)
3166        {
3167            if (!w2m.Convert(psz, buf))
3168                return wxCONV_FAILED;
3169        }
3170
3171        return inbuf;
3172    }
3173
3174    virtual size_t GetMBNulLen() const
3175    {
3176        switch ( m_enc )
3177        {
3178            case wxFONTENCODING_UTF16BE:
3179            case wxFONTENCODING_UTF16LE:
3180                return 2;
3181
3182            case wxFONTENCODING_UTF32BE:
3183            case wxFONTENCODING_UTF32LE:
3184                return 4;
3185
3186            default:
3187                return 1;
3188        }
3189    }
3190
3191    virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3192
3193    bool IsOk() const { return m_ok; }
3194
3195public:
3196    wxFontEncoding m_enc;
3197    wxEncodingConverter m2w, w2m;
3198
3199private:
3200    // were we initialized successfully?
3201    bool m_ok;
3202
3203    DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3204};
3205
3206// make the constructors available for unit testing
3207WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3208{
3209    wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3210    if ( !result->IsOk() )
3211    {
3212        delete result;
3213        return 0;
3214    }
3215
3216    return result;
3217}
3218
3219#endif // wxUSE_FONTMAP
3220
3221// ============================================================================
3222// wxCSConv implementation
3223// ============================================================================
3224
3225void wxCSConv::Init()
3226{
3227    m_name = NULL;
3228    m_convReal =  NULL;
3229    m_deferred = true;
3230}
3231
3232wxCSConv::wxCSConv(const wxChar *charset)
3233{
3234    Init();
3235
3236    if ( charset )
3237    {
3238        SetName(charset);
3239    }
3240
3241#if wxUSE_FONTMAP
3242    m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3243    if ( m_encoding == wxFONTENCODING_MAX )
3244    {
3245        // set to unknown/invalid value
3246        m_encoding = wxFONTENCODING_SYSTEM;
3247    }
3248    else if ( m_encoding == wxFONTENCODING_DEFAULT )
3249    {
3250        // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
3251        m_encoding = wxFONTENCODING_ISO8859_1;
3252    }
3253#else
3254    m_encoding = wxFONTENCODING_SYSTEM;
3255#endif
3256}
3257
3258wxCSConv::wxCSConv(wxFontEncoding encoding)
3259{
3260    if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3261    {
3262        wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3263
3264        encoding = wxFONTENCODING_SYSTEM;
3265    }
3266
3267    Init();
3268
3269    m_encoding = encoding;
3270}
3271
3272wxCSConv::~wxCSConv()
3273{
3274    Clear();
3275}
3276
3277wxCSConv::wxCSConv(const wxCSConv& conv)
3278        : wxMBConv()
3279{
3280    Init();
3281
3282    SetName(conv.m_name);
3283    m_encoding = conv.m_encoding;
3284}
3285
3286wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3287{
3288    Clear();
3289
3290    SetName(conv.m_name);
3291    m_encoding = conv.m_encoding;
3292
3293    return *this;
3294}
3295
3296void wxCSConv::Clear()
3297{
3298    free(m_name);
3299    delete m_convReal;
3300
3301    m_name = NULL;
3302    m_convReal = NULL;
3303}
3304
3305void wxCSConv::SetName(const wxChar *charset)
3306{
3307    if (charset)
3308    {
3309        m_name = wxStrdup(charset);
3310        m_deferred = true;
3311    }
3312}
3313
3314#if wxUSE_FONTMAP
3315
3316WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3317                     wxEncodingNameCache );
3318
3319static wxEncodingNameCache gs_nameCache;
3320#endif
3321
3322wxMBConv *wxCSConv::DoCreate() const
3323{
3324#if wxUSE_FONTMAP
3325    wxLogTrace(TRACE_STRCONV,
3326               wxT("creating conversion for %s"),
3327               (m_name ? m_name
3328                       : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3329#endif // wxUSE_FONTMAP
3330
3331    // check for the special case of ASCII or ISO8859-1 charset: as we have
3332    // special knowledge of it anyhow, we don't need to create a special
3333    // conversion object
3334    if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3335            m_encoding == wxFONTENCODING_DEFAULT )
3336    {
3337        // don't convert at all
3338        return NULL;
3339    }
3340
3341    // we trust OS to do conversion better than we can so try external
3342    // conversion methods first
3343    //
3344    // the full order is:
3345    //      1. OS conversion (iconv() under Unix or Win32 API)
3346    //      2. hard coded conversions for UTF
3347    //      3. wxEncodingConverter as fall back
3348
3349    // step (1)
3350#ifdef HAVE_ICONV
3351#if !wxUSE_FONTMAP
3352    if ( m_name )
3353#endif // !wxUSE_FONTMAP
3354    {
3355        wxString name(m_name);
3356#if wxUSE_FONTMAP
3357        wxFontEncoding encoding(m_encoding);
3358#endif
3359
3360        if ( !name.empty() )
3361        {
3362            wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3363            if ( conv->IsOk() )
3364                return conv;
3365
3366            delete conv;
3367
3368#if wxUSE_FONTMAP
3369            encoding =
3370                wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3371#endif // wxUSE_FONTMAP
3372        }
3373#if wxUSE_FONTMAP
3374        {
3375            const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3376            if ( it != gs_nameCache.end() )
3377            {
3378                if ( it->second.empty() )
3379                    return NULL;
3380
3381                wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3382                if ( conv->IsOk() )
3383                    return conv;
3384
3385                delete conv;
3386            }
3387
3388            const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3389            // CS : in case this does not return valid names (eg for MacRoman) encoding
3390            // got a 'failure' entry in the cache all the same, although it just has to
3391            // be created using a different method, so only store failed iconv creation
3392            // attempts (or perhaps we shoulnd't do this at all ?)
3393            if ( names[0] != NULL )
3394            {
3395                for ( ; *names; ++names )
3396                {
3397                    wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3398                    if ( conv->IsOk() )
3399                    {
3400                        gs_nameCache[encoding] = *names;
3401                        return conv;
3402                    }
3403
3404                    delete conv;
3405                }
3406
3407                gs_nameCache[encoding] = _T(""); // cache the failure
3408            }
3409        }
3410#endif // wxUSE_FONTMAP
3411    }
3412#endif // HAVE_ICONV
3413
3414#ifdef wxHAVE_WIN32_MB2WC
3415    {
3416#if wxUSE_FONTMAP
3417        wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3418                                      : new wxMBConv_win32(m_encoding);
3419        if ( conv->IsOk() )
3420            return conv;
3421
3422        delete conv;
3423#else
3424        return NULL;
3425#endif
3426    }
3427#endif // wxHAVE_WIN32_MB2WC
3428
3429#if defined(__WXMAC__)
3430    {
3431        // leave UTF16 and UTF32 to the built-ins of wx
3432        if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3433            ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3434        {
3435#if wxUSE_FONTMAP
3436            wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3437                                        : new wxMBConv_mac(m_encoding);
3438#else
3439            wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3440#endif
3441            if ( conv->IsOk() )
3442                 return conv;
3443
3444            delete conv;
3445        }
3446    }
3447#endif
3448
3449#if defined(__WXCOCOA__)
3450    {
3451        if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3452        {
3453#if wxUSE_FONTMAP
3454            wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3455                                          : new wxMBConv_cocoa(m_encoding);
3456#else
3457            wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3458#endif
3459
3460            if ( conv->IsOk() )
3461                 return conv;
3462
3463            delete conv;
3464        }
3465    }
3466#endif
3467    // step (2)
3468    wxFontEncoding enc = m_encoding;
3469#if wxUSE_FONTMAP
3470    if ( enc == wxFONTENCODING_SYSTEM && m_name )
3471    {
3472        // use "false" to suppress interactive dialogs -- we can be called from
3473        // anywhere and popping up a dialog from here is the last thing we want to
3474        // do
3475        enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3476    }
3477#endif // wxUSE_FONTMAP
3478
3479    switch ( enc )
3480    {
3481        case wxFONTENCODING_UTF7:
3482             return new wxMBConvUTF7;
3483
3484        case wxFONTENCODING_UTF8:
3485             return new wxMBConvUTF8;
3486
3487        case wxFONTENCODING_UTF16BE:
3488             return new wxMBConvUTF16BE;
3489
3490        case wxFONTENCODING_UTF16LE:
3491             return new wxMBConvUTF16LE;
3492
3493        case wxFONTENCODING_UTF32BE:
3494             return new wxMBConvUTF32BE;
3495
3496        case wxFONTENCODING_UTF32LE:
3497             return new wxMBConvUTF32LE;
3498
3499        default:
3500             // nothing to do but put here to suppress gcc warnings
3501             break;
3502    }
3503
3504    // step (3)
3505#if wxUSE_FONTMAP
3506    {
3507        wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3508                                      : new wxMBConv_wxwin(m_encoding);
3509        if ( conv->IsOk() )
3510            return conv;
3511
3512        delete conv;
3513    }
3514#endif // wxUSE_FONTMAP
3515
3516    // NB: This is a hack to prevent deadlock. What could otherwise happen
3517    //     in Unicode build: wxConvLocal creation ends up being here
3518    //     because of some failure and logs the error. But wxLog will try to
3519    //     attach a timestamp, for which it will need wxConvLocal (to convert
3520    //     time to char* and then wchar_t*), but that fails, tries to log the
3521    //     error, but wxLog has an (already locked) critical section that
3522    //     guards the static buffer.
3523    static bool alreadyLoggingError = false;
3524    if (!alreadyLoggingError)
3525    {
3526        alreadyLoggingError = true;
3527        wxLogError(_("Cannot convert from the charset '%s'!"),
3528                   m_name ? m_name
3529                      :
3530#if wxUSE_FONTMAP
3531                         wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3532#else // !wxUSE_FONTMAP
3533                         wxString::Format(_("encoding %i"), m_encoding).c_str()
3534#endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3535              );
3536
3537        alreadyLoggingError = false;
3538    }
3539
3540    return NULL;
3541}
3542
3543void wxCSConv::CreateConvIfNeeded() const
3544{
3545    if ( m_deferred )
3546    {
3547        wxCSConv *self = (wxCSConv *)this; // const_cast
3548
3549        // if we don't have neither the name nor the encoding, use the default
3550        // encoding for this system
3551        if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3552        {
3553#if wxUSE_INTL
3554            self->m_encoding = wxLocale::GetSystemEncoding();
3555#else
3556            // fallback to some reasonable default:
3557            self->m_encoding = wxFONTENCODING_ISO8859_1;
3558#endif // wxUSE_INTL
3559        }
3560
3561        self->m_convReal = DoCreate();
3562        self->m_deferred = false;
3563    }
3564}
3565
3566bool wxCSConv::IsOk() const
3567{
3568    CreateConvIfNeeded();
3569
3570    // special case: no convReal created for wxFONTENCODING_ISO8859_1
3571    if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3572        return true; // always ok as we do it ourselves
3573
3574    // m_convReal->IsOk() is called at its own creation, so we know it must
3575    // be ok if m_convReal is non-NULL
3576    return m_convReal != NULL;
3577}
3578
3579size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3580                         const char *src, size_t srcLen) const
3581{
3582    CreateConvIfNeeded();
3583
3584    if (m_convReal)
3585        return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3586
3587    // latin-1 (direct)
3588    if ( srcLen == wxNO_LEN )
3589        srcLen = strlen(src) + 1; // take trailing NUL too
3590
3591    if ( dst )
3592    {
3593        if ( dstLen < srcLen )
3594            return wxCONV_FAILED;
3595
3596        for ( size_t n = 0; n < srcLen; n++ )
3597            dst[n] = (unsigned char)(src[n]);
3598    }
3599
3600    return srcLen;
3601}
3602
3603size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3604                           const wchar_t *src, size_t srcLen) const
3605{
3606    CreateConvIfNeeded();
3607
3608    if (m_convReal)
3609        return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3610
3611    // latin-1 (direct)
3612    if ( srcLen == wxNO_LEN )
3613        srcLen = wxWcslen(src) + 1;
3614
3615    if ( dst )
3616    {
3617        if ( dstLen < srcLen )
3618            return wxCONV_FAILED;
3619
3620        for ( size_t n = 0; n < srcLen; n++ )
3621        {
3622            if ( src[n] > 0xFF )
3623                return wxCONV_FAILED;
3624
3625            dst[n] = (char)src[n];
3626        }
3627
3628    }
3629    else // still need to check the input validity
3630    {
3631        for ( size_t n = 0; n < srcLen; n++ )
3632        {
3633            if ( src[n] > 0xFF )
3634                return wxCONV_FAILED;
3635        }
3636    }
3637
3638    return srcLen;
3639}
3640
3641size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
3642{
3643    // this function exists only for ABI-compatibility in 2.8 branch
3644    return wxMBConv::MB2WC(buf, psz, n);
3645}
3646
3647size_t wxCSConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
3648{
3649    // this function exists only for ABI-compatibility in 2.8 branch
3650    return wxMBConv::WC2MB(buf, psz, n);
3651}
3652
3653size_t wxCSConv::GetMBNulLen() const
3654{
3655    CreateConvIfNeeded();
3656
3657    if ( m_convReal )
3658    {
3659        return m_convReal->GetMBNulLen();
3660    }
3661
3662    return 1;
3663}
3664
3665// ----------------------------------------------------------------------------
3666// globals
3667// ----------------------------------------------------------------------------
3668
3669#ifdef __WINDOWS__
3670    static wxMBConv_win32 wxConvLibcObj;
3671#elif defined(__WXMAC__) && !defined(__MACH__)
3672    static wxMBConv_mac wxConvLibcObj ;
3673#else
3674    static wxMBConvLibc wxConvLibcObj;
3675#endif
3676
3677static wxCSConv wxConvLocalObj(wxFONTENCODING_SYSTEM);
3678static wxCSConv wxConvISO8859_1Obj(wxFONTENCODING_ISO8859_1);
3679static wxMBConvUTF7 wxConvUTF7Obj;
3680static wxMBConvUTF8 wxConvUTF8Obj;
3681#if defined(__WXMAC__) && defined(TARGET_CARBON)
3682static wxMBConv_macUTF8D wxConvMacUTF8DObj;
3683#endif
3684WXDLLIMPEXP_DATA_BASE(wxMBConv&) wxConvLibc = wxConvLibcObj;
3685WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvLocal = wxConvLocalObj;
3686WXDLLIMPEXP_DATA_BASE(wxCSConv&) wxConvISO8859_1 = wxConvISO8859_1Obj;
3687WXDLLIMPEXP_DATA_BASE(wxMBConvUTF7&) wxConvUTF7 = wxConvUTF7Obj;
3688WXDLLIMPEXP_DATA_BASE(wxMBConvUTF8&) wxConvUTF8 = wxConvUTF8Obj;
3689WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = &wxConvLibcObj;
3690WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = &wxConvLocal;
3691WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = &
3692#ifdef __WXOSX__
3693#if defined(__WXMAC__) && defined(TARGET_CARBON)
3694                                    wxConvMacUTF8DObj;
3695#else
3696                                    wxConvUTF8Obj;
3697#endif
3698#else // !__WXOSX__
3699                                    wxConvLibcObj;
3700#endif // __WXOSX__/!__WXOSX__
3701
3702#if wxUSE_UNICODE
3703
3704wxWCharBuffer wxSafeConvertMB2WX(const char *s)
3705{
3706    if ( !s )
3707        return wxWCharBuffer();
3708
3709    wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
3710    if ( !wbuf )
3711        wbuf = wxConvUTF8.cMB2WX(s);
3712    if ( !wbuf )
3713        wbuf = wxConvISO8859_1.cMB2WX(s);
3714
3715    return wbuf;
3716}
3717
3718wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
3719{
3720    if ( !ws )
3721        return wxCharBuffer();
3722
3723    wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
3724    if ( !buf )
3725        buf = wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL).cWX2MB(ws);
3726
3727    return buf;
3728}
3729
3730#endif // wxUSE_UNICODE
3731
3732#else // !wxUSE_WCHAR_T
3733
3734// stand-ins in absence of wchar_t
3735WXDLLIMPEXP_DATA_BASE(wxMBConv) wxConvLibc,
3736                                wxConvISO8859_1,
3737                                wxConvLocal,
3738                                wxConvUTF8;
3739
3740WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = NULL;
3741
3742#endif // wxUSE_WCHAR_T/!wxUSE_WCHAR_T
3743