1/////////////////////////////////////////////////////////////////////////////
2// Name:        wx/encconv.h
3// Purpose:     wxEncodingConverter class for converting between different
4//              font encodings
5// Author:      Vaclav Slavik
6// Copyright:   (c) 1999 Vaclav Slavik
7// Licence:     wxWindows licence
8/////////////////////////////////////////////////////////////////////////////
9
10#ifndef _WX_ENCCONV_H_
11#define _WX_ENCCONV_H_
12
13#include "wx/defs.h"
14
15#include "wx/object.h"
16#include "wx/fontenc.h"
17#include "wx/dynarray.h"
18
19// ----------------------------------------------------------------------------
20// constants
21// ----------------------------------------------------------------------------
22
23enum
24{
25    wxCONVERT_STRICT,
26    wxCONVERT_SUBSTITUTE
27};
28
29
30enum
31{
32    wxPLATFORM_CURRENT = -1,
33
34    wxPLATFORM_UNIX = 0,
35    wxPLATFORM_WINDOWS,
36    wxPLATFORM_OS2,
37    wxPLATFORM_MAC
38};
39
40// ----------------------------------------------------------------------------
41// types
42// ----------------------------------------------------------------------------
43
44WX_DEFINE_ARRAY_INT(wxFontEncoding, wxFontEncodingArray);
45
46//--------------------------------------------------------------------------------
47// wxEncodingConverter
48//                  This class is capable of converting strings between any two
49//                  8bit encodings/charsets. It can also convert from/to Unicode
50//--------------------------------------------------------------------------------
51
52class WXDLLIMPEXP_BASE wxEncodingConverter : public wxObject
53{
54    public:
55
56            wxEncodingConverter();
57            virtual ~wxEncodingConverter() { if (m_Table) delete[] m_Table; }
58
59            // Initialize conversion. Both output or input encoding may
60            // be wxFONTENCODING_UNICODE, but only if wxUSE_WCHAR_T is set to 1.
61            //
62            // All subsequent calls to Convert() will interpret it's argument
63            // as a string in input_enc encoding and will output string in
64            // output_enc encoding.
65            //
66            // You must call this method before calling Convert. You may call
67            // it more than once in order to switch to another conversion
68            //
69            // Method affects behaviour of Convert() in case input character
70            // cannot be converted because it does not exist in output encoding:
71            //     wxCONVERT_STRICT --
72            //              follow behaviour of GNU Recode - just copy unconvertable
73            //              characters to output and don't change them (it's integer
74            //              value will stay the same)
75            //     wxCONVERT_SUBSTITUTE --
76            //              try some (lossy) substitutions - e.g. replace
77            //              unconvertable latin capitals with acute by ordinary
78            //              capitals, replace en-dash or em-dash by '-' etc.
79            //     both modes gurantee that output string will have same length
80            //     as input string
81            //
82            // Returns false if given conversion is impossible, true otherwise
83            // (conversion may be impossible either if you try to convert
84            // to Unicode with non-Unicode build of wxWidgets or if input
85            // or output encoding is not supported.)
86            bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method = wxCONVERT_STRICT);
87
88            // Convert input string according to settings passed to Init.
89            // Note that you must call Init before using Convert!
90            bool Convert(const char* input, char* output) const;
91            bool Convert(char* str) const { return Convert(str, str); }
92            wxString Convert(const wxString& input) const;
93
94#if wxUSE_WCHAR_T
95            bool Convert(const char* input, wchar_t* output) const;
96            bool Convert(const wchar_t* input, char* output) const;
97            bool Convert(const wchar_t* input, wchar_t* output) const;
98            bool Convert(wchar_t* str) const { return Convert(str, str); }
99#endif
100            // Return equivalent(s) for given font that are used
101            // under given platform. wxPLATFORM_CURRENT means the plaform
102            // this binary was compiled for
103            //
104            // Examples:
105            //     current platform          enc    returned value
106            // -----------------------------------------------------
107            //     unix                   CP1250         {ISO8859_2}
108            //     unix                ISO8859_2                  {}
109            //     windows             ISO8859_2            {CP1250}
110            //
111            // Equivalence is defined in terms of convertibility:
112            // 2 encodings are equivalent if you can convert text between
113            // then without loosing information (it may - and will - happen
114            // that you loose special chars like quotation marks or em-dashes
115            // but you shouldn't loose any diacritics and language-specific
116            // characters when converting between equivalent encodings).
117            //
118            // Convert() method is not limited to converting between
119            // equivalent encodings, it can convert between arbitrary
120            // two encodings!
121            //
122            // Remember that this function does _NOT_ check for presence of
123            // fonts in system. It only tells you what are most suitable
124            // encodings. (It usually returns only one encoding)
125            //
126            // Note that argument enc itself may be present in returned array!
127            // (so that you can -- as a side effect -- detect whether the
128            // encoding is native for this platform or not)
129            static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, int platform = wxPLATFORM_CURRENT);
130
131            // Similar to GetPlatformEquivalent, but this one will return ALL
132            // equivalent encodings, regardless the platform, including itself.
133            static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
134
135            // Return true if [any text in] one multibyte encoding can be
136            // converted to another one losslessly.
137            //
138            // Do not call this with wxFONTENCODING_UNICODE, it doesn't make
139            // sense (always works in one sense and always depends on the text
140            // to convert in the other)
141            static bool CanConvert(wxFontEncoding encIn, wxFontEncoding encOut)
142            {
143                return GetAllEquivalents(encIn).Index(encOut) != wxNOT_FOUND;
144            }
145
146    private:
147
148#if wxUSE_WCHAR_T
149            wchar_t *m_Table;
150#else
151            char *m_Table;
152#endif
153            bool m_UnicodeInput, m_UnicodeOutput;
154            bool m_JustCopy;
155
156    DECLARE_NO_COPY_CLASS(wxEncodingConverter)
157};
158
159#endif  // _WX_ENCCONV_H_
160