1///////////////////////////////////////////////////////////////////////////// 2// Name: wx/encconv.h 3// Purpose: wxEncodingConverter class for converting between different 4// font encodings 5// Author: Vaclav Slavik 6// Copyright: (c) 1999 Vaclav Slavik 7// Licence: wxWindows licence 8///////////////////////////////////////////////////////////////////////////// 9 10#ifndef _WX_ENCCONV_H_ 11#define _WX_ENCCONV_H_ 12 13#include "wx/defs.h" 14 15#include "wx/object.h" 16#include "wx/fontenc.h" 17#include "wx/dynarray.h" 18 19// ---------------------------------------------------------------------------- 20// constants 21// ---------------------------------------------------------------------------- 22 23enum 24{ 25 wxCONVERT_STRICT, 26 wxCONVERT_SUBSTITUTE 27}; 28 29 30enum 31{ 32 wxPLATFORM_CURRENT = -1, 33 34 wxPLATFORM_UNIX = 0, 35 wxPLATFORM_WINDOWS, 36 wxPLATFORM_OS2, 37 wxPLATFORM_MAC 38}; 39 40// ---------------------------------------------------------------------------- 41// types 42// ---------------------------------------------------------------------------- 43 44WX_DEFINE_ARRAY_INT(wxFontEncoding, wxFontEncodingArray); 45 46//-------------------------------------------------------------------------------- 47// wxEncodingConverter 48// This class is capable of converting strings between any two 49// 8bit encodings/charsets. It can also convert from/to Unicode 50//-------------------------------------------------------------------------------- 51 52class WXDLLIMPEXP_BASE wxEncodingConverter : public wxObject 53{ 54 public: 55 56 wxEncodingConverter(); 57 virtual ~wxEncodingConverter() { if (m_Table) delete[] m_Table; } 58 59 // Initialize conversion. Both output or input encoding may 60 // be wxFONTENCODING_UNICODE, but only if wxUSE_WCHAR_T is set to 1. 61 // 62 // All subsequent calls to Convert() will interpret it's argument 63 // as a string in input_enc encoding and will output string in 64 // output_enc encoding. 65 // 66 // You must call this method before calling Convert. You may call 67 // it more than once in order to switch to another conversion 68 // 69 // Method affects behaviour of Convert() in case input character 70 // cannot be converted because it does not exist in output encoding: 71 // wxCONVERT_STRICT -- 72 // follow behaviour of GNU Recode - just copy unconvertable 73 // characters to output and don't change them (it's integer 74 // value will stay the same) 75 // wxCONVERT_SUBSTITUTE -- 76 // try some (lossy) substitutions - e.g. replace 77 // unconvertable latin capitals with acute by ordinary 78 // capitals, replace en-dash or em-dash by '-' etc. 79 // both modes gurantee that output string will have same length 80 // as input string 81 // 82 // Returns false if given conversion is impossible, true otherwise 83 // (conversion may be impossible either if you try to convert 84 // to Unicode with non-Unicode build of wxWidgets or if input 85 // or output encoding is not supported.) 86 bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method = wxCONVERT_STRICT); 87 88 // Convert input string according to settings passed to Init. 89 // Note that you must call Init before using Convert! 90 bool Convert(const char* input, char* output) const; 91 bool Convert(char* str) const { return Convert(str, str); } 92 wxString Convert(const wxString& input) const; 93 94#if wxUSE_WCHAR_T 95 bool Convert(const char* input, wchar_t* output) const; 96 bool Convert(const wchar_t* input, char* output) const; 97 bool Convert(const wchar_t* input, wchar_t* output) const; 98 bool Convert(wchar_t* str) const { return Convert(str, str); } 99#endif 100 // Return equivalent(s) for given font that are used 101 // under given platform. wxPLATFORM_CURRENT means the plaform 102 // this binary was compiled for 103 // 104 // Examples: 105 // current platform enc returned value 106 // ----------------------------------------------------- 107 // unix CP1250 {ISO8859_2} 108 // unix ISO8859_2 {} 109 // windows ISO8859_2 {CP1250} 110 // 111 // Equivalence is defined in terms of convertibility: 112 // 2 encodings are equivalent if you can convert text between 113 // then without loosing information (it may - and will - happen 114 // that you loose special chars like quotation marks or em-dashes 115 // but you shouldn't loose any diacritics and language-specific 116 // characters when converting between equivalent encodings). 117 // 118 // Convert() method is not limited to converting between 119 // equivalent encodings, it can convert between arbitrary 120 // two encodings! 121 // 122 // Remember that this function does _NOT_ check for presence of 123 // fonts in system. It only tells you what are most suitable 124 // encodings. (It usually returns only one encoding) 125 // 126 // Note that argument enc itself may be present in returned array! 127 // (so that you can -- as a side effect -- detect whether the 128 // encoding is native for this platform or not) 129 static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, int platform = wxPLATFORM_CURRENT); 130 131 // Similar to GetPlatformEquivalent, but this one will return ALL 132 // equivalent encodings, regardless the platform, including itself. 133 static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc); 134 135 // Return true if [any text in] one multibyte encoding can be 136 // converted to another one losslessly. 137 // 138 // Do not call this with wxFONTENCODING_UNICODE, it doesn't make 139 // sense (always works in one sense and always depends on the text 140 // to convert in the other) 141 static bool CanConvert(wxFontEncoding encIn, wxFontEncoding encOut) 142 { 143 return GetAllEquivalents(encIn).Index(encOut) != wxNOT_FOUND; 144 } 145 146 private: 147 148#if wxUSE_WCHAR_T 149 wchar_t *m_Table; 150#else 151 char *m_Table; 152#endif 153 bool m_UnicodeInput, m_UnicodeOutput; 154 bool m_JustCopy; 155 156 DECLARE_NO_COPY_CLASS(wxEncodingConverter) 157}; 158 159#endif // _WX_ENCCONV_H_ 160