1/////////////////////////////////////////////////////////////////////////////// 2// Name: src/common/convauto.cpp 3// Purpose: implementation of wxConvAuto 4// Author: Vadim Zeitlin 5// Created: 2006-04-04 6// RCS-ID: $Id: convauto.cpp 38570 2006-04-05 14:37:47Z VZ $ 7// Copyright: (c) 2006 Vadim Zeitlin <vadim@wxwindows.org> 8// Licence: wxWindows licence 9/////////////////////////////////////////////////////////////////////////////// 10 11// ============================================================================ 12// declarations 13// ============================================================================ 14 15// ---------------------------------------------------------------------------- 16// headers 17// ---------------------------------------------------------------------------- 18 19// for compilers that support precompilation, includes "wx.h". 20#include "wx/wxprec.h" 21 22#ifdef __BORLANDC__ 23 #pragma hdrstop 24#endif 25 26#if wxUSE_WCHAR_T 27 28#ifndef WX_PRECOMP 29#endif //WX_PRECOMP 30 31#include "wx/convauto.h" 32 33// ============================================================================ 34// implementation 35// ============================================================================ 36 37/* static */ 38wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) 39{ 40 if ( srcLen < 2 ) 41 { 42 // minimal BOM is 2 bytes so bail out immediately and simplify the code 43 // below which wouldn't need to check for length for UTF-16 cases 44 return BOM_None; 45 } 46 47 // examine the buffer for BOM presence 48 // 49 // see http://www.unicode.org/faq/utf_bom.html#BOM 50 switch ( *src++ ) 51 { 52 case '\0': 53 // could only be big endian UTF-32 (00 00 FE FF) 54 if ( srcLen >= 4 && 55 src[0] == '\0' && 56 src[1] == '\xfe' && 57 src[2] == '\xff' ) 58 { 59 return BOM_UTF32BE; 60 } 61 break; 62 63 case '\xfe': 64 // could only be big endian UTF-16 (FE FF) 65 if ( *src++ == '\xff' ) 66 { 67 return BOM_UTF16BE; 68 } 69 break; 70 71 case '\xff': 72 // could be either little endian UTF-16 or UTF-32, both start 73 // with FF FE 74 if ( *src++ == '\xfe' ) 75 { 76 return srcLen >= 4 && src[0] == '\0' && src[1] == '\0' 77 ? BOM_UTF32LE 78 : BOM_UTF16LE; 79 } 80 break; 81 82 case '\xef': 83 // is this UTF-8 BOM (EF BB BF)? 84 if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' ) 85 { 86 return BOM_UTF8; 87 } 88 break; 89 } 90 91 return BOM_None; 92} 93 94void wxConvAuto::InitFromBOM(BOMType bomType) 95{ 96 m_consumedBOM = false; 97 98 switch ( bomType ) 99 { 100 case BOM_UTF32BE: 101 m_conv = new wxMBConvUTF32BE; 102 m_ownsConv = true; 103 break; 104 105 case BOM_UTF32LE: 106 m_conv = new wxMBConvUTF32LE; 107 m_ownsConv = true; 108 break; 109 110 case BOM_UTF16BE: 111 m_conv = new wxMBConvUTF16BE; 112 m_ownsConv = true; 113 break; 114 115 case BOM_UTF16LE: 116 m_conv = new wxMBConvUTF16LE; 117 m_ownsConv = true; 118 break; 119 120 case BOM_UTF8: 121 m_conv = &wxConvUTF8; 122 m_ownsConv = false; 123 break; 124 125 default: 126 wxFAIL_MSG( _T("unexpected BOM type") ); 127 // fall through: still need to create something 128 129 case BOM_None: 130 InitWithDefault(); 131 m_consumedBOM = true; // as there is nothing to consume 132 } 133} 134 135void wxConvAuto::SkipBOM(const char **src, size_t *len) const 136{ 137 int ofs; 138 switch ( m_bomType ) 139 { 140 case BOM_UTF32BE: 141 case BOM_UTF32LE: 142 ofs = 4; 143 break; 144 145 case BOM_UTF16BE: 146 case BOM_UTF16LE: 147 ofs = 2; 148 break; 149 150 case BOM_UTF8: 151 ofs = 3; 152 break; 153 154 default: 155 wxFAIL_MSG( _T("unexpected BOM type") ); 156 // fall through: still need to create something 157 158 case BOM_None: 159 ofs = 0; 160 } 161 162 *src += ofs; 163 if ( *len != (size_t)-1 ) 164 *len -= ofs; 165} 166 167void wxConvAuto::InitFromInput(const char **src, size_t *len) 168{ 169 m_bomType = DetectBOM(*src, *len); 170 InitFromBOM(m_bomType); 171 SkipBOM(src, len); 172} 173 174size_t 175wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen, 176 const char *src, size_t srcLen) const 177{ 178 // we check BOM and create the appropriate conversion the first time we're 179 // called but we also need to ensure that the BOM is skipped not only 180 // during this initial call but also during the first call with non-NULL 181 // dst as typically we're first called with NULL dst to calculate the 182 // needed buffer size 183 wxConvAuto *self = wx_const_cast(wxConvAuto *, this); 184 if ( !m_conv ) 185 { 186 self->InitFromInput(&src, &srcLen); 187 if ( dst ) 188 self->m_consumedBOM = true; 189 } 190 191 if ( !m_consumedBOM && dst ) 192 { 193 self->m_consumedBOM = true; 194 SkipBOM(&src, &srcLen); 195 } 196 197 return m_conv->ToWChar(dst, dstLen, src, srcLen); 198} 199 200size_t 201wxConvAuto::FromWChar(char *dst, size_t dstLen, 202 const wchar_t *src, size_t srcLen) const 203{ 204 if ( !m_conv ) 205 { 206 // default to UTF-8 for the multibyte output 207 wx_const_cast(wxConvAuto *, this)->InitWithDefault(); 208 } 209 210 return m_conv->FromWChar(dst, dstLen, src, srcLen); 211} 212 213#endif // wxUSE_WCHAR_T 214 215