1/**************************************************************************** 2** libebml : parse EBML files, see http://embl.sourceforge.net/ 3** 4** <file/class description> 5** 6** Copyright (C) 2002-2005 Steve Lhomme. All rights reserved. 7** 8** This file is part of libebml. 9** 10** This library is free software; you can redistribute it and/or 11** modify it under the terms of the GNU Lesser General Public 12** License as published by the Free Software Foundation; either 13** version 2.1 of the License, or (at your option) any later version. 14** 15** This library is distributed in the hope that it will be useful, 16** but WITHOUT ANY WARRANTY; without even the implied warranty of 17** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18** Lesser General Public License for more details. 19** 20** You should have received a copy of the GNU Lesser General Public 21** License along with this library; if not, write to the Free Software 22** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23** 24** See http://www.matroska.org/license/lgpl/ for LGPL licensing information. 25** 26** Contact license@matroska.org if any conditions of this licensing are 27** not clear to you. 28** 29**********************************************************************/ 30 31/*! 32 \file 33 \version \$Id: EbmlUnicodeString.cpp 1079 2005-03-03 13:18:14Z robux4 $ 34 \author Steve Lhomme <robux4 @ users.sf.net> 35 \author Jory Stone <jcsston @ toughguy.net> 36*/ 37 38#include <cassert> 39 40#if __GNUC__ == 2 && ! defined ( __OpenBSD__ ) 41#include <wchar.h> 42#endif 43 44#include "ebml/EbmlUnicodeString.h" 45 46START_LIBEBML_NAMESPACE 47 48// ===================== UTFstring class =================== 49 50UTFstring::UTFstring() 51 :_Length(0) 52 ,_Data(NULL) 53{} 54 55UTFstring::UTFstring(const wchar_t * _aBuf) 56 :_Length(0) 57 ,_Data(NULL) 58{ 59 *this = _aBuf; 60} 61 62UTFstring::~UTFstring() 63{ 64 delete [] _Data; 65} 66 67UTFstring::UTFstring(const UTFstring & _aBuf) 68 :_Length(0) 69 ,_Data(NULL) 70{ 71 *this = _aBuf.c_str(); 72} 73 74UTFstring & UTFstring::operator=(const UTFstring & _aBuf) 75{ 76 *this = _aBuf.c_str(); 77 return *this; 78} 79 80UTFstring & UTFstring::operator=(const wchar_t * _aBuf) 81{ 82 delete [] _Data; 83 if (_aBuf == NULL) { 84 _Data = new wchar_t[1]; 85 _Data[0] = 0; 86 UpdateFromUCS2(); 87 return *this; 88 } 89 90 size_t aLen; 91 for (aLen=0; _aBuf[aLen] != 0; aLen++); 92 _Length = aLen; 93 _Data = new wchar_t[_Length+1]; 94 for (aLen=0; _aBuf[aLen] != 0; aLen++) { 95 _Data[aLen] = _aBuf[aLen]; 96 } 97 _Data[aLen] = 0; 98 UpdateFromUCS2(); 99 return *this; 100} 101 102UTFstring & UTFstring::operator=(wchar_t _aChar) 103{ 104 delete [] _Data; 105 _Data = new wchar_t[2]; 106 _Length = 1; 107 _Data[0] = _aChar; 108 _Data[1] = 0; 109 UpdateFromUCS2(); 110 return *this; 111} 112 113bool UTFstring::operator==(const UTFstring& _aStr) const 114{ 115 if ((_Data == NULL) && (_aStr._Data == NULL)) 116 return true; 117 if ((_Data == NULL) || (_aStr._Data == NULL)) 118 return false; 119 return wcscmp_internal(_Data, _aStr._Data); 120} 121 122void UTFstring::SetUTF8(const std::string & _aStr) 123{ 124 UTF8string = _aStr; 125 UpdateFromUTF8(); 126} 127 128/*! 129 \see RFC 2279 130*/ 131void UTFstring::UpdateFromUTF8() 132{ 133 delete [] _Data; 134 // find the size of the final UCS-2 string 135 size_t i; 136 for (_Length=0, i=0; i<UTF8string.length(); _Length++) { 137 if ((UTF8string[i] & 0x80) == 0) { 138 i++; 139 } else if ((UTF8string[i] & 0x20) == 0) { 140 i += 2; 141 } else if ((UTF8string[i] & 0x10) == 0) { 142 i += 3; 143 } 144 } 145 _Data = new wchar_t[_Length+1]; 146 size_t j; 147 for (j=0, i=0; i<UTF8string.length(); j++) { 148 if ((UTF8string[i] & 0x80) == 0) { 149 _Data[j] = UTF8string[i]; 150 i++; 151 } else if ((UTF8string[i] & 0x20) == 0) { 152 _Data[j] = ((UTF8string[i] & 0x1F) << 6) + (UTF8string[i+1] & 0x3F); 153 i += 2; 154 } else if ((UTF8string[i] & 0x10) == 0) { 155 _Data[j] = ((UTF8string[i] & 0x0F) << 12) + ((UTF8string[i+1] & 0x3F) << 6) + (UTF8string[i+2] & 0x3F); 156 i += 3; 157 } 158 } 159 _Data[j] = 0; 160} 161 162void UTFstring::UpdateFromUCS2() 163{ 164 // find the size of the final UTF-8 string 165 size_t i,Size=0; 166 for (i=0; i<_Length; i++) 167 { 168 if (_Data[i] < 0x80) { 169 Size++; 170 } else if (_Data[i] < 0x800) { 171 Size += 2; 172 } else if (_Data[i] < 0x10000) { 173 Size += 3; 174 } 175 } 176 std::string::value_type *tmpStr = new std::string::value_type[Size+1]; 177 for (i=0, Size=0; i<_Length; i++) 178 { 179 if (_Data[i] < 0x80) { 180 tmpStr[Size++] = _Data[i]; 181 } else if (_Data[i] < 0x800) { 182 tmpStr[Size++] = 0xC0 | (_Data[i] >> 6); 183 tmpStr[Size++] = 0x80 | (_Data[i] & 0x3F); 184 } else if (_Data[i] < 0x10000) { 185 tmpStr[Size++] = 0xE0 | (_Data[i] >> 12); 186 tmpStr[Size++] = 0x80 | ((_Data[i] >> 6) & 0x3F); 187 tmpStr[Size++] = 0x80 | (_Data[i] & 0x3F); 188 } 189 } 190 tmpStr[Size] = 0; 191 UTF8string = tmpStr; // implicit conversion 192 delete [] tmpStr; 193 194} 195 196bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2) 197{ 198 size_t Index=0; 199 while (str1[Index] == str2[Index] && str1[Index] != 0) { 200 Index++; 201 } 202 return (str1[Index] == str2[Index]); 203} 204 205// ===================== EbmlUnicodeString class =================== 206 207EbmlUnicodeString::EbmlUnicodeString() 208:EbmlElement(0, false) 209{ 210 DefaultSize = 0; 211} 212 213EbmlUnicodeString::EbmlUnicodeString(const UTFstring & aDefaultValue) 214:EbmlElement(0, true), Value(aDefaultValue), DefaultValue(aDefaultValue) 215{ 216 DefaultSize = 0; 217 DefaultIsSet = true; 218} 219 220EbmlUnicodeString::EbmlUnicodeString(const EbmlUnicodeString & ElementToClone) 221 :EbmlElement(ElementToClone) 222 ,Value(ElementToClone.Value) 223 ,DefaultValue(ElementToClone.DefaultValue) 224{ 225} 226 227/*! 228\note limited to UCS-2 229\todo handle exception on errors 230*/ 231uint32 EbmlUnicodeString::RenderData(IOCallback & output, bool bForceRender, bool bKeepIntact) 232{ 233 uint32 Result = Value.GetUTF8().length(); 234 235 if (Result != 0) { 236 output.writeFully(Value.GetUTF8().c_str(), Result); 237 } 238 239 if (Result < DefaultSize) { 240 // pad the rest with 0 241 binary *Pad = new binary[DefaultSize - Result]; 242 if (Pad != NULL) { 243 memset(Pad, 0x00, DefaultSize - Result); 244 output.writeFully(Pad, DefaultSize - Result); 245 246 Result = DefaultSize; 247 delete [] Pad; 248 } 249 } 250 251 return Result; 252} 253 254EbmlUnicodeString & EbmlUnicodeString::operator=(const UTFstring & NewString) 255{ 256 Value = NewString; 257 bValueIsSet = true; 258 return *this; 259} 260 261/*! 262\note limited to UCS-2 263*/ 264uint64 EbmlUnicodeString::UpdateSize(bool bKeepIntact, bool bForceRender) 265{ 266 if (!bKeepIntact && IsDefaultValue()) 267 return 0; 268 269 Size = Value.GetUTF8().length(); 270 if (Size < DefaultSize) 271 Size = DefaultSize; 272 273 return Size; 274} 275 276/*! 277 \note limited to UCS-2 278*/ 279uint64 EbmlUnicodeString::ReadData(IOCallback & input, ScopeMode ReadFully) 280{ 281 if (ReadFully != SCOPE_NO_DATA) 282 { 283 if (Size == 0) { 284 Value = UTFstring::value_type(0); 285 bValueIsSet = true; 286 } else { 287 char *Buffer = new char[Size+1]; 288 if (Buffer == NULL) { 289 // impossible to read, skip it 290 input.setFilePointer(Size, seek_current); 291 } else { 292 input.readFully(Buffer, Size); 293 if (Buffer[Size-1] != 0) { 294 Buffer[Size] = 0; 295 } 296 297 Value.SetUTF8(Buffer); // implicit conversion to std::string 298 delete [] Buffer; 299 bValueIsSet = true; 300 } 301 } 302 } 303 304 return Size; 305} 306 307END_LIBEBML_NAMESPACE 308