1/****************************************************************************
2** libebml : parse EBML files, see http://embl.sourceforge.net/
3**
4** <file/class description>
5**
6** Copyright (C) 2002-2005 Steve Lhomme.  All rights reserved.
7**
8** This file is part of libebml.
9**
10** This library is free software; you can redistribute it and/or
11** modify it under the terms of the GNU Lesser General Public
12** License as published by the Free Software Foundation; either
13** version 2.1 of the License, or (at your option) any later version.
14**
15** This library is distributed in the hope that it will be useful,
16** but WITHOUT ANY WARRANTY; without even the implied warranty of
17** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18** Lesser General Public License for more details.
19**
20** You should have received a copy of the GNU Lesser General Public
21** License along with this library; if not, write to the Free Software
22** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23**
24** See http://www.matroska.org/license/lgpl/ for LGPL licensing information.
25**
26** Contact license@matroska.org if any conditions of this licensing are
27** not clear to you.
28**
29**********************************************************************/
30
31/*!
32	\file
33	\version \$Id: EbmlUnicodeString.cpp 1079 2005-03-03 13:18:14Z robux4 $
34	\author Steve Lhomme     <robux4 @ users.sf.net>
35	\author Jory Stone       <jcsston @ toughguy.net>
36*/
37
38#include <cassert>
39
40#if __GNUC__ == 2 && ! defined ( __OpenBSD__ )
41#include <wchar.h>
42#endif
43
44#include "ebml/EbmlUnicodeString.h"
45
46START_LIBEBML_NAMESPACE
47
48// ===================== UTFstring class ===================
49
50UTFstring::UTFstring()
51	:_Length(0)
52	,_Data(NULL)
53{}
54
55UTFstring::UTFstring(const wchar_t * _aBuf)
56	:_Length(0)
57	,_Data(NULL)
58{
59	*this = _aBuf;
60}
61
62UTFstring::~UTFstring()
63{
64	delete [] _Data;
65}
66
67UTFstring::UTFstring(const UTFstring & _aBuf)
68	:_Length(0)
69	,_Data(NULL)
70{
71	*this = _aBuf.c_str();
72}
73
74UTFstring & UTFstring::operator=(const UTFstring & _aBuf)
75{
76	*this = _aBuf.c_str();
77	return *this;
78}
79
80UTFstring & UTFstring::operator=(const wchar_t * _aBuf)
81{
82	delete [] _Data;
83	if (_aBuf == NULL) {
84		_Data = new wchar_t[1];
85		_Data[0] = 0;
86		UpdateFromUCS2();
87		return *this;
88	}
89
90	size_t aLen;
91	for (aLen=0; _aBuf[aLen] != 0; aLen++);
92	_Length = aLen;
93	_Data = new wchar_t[_Length+1];
94	for (aLen=0; _aBuf[aLen] != 0; aLen++) {
95		_Data[aLen] = _aBuf[aLen];
96	}
97	_Data[aLen] = 0;
98	UpdateFromUCS2();
99	return *this;
100}
101
102UTFstring & UTFstring::operator=(wchar_t _aChar)
103{
104	delete [] _Data;
105	_Data = new wchar_t[2];
106	_Length = 1;
107	_Data[0] = _aChar;
108	_Data[1] = 0;
109	UpdateFromUCS2();
110	return *this;
111}
112
113bool UTFstring::operator==(const UTFstring& _aStr) const
114{
115	if ((_Data == NULL) && (_aStr._Data == NULL))
116		return true;
117	if ((_Data == NULL) || (_aStr._Data == NULL))
118		return false;
119	return wcscmp_internal(_Data, _aStr._Data);
120}
121
122void UTFstring::SetUTF8(const std::string & _aStr)
123{
124	UTF8string = _aStr;
125	UpdateFromUTF8();
126}
127
128/*!
129	\see RFC 2279
130*/
131void UTFstring::UpdateFromUTF8()
132{
133	delete [] _Data;
134	// find the size of the final UCS-2 string
135	size_t i;
136	for (_Length=0, i=0; i<UTF8string.length(); _Length++) {
137		if ((UTF8string[i] & 0x80) == 0) {
138			i++;
139		} else if ((UTF8string[i] & 0x20) == 0) {
140			i += 2;
141		} else if ((UTF8string[i] & 0x10) == 0) {
142			i += 3;
143		}
144	}
145	_Data = new wchar_t[_Length+1];
146	size_t j;
147	for (j=0, i=0; i<UTF8string.length(); j++) {
148		if ((UTF8string[i] & 0x80) == 0) {
149			_Data[j] = UTF8string[i];
150			i++;
151		} else if ((UTF8string[i] & 0x20) == 0) {
152			_Data[j] = ((UTF8string[i] & 0x1F) << 6) + (UTF8string[i+1] & 0x3F);
153			i += 2;
154		} else if ((UTF8string[i] & 0x10) == 0) {
155			_Data[j] = ((UTF8string[i] & 0x0F) << 12) + ((UTF8string[i+1] & 0x3F) << 6) + (UTF8string[i+2] & 0x3F);
156			i += 3;
157		}
158	}
159	_Data[j] = 0;
160}
161
162void UTFstring::UpdateFromUCS2()
163{
164	// find the size of the final UTF-8 string
165	size_t i,Size=0;
166	for (i=0; i<_Length; i++)
167	{
168		if (_Data[i] < 0x80) {
169			Size++;
170		} else if (_Data[i] < 0x800) {
171			Size += 2;
172		} else if (_Data[i] < 0x10000) {
173			Size += 3;
174		}
175	}
176	std::string::value_type *tmpStr = new std::string::value_type[Size+1];
177	for (i=0, Size=0; i<_Length; i++)
178	{
179		if (_Data[i] < 0x80) {
180			tmpStr[Size++] = _Data[i];
181		} else if (_Data[i] < 0x800) {
182			tmpStr[Size++] = 0xC0 | (_Data[i] >> 6);
183			tmpStr[Size++] = 0x80 | (_Data[i] & 0x3F);
184		} else if (_Data[i] < 0x10000) {
185			tmpStr[Size++] = 0xE0 | (_Data[i] >> 12);
186			tmpStr[Size++] = 0x80 | ((_Data[i] >> 6) & 0x3F);
187			tmpStr[Size++] = 0x80 | (_Data[i] & 0x3F);
188		}
189	}
190	tmpStr[Size] = 0;
191	UTF8string = tmpStr; // implicit conversion
192	delete [] tmpStr;
193
194}
195
196bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2)
197{
198	size_t Index=0;
199	while (str1[Index] == str2[Index] && str1[Index] != 0) {
200		Index++;
201	}
202	return (str1[Index] == str2[Index]);
203}
204
205// ===================== EbmlUnicodeString class ===================
206
207EbmlUnicodeString::EbmlUnicodeString()
208:EbmlElement(0, false)
209{
210	DefaultSize = 0;
211}
212
213EbmlUnicodeString::EbmlUnicodeString(const UTFstring & aDefaultValue)
214:EbmlElement(0, true), Value(aDefaultValue), DefaultValue(aDefaultValue)
215{
216	DefaultSize = 0;
217	DefaultIsSet = true;
218}
219
220EbmlUnicodeString::EbmlUnicodeString(const EbmlUnicodeString & ElementToClone)
221 :EbmlElement(ElementToClone)
222 ,Value(ElementToClone.Value)
223 ,DefaultValue(ElementToClone.DefaultValue)
224{
225}
226
227/*!
228\note limited to UCS-2
229\todo handle exception on errors
230*/
231uint32 EbmlUnicodeString::RenderData(IOCallback & output, bool bForceRender, bool bKeepIntact)
232{
233	uint32 Result = Value.GetUTF8().length();
234
235	if (Result != 0) {
236		output.writeFully(Value.GetUTF8().c_str(), Result);
237	}
238
239	if (Result < DefaultSize) {
240		// pad the rest with 0
241		binary *Pad = new binary[DefaultSize - Result];
242		if (Pad != NULL) {
243			memset(Pad, 0x00, DefaultSize - Result);
244			output.writeFully(Pad, DefaultSize - Result);
245
246			Result = DefaultSize;
247			delete [] Pad;
248		}
249	}
250
251	return Result;
252}
253
254EbmlUnicodeString & EbmlUnicodeString::operator=(const UTFstring & NewString)
255{
256	Value = NewString;
257	bValueIsSet = true;
258	return *this;
259}
260
261/*!
262\note limited to UCS-2
263*/
264uint64 EbmlUnicodeString::UpdateSize(bool bKeepIntact, bool bForceRender)
265{
266	if (!bKeepIntact && IsDefaultValue())
267		return 0;
268
269	Size = Value.GetUTF8().length();
270	if (Size < DefaultSize)
271		Size = DefaultSize;
272
273	return Size;
274}
275
276/*!
277	\note limited to UCS-2
278*/
279uint64 EbmlUnicodeString::ReadData(IOCallback & input, ScopeMode ReadFully)
280{
281	if (ReadFully != SCOPE_NO_DATA)
282	{
283		if (Size == 0) {
284			Value = UTFstring::value_type(0);
285			bValueIsSet = true;
286		} else {
287			char *Buffer = new char[Size+1];
288			if (Buffer == NULL) {
289				// impossible to read, skip it
290				input.setFilePointer(Size, seek_current);
291			} else {
292				input.readFully(Buffer, Size);
293				if (Buffer[Size-1] != 0) {
294					Buffer[Size] = 0;
295				}
296
297				Value.SetUTF8(Buffer); // implicit conversion to std::string
298				delete [] Buffer;
299				bValueIsSet = true;
300			}
301		}
302	}
303
304	return Size;
305}
306
307END_LIBEBML_NAMESPACE
308