1/*
2 * libid3tag - ID3 tag manipulation library
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 *
19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
20 */
21
22# ifdef HAVE_CONFIG_H
23#  include "config.h"
24# endif
25
26# include "global.h"
27
28# include <stdlib.h>
29
30# include "id3tag.h"
31# include "utf16.h"
32# include "ucs4.h"
33
34/*
35 * NAME:	utf16->length()
36 * DESCRIPTION:	return the number of ucs4 chars represented by a utf16 string
37 */
38id3_length_t id3_utf16_length(id3_utf16_t const *utf16)
39{
40  id3_length_t length = 0;
41
42  while (*utf16) {
43    if (utf16[0] < 0xd800 || utf16[0] > 0xdfff)
44      ++length;
45    else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
46	     utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
47      ++length;
48      ++utf16;
49    }
50
51    ++utf16;
52  }
53
54  return length;
55}
56
57/*
58 * NAME:	utf16->size()
59 * DESCRIPTION:	return the encoding size of a utf16 string
60 */
61id3_length_t id3_utf16_size(id3_utf16_t const *utf16)
62{
63  id3_utf16_t const *ptr = utf16;
64
65  while (*ptr)
66    ++ptr;
67
68  return ptr - utf16 + 1;
69}
70
71/*
72 * NAME:	utf16->ucs4duplicate()
73 * DESCRIPTION:	duplicate and decode a utf16 string into ucs4
74 */
75id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16)
76{
77  id3_ucs4_t *ucs4;
78
79  ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
80  if (ucs4)
81    id3_utf16_decode(utf16, ucs4);
82
83  return release(ucs4);
84}
85
86/*
87 * NAME:	utf16->decodechar()
88 * DESCRIPTION:	decode a series of utf16 chars into a single ucs4 char
89 */
90id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
91{
92  id3_utf16_t const *start = utf16;
93
94  while (1) {
95    if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) {
96      *ucs4 = utf16[0];
97      return utf16 - start + 1;
98    }
99    else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
100	     utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
101      *ucs4 = (((utf16[0] & 0x03ffL) << 10) |
102	       ((utf16[1] & 0x03ffL) <<  0)) + 0x00010000L;
103      return utf16 - start + 2;
104    }
105
106    ++utf16;
107  }
108}
109
110/*
111 * NAME:	utf16->encodechar()
112 * DESCRIPTION:	encode a single ucs4 char into a series of up to 2 utf16 chars
113 */
114id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4)
115{
116  if (ucs4 < 0x00010000L) {
117    utf16[0] = ucs4;
118
119    return 1;
120  }
121  else if (ucs4 < 0x00110000L) {
122    ucs4 -= 0x00010000L;
123
124    utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800;
125    utf16[1] = ((ucs4 >>  0) & 0x3ff) | 0xdc00;
126
127    return 2;
128  }
129
130  /* default */
131
132  return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR);
133}
134
135/*
136 * NAME:	utf16->decode()
137 * DESCRIPTION:	decode a complete utf16 string into a ucs4 string
138 */
139void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
140{
141  do
142    utf16 += id3_utf16_decodechar(utf16, ucs4);
143  while (*ucs4++);
144}
145
146/*
147 * NAME:	utf16->encode()
148 * DESCRIPTION:	encode a complete ucs4 string into a utf16 string
149 */
150void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4)
151{
152  do
153    utf16 += id3_utf16_encodechar(utf16, *ucs4);
154  while (*ucs4++);
155}
156
157/*
158 * NAME:	utf16->put()
159 * DESCRIPTION:	serialize a single utf16 character
160 */
161id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16,
162			   enum id3_utf16_byteorder byteorder)
163{
164  if (ptr) {
165    switch (byteorder) {
166    default:
167    case ID3_UTF16_BYTEORDER_BE:
168      (*ptr)[0] = (utf16 >> 8) & 0xff;
169      (*ptr)[1] = (utf16 >> 0) & 0xff;
170      break;
171
172    case ID3_UTF16_BYTEORDER_LE:
173      (*ptr)[0] = (utf16 >> 0) & 0xff;
174      (*ptr)[1] = (utf16 >> 8) & 0xff;
175      break;
176    }
177
178    *ptr += 2;
179  }
180
181  return 2;
182}
183
184/*
185 * NAME:	utf16->get()
186 * DESCRIPTION:	deserialize a single utf16 character
187 */
188id3_utf16_t id3_utf16_get(id3_byte_t const **ptr,
189			  enum id3_utf16_byteorder byteorder)
190{
191  id3_utf16_t utf16;
192
193  switch (byteorder) {
194  default:
195  case ID3_UTF16_BYTEORDER_BE:
196    utf16 =
197      ((*ptr)[0] << 8) |
198      ((*ptr)[1] << 0);
199    break;
200
201  case ID3_UTF16_BYTEORDER_LE:
202    utf16 =
203      ((*ptr)[0] << 0) |
204      ((*ptr)[1] << 8);
205    break;
206  }
207
208  *ptr += 2;
209
210  return utf16;
211}
212
213/*
214 * NAME:	utf16->serialize()
215 * DESCRIPTION:	serialize a ucs4 string using utf16 encoding
216 */
217id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
218				 enum id3_utf16_byteorder byteorder,
219				 int terminate)
220{
221  id3_length_t size = 0;
222  id3_utf16_t utf16[2], *out;
223
224  if (byteorder == ID3_UTF16_BYTEORDER_ANY)
225    size += id3_utf16_put(ptr, 0xfeff, byteorder);
226
227  while (*ucs4) {
228    switch (id3_utf16_encodechar(out = utf16, *ucs4++)) {
229    case 2: size += id3_utf16_put(ptr, *out++, byteorder);
230    case 1: size += id3_utf16_put(ptr, *out++, byteorder);
231    case 0: break;
232    }
233  }
234
235  if (terminate)
236    size += id3_utf16_put(ptr, 0, byteorder);
237
238  return size;
239}
240
241/*
242 * NAME:	utf16->deserialize()
243 * DESCRIPTION:	deserialize a ucs4 string using utf16 encoding
244 */
245id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
246				  enum id3_utf16_byteorder byteorder)
247{
248  id3_byte_t const *end;
249  id3_utf16_t *utf16ptr, *utf16;
250  id3_ucs4_t *ucs4;
251
252  end = *ptr + (length & ~1);
253
254  utf16 = malloc((length / 2 + 1) * sizeof(*utf16));
255  if (utf16 == 0)
256    return 0;
257
258  if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) {
259    switch (((*ptr)[0] << 8) |
260	    ((*ptr)[1] << 0)) {
261    case 0xfeff:
262      byteorder = ID3_UTF16_BYTEORDER_BE;
263      *ptr += 2;
264      break;
265
266    case 0xfffe:
267      byteorder = ID3_UTF16_BYTEORDER_LE;
268      *ptr += 2;
269      break;
270    }
271  }
272
273  utf16ptr = utf16;
274  while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder)))
275    ++utf16ptr;
276
277  *utf16ptr = 0;
278
279  ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
280  if (ucs4)
281    id3_utf16_decode(utf16, ucs4);
282
283  free(utf16);
284
285  return ucs4;
286}
287