1/* 2 * libid3tag - ID3 tag manipulation library 3 * Copyright (C) 2000-2004 Underbit Technologies, Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * 19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $ 20 */ 21 22# ifdef HAVE_CONFIG_H 23# include "config.h" 24# endif 25 26# include "global.h" 27 28# include <stdlib.h> 29 30# include "id3tag.h" 31# include "utf16.h" 32# include "ucs4.h" 33 34/* 35 * NAME: utf16->length() 36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string 37 */ 38id3_length_t id3_utf16_length(id3_utf16_t const *utf16) 39{ 40 id3_length_t length = 0; 41 42 while (*utf16) { 43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) 44 ++length; 45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff && 46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) { 47 ++length; 48 ++utf16; 49 } 50 51 ++utf16; 52 } 53 54 return length; 55} 56 57/* 58 * NAME: utf16->size() 59 * DESCRIPTION: return the encoding size of a utf16 string 60 */ 61id3_length_t id3_utf16_size(id3_utf16_t const *utf16) 62{ 63 id3_utf16_t const *ptr = utf16; 64 65 while (*ptr) 66 ++ptr; 67 68 return ptr - utf16 + 1; 69} 70 71/* 72 * NAME: utf16->ucs4duplicate() 73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4 74 */ 75id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16) 76{ 77 id3_ucs4_t *ucs4; 78 79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4)); 80 if (ucs4) 81 id3_utf16_decode(utf16, ucs4); 82 83 return release(ucs4); 84} 85 86/* 87 * NAME: utf16->decodechar() 88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char 89 */ 90id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4) 91{ 92 id3_utf16_t const *start = utf16; 93 94 while (1) { 95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) { 96 *ucs4 = utf16[0]; 97 return utf16 - start + 1; 98 } 99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff && 100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) { 101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) | 102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L; 103 return utf16 - start + 2; 104 } 105 106 ++utf16; 107 } 108} 109 110/* 111 * NAME: utf16->encodechar() 112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars 113 */ 114id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4) 115{ 116 if (ucs4 < 0x00010000L) { 117 utf16[0] = ucs4; 118 119 return 1; 120 } 121 else if (ucs4 < 0x00110000L) { 122 ucs4 -= 0x00010000L; 123 124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800; 125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00; 126 127 return 2; 128 } 129 130 /* default */ 131 132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR); 133} 134 135/* 136 * NAME: utf16->decode() 137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string 138 */ 139void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4) 140{ 141 do 142 utf16 += id3_utf16_decodechar(utf16, ucs4); 143 while (*ucs4++); 144} 145 146/* 147 * NAME: utf16->encode() 148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string 149 */ 150void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4) 151{ 152 do 153 utf16 += id3_utf16_encodechar(utf16, *ucs4); 154 while (*ucs4++); 155} 156 157/* 158 * NAME: utf16->put() 159 * DESCRIPTION: serialize a single utf16 character 160 */ 161id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16, 162 enum id3_utf16_byteorder byteorder) 163{ 164 if (ptr) { 165 switch (byteorder) { 166 default: 167 case ID3_UTF16_BYTEORDER_BE: 168 (*ptr)[0] = (utf16 >> 8) & 0xff; 169 (*ptr)[1] = (utf16 >> 0) & 0xff; 170 break; 171 172 case ID3_UTF16_BYTEORDER_LE: 173 (*ptr)[0] = (utf16 >> 0) & 0xff; 174 (*ptr)[1] = (utf16 >> 8) & 0xff; 175 break; 176 } 177 178 *ptr += 2; 179 } 180 181 return 2; 182} 183 184/* 185 * NAME: utf16->get() 186 * DESCRIPTION: deserialize a single utf16 character 187 */ 188id3_utf16_t id3_utf16_get(id3_byte_t const **ptr, 189 enum id3_utf16_byteorder byteorder) 190{ 191 id3_utf16_t utf16; 192 193 switch (byteorder) { 194 default: 195 case ID3_UTF16_BYTEORDER_BE: 196 utf16 = 197 ((*ptr)[0] << 8) | 198 ((*ptr)[1] << 0); 199 break; 200 201 case ID3_UTF16_BYTEORDER_LE: 202 utf16 = 203 ((*ptr)[0] << 0) | 204 ((*ptr)[1] << 8); 205 break; 206 } 207 208 *ptr += 2; 209 210 return utf16; 211} 212 213/* 214 * NAME: utf16->serialize() 215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding 216 */ 217id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4, 218 enum id3_utf16_byteorder byteorder, 219 int terminate) 220{ 221 id3_length_t size = 0; 222 id3_utf16_t utf16[2], *out; 223 224 if (byteorder == ID3_UTF16_BYTEORDER_ANY) 225 size += id3_utf16_put(ptr, 0xfeff, byteorder); 226 227 while (*ucs4) { 228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) { 229 case 2: size += id3_utf16_put(ptr, *out++, byteorder); 230 case 1: size += id3_utf16_put(ptr, *out++, byteorder); 231 case 0: break; 232 } 233 } 234 235 if (terminate) 236 size += id3_utf16_put(ptr, 0, byteorder); 237 238 return size; 239} 240 241/* 242 * NAME: utf16->deserialize() 243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding 244 */ 245id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length, 246 enum id3_utf16_byteorder byteorder) 247{ 248 id3_byte_t const *end; 249 id3_utf16_t *utf16ptr, *utf16; 250 id3_ucs4_t *ucs4; 251 252 end = *ptr + (length & ~1); 253 254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16)); 255 if (utf16 == 0) 256 return 0; 257 258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) { 259 switch (((*ptr)[0] << 8) | 260 ((*ptr)[1] << 0)) { 261 case 0xfeff: 262 byteorder = ID3_UTF16_BYTEORDER_BE; 263 *ptr += 2; 264 break; 265 266 case 0xfffe: 267 byteorder = ID3_UTF16_BYTEORDER_LE; 268 *ptr += 2; 269 break; 270 } 271 } 272 273 utf16ptr = utf16; 274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder))) 275 ++utf16ptr; 276 277 *utf16ptr = 0; 278 279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4)); 280 if (ucs4) 281 id3_utf16_decode(utf16, ucs4); 282 283 free(utf16); 284 285 return ucs4; 286} 287