1/* 2 * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#if HFS 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/malloc.h> 34#include <sys/queue.h> 35#include <sys/utfconv.h> 36#include <kern/host.h> 37#include <mach/host_priv.h> 38#include <libkern/OSKextLib.h> 39#include <libkern/OSKextLibPrivate.h> 40 41#include "hfs.h" 42 43 44lck_grp_t * encodinglst_lck_grp; 45lck_grp_attr_t * encodinglst_lck_grp_attr; 46lck_attr_t * encodinglst_lck_attr; 47 48 49/* hfs encoding converter list */ 50SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0}; 51 52lck_mtx_t encodinglst_mutex; 53 54 55 56/* hfs encoding converter entry */ 57struct hfs_encoding { 58 SLIST_ENTRY(hfs_encoding) link; 59 int refcount; 60 int kmod_id; 61 u_int32_t encoding; 62 hfs_to_unicode_func_t get_unicode_func; 63 unicode_to_hfs_func_t get_hfsname_func; 64}; 65 66#define MAX_HFS_UNICODE_CHARS (15*5) 67 68static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str); 69 70void 71hfs_converterinit(void) 72{ 73 SLIST_INIT(&hfs_encoding_list); 74 75 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init(); 76 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr); 77 encodinglst_lck_attr = lck_attr_alloc_init(); 78 79 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); 80 81 /* 82 * add resident MacRoman converter and take a reference 83 * since its always "loaded". 84 */ 85 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman); 86 SLIST_FIRST(&hfs_encoding_list)->refcount++; 87} 88 89 90/* 91 * hfs_addconverter - add an HFS encoding converter 92 * 93 * This is called exclusivly by kernel loadable modules 94 * (like HFS_Japanese.kmod) to register hfs encoding 95 * conversion routines. 96 * 97 */ 98int 99hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname) 100{ 101 struct hfs_encoding *encp; 102 103 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK); 104 105 lck_mtx_lock(&encodinglst_mutex); 106 107 encp->link.sle_next = NULL; 108 encp->refcount = 0; 109 encp->encoding = encoding; 110 encp->get_unicode_func = get_unicode; 111 encp->get_hfsname_func = get_hfsname; 112 encp->kmod_id = id; 113 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link); 114 115 lck_mtx_unlock(&encodinglst_mutex); 116 return (0); 117} 118 119 120/* 121 * hfs_remconverter - remove an HFS encoding converter 122 * 123 * Can be called by a kernel loadable module's finalize 124 * routine to remove an encoding converter so that the 125 * module (i.e. the code) can be unloaded. 126 * 127 * However, in the normal case, the removing and unloading 128 * of these converters is done in hfs_relconverter. 129 * The call is initiated from within the kernel during the unmounting of an hfs voulume. 130 */ 131int 132hfs_remconverter(int id, u_int32_t encoding) 133{ 134 struct hfs_encoding *encp; 135 136 lck_mtx_lock(&encodinglst_mutex); 137 SLIST_FOREACH(encp, &hfs_encoding_list, link) { 138 if (encp->encoding == encoding && encp->kmod_id == id) { 139 encp->refcount--; 140 141 /* if converter is no longer in use, release it */ 142 if (encp->refcount <= 0 && encp->kmod_id != 0) { 143 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); 144 lck_mtx_unlock(&encodinglst_mutex); 145 FREE(encp, M_TEMP); 146 return (0); 147 } else { 148 lck_mtx_unlock(&encodinglst_mutex); 149 return (1); /* busy */ 150 } 151 break; 152 } 153 } 154 lck_mtx_unlock(&encodinglst_mutex); 155 156 return (0); 157} 158 159 160/* 161 * hfs_getconverter - get HFS encoding converters 162 * 163 * Normally called during the mounting of an hfs voulume. 164 */ 165int 166hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname) 167{ 168 struct hfs_encoding *encp; 169 int found = 0; 170 171 lck_mtx_lock(&encodinglst_mutex); 172 SLIST_FOREACH(encp, &hfs_encoding_list, link) { 173 if (encp->encoding == encoding) { 174 found = 1; 175 *get_unicode = encp->get_unicode_func; 176 *get_hfsname = encp->get_hfsname_func; 177 ++encp->refcount; 178 break; 179 } 180 } 181 lck_mtx_unlock(&encodinglst_mutex); 182 183 if (!found) { 184 *get_unicode = NULL; 185 *get_hfsname = NULL; 186 return (EINVAL); 187 } 188 189 return (0); 190} 191 192 193/* 194 * hfs_relconverter - release interest in an HFS encoding converter 195 * 196 * Normally called during the unmounting of an hfs voulume. 197 */ 198int 199hfs_relconverter(u_int32_t encoding) 200{ 201 struct hfs_encoding *encp; 202 203 lck_mtx_lock(&encodinglst_mutex); 204 SLIST_FOREACH(encp, &hfs_encoding_list, link) { 205 if (encp->encoding == encoding) { 206 encp->refcount--; 207 208 /* if converter is no longer in use, release it */ 209 if (encp->refcount <= 0 && encp->kmod_id != 0) { 210 uint32_t loadTag = (uint32_t)encp->kmod_id; 211 212 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); 213 lck_mtx_unlock(&encodinglst_mutex); 214 215 FREE(encp, M_TEMP); 216 (void)OSKextUnloadKextWithLoadTag(loadTag); 217 return (0); 218 } 219 lck_mtx_unlock(&encodinglst_mutex); 220 return (0); 221 } 222 } 223 lck_mtx_unlock(&encodinglst_mutex); 224 225 return (EINVAL); 226} 227 228 229/* 230 * Convert HFS encoded string into UTF-8 231 * 232 * Unicode output is fully decomposed 233 * '/' chars are converted to ':' 234 */ 235int 236hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) 237{ 238 int error; 239 UniChar uniStr[MAX_HFS_UNICODE_CHARS]; 240 ItemCount uniCount; 241 size_t utf8len; 242 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode; 243 u_int8_t pascal_length = 0; 244 245 /* 246 * Validate the length of the Pascal-style string before passing it 247 * down to the decoding engine. 248 */ 249 pascal_length = *((const u_int8_t*)(hfs_str)); 250 if (pascal_length > 31) { 251 /* invalid string; longer than 31 bytes */ 252 error = EINVAL; 253 return error; 254 } 255 256 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); 257 258 if (uniCount == 0) 259 error = EINVAL; 260 261 if (error == 0) { 262 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); 263 if (error == ENAMETOOLONG) 264 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); 265 else 266 *actualDstLen = utf8len; 267 } 268 269 return error; 270} 271 272 273/* 274 * When an HFS name cannot be encoded with the current 275 * volume encoding then MacRoman is used as a fallback. 276 */ 277int 278mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) 279{ 280 int error; 281 UniChar uniStr[MAX_HFS_UNICODE_CHARS]; 282 ItemCount uniCount; 283 size_t utf8len; 284 u_int8_t pascal_length = 0; 285 286 /* 287 * Validate the length of the Pascal-style string before passing it 288 * down to the decoding engine. 289 */ 290 pascal_length = *((const u_int8_t*)(hfs_str)); 291 if (pascal_length > 31) { 292 /* invalid string; longer than 31 bytes */ 293 error = EINVAL; 294 return error; 295 } 296 297 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); 298 299 if (uniCount == 0) 300 error = EINVAL; 301 302 if (error == 0) { 303 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); 304 if (error == ENAMETOOLONG) 305 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); 306 else 307 *actualDstLen = utf8len; 308 } 309 310 return error; 311} 312 313 314/* 315 * Convert Unicode string into HFS encoding 316 * 317 * ':' chars are converted to '/' 318 * Assumes input represents fully decomposed Unicode 319 */ 320int 321unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry) 322{ 323 int error; 324 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname; 325 326 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr); 327 if (error && retry) { 328 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr); 329 } 330 return error; 331} 332 333/* 334 * Convert UTF-8 string into HFS encoding 335 * 336 * ':' chars are converted to '/' 337 * Assumes input represents fully decomposed Unicode 338 */ 339int 340utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/) 341{ 342 int error; 343 UniChar uniStr[MAX_HFS_UNICODE_CHARS]; 344 size_t ucslen; 345 346 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); 347 if (error == 0) 348 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1); 349 350 return error; 351} 352 353int 354utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr) 355{ 356 int error; 357 UniChar uniStr[MAX_HFS_UNICODE_CHARS]; 358 size_t ucslen; 359 360 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); 361 if (error == 0) 362 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr); 363 364 return error; 365} 366 367/* 368 * HFS MacRoman to/from Unicode conversions are built into the kernel 369 * All others hfs encodings are loadable. 370 */ 371 372/* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */ 373static u_int8_t gLatin1Table[] = { 374 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 375 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8, 376 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0, 377 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?', 378 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7, 379 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?', 380 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?' 381}; 382 383/* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */ 384static u_int8_t gSpaceModsTable[] = { 385 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 386 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?', 387 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?' 388}; 389 390/* 0x2010 - 0x20AF = General Punctuation (17 total) */ 391static u_int8_t gPunctTable[] = { 392 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 393 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?', 394 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?', 395 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?', 396 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 397 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 398 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 399 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 400 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 401 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 402 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?' 403}; 404 405/* 0x22xx = Mathematical Operators (11 total) */ 406static u_int8_t gMathTable[] = { 407 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 408 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8, 409 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?', 410 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?', 411 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 412 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?', 413 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 414 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?' 415}; 416 417/* */ 418static u_int8_t gReverseCombTable[] = { 419 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 420 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98, 421 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 422 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3, 423 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 424 425 /* Combining Diacritical Marks (0x0300 - 0x030A) */ 426 /* 0 1 2 3 4 5 6 7 8 9 A */ 427 /* 'A' */ 428 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81, 429 430 /* 'a' */ 431 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C, 432 433 /* 'E' */ 434 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?', 435 436 /* 'e' */ 437 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?', 438 439 /* 'I' */ 440 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?', 441 442 /* 'i' */ 443 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?', 444 445 /* 'N' */ 446 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?', 447 448 /* 'n' */ 449 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?', 450 451 /* 'O' */ 452 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?', 453 454 /* 'o' */ 455 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?', 456 457 /* 'U' */ 458 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?', 459 460 /* 'u' */ 461 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?', 462 463 /* 'Y' */ 464 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?', 465 466 /* 'y' */ 467 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?', 468 469 /* else */ 470 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?' 471}; 472 473 474/* 475 * Convert Unicode string into HFS MacRoman encoding 476 * 477 * Assumes Unicode input is fully decomposed 478 */ 479static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str) 480{ 481 u_int8_t *p; 482 const UniChar *u; 483 UniChar c; 484 UniChar mask; 485 u_int16_t inputChars; 486 u_int16_t pascalChars; 487 OSErr result = noErr; 488 u_int8_t lsb; 489 u_int8_t prevChar; 490 u_int8_t mc; 491 492 mask = (UniChar) 0xFF80; 493 p = &hfs_str[1]; 494 u = uni_str; 495 inputChars = unicodeChars; 496 pascalChars = prevChar = 0; 497 498 while (inputChars) { 499 c = *(u++); 500 lsb = (u_int8_t) c; 501 502 /* 503 * If its not 7-bit ascii, then we need to map it 504 */ 505 if ( c & mask ) { 506 mc = '?'; 507 switch (c & 0xFF00) { 508 case 0x0000: 509 if (lsb >= 0xA0) 510 mc = gLatin1Table[lsb - 0xA0]; 511 break; 512 513 case 0x0200: 514 if (lsb >= 0xC0 && lsb <= 0xDF) 515 mc = gSpaceModsTable[lsb - 0xC0]; 516 break; 517 518 case 0x2000: 519 if (lsb >= 0x10 && lsb <= 0xAF) 520 mc = gPunctTable[lsb- 0x10]; 521 break; 522 523 case 0x2200: 524 if (lsb <= 0x68) 525 mc = gMathTable[lsb]; 526 break; 527 528 case 0x0300: 529 if (c <= 0x030A) { 530 if (prevChar >= 'A' && prevChar < 'z') { 531 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb]; 532 --p; /* backup over base char */ 533 --pascalChars; 534 } 535 } else { 536 switch (c) { 537 case 0x0327: /* combining cedilla */ 538 if (prevChar == 'C') 539 mc = 0x82; 540 else if (prevChar == 'c') 541 mc = 0x8D; 542 else 543 break; 544 --p; /* backup over base char */ 545 --pascalChars; 546 break; 547 548 case 0x03A9: mc = 0xBD; break; /* omega */ 549 550 case 0x03C0: mc = 0xB9; break; /* pi */ 551 } 552 } 553 break; 554 555 default: 556 switch (c) { 557 case 0x0131: mc = 0xf5; break; /* dotless i */ 558 559 case 0x0152: mc = 0xce; break; /* OE */ 560 561 case 0x0153: mc = 0xcf; break; /* oe */ 562 563 case 0x0192: mc = 0xc4; break; /* � */ 564 565 case 0x2122: mc = 0xaa; break; /* TM */ 566 567 case 0x25ca: mc = 0xd7; break; /* diamond */ 568 569 case 0xf8ff: mc = 0xf0; break; /* apple logo */ 570 571 case 0xfb01: mc = 0xde; break; /* fi */ 572 573 case 0xfb02: mc = 0xdf; break; /* fl */ 574 } 575 } /* end switch (c & 0xFF00) */ 576 577 /* 578 * If we have an unmapped character then we need to mangle the name... 579 */ 580 if (mc == '?') 581 result = kTECUsedFallbacksStatus; 582 583 prevChar = 0; 584 lsb = mc; 585 586 } else { 587 prevChar = lsb; 588 } 589 590 if (pascalChars >= 31) 591 break; 592 593 *(p++) = lsb; 594 ++pascalChars; 595 --inputChars; 596 597 } /* end while */ 598 599 hfs_str[0] = pascalChars; 600 601 if (inputChars > 0) 602 result = ENAMETOOLONG; /* ran out of room! */ 603 604 return result; 605} 606 607 608static UniChar gHiBitBaseUnicode[128] = { 609 /* 0x80 */ 0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061, 610 /* 0x88 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065, 611 /* 0x90 */ 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f, 612 /* 0x98 */ 0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 613 /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 614 /* 0xa8 */ 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, 615 /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 616 /* 0xb8 */ 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8, 617 /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 618 /* 0xc8 */ 0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153, 619 /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 620 /* 0xd8 */ 0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02, 621 /* 0xe0 */ 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041, 622 /* 0xe8 */ 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f, 623 /* 0xf0 */ 0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc, 624 /* 0xf8 */ 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7 625}; 626 627static UniChar gHiBitCombUnicode[128] = { 628 /* 0x80 */ 0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301, 629 /* 0x88 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300, 630 /* 0x90 */ 0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 631 /* 0x98 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308, 632 /* 0xa0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 633 /* 0xa8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 634 /* 0xb0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 635 /* 0xb8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 636 /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 637 /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000, 638 /* 0xd0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 639 /* 0xd8 */ 0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 640 /* 0xe0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301, 641 /* 0xe8 */ 0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302, 642 /* 0xf0 */ 0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000, 643 /* 0xf8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 644}; 645 646 647/* 648 * Convert HFS MacRoman encoded string into Unicode 649 * 650 * Unicode output is fully decomposed 651 */ 652int 653mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, 654 __unused u_int32_t maxCharLen, u_int32_t *unicodeChars) 655{ 656 const u_int8_t *p; 657 UniChar *u; 658 u_int16_t pascalChars; 659 u_int8_t c; 660 661 p = hfs_str; 662 u = uni_str; 663 664 *unicodeChars = pascalChars = *(p++); /* pick up length byte */ 665 666 while (pascalChars--) { 667 c = *(p++); 668 669 if ( (int8_t) c >= 0 ) { /* check if seven bit ascii */ 670 *(u++) = (UniChar) c; /* just pad high byte with zero */ 671 } else { /* its a hi bit character */ 672 UniChar uc; 673 674 c &= 0x7F; 675 *(u++) = uc = gHiBitBaseUnicode[c]; 676 677 /* 678 * if the unicode character we get back is an alpha char 679 * then we must have an additional combining character 680 */ 681 if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) { 682 *(u++) = gHiBitCombUnicode[c]; 683 ++(*unicodeChars); 684 } 685 } 686 } 687 688 return noErr; 689} 690 691#else /* not HFS - temp workaround until 4277828 is fixed */ 692/* stubs for exported routines that aren't present when we build kernel without HFS */ 693 694#include <sys/types.h> 695#include <sys/errno.h> 696 697int hfs_addconverter(int id, u_int32_t encoding, void * get_unicode, void * get_hfsname); 698int hfs_getconverter(u_int32_t encoding, void *get_unicode, void *get_hfsname); 699int hfs_relconverter(u_int32_t encoding); 700int hfs_remconverter(int id, u_int32_t encoding); 701 702int hfs_addconverter( __unused int id, 703 __unused u_int32_t encoding, 704 __unused void * get_unicode, 705 __unused void * get_hfsname ) 706{ 707 return(0); 708} 709 710int hfs_getconverter(__unused u_int32_t encoding, __unused void *get_unicode, __unused void *get_hfsname) 711{ 712 return(EINVAL); 713} 714 715int hfs_relconverter(__unused u_int32_t encoding) 716{ 717 return(EINVAL); 718} 719 720int hfs_remconverter(__unused int id, __unused u_int32_t encoding) 721{ 722 return(0); 723} 724 725#endif /* HFS */ 726