1/*
2 * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#if HFS
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/malloc.h>
34#include <sys/queue.h>
35#include <sys/utfconv.h>
36#include <kern/host.h>
37#include <mach/host_priv.h>
38#include <libkern/OSKextLib.h>
39#include <libkern/OSKextLibPrivate.h>
40
41#include "hfs.h"
42
43
44lck_grp_t * encodinglst_lck_grp;
45lck_grp_attr_t * encodinglst_lck_grp_attr;
46lck_attr_t * encodinglst_lck_attr;
47
48
49/* hfs encoding converter list */
50SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
51
52lck_mtx_t  encodinglst_mutex;
53
54
55
56/* hfs encoding converter entry */
57struct	hfs_encoding {
58	SLIST_ENTRY(hfs_encoding)  link;
59	int			refcount;
60	int			kmod_id;
61	u_int32_t	encoding;
62	hfs_to_unicode_func_t	get_unicode_func;
63	unicode_to_hfs_func_t	get_hfsname_func;
64};
65
66#define MAX_HFS_UNICODE_CHARS	(15*5)
67
68static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str);
69
70void
71hfs_converterinit(void)
72{
73	SLIST_INIT(&hfs_encoding_list);
74
75	encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
76	encodinglst_lck_grp  = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
77	encodinglst_lck_attr = lck_attr_alloc_init();
78
79	lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
80
81	/*
82	 * add resident MacRoman converter and take a reference
83	 * since its always "loaded".
84	 */
85	hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
86	SLIST_FIRST(&hfs_encoding_list)->refcount++;
87}
88
89
90/*
91 * hfs_addconverter - add an HFS encoding converter
92 *
93 * This is called exclusivly by kernel loadable modules
94 * (like HFS_Japanese.kmod) to register hfs encoding
95 * conversion routines.
96 *
97 */
98int
99hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
100{
101	struct hfs_encoding *encp;
102
103	MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
104
105	lck_mtx_lock(&encodinglst_mutex);
106
107	encp->link.sle_next = NULL;
108	encp->refcount = 0;
109	encp->encoding = encoding;
110	encp->get_unicode_func = get_unicode;
111	encp->get_hfsname_func = get_hfsname;
112	encp->kmod_id = id;
113	SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
114
115	lck_mtx_unlock(&encodinglst_mutex);
116	return (0);
117}
118
119
120/*
121 * hfs_remconverter - remove an HFS encoding converter
122 *
123 * Can be called by a kernel loadable module's finalize
124 * routine to remove an encoding converter so that the
125 * module (i.e. the code) can be unloaded.
126 *
127 * However, in the normal case, the removing and unloading
128 * of these converters is done in hfs_relconverter.
129 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
130 */
131int
132hfs_remconverter(int id, u_int32_t encoding)
133{
134	struct hfs_encoding *encp;
135
136	lck_mtx_lock(&encodinglst_mutex);
137	SLIST_FOREACH(encp, &hfs_encoding_list, link) {
138		if (encp->encoding == encoding && encp->kmod_id == id) {
139			encp->refcount--;
140
141			/* if converter is no longer in use, release it */
142			if (encp->refcount <= 0 && encp->kmod_id != 0) {
143				SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
144				lck_mtx_unlock(&encodinglst_mutex);
145    				FREE(encp, M_TEMP);
146    				return (0);
147 			} else {
148 				lck_mtx_unlock(&encodinglst_mutex);
149				return (1);   /* busy */
150			}
151			break;
152		}
153	}
154	lck_mtx_unlock(&encodinglst_mutex);
155
156	return (0);
157}
158
159
160/*
161 * hfs_getconverter - get HFS encoding converters
162 *
163 * Normally called during the mounting of an hfs voulume.
164 */
165int
166hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
167{
168	struct hfs_encoding *encp;
169	int found = 0;
170
171	lck_mtx_lock(&encodinglst_mutex);
172	SLIST_FOREACH(encp, &hfs_encoding_list, link) {
173		if (encp->encoding == encoding) {
174			found = 1;
175			*get_unicode = encp->get_unicode_func;
176			*get_hfsname = encp->get_hfsname_func;
177			++encp->refcount;
178			break;
179		}
180	}
181	lck_mtx_unlock(&encodinglst_mutex);
182
183	if (!found) {
184		*get_unicode = NULL;
185		*get_hfsname = NULL;
186		return (EINVAL);
187	}
188
189	return (0);
190}
191
192
193/*
194 * hfs_relconverter - release interest in an HFS encoding converter
195 *
196 * Normally called during the unmounting of an hfs voulume.
197 */
198int
199hfs_relconverter(u_int32_t encoding)
200{
201	struct hfs_encoding *encp;
202
203	lck_mtx_lock(&encodinglst_mutex);
204	SLIST_FOREACH(encp, &hfs_encoding_list, link) {
205		if (encp->encoding == encoding) {
206			encp->refcount--;
207
208			/* if converter is no longer in use, release it */
209			if (encp->refcount <= 0 && encp->kmod_id != 0) {
210				uint32_t loadTag = (uint32_t)encp->kmod_id;
211
212				SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
213				lck_mtx_unlock(&encodinglst_mutex);
214
215 				FREE(encp, M_TEMP);
216   				(void)OSKextUnloadKextWithLoadTag(loadTag);
217				return (0);
218			}
219			lck_mtx_unlock(&encodinglst_mutex);
220			return (0);
221		}
222	}
223	lck_mtx_unlock(&encodinglst_mutex);
224
225	return (EINVAL);
226}
227
228
229/*
230 * Convert HFS encoded string into UTF-8
231 *
232 * Unicode output is fully decomposed
233 * '/' chars are converted to ':'
234 */
235int
236hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
237{
238	int error;
239	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
240	ItemCount uniCount;
241	size_t utf8len;
242	hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
243	u_int8_t pascal_length = 0;
244
245	/*
246	 * Validate the length of the Pascal-style string before passing it
247	 * down to the decoding engine.
248	 */
249	pascal_length = *((const u_int8_t*)(hfs_str));
250	if (pascal_length > 31) {
251		/* invalid string; longer than 31 bytes */
252		error = EINVAL;
253		return error;
254	}
255
256	error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
257
258	if (uniCount == 0)
259		error = EINVAL;
260
261	if (error == 0) {
262		error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
263		if (error == ENAMETOOLONG)
264			*actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
265		else
266			*actualDstLen = utf8len;
267	}
268
269	return error;
270}
271
272
273/*
274 * When an HFS name cannot be encoded with the current
275 * volume encoding then MacRoman is used as a fallback.
276 */
277int
278mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
279{
280	int error;
281	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
282	ItemCount uniCount;
283	size_t utf8len;
284	u_int8_t pascal_length = 0;
285
286	/*
287	 * Validate the length of the Pascal-style string before passing it
288	 * down to the decoding engine.
289	 */
290	pascal_length = *((const u_int8_t*)(hfs_str));
291	if (pascal_length > 31) {
292		/* invalid string; longer than 31 bytes */
293		error = EINVAL;
294		return error;
295	}
296
297	error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
298
299	if (uniCount == 0)
300		error = EINVAL;
301
302	if (error == 0) {
303		error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
304		if (error == ENAMETOOLONG)
305			*actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
306		else
307			*actualDstLen = utf8len;
308	}
309
310	return error;
311}
312
313
314/*
315 * Convert Unicode string into HFS encoding
316 *
317 * ':' chars are converted to '/'
318 * Assumes input represents fully decomposed Unicode
319 */
320int
321unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
322{
323	int error;
324	unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
325
326	error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
327	if (error && retry) {
328		error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
329	}
330	return error;
331}
332
333/*
334 * Convert UTF-8 string into HFS encoding
335 *
336 * ':' chars are converted to '/'
337 * Assumes input represents fully decomposed Unicode
338 */
339int
340utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
341{
342	int error;
343	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
344	size_t ucslen;
345
346	error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
347	if (error == 0)
348		error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
349
350	return error;
351}
352
353int
354utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
355{
356	int error;
357	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
358	size_t ucslen;
359
360	error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
361	if (error == 0)
362		error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
363
364	return error;
365}
366
367/*
368 * HFS MacRoman to/from Unicode conversions are built into the kernel
369 * All others hfs encodings are loadable.
370 */
371
372/* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
373static u_int8_t gLatin1Table[] = {
374  /*		  0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
375  /* 0x00A0 */	0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4,  '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2,  '?', 0xA8, 0xF8,
376  /* 0x00B0 */	0xA1, 0XB1,  '?',  '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC,  '?', 0xBC, 0xC8,  '?',  '?',  '?', 0xC0,
377  /* 0x00C0 */	 '?',  '?',  '?',  '?',  '?',  '?', 0xAE,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
378  /* 0x00D0 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xAF,  '?',  '?',  '?',  '?',  '?',  '?', 0xA7,
379  /* 0x00E0 */	 '?',  '?',  '?',  '?',  '?',  '?', 0xBE,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
380  /* 0x00F0 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD6, 0xBF,  '?',  '?',  '?',  '?',  '?',  '?',  '?'
381};
382
383/* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
384static u_int8_t gSpaceModsTable[] = {
385  /*		  0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
386  /* 0x02C0 */	 '?',  '?',  '?',  '?',  '?',  '?', 0xF6, 0xFF,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
387  /* 0x02D0 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD,  '?',  '?'
388};
389
390/* 0x2010 - 0x20AF = General Punctuation (17 total) */
391static u_int8_t gPunctTable[] = {
392  /*		  0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
393  /* 0x2010 */	 '?',  '?',  '?', 0xd0, 0xd1,  '?',  '?',  '?', 0xd4, 0xd5, 0xe2,  '?', 0xd2, 0xd3, 0xe3,  '?',
394  /* 0x2020 */	0xa0, 0xe0, 0xa5,  '?',  '?',  '?', 0xc9,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
395  /* 0x2030 */	0xe4,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xdc, 0xdd,  '?',  '?',  '?',  '?',  '?',
396  /* 0x2040 */	 '?',  '?',  '?',  '?', 0xda,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
397  /* 0x2050 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
398  /* 0x2060 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
399  /* 0x2070 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
400  /* 0x2080 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
401  /* 0x2090 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
402  /* 0x20A0 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xdb,  '?',  '?',  '?'
403};
404
405/* 0x22xx = Mathematical Operators (11 total) */
406static u_int8_t gMathTable[] = {
407  /*		  0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
408  /* 0x2200 */	 '?',  '?', 0xb6,  '?',  '?',  '?', 0xc6,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xb8,
409  /* 0x2210 */	 '?', 0xb7,  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xc3,  '?',  '?',  '?', 0xb0,  '?',
410  /* 0x2220 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xba,  '?',  '?',  '?',  '?',
411  /* 0x2230 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
412  /* 0x2240 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xc5,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
413  /* 0x2250 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',
414  /* 0x2260 */	0xad,  '?',  '?',  '?', 0xb2, 0xb3,  '?',  '?'
415};
416
417/* */
418static u_int8_t gReverseCombTable[] = {
419  /*		  0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F  */
420  /* 0x40 */	0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
421  /* 0x50 */	0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
422  /* 0x60 */	0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
423  /* 0x70 */	0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
424
425  /* Combining Diacritical Marks (0x0300 - 0x030A) */
426  /*              0     1     2     3     4     5     6     7     8     9     A  */
427  /*  'A'   */
428  /* 0x0300 */	0xCB, 0xE7, 0xE5, 0xCC,  '?',  '?',  '?',  '?', 0x80,  '?', 0x81,
429
430  /*  'a'   */
431  /* 0x0300 */	0x88, 0x87, 0x89, 0x8B,  '?',  '?',  '?',  '?', 0x8A,  '?', 0x8C,
432
433  /*  'E'   */
434  /* 0x0300 */	0xE9, 0x83, 0xE6,  '?',  '?',  '?',  '?',  '?', 0xE8,  '?',  '?',
435
436  /*  'e'   */
437  /* 0x0300 */	0x8F, 0x8E, 0x90,  '?',  '?',  '?',  '?',  '?', 0x91,  '?',  '?',
438
439  /*  'I'   */
440  /* 0x0300 */	0xED, 0xEA, 0xEB,  '?',  '?',  '?',  '?',  '?', 0xEC,  '?',  '?',
441
442  /*  'i'   */
443  /* 0x0300 */	0x93, 0x92, 0x94,  '?',  '?',  '?',  '?',  '?', 0x95,  '?',  '?',
444
445  /*  'N'   */
446  /* 0x0300 */	 '?',  '?',  '?', 0x84,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
447
448  /*  'n'   */
449  /* 0x0300 */	 '?',  '?',  '?', 0x96,  '?',  '?',  '?',  '?',  '?',  '?',  '?',
450
451  /*  'O'   */
452  /* 0x0300 */	0xF1, 0xEE, 0xEF, 0xCD,  '?',  '?',  '?',  '?', 0x85,  '?',  '?',
453
454  /*  'o'   */
455  /* 0x0300 */	0x98, 0x97, 0x99, 0x9B,  '?',  '?',  '?',  '?', 0x9A,  '?',  '?',
456
457  /*  'U'   */
458  /* 0x0300 */	0xF4, 0xF2, 0xF3,  '?',  '?',  '?',  '?',  '?', 0x86,  '?',  '?',
459
460  /*  'u'   */
461  /* 0x0300 */	0x9D, 0x9C, 0x9E,  '?',  '?',  '?',  '?',  '?', 0x9F,  '?',  '?',
462
463  /*  'Y'   */
464  /* 0x0300 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD9,  '?',  '?',
465
466  /*  'y'   */
467  /* 0x0300 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?', 0xD8,  '?',  '?',
468
469  /*  else  */
470  /* 0x0300 */	 '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?',  '?'
471};
472
473
474/*
475 * Convert Unicode string into HFS MacRoman encoding
476 *
477 * Assumes Unicode input is fully decomposed
478 */
479static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str)
480{
481	u_int8_t	*p;
482	const UniChar	*u;
483	UniChar		c;
484	UniChar		mask;
485	u_int16_t	inputChars;
486	u_int16_t	pascalChars;
487	OSErr		result = noErr;
488	u_int8_t	lsb;
489	u_int8_t	prevChar;
490	u_int8_t	mc;
491
492	mask = (UniChar) 0xFF80;
493	p = &hfs_str[1];
494	u = uni_str;
495	inputChars = unicodeChars;
496	pascalChars = prevChar = 0;
497
498	while (inputChars) {
499		c = *(u++);
500		lsb = (u_int8_t) c;
501
502		/*
503		 * If its not 7-bit ascii, then we need to map it
504		 */
505		if ( c & mask ) {
506			mc = '?';
507			switch (c & 0xFF00) {
508			case 0x0000:
509				if (lsb >= 0xA0)
510					mc = gLatin1Table[lsb - 0xA0];
511				break;
512
513			case 0x0200:
514				if (lsb >= 0xC0 && lsb <= 0xDF)
515					mc = gSpaceModsTable[lsb - 0xC0];
516				break;
517
518			case 0x2000:
519				if (lsb >= 0x10 && lsb <= 0xAF)
520					mc = gPunctTable[lsb- 0x10];
521				break;
522
523			case 0x2200:
524				if (lsb <= 0x68)
525					mc = gMathTable[lsb];
526				break;
527
528			case 0x0300:
529				if (c <= 0x030A) {
530					if (prevChar >= 'A' && prevChar < 'z') {
531						mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
532						--p;	/* backup over base char */
533						--pascalChars;
534					}
535				} else {
536					switch (c) {
537					case 0x0327:	/* combining cedilla */
538						if (prevChar == 'C')
539							mc = 0x82;
540						else if (prevChar == 'c')
541							mc = 0x8D;
542						else
543							break;
544						--p;	/* backup over base char */
545						--pascalChars;
546						break;
547
548					case 0x03A9: mc = 0xBD; break;	/* omega */
549
550					case 0x03C0: mc = 0xB9; break;	/* pi */
551					}
552				}
553				break;
554
555			default:
556				switch (c) {
557				case 0x0131: mc = 0xf5; break;	/* dotless i */
558
559				case 0x0152: mc = 0xce; break;	/* OE */
560
561				case 0x0153: mc = 0xcf; break;	/* oe */
562
563				case 0x0192: mc = 0xc4; break;	/* � */
564
565				case 0x2122: mc = 0xaa; break;	/* TM */
566
567				case 0x25ca: mc = 0xd7; break;	/* diamond */
568
569				case 0xf8ff: mc = 0xf0; break;	/* apple logo */
570
571				case 0xfb01: mc = 0xde; break;	/* fi */
572
573				case 0xfb02: mc = 0xdf; break;	/* fl */
574				}
575			} /* end switch (c & 0xFF00) */
576
577			/*
578			 * If we have an unmapped character then we need to mangle the name...
579			 */
580			if (mc == '?')
581				result = kTECUsedFallbacksStatus;
582
583			prevChar = 0;
584			lsb = mc;
585
586		} else {
587			prevChar = lsb;
588		}
589
590		if (pascalChars >= 31)
591			break;
592
593		*(p++) = lsb;
594		++pascalChars;
595		--inputChars;
596
597	} /* end while */
598
599	hfs_str[0] = pascalChars;
600
601	if (inputChars > 0)
602		result = ENAMETOOLONG;	/* ran out of room! */
603
604	return result;
605}
606
607
608static UniChar gHiBitBaseUnicode[128] = {
609  /* 0x80 */	0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061,
610  /* 0x88 */	0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065,
611  /* 0x90 */	0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f,
612  /* 0x98 */	0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075,
613  /* 0xa0 */	0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
614  /* 0xa8 */	0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
615  /* 0xb0 */	0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
616  /* 0xb8 */	0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
617  /* 0xc0 */	0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
618  /* 0xc8 */	0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153,
619  /* 0xd0 */	0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
620  /* 0xd8 */	0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
621  /* 0xe0 */	0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041,
622  /* 0xe8 */	0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f,
623  /* 0xf0 */	0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc,
624  /* 0xf8 */	0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7
625};
626
627static UniChar gHiBitCombUnicode[128] = {
628  /* 0x80 */	0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301,
629  /* 0x88 */	0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300,
630  /* 0x90 */	0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301,
631  /* 0x98 */	0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308,
632  /* 0xa0 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
633  /* 0xa8 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
634  /* 0xb0 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
635  /* 0xb8 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
636  /* 0xc0 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
637  /* 0xc8 */	0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000,
638  /* 0xd0 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
639  /* 0xd8 */	0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
640  /* 0xe0 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301,
641  /* 0xe8 */	0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302,
642  /* 0xf0 */	0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000,
643  /* 0xf8 */	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
644};
645
646
647/*
648 * Convert HFS MacRoman encoded string into Unicode
649 *
650 * Unicode output is fully decomposed
651 */
652int
653mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str,
654				__unused u_int32_t maxCharLen, u_int32_t *unicodeChars)
655{
656	const u_int8_t  *p;
657	UniChar  *u;
658	u_int16_t  pascalChars;
659	u_int8_t  c;
660
661	p = hfs_str;
662	u = uni_str;
663
664	*unicodeChars = pascalChars = *(p++);	/* pick up length byte */
665
666	while (pascalChars--) {
667		c = *(p++);
668
669		if ( (int8_t) c >= 0 ) {		/* check if seven bit ascii */
670			*(u++) = (UniChar) c;	/* just pad high byte with zero */
671		} else { /* its a hi bit character */
672			UniChar uc;
673
674			c &= 0x7F;
675			*(u++) = uc = gHiBitBaseUnicode[c];
676
677			/*
678			 * if the unicode character we get back is an alpha char
679			 * then we must have an additional combining character
680			 */
681			if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) {
682				*(u++) = gHiBitCombUnicode[c];
683				++(*unicodeChars);
684			}
685		}
686	}
687
688	return noErr;
689}
690
691#else /* not HFS - temp workaround until 4277828 is fixed */
692/* stubs for exported routines that aren't present when we build kernel without HFS */
693
694#include <sys/types.h>
695#include <sys/errno.h>
696
697int hfs_addconverter(int id, u_int32_t encoding, void * get_unicode, void * get_hfsname);
698int hfs_getconverter(u_int32_t encoding, void *get_unicode, void *get_hfsname);
699int hfs_relconverter(u_int32_t encoding);
700int hfs_remconverter(int id, u_int32_t encoding);
701
702int hfs_addconverter( __unused int id,
703					  __unused u_int32_t encoding,
704					  __unused void * get_unicode,
705					  __unused void * get_hfsname )
706{
707	return(0);
708}
709
710int hfs_getconverter(__unused u_int32_t encoding, __unused void *get_unicode, __unused void *get_hfsname)
711{
712	return(EINVAL);
713}
714
715int hfs_relconverter(__unused u_int32_t encoding)
716{
717	return(EINVAL);
718}
719
720int hfs_remconverter(__unused int id, __unused u_int32_t encoding)
721{
722	return(0);
723}
724
725#endif /* HFS */
726