1/*
2 * Copyright (c) 2000-2013 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29	File:		UnicodeWrappers.c
30
31	Contains:	Wrapper routines for Unicode conversion and comparison.
32
33*/
34
35#if HFS
36#include <sys/param.h>
37#include <sys/utfconv.h>
38
39#include "../../hfs_macos_defs.h"
40#include "UCStringCompareData.h"
41
42#include "../headers/FileMgrInternal.h"
43#include "../headers/HFSUnicodeWrappers.h"
44
45enum {
46	kMinFileExtensionChars = 1,	/* does not include dot */
47	kMaxFileExtensionChars = 5	/* does not include dot */
48};
49
50
51#define EXTENSIONCHAR(c)	(((c) >= 0x61 && (c) <= 0x7A) || \
52				 ((c) >= 0x41 && (c) <= 0x5A) || \
53				 ((c) >= 0x30 && (c) <= 0x39))
54
55
56#define IsHexDigit(c)		(((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
57				 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
58
59
60static void	GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
61
62
63static u_int32_t	HexStringToInteger( u_int32_t length, const u_int8_t *hexStr );
64
65
66/*
67 * Get filename extension (if any) as a C string
68 */
69static void
70GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
71{
72	u_int32_t	i;
73	UniChar	c;
74	u_int16_t	extChars;	/* number of extension chars (excluding dot) */
75	u_int16_t	maxExtChars;
76	Boolean	foundExtension;
77
78	extStr[0] = '\0';	/* assume there's no extension */
79
80	if ( length < 3 )
81		return;		/* "x.y" is smallest possible extension */
82
83	if ( length < (kMaxFileExtensionChars + 2) )
84		maxExtChars = length - 2;	/* save room for prefix + dot */
85	else
86		maxExtChars = kMaxFileExtensionChars;
87
88	i = length;
89	extChars = 0;
90	foundExtension = false;
91
92	while ( extChars <= maxExtChars ) {
93		c = unicodeStr[--i];
94
95		/* look for leading dot */
96		if ( c == (UniChar) '.' ) {
97			if ( extChars > 0 )	/* cannot end with a dot */
98				foundExtension = true;
99			break;
100		}
101
102		if ( EXTENSIONCHAR(c) )
103			++extChars;
104		else
105			break;
106	}
107
108	/* if we found one then copy it */
109	if ( foundExtension ) {
110		u_int8_t *extStrPtr = (u_int8_t *)extStr;
111		const UniChar *unicodeStrPtr = &unicodeStr[i];
112
113		for ( i = 0; i <= extChars; ++i )
114			*(extStrPtr++) = (u_int8_t) *(unicodeStrPtr++);
115		extStr[extChars + 1] = '\0';	/* terminate extension + dot */
116	}
117}
118
119
120
121/*
122 * Count filename extension characters (if any)
123 */
124__private_extern__ u_int32_t
125CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length )
126{
127	u_int32_t	i;
128	UniChar	c;
129	u_int32_t	extChars;	/* number of extension chars (excluding dot) */
130	u_int16_t	maxExtChars;
131	Boolean	foundExtension;
132
133	if ( length < 3 )
134		return 0;	/* "x.y" is smallest possible extension	*/
135
136	if ( length < (kMaxFileExtensionChars + 2) )
137		maxExtChars = length - 2;	/* save room for prefix + dot */
138	else
139		maxExtChars = kMaxFileExtensionChars;
140
141	extChars = 0;		/* assume there's no extension */
142	i = length - 1;		/* index to last ascii character */
143	foundExtension = false;
144
145	while ( extChars <= maxExtChars ) {
146		c = filename[i--];
147
148		/* look for leading dot */
149		if ( c == (u_int8_t) '.' )	{
150			if ( extChars > 0 )	/* cannot end with a dot */
151				return (extChars);
152
153			break;
154		}
155
156		if ( EXTENSIONCHAR(c) )
157			++extChars;
158		else
159			break;
160	}
161
162	return 0;
163}
164
165
166/*
167 * extract the file id from a mangled name
168 */
169HFSCatalogNodeID
170GetEmbeddedFileID(const unsigned char * filename, u_int32_t length, u_int32_t *prefixLength)
171{
172	short	extChars;
173	short	i;
174	u_int8_t	c;
175
176	*prefixLength = 0;
177
178	if ( filename == NULL )
179		return 0;
180
181	if ( length < 28 )
182		return 0;	/* too small to have been mangled */
183
184	/* big enough for a file ID (#10) and an extension (.x) ? */
185	if ( length > 5 )
186		extChars = CountFilenameExtensionChars(filename, length);
187	else
188		extChars = 0;
189
190	/* skip over dot plus extension characters */
191	if ( extChars > 0 )
192		length -= (extChars + 1);
193
194	/* scan for file id digits */
195	for ( i = length - 1; i >= 0; --i) {
196		c = filename[i];
197
198		/* look for file ID marker */
199		if ( c == '#' ) {
200			if ( (length - i) < 3 )
201				break;	/* too small to be a file ID */
202
203			*prefixLength = i;
204			return HexStringToInteger(length - i - 1, &filename[i+1]);
205		}
206
207		if ( !IsHexDigit(c) )
208			break;	/* file ID string must have hex digits */
209	}
210
211	return 0;
212}
213
214
215
216static u_int32_t
217HexStringToInteger(u_int32_t length, const u_int8_t *hexStr)
218{
219	u_int32_t		value;
220	u_int32_t		i;
221	u_int8_t		c;
222	const u_int8_t	*p;
223
224	value = 0;
225	p = hexStr;
226
227	for ( i = 0; i < length; ++i ) {
228		c = *p++;
229
230		if (c >= '0' && c <= '9') {
231			value = value << 4;
232			value += (u_int32_t) c - (u_int32_t) '0';
233		} else if (c >= 'A' && c <= 'F') {
234			value = value << 4;
235			value += 10 + ((unsigned int) c - (unsigned int) 'A');
236		} else {
237			return 0;	/* bad character */
238		}
239	}
240
241	return value;
242}
243
244
245/*
246 * Routine:	FastRelString
247 *
248 * Output:	returns -1 if str1 < str2
249 *		returns  1 if str1 > str2
250 *		return	 0 if equal
251 *
252 */
253int32_t	FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
254{
255	u_int16_t*		compareTable;
256	int32_t	 		bestGuess;
257	u_int8_t 	 	length, length2;
258	u_int8_t 	 	delta;
259
260	delta = 0;
261	length = *(str1++);
262	length2 = *(str2++);
263
264	if (length == length2)
265		bestGuess = 0;
266	else if (length < length2)
267	{
268		bestGuess = -1;
269		delta = length2 - length;
270	}
271	else
272	{
273		bestGuess = 1;
274		length = length2;
275	}
276
277	compareTable = (u_int16_t*) gCompareTable;
278
279	while (length--)
280	{
281		u_int8_t	aChar, bChar;
282
283		aChar = *(str1++);
284		bChar = *(str2++);
285
286		if (aChar != bChar)		//	If they don't match exacly, do case conversion
287		{
288			u_int16_t	aSortWord, bSortWord;
289
290			aSortWord = compareTable[aChar];
291			bSortWord = compareTable[bChar];
292
293			if (aSortWord > bSortWord)
294				return 1;
295
296			if (aSortWord < bSortWord)
297				return -1;
298		}
299
300		//	If characters match exactly, then go on to next character immediately without
301		//	doing any extra work.
302	}
303
304	//	if you got to here, then return bestGuess
305	return bestGuess;
306}
307
308
309
310//
311//	FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
312//
313//	    IF				RESULT
314//	--------------------------
315//	str1 < str2		=>	-1
316//	str1 = str2		=>	 0
317//	str1 > str2		=>	+1
318//
319//	The lower case table starts with 256 entries (one for each of the upper bytes
320//	of the original Unicode char).  If that entry is zero, then all characters with
321//	that upper byte are already case folded.  If the entry is non-zero, then it is
322//	the _index_ (not byte offset) of the start of the sub-table for the characters
323//	with that upper byte.  All ignorable characters are folded to the value zero.
324//
325//	In pseudocode:
326//
327//		Let c = source Unicode character
328//		Let table[] = lower case table
329//
330//		lower = table[highbyte(c)]
331//		if (lower == 0)
332//			lower = c
333//		else
334//			lower = table[lower+lowbyte(c)]
335//
336//		if (lower == 0)
337//			ignore this character
338//
339//	To handle ignorable characters, we now need a loop to find the next valid character.
340//	Also, we can't pre-compute the number of characters to compare; the string length might
341//	be larger than the number of non-ignorable characters.  Further, we must be able to handle
342//	ignorable characters at any point in the string, including as the first or last characters.
343//	We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
344//	Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
345//	the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
346//	an invalid Unicode character).
347//
348//	Pseudocode:
349//
350//		while (1) {
351//			c1 = GetNextValidChar(str1)			//	returns zero if at end of string
352//			c2 = GetNextValidChar(str2)
353//
354//			if (c1 != c2) break					//	found a difference
355//
356//			if (c1 == 0)						//	reached end of string on both strings at once?
357//				return 0;						//	yes, so strings are equal
358//		}
359//
360//		// When we get here, c1 != c2.  So, we just need to determine which one is less.
361//		if (c1 < c2)
362//			return -1;
363//		else
364//			return 1;
365//
366
367int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
368							register ConstUniCharArrayPtr str2, register ItemCount length2)
369{
370	register u_int16_t		c1,c2;
371	register u_int16_t		temp;
372	register u_int16_t*	lowerCaseTable;
373
374	lowerCaseTable = (u_int16_t*) gLowerCaseTable;
375
376	while (1) {
377		/* Set default values for c1, c2 in case there are no more valid chars */
378		c1 = 0;
379		c2 = 0;
380
381		/* Find next non-ignorable char from str1, or zero if no more */
382		while (length1 && c1 == 0) {
383			c1 = *(str1++);
384			--length1;
385			/* check for basic latin first */
386			if (c1 < 0x0100) {
387				c1 = gLatinCaseFold[c1];
388				break;
389			}
390			/* case fold if neccessary */
391			if ((temp = lowerCaseTable[c1>>8]) != 0)
392				c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
393		}
394
395
396		/* Find next non-ignorable char from str2, or zero if no more */
397		while (length2 && c2 == 0) {
398			c2 = *(str2++);
399			--length2;
400			/* check for basic latin first */
401			if (c2 < 0x0100) {
402				c2 = gLatinCaseFold[c2];
403				break;
404			}
405			/* case fold if neccessary */
406			if ((temp = lowerCaseTable[c2>>8]) != 0)
407				c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
408		}
409
410		if (c1 != c2)		//	found a difference, so stop looping
411			break;
412
413		if (c1 == 0)		//	did we reach the end of both strings at the same time?
414			return 0;		//	yes, so strings are equal
415	}
416
417	if (c1 < c2)
418		return -1;
419	else
420		return 1;
421}
422
423/*
424 * UnicodeBinaryCompare
425 * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
426 *
427 * Results are emitted like FastUnicodeCompare:
428 *
429 *
430 *	    IF				RESULT
431 *	--------------------------
432 *	str1 < str2		=>	-1
433 *	str1 = str2		=>	 0
434 *	str1 > str2		=>	+1
435 *
436 * The case matching source code is greatly simplified due to the lack of case-folding
437 * in this comparison routine. We compare, in order: the lengths, then do character-by-
438 * character comparisons.
439 *
440 */
441int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1,
442							register ConstUniCharArrayPtr str2, register ItemCount len2) {
443	uint16_t c1;
444	uint16_t c2;
445	int string_length;
446	int32_t result = 0;
447
448	/* Set default values for the two character pointers */
449	c1 = 0;
450	c2 = 0;
451
452	/* First generate the string length (for comparison purposes) */
453	if (len1 < len2) {
454		string_length = len1;
455		--result;
456	}
457	else if (len1 > len2) {
458		string_length = len2;
459		++result;
460	}
461	else {
462		string_length = len1;
463	}
464
465	/* now compare the two string pointers */
466	while (string_length--) {
467		c1 = *(str1++);
468		c2 = *(str2++);
469
470		if (c1 > c2) {
471			result = 1;
472			break;
473		}
474
475		if (c1 < c2) {
476			result = -1;
477			break;
478		}
479		/* If equal, iterate to the next two respective chars */
480	}
481
482	return result;
483}
484
485
486OSErr
487ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
488					 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
489{
490	ByteCount subMaxLen;
491	size_t utf8len;
492	char fileIDStr[15];
493	char extStr[15];
494
495	snprintf(fileIDStr, sizeof(fileIDStr), "#%X", cnid);
496	GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
497
498	/* remove extension chars from source */
499	srcLen -= strlen(extStr) * sizeof(UniChar);
500	subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
501
502	(void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
503
504	strlcat((char *)dstStr, fileIDStr, maxDstLen);
505	strlcat((char *)dstStr, extStr, maxDstLen);
506	*actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
507
508	return noErr;
509}
510
511#else /* not HFS - temp workaround until 4277828 is fixed */
512/* stubs for exported routines that aren't present when we build kernel without HFS */
513
514#include <sys/types.h>
515
516int32_t FastUnicodeCompare( void * str1, u_int32_t length1, void * str2, u_int32_t length2 );
517
518
519int32_t FastUnicodeCompare( __unused void * str1,
520							__unused u_int32_t length1,
521							__unused void * str2,
522							__unused u_int32_t length2 )
523{
524	return( 0 );
525}
526
527
528#endif /* HFS */
529
530