• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt/router/samba-3.5.8/source3/modules/
1/*
2   Unix SMB/CIFS implementation.
3   Samba charset module for Mac OS X/Darwin
4   Copyright (C) Benjamin Riefenstahl 2003
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.
18*/
19
20/*
21 * modules/charset_macosxfs.c
22 *
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
25 *
26 * Actually two implementations are provided here.  The default
27 * implementation is based on the official CFString API.  The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
29 * source.
30 */
31
32#include "includes.h"
33
34/*
35 * Include OS frameworks.  These are only needed in this module.
36 */
37#include <CoreFoundation/CFString.h>
38
39/*
40 * See if autoconf has found us the internal headers in some form.
41 */
42#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
43#	include <CoreFoundation/CFStringEncodingConverter.h>
44#	include <CoreFoundation/CFUnicodePrecomposition.h>
45#	define USE_INTERNAL_API 1
46#elif HAVE_CFSTRINGENCODINGCONVERTER_H
47#	include <CFStringEncodingConverter.h>
48#	include <CFUnicodePrecomposition.h>
49#	define USE_INTERNAL_API 1
50#endif
51
52/*
53 * Compile time configuration: Do we want debug output?
54 */
55/* #define DEBUG_STRINGS 1 */
56
57/*
58 * A simple, but efficient memory provider for our buffers.
59 */
60static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
61{
62	if (newsize > *size) {
63		*size = newsize + 128;
64		buffer = SMB_REALLOC(buffer, *size);
65	}
66	return buffer;
67}
68
69/*
70 * While there is a version of OpenDarwin for intel, the usual case is
71 * big-endian PPC.  So we need byte swapping to handle the
72 * little-endian byte order of the network protocol.  We also need an
73 * additional dynamic buffer to do this work for incoming data blocks,
74 * because we have to consider the original data as constant.
75 *
76 * We abstract the differences away by providing a simple facade with
77 * these functions/macros:
78 *
79 *	le_to_native(dst,src,len)
80 *	native_to_le(cp,len)
81 *	set_ucbuffer_with_le(buffer,bufsize,data,size)
82 *	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
83 */
84#ifdef WORDS_BIGENDIAN
85
86static inline void swap_bytes (char * dst, const char * src, size_t len)
87{
88	const char *srcend = src + len;
89	while (src < srcend) {
90		dst[0] = src[1];
91		dst[1] = src[0];
92		dst += 2;
93		src += 2;
94	}
95}
96static inline void swap_bytes_inplace (char * cp, size_t len)
97{
98	char temp;
99	char *end = cp + len;
100	while (cp  < end) {
101		temp = cp[1];
102		cp[1] = cp[0];
103		cp[0] = temp;
104		cp += 2;
105	}
106}
107
108#define le_to_native(dst,src,len)	swap_bytes(dst,src,len)
109#define native_to_le(cp,len)		swap_bytes_inplace(cp,len)
110#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
111	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
112
113#else	/* ! WORDS_BIGENDIAN */
114
115#define le_to_native(dst,src,len)	memcpy(dst,src,len)
116#define native_to_le(cp,len)		/* nothing */
117#define	set_ucbuffer_with_le(buffer,bufsize,data,size) \
118	(((void)(bufsize)),(UniChar*)(data))
119
120#endif
121
122static inline UniChar *set_ucbuffer_with_le_copy (
123	UniChar *buffer, size_t *bufsize,
124	const void *data, size_t size, size_t reserve)
125{
126	buffer = resize_buffer(buffer, bufsize, size+reserve);
127	le_to_native((char*)buffer,data,size);
128	return buffer;
129}
130
131
132/*
133 * A simple hexdump function for debugging error conditions.
134 */
135#define	debug_out(s)	DEBUG(0,(s))
136
137#ifdef DEBUG_STRINGS
138
139static void hexdump( const char * label, const char * s, size_t len )
140{
141	size_t restlen = len;
142	debug_out("<<<<<<<\n");
143	debug_out(label);
144	debug_out("\n");
145	while (restlen > 0) {
146		char line[100];
147		size_t i, j;
148		char * d = line;
149#undef sprintf
150		d += sprintf(d, "%04X ", (unsigned)(len-restlen));
151		*d++ = ' ';
152		for( i = 0; i<restlen && i<8; ++i ) {
153			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
154		}
155		for( j = i; j<8; ++j ) {
156			d += sprintf(d, "   ");
157		}
158		*d++ = ' ';
159		for( i = 8; i<restlen && i<16; ++i ) {
160			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
161		}
162		for( j = i; j<16; ++j ) {
163			d += sprintf(d, "   ");
164		}
165		*d++ = ' ';
166		for( i = 0; i<restlen && i<16; ++i ) {
167			if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
168				*d++ = '.';
169			else
170				*d++ = s[i];
171		}
172		*d++ = '\n';
173		*d = 0;
174		restlen -= i;
175		s += i;
176		debug_out(line);
177	}
178	debug_out(">>>>>>>\n");
179}
180
181#else	/* !DEBUG_STRINGS */
182
183#define hexdump(label,s,len) /* nothing */
184
185#endif
186
187
188#if !USE_INTERNAL_API
189
190/*
191 * An implementation based on documented Mac OS X APIs.
192 *
193 * This does a certain amount of memory management, creating and
194 * manipulating CFString objects.  We try to minimize the impact by
195 * keeping those objects around and re-using them.  We also use
196 * external backing store for the CFStrings where this is possible and
197 * benficial.
198 *
199 * The Unicode normalizations forms available at this level are
200 * generic, not specifically for the file system.  So they may not be
201 * perfect fits.
202 */
203static size_t macosxfs_encoding_pull(
204	void *cd,				/* Encoder handle */
205	char **inbuf, size_t *inbytesleft,	/* Script string */
206	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
207{
208	static const int script_code = kCFStringEncodingUTF8;
209	static CFMutableStringRef cfstring = NULL;
210	size_t outsize;
211	CFRange range;
212
213	(void) cd; /* UNUSED */
214
215	if (0 == *inbytesleft) {
216		return 0;
217	}
218
219	if (NULL == cfstring) {
220		/*
221		 * A version with an external backing store as in the
222		 * push function should have been more efficient, but
223		 * testing shows, that it is actually slower (!).
224		 * Maybe kCFAllocatorDefault gets shortcut evaluation
225		 * internally, while kCFAllocatorNull doesn't.
226		 */
227		cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
228	}
229
230	/*
231	 * Three methods of appending to a CFString, choose the most
232	 * efficient.
233	 */
234	if (0 == (*inbuf)[*inbytesleft-1]) {
235		CFStringAppendCString(cfstring, *inbuf, script_code);
236	} else if (*inbytesleft <= 255) {
237		Str255 buffer;
238		buffer[0] = *inbytesleft;
239		memcpy(buffer+1, *inbuf, buffer[0]);
240		CFStringAppendPascalString(cfstring, buffer, script_code);
241	} else {
242		/*
243		 * We would like to use a fixed buffer and a loop
244		 * here, but than we can't garantee that the input is
245		 * well-formed UTF-8, as we are supposed to do.
246		 */
247		static char *buffer = NULL;
248		static size_t buflen = 0;
249		buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
250		memcpy(buffer, *inbuf, *inbytesleft);
251		buffer[*inbytesleft] = 0;
252		CFStringAppendCString(cfstring, *inbuf, script_code);
253	}
254
255	/*
256	 * Compose characters, using the non-canonical composition
257	 * form.
258	 */
259	CFStringNormalize(cfstring, kCFStringNormalizationFormC);
260
261	outsize = CFStringGetLength(cfstring);
262	range = CFRangeMake(0,outsize);
263
264	if (outsize == 0) {
265		/*
266		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
267		 * errors here.  That function will always pass 2
268		 * characters.  smbd/open.c:check_for_pipe() cuts a
269		 * patchname to 10 characters blindly.  Suppress the
270		 * debug output in those cases.
271		 */
272		if(2 != *inbytesleft && 10 != *inbytesleft) {
273			debug_out("String conversion: "
274				  "An unknown error occurred\n");
275			hexdump("UTF8->UTF16LE (old) input",
276				*inbuf, *inbytesleft);
277		}
278		errno = EILSEQ; /* Not sure, but this is what we have
279				 * actually seen. */
280		return -1;
281	}
282	if (outsize*2 > *outbytesleft) {
283		CFStringDelete(cfstring, range);
284		debug_out("String conversion: "
285			  "Output buffer too small\n");
286		hexdump("UTF8->UTF16LE (old) input",
287			*inbuf, *inbytesleft);
288		errno = E2BIG;
289		return -1;
290	}
291
292        CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
293	CFStringDelete(cfstring, range);
294
295	native_to_le(*outbuf, outsize*2);
296
297	/*
298	 * Add a converted null byte, if the CFString conversions
299	 * prevented that until now.
300	 */
301	if (0 == (*inbuf)[*inbytesleft-1] &&
302	    (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
303
304		if ((outsize*2+2) > *outbytesleft) {
305			debug_out("String conversion: "
306				  "Output buffer too small\n");
307			hexdump("UTF8->UTF16LE (old) input",
308				*inbuf, *inbytesleft);
309			errno = E2BIG;
310			return -1;
311		}
312
313		(*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
314		outsize += 2;
315	}
316
317	*inbuf += *inbytesleft;
318	*inbytesleft = 0;
319	*outbuf += outsize*2;
320	*outbytesleft -= outsize*2;
321
322	return 0;
323}
324
325static size_t macosxfs_encoding_push(
326	void *cd,				/* Encoder handle */
327	char **inbuf, size_t *inbytesleft,	/* UTF-16-LE string */
328	char **outbuf, size_t *outbytesleft)	/* Script string */
329{
330	static const int script_code = kCFStringEncodingUTF8;
331	static CFMutableStringRef cfstring = NULL;
332	static UniChar *buffer = NULL;
333	static size_t buflen = 0;
334	CFIndex outsize, cfsize, charsconverted;
335
336	(void) cd; /* UNUSED */
337
338	if (0 == *inbytesleft) {
339		return 0;
340	}
341
342	/*
343	 * We need a buffer that can hold 4 times the original data,
344	 * because that is the theoretical maximum that decomposition
345	 * can create currently (in Unicode 4.0).
346	 */
347	buffer = set_ucbuffer_with_le_copy(
348		buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
349
350	if (NULL == cfstring) {
351		cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
352			kCFAllocatorDefault,
353			buffer, *inbytesleft/2, buflen/2,
354			kCFAllocatorNull);
355	} else {
356		CFStringSetExternalCharactersNoCopy(
357			cfstring,
358			buffer, *inbytesleft/2, buflen/2);
359	}
360
361	/*
362	 * Decompose characters, using the non-canonical decomposition
363	 * form.
364	 *
365	 * NB: This isn't exactly what HFS+ wants (see note on
366	 * kCFStringEncodingUseHFSPlusCanonical in
367	 * CFStringEncodingConverter.h), but AFAIK it's the best that
368	 * the official API can do.
369	 */
370	CFStringNormalize(cfstring, kCFStringNormalizationFormD);
371
372	cfsize = CFStringGetLength(cfstring);
373	charsconverted = CFStringGetBytes(
374		cfstring, CFRangeMake(0,cfsize),
375		script_code, 0, False,
376		*outbuf, *outbytesleft, &outsize);
377
378	if (0 == charsconverted) {
379		debug_out("String conversion: "
380			  "Buffer too small or not convertable\n");
381		hexdump("UTF16LE->UTF8 (old) input",
382			*inbuf, *inbytesleft);
383		errno = EILSEQ; /* Probably more likely. */
384		return -1;
385	}
386
387	/*
388	 * Add a converted null byte, if the CFString conversions
389	 * prevented that until now.
390	 */
391	if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
392	    (0 != (*outbuf)[outsize-1])) {
393
394		if (((size_t)outsize+1) > *outbytesleft) {
395			debug_out("String conversion: "
396				  "Output buffer too small\n");
397			hexdump("UTF16LE->UTF8 (old) input",
398				*inbuf, *inbytesleft);
399			errno = E2BIG;
400			return -1;
401		}
402
403		(*outbuf)[outsize] = 0;
404		++outsize;
405	}
406
407	*inbuf += *inbytesleft;
408	*inbytesleft = 0;
409	*outbuf += outsize;
410	*outbytesleft -= outsize;
411
412	return 0;
413}
414
415#else /* USE_INTERNAL_API */
416
417/*
418 * An implementation based on internal code as known from the
419 * OpenDarwin CVS.
420 *
421 * This code doesn't need much memory management because it uses
422 * functions that operate on the raw memory directly.
423 *
424 * The push routine here is faster and more compatible with HFS+ than
425 * the other implementation above.  The pull routine is only faster
426 * for some strings, slightly slower for others.  The pull routine
427 * looses because it has to iterate over the data twice, once to
428 * decode UTF-8 and than to do the character composition required by
429 * Windows.
430 */
431static size_t macosxfs_encoding_pull(
432	void *cd,				/* Encoder handle */
433	char **inbuf, size_t *inbytesleft,	/* Script string */
434	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
435{
436	static const int script_code = kCFStringEncodingUTF8;
437	UInt32 srcCharsUsed = 0;
438	UInt32 dstCharsUsed = 0;
439	UInt32 result;
440	uint32_t dstDecomposedUsed = 0;
441	uint32_t dstPrecomposedUsed = 0;
442
443	(void) cd; /* UNUSED */
444
445	if (0 == *inbytesleft) {
446		return 0;
447	}
448
449        result = CFStringEncodingBytesToUnicode(
450		script_code, kCFStringEncodingComposeCombinings,
451		*inbuf, *inbytesleft, &srcCharsUsed,
452		(UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
453
454	switch(result) {
455	case kCFStringEncodingConversionSuccess:
456		if (*inbytesleft == srcCharsUsed)
457			break;
458		else
459			; /*fall through*/
460	case kCFStringEncodingInsufficientOutputBufferLength:
461		debug_out("String conversion: "
462			  "Output buffer too small\n");
463		hexdump("UTF8->UTF16LE (new) input",
464			*inbuf, *inbytesleft);
465		errno = E2BIG;
466		return -1;
467	case kCFStringEncodingInvalidInputStream:
468		/*
469		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
470		 * errors here.  That function will always pass 2
471		 * characters.  smbd/open.c:check_for_pipe() cuts a
472		 * patchname to 10 characters blindly.  Suppress the
473		 * debug output in those cases.
474		 */
475		if(2 != *inbytesleft && 10 != *inbytesleft) {
476			debug_out("String conversion: "
477				  "Invalid input sequence\n");
478			hexdump("UTF8->UTF16LE (new) input",
479				*inbuf, *inbytesleft);
480		}
481		errno = EILSEQ;
482		return -1;
483	case kCFStringEncodingConverterUnavailable:
484		debug_out("String conversion: "
485			  "Unknown encoding\n");
486		hexdump("UTF8->UTF16LE (new) input",
487			*inbuf, *inbytesleft);
488		errno = EINVAL;
489		return -1;
490	}
491
492	/*
493	 * It doesn't look like CFStringEncodingBytesToUnicode() can
494	 * produce precomposed characters (flags=ComposeCombinings
495	 * doesn't do it), so we need another pass over the data here.
496	 * We can do this in-place, as the string can only get
497	 * shorter.
498	 *
499	 * (Actually in theory there should be an internal
500	 * decomposition and reordering before the actual composition
501	 * step.  But we should be able to rely on that we always get
502	 * fully decomposed strings for input, so this can't create
503	 * problems in reality.)
504	 */
505	CFUniCharPrecompose(
506		(const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
507		(UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
508
509	native_to_le(*outbuf, dstPrecomposedUsed*2);
510
511	*inbuf += srcCharsUsed;
512	*inbytesleft -= srcCharsUsed;
513	*outbuf += dstPrecomposedUsed*2;
514	*outbytesleft -= dstPrecomposedUsed*2;
515
516	return 0;
517}
518
519static size_t macosxfs_encoding_push(
520	void *cd,				/* Encoder handle */
521	char **inbuf, size_t *inbytesleft,	/* UTF-16-LE string */
522	char **outbuf, size_t *outbytesleft)	/* Script string */
523{
524	static const int script_code = kCFStringEncodingUTF8;
525	static UniChar *buffer = NULL;
526	static size_t buflen = 0;
527	UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
528
529	(void) cd; /* UNUSED */
530
531	if (0 == *inbytesleft) {
532		return 0;
533	}
534
535	buffer = set_ucbuffer_with_le(
536		buffer, &buflen, *inbuf, *inbytesleft);
537
538	result = CFStringEncodingUnicodeToBytes(
539		script_code, kCFStringEncodingUseHFSPlusCanonical,
540		buffer, *inbytesleft/2, &srcCharsUsed,
541		*outbuf, *outbytesleft, &dstCharsUsed);
542
543	switch(result) {
544	case kCFStringEncodingConversionSuccess:
545		if (*inbytesleft/2 == srcCharsUsed)
546			break;
547		else
548			; /*fall through*/
549	case kCFStringEncodingInsufficientOutputBufferLength:
550		debug_out("String conversion: "
551			  "Output buffer too small\n");
552		hexdump("UTF16LE->UTF8 (new) input",
553			*inbuf, *inbytesleft);
554		errno = E2BIG;
555		return -1;
556	case kCFStringEncodingInvalidInputStream:
557		/*
558		 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
559		 * cuts a pathname to 10 characters blindly.  Suppress
560		 * the debug output in those cases.
561		 */
562		if(10 != *inbytesleft) {
563			debug_out("String conversion: "
564				  "Invalid input sequence\n");
565			hexdump("UTF16LE->UTF8 (new) input",
566				*inbuf, *inbytesleft);
567		}
568		errno = EILSEQ;
569		return -1;
570	case kCFStringEncodingConverterUnavailable:
571		debug_out("String conversion: "
572			  "Unknown encoding\n");
573		hexdump("UTF16LE->UTF8 (new) input",
574			*inbuf, *inbytesleft);
575		errno = EINVAL;
576		return -1;
577	}
578
579	*inbuf += srcCharsUsed*2;
580	*inbytesleft -= srcCharsUsed*2;
581	*outbuf += dstCharsUsed;
582	*outbytesleft -= dstCharsUsed;
583
584	return 0;
585}
586
587#endif /* USE_INTERNAL_API */
588
589/*
590 * For initialization, actually install the encoding as "macosxfs".
591 */
592static struct charset_functions macosxfs_encoding_functions = {
593	"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
594};
595
596NTSTATUS charset_macosxfs_init(void)
597{
598	return smb_register_charset(&macosxfs_encoding_functions);
599}
600
601/* eof */
602