1/* 2 Unix SMB/CIFS implementation. 3 Samba charset module for Mac OS X/Darwin 4 Copyright (C) Benjamin Riefenstahl 2003 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18*/ 19 20/* 21 * modules/charset_macosxfs.c 22 * 23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem 24 * and display encoding. 25 * 26 * Actually two implementations are provided here. The default 27 * implementation is based on the official CFString API. The other is 28 * based on internal CFString APIs as defined in the OpenDarwin 29 * source. 30 */ 31 32#include "includes.h" 33 34/* 35 * Include OS frameworks. These are only needed in this module. 36 */ 37#include <CoreFoundation/CFString.h> 38 39/* 40 * See if autoconf has found us the internal headers in some form. 41 */ 42#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H 43# include <CoreFoundation/CFStringEncodingConverter.h> 44# include <CoreFoundation/CFUnicodePrecomposition.h> 45# define USE_INTERNAL_API 1 46#elif HAVE_CFSTRINGENCODINGCONVERTER_H 47# include <CFStringEncodingConverter.h> 48# include <CFUnicodePrecomposition.h> 49# define USE_INTERNAL_API 1 50#endif 51 52/* 53 * Compile time configuration: Do we want debug output? 54 */ 55/* #define DEBUG_STRINGS 1 */ 56 57/* 58 * A simple, but efficient memory provider for our buffers. 59 */ 60static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize) 61{ 62 if (newsize > *size) { 63 *size = newsize + 128; 64 buffer = SMB_REALLOC(buffer, *size); 65 } 66 return buffer; 67} 68 69/* 70 * While there is a version of OpenDarwin for intel, the usual case is 71 * big-endian PPC. So we need byte swapping to handle the 72 * little-endian byte order of the network protocol. We also need an 73 * additional dynamic buffer to do this work for incoming data blocks, 74 * because we have to consider the original data as constant. 75 * 76 * We abstract the differences away by providing a simple facade with 77 * these functions/macros: 78 * 79 * le_to_native(dst,src,len) 80 * native_to_le(cp,len) 81 * set_ucbuffer_with_le(buffer,bufsize,data,size) 82 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve) 83 */ 84#ifdef WORDS_BIGENDIAN 85 86static inline void swap_bytes (char * dst, const char * src, size_t len) 87{ 88 const char *srcend = src + len; 89 while (src < srcend) { 90 dst[0] = src[1]; 91 dst[1] = src[0]; 92 dst += 2; 93 src += 2; 94 } 95} 96static inline void swap_bytes_inplace (char * cp, size_t len) 97{ 98 char temp; 99 char *end = cp + len; 100 while (cp < end) { 101 temp = cp[1]; 102 cp[1] = cp[0]; 103 cp[0] = temp; 104 cp += 2; 105 } 106} 107 108#define le_to_native(dst,src,len) swap_bytes(dst,src,len) 109#define native_to_le(cp,len) swap_bytes_inplace(cp,len) 110#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ 111 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0) 112 113#else /* ! WORDS_BIGENDIAN */ 114 115#define le_to_native(dst,src,len) memcpy(dst,src,len) 116#define native_to_le(cp,len) /* nothing */ 117#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ 118 (((void)(bufsize)),(UniChar*)(data)) 119 120#endif 121 122static inline UniChar *set_ucbuffer_with_le_copy ( 123 UniChar *buffer, size_t *bufsize, 124 const void *data, size_t size, size_t reserve) 125{ 126 buffer = resize_buffer(buffer, bufsize, size+reserve); 127 le_to_native((char*)buffer,data,size); 128 return buffer; 129} 130 131 132/* 133 * A simple hexdump function for debugging error conditions. 134 */ 135#define debug_out(s) DEBUG(0,(s)) 136 137#ifdef DEBUG_STRINGS 138 139static void hexdump( const char * label, const char * s, size_t len ) 140{ 141 size_t restlen = len; 142 debug_out("<<<<<<<\n"); 143 debug_out(label); 144 debug_out("\n"); 145 while (restlen > 0) { 146 char line[100]; 147 size_t i, j; 148 char * d = line; 149#undef sprintf 150 d += sprintf(d, "%04X ", (unsigned)(len-restlen)); 151 *d++ = ' '; 152 for( i = 0; i<restlen && i<8; ++i ) { 153 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); 154 } 155 for( j = i; j<8; ++j ) { 156 d += sprintf(d, " "); 157 } 158 *d++ = ' '; 159 for( i = 8; i<restlen && i<16; ++i ) { 160 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); 161 } 162 for( j = i; j<16; ++j ) { 163 d += sprintf(d, " "); 164 } 165 *d++ = ' '; 166 for( i = 0; i<restlen && i<16; ++i ) { 167 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i])) 168 *d++ = '.'; 169 else 170 *d++ = s[i]; 171 } 172 *d++ = '\n'; 173 *d = 0; 174 restlen -= i; 175 s += i; 176 debug_out(line); 177 } 178 debug_out(">>>>>>>\n"); 179} 180 181#else /* !DEBUG_STRINGS */ 182 183#define hexdump(label,s,len) /* nothing */ 184 185#endif 186 187 188#if !USE_INTERNAL_API 189 190/* 191 * An implementation based on documented Mac OS X APIs. 192 * 193 * This does a certain amount of memory management, creating and 194 * manipulating CFString objects. We try to minimize the impact by 195 * keeping those objects around and re-using them. We also use 196 * external backing store for the CFStrings where this is possible and 197 * benficial. 198 * 199 * The Unicode normalizations forms available at this level are 200 * generic, not specifically for the file system. So they may not be 201 * perfect fits. 202 */ 203static size_t macosxfs_encoding_pull( 204 void *cd, /* Encoder handle */ 205 char **inbuf, size_t *inbytesleft, /* Script string */ 206 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ 207{ 208 static const int script_code = kCFStringEncodingUTF8; 209 static CFMutableStringRef cfstring = NULL; 210 size_t outsize; 211 CFRange range; 212 213 (void) cd; /* UNUSED */ 214 215 if (0 == *inbytesleft) { 216 return 0; 217 } 218 219 if (NULL == cfstring) { 220 /* 221 * A version with an external backing store as in the 222 * push function should have been more efficient, but 223 * testing shows, that it is actually slower (!). 224 * Maybe kCFAllocatorDefault gets shortcut evaluation 225 * internally, while kCFAllocatorNull doesn't. 226 */ 227 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0); 228 } 229 230 /* 231 * Three methods of appending to a CFString, choose the most 232 * efficient. 233 */ 234 if (0 == (*inbuf)[*inbytesleft-1]) { 235 CFStringAppendCString(cfstring, *inbuf, script_code); 236 } else if (*inbytesleft <= 255) { 237 Str255 buffer; 238 buffer[0] = *inbytesleft; 239 memcpy(buffer+1, *inbuf, buffer[0]); 240 CFStringAppendPascalString(cfstring, buffer, script_code); 241 } else { 242 /* 243 * We would like to use a fixed buffer and a loop 244 * here, but than we can't garantee that the input is 245 * well-formed UTF-8, as we are supposed to do. 246 */ 247 static char *buffer = NULL; 248 static size_t buflen = 0; 249 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1); 250 memcpy(buffer, *inbuf, *inbytesleft); 251 buffer[*inbytesleft] = 0; 252 CFStringAppendCString(cfstring, *inbuf, script_code); 253 } 254 255 /* 256 * Compose characters, using the non-canonical composition 257 * form. 258 */ 259 CFStringNormalize(cfstring, kCFStringNormalizationFormC); 260 261 outsize = CFStringGetLength(cfstring); 262 range = CFRangeMake(0,outsize); 263 264 if (outsize == 0) { 265 /* 266 * HACK: smbd/mangle_hash2.c:is_legal_name() expects 267 * errors here. That function will always pass 2 268 * characters. smbd/open.c:check_for_pipe() cuts a 269 * patchname to 10 characters blindly. Suppress the 270 * debug output in those cases. 271 */ 272 if(2 != *inbytesleft && 10 != *inbytesleft) { 273 debug_out("String conversion: " 274 "An unknown error occurred\n"); 275 hexdump("UTF8->UTF16LE (old) input", 276 *inbuf, *inbytesleft); 277 } 278 errno = EILSEQ; /* Not sure, but this is what we have 279 * actually seen. */ 280 return -1; 281 } 282 if (outsize*2 > *outbytesleft) { 283 CFStringDelete(cfstring, range); 284 debug_out("String conversion: " 285 "Output buffer too small\n"); 286 hexdump("UTF8->UTF16LE (old) input", 287 *inbuf, *inbytesleft); 288 errno = E2BIG; 289 return -1; 290 } 291 292 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf); 293 CFStringDelete(cfstring, range); 294 295 native_to_le(*outbuf, outsize*2); 296 297 /* 298 * Add a converted null byte, if the CFString conversions 299 * prevented that until now. 300 */ 301 if (0 == (*inbuf)[*inbytesleft-1] && 302 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) { 303 304 if ((outsize*2+2) > *outbytesleft) { 305 debug_out("String conversion: " 306 "Output buffer too small\n"); 307 hexdump("UTF8->UTF16LE (old) input", 308 *inbuf, *inbytesleft); 309 errno = E2BIG; 310 return -1; 311 } 312 313 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0; 314 outsize += 2; 315 } 316 317 *inbuf += *inbytesleft; 318 *inbytesleft = 0; 319 *outbuf += outsize*2; 320 *outbytesleft -= outsize*2; 321 322 return 0; 323} 324 325static size_t macosxfs_encoding_push( 326 void *cd, /* Encoder handle */ 327 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ 328 char **outbuf, size_t *outbytesleft) /* Script string */ 329{ 330 static const int script_code = kCFStringEncodingUTF8; 331 static CFMutableStringRef cfstring = NULL; 332 static UniChar *buffer = NULL; 333 static size_t buflen = 0; 334 CFIndex outsize, cfsize, charsconverted; 335 336 (void) cd; /* UNUSED */ 337 338 if (0 == *inbytesleft) { 339 return 0; 340 } 341 342 /* 343 * We need a buffer that can hold 4 times the original data, 344 * because that is the theoretical maximum that decomposition 345 * can create currently (in Unicode 4.0). 346 */ 347 buffer = set_ucbuffer_with_le_copy( 348 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft); 349 350 if (NULL == cfstring) { 351 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy( 352 kCFAllocatorDefault, 353 buffer, *inbytesleft/2, buflen/2, 354 kCFAllocatorNull); 355 } else { 356 CFStringSetExternalCharactersNoCopy( 357 cfstring, 358 buffer, *inbytesleft/2, buflen/2); 359 } 360 361 /* 362 * Decompose characters, using the non-canonical decomposition 363 * form. 364 * 365 * NB: This isn't exactly what HFS+ wants (see note on 366 * kCFStringEncodingUseHFSPlusCanonical in 367 * CFStringEncodingConverter.h), but AFAIK it's the best that 368 * the official API can do. 369 */ 370 CFStringNormalize(cfstring, kCFStringNormalizationFormD); 371 372 cfsize = CFStringGetLength(cfstring); 373 charsconverted = CFStringGetBytes( 374 cfstring, CFRangeMake(0,cfsize), 375 script_code, 0, False, 376 *outbuf, *outbytesleft, &outsize); 377 378 if (0 == charsconverted) { 379 debug_out("String conversion: " 380 "Buffer too small or not convertable\n"); 381 hexdump("UTF16LE->UTF8 (old) input", 382 *inbuf, *inbytesleft); 383 errno = EILSEQ; /* Probably more likely. */ 384 return -1; 385 } 386 387 /* 388 * Add a converted null byte, if the CFString conversions 389 * prevented that until now. 390 */ 391 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] && 392 (0 != (*outbuf)[outsize-1])) { 393 394 if (((size_t)outsize+1) > *outbytesleft) { 395 debug_out("String conversion: " 396 "Output buffer too small\n"); 397 hexdump("UTF16LE->UTF8 (old) input", 398 *inbuf, *inbytesleft); 399 errno = E2BIG; 400 return -1; 401 } 402 403 (*outbuf)[outsize] = 0; 404 ++outsize; 405 } 406 407 *inbuf += *inbytesleft; 408 *inbytesleft = 0; 409 *outbuf += outsize; 410 *outbytesleft -= outsize; 411 412 return 0; 413} 414 415#else /* USE_INTERNAL_API */ 416 417/* 418 * An implementation based on internal code as known from the 419 * OpenDarwin CVS. 420 * 421 * This code doesn't need much memory management because it uses 422 * functions that operate on the raw memory directly. 423 * 424 * The push routine here is faster and more compatible with HFS+ than 425 * the other implementation above. The pull routine is only faster 426 * for some strings, slightly slower for others. The pull routine 427 * looses because it has to iterate over the data twice, once to 428 * decode UTF-8 and than to do the character composition required by 429 * Windows. 430 */ 431static size_t macosxfs_encoding_pull( 432 void *cd, /* Encoder handle */ 433 char **inbuf, size_t *inbytesleft, /* Script string */ 434 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ 435{ 436 static const int script_code = kCFStringEncodingUTF8; 437 UInt32 srcCharsUsed = 0; 438 UInt32 dstCharsUsed = 0; 439 UInt32 result; 440 uint32_t dstDecomposedUsed = 0; 441 uint32_t dstPrecomposedUsed = 0; 442 443 (void) cd; /* UNUSED */ 444 445 if (0 == *inbytesleft) { 446 return 0; 447 } 448 449 result = CFStringEncodingBytesToUnicode( 450 script_code, kCFStringEncodingComposeCombinings, 451 *inbuf, *inbytesleft, &srcCharsUsed, 452 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed); 453 454 switch(result) { 455 case kCFStringEncodingConversionSuccess: 456 if (*inbytesleft == srcCharsUsed) 457 break; 458 else 459 ; /*fall through*/ 460 case kCFStringEncodingInsufficientOutputBufferLength: 461 debug_out("String conversion: " 462 "Output buffer too small\n"); 463 hexdump("UTF8->UTF16LE (new) input", 464 *inbuf, *inbytesleft); 465 errno = E2BIG; 466 return -1; 467 case kCFStringEncodingInvalidInputStream: 468 /* 469 * HACK: smbd/mangle_hash2.c:is_legal_name() expects 470 * errors here. That function will always pass 2 471 * characters. smbd/open.c:check_for_pipe() cuts a 472 * patchname to 10 characters blindly. Suppress the 473 * debug output in those cases. 474 */ 475 if(2 != *inbytesleft && 10 != *inbytesleft) { 476 debug_out("String conversion: " 477 "Invalid input sequence\n"); 478 hexdump("UTF8->UTF16LE (new) input", 479 *inbuf, *inbytesleft); 480 } 481 errno = EILSEQ; 482 return -1; 483 case kCFStringEncodingConverterUnavailable: 484 debug_out("String conversion: " 485 "Unknown encoding\n"); 486 hexdump("UTF8->UTF16LE (new) input", 487 *inbuf, *inbytesleft); 488 errno = EINVAL; 489 return -1; 490 } 491 492 /* 493 * It doesn't look like CFStringEncodingBytesToUnicode() can 494 * produce precomposed characters (flags=ComposeCombinings 495 * doesn't do it), so we need another pass over the data here. 496 * We can do this in-place, as the string can only get 497 * shorter. 498 * 499 * (Actually in theory there should be an internal 500 * decomposition and reordering before the actual composition 501 * step. But we should be able to rely on that we always get 502 * fully decomposed strings for input, so this can't create 503 * problems in reality.) 504 */ 505 CFUniCharPrecompose( 506 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed, 507 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed); 508 509 native_to_le(*outbuf, dstPrecomposedUsed*2); 510 511 *inbuf += srcCharsUsed; 512 *inbytesleft -= srcCharsUsed; 513 *outbuf += dstPrecomposedUsed*2; 514 *outbytesleft -= dstPrecomposedUsed*2; 515 516 return 0; 517} 518 519static size_t macosxfs_encoding_push( 520 void *cd, /* Encoder handle */ 521 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ 522 char **outbuf, size_t *outbytesleft) /* Script string */ 523{ 524 static const int script_code = kCFStringEncodingUTF8; 525 static UniChar *buffer = NULL; 526 static size_t buflen = 0; 527 UInt32 srcCharsUsed=0, dstCharsUsed=0, result; 528 529 (void) cd; /* UNUSED */ 530 531 if (0 == *inbytesleft) { 532 return 0; 533 } 534 535 buffer = set_ucbuffer_with_le( 536 buffer, &buflen, *inbuf, *inbytesleft); 537 538 result = CFStringEncodingUnicodeToBytes( 539 script_code, kCFStringEncodingUseHFSPlusCanonical, 540 buffer, *inbytesleft/2, &srcCharsUsed, 541 *outbuf, *outbytesleft, &dstCharsUsed); 542 543 switch(result) { 544 case kCFStringEncodingConversionSuccess: 545 if (*inbytesleft/2 == srcCharsUsed) 546 break; 547 else 548 ; /*fall through*/ 549 case kCFStringEncodingInsufficientOutputBufferLength: 550 debug_out("String conversion: " 551 "Output buffer too small\n"); 552 hexdump("UTF16LE->UTF8 (new) input", 553 *inbuf, *inbytesleft); 554 errno = E2BIG; 555 return -1; 556 case kCFStringEncodingInvalidInputStream: 557 /* 558 * HACK: smbd/open.c:check_for_pipe():is_legal_name() 559 * cuts a pathname to 10 characters blindly. Suppress 560 * the debug output in those cases. 561 */ 562 if(10 != *inbytesleft) { 563 debug_out("String conversion: " 564 "Invalid input sequence\n"); 565 hexdump("UTF16LE->UTF8 (new) input", 566 *inbuf, *inbytesleft); 567 } 568 errno = EILSEQ; 569 return -1; 570 case kCFStringEncodingConverterUnavailable: 571 debug_out("String conversion: " 572 "Unknown encoding\n"); 573 hexdump("UTF16LE->UTF8 (new) input", 574 *inbuf, *inbytesleft); 575 errno = EINVAL; 576 return -1; 577 } 578 579 *inbuf += srcCharsUsed*2; 580 *inbytesleft -= srcCharsUsed*2; 581 *outbuf += dstCharsUsed; 582 *outbytesleft -= dstCharsUsed; 583 584 return 0; 585} 586 587#endif /* USE_INTERNAL_API */ 588 589/* 590 * For initialization, actually install the encoding as "macosxfs". 591 */ 592static struct charset_functions macosxfs_encoding_functions = { 593 "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push 594}; 595 596NTSTATUS charset_macosxfs_init(void) 597{ 598 return smb_register_charset(&macosxfs_encoding_functions); 599} 600 601/* eof */ 602