1/* 2 Unix SMB/CIFS implementation. 3 Samba charset module for Mac OS X/Darwin 4 Copyright (C) Benjamin Riefenstahl 2003 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19*/ 20 21/* 22 * modules/charset_macosxfs.c 23 * 24 * A Samba charset module to use on Mac OS X/Darwin as the filesystem 25 * and display encoding. 26 * 27 * Actually two implementations are provided here. The default 28 * implementation is based on the official CFString API. The other is 29 * based on internal CFString APIs as defined in the OpenDarwin 30 * source. 31 */ 32 33#include "includes.h" 34 35/* 36 * Include OS frameworks. These are only needed in this module. 37 */ 38#include <CoreFoundation/CFString.h> 39 40/* 41 * See if autoconf has found us the internal headers in some form. 42 */ 43#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H 44# include <Corefoundation/CFStringEncodingConverter.h> 45# include <Corefoundation/CFUnicodePrecomposition.h> 46# define USE_INTERNAL_API 1 47#elif HAVE_CFSTRINGENCODINGCONVERTER_H 48# include <CFStringEncodingConverter.h> 49# include <CFUnicodePrecomposition.h> 50# define USE_INTERNAL_API 1 51#endif 52 53/* 54 * Compile time configuration: Do we want debug output? 55 */ 56/* #define DEBUG_STRINGS 1 */ 57 58/* 59 * A simple, but efficient memory provider for our buffers. 60 */ 61static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize) 62{ 63 if (newsize > *size) { 64 *size = newsize + 128; 65 buffer = realloc(buffer, *size); 66 } 67 return buffer; 68} 69 70/* 71 * While there is a version of OpenDarwin for intel, the usual case is 72 * big-endian PPC. So we need byte swapping to handle the 73 * little-endian byte order of the network protocol. We also need an 74 * additional dynamic buffer to do this work for incoming data blocks, 75 * because we have to consider the original data as constant. 76 * 77 * We abstract the differences away by providing a simple facade with 78 * these functions/macros: 79 * 80 * le_to_native(dst,src,len) 81 * native_to_le(cp,len) 82 * set_ucbuffer_with_le(buffer,bufsize,data,size) 83 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve) 84 */ 85#ifdef WORDS_BIGENDIAN 86 87static inline void swap_bytes (char * dst, const char * src, size_t len) 88{ 89 const char *srcend = src + len; 90 while (src < srcend) { 91 dst[0] = src[1]; 92 dst[1] = src[0]; 93 dst += 2; 94 src += 2; 95 } 96} 97static inline void swap_bytes_inplace (char * cp, size_t len) 98{ 99 char temp; 100 char *end = cp + len; 101 while (cp < end) { 102 temp = cp[1]; 103 cp[1] = cp[0]; 104 cp[0] = temp; 105 cp += 2; 106 } 107} 108 109#define le_to_native(dst,src,len) swap_bytes(dst,src,len) 110#define native_to_le(cp,len) swap_bytes_inplace(cp,len) 111#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ 112 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0) 113 114#else /* ! WORDS_BIGENDIAN */ 115 116#define le_to_native(dst,src,len) memcpy(dst,src,len) 117#define native_to_le(cp,len) /* nothing */ 118#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ 119 (((void)(bufsize)),(UniChar*)(data)) 120 121#endif 122 123static inline UniChar *set_ucbuffer_with_le_copy ( 124 UniChar *buffer, size_t *bufsize, 125 const void *data, size_t size, size_t reserve) 126{ 127 buffer = resize_buffer(buffer, bufsize, size+reserve); 128 le_to_native((char*)buffer,data,size); 129 return buffer; 130} 131 132 133/* 134 * A simple hexdump function for debugging error conditions. 135 */ 136#define debug_out(s) DEBUG(0,(s)) 137 138#ifdef DEBUG_STRINGS 139 140static void hexdump( const char * label, const char * s, size_t len ) 141{ 142 size_t restlen = len; 143 debug_out("<<<<<<<\n"); 144 debug_out(label); 145 debug_out("\n"); 146 while (restlen > 0) { 147 char line[100]; 148 size_t i, j; 149 char * d = line; 150#undef sprintf 151 d += sprintf(d, "%04X ", (unsigned)(len-restlen)); 152 *d++ = ' '; 153 for( i = 0; i<restlen && i<8; ++i ) { 154 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); 155 } 156 for( j = i; j<8; ++j ) { 157 d += sprintf(d, " "); 158 } 159 *d++ = ' '; 160 for( i = 8; i<restlen && i<16; ++i ) { 161 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); 162 } 163 for( j = i; j<16; ++j ) { 164 d += sprintf(d, " "); 165 } 166 *d++ = ' '; 167 for( i = 0; i<restlen && i<16; ++i ) { 168 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i])) 169 *d++ = '.'; 170 else 171 *d++ = s[i]; 172 } 173 *d++ = '\n'; 174 *d = 0; 175 restlen -= i; 176 s += i; 177 debug_out(line); 178 } 179 debug_out(">>>>>>>\n"); 180} 181 182#else /* !DEBUG_STRINGS */ 183 184#define hexdump(label,s,len) /* nothing */ 185 186#endif 187 188 189#if !USE_INTERNAL_API 190 191/* 192 * An implementation based on documented Mac OS X APIs. 193 * 194 * This does a certain amount of memory management, creating and 195 * manipulating CFString objects. We try to minimize the impact by 196 * keeping those objects around and re-using them. We also use 197 * external backing store for the CFStrings where this is possible and 198 * benficial. 199 * 200 * The Unicode normalizations forms available at this level are 201 * generic, not specifically for the file system. So they may not be 202 * perfect fits. 203 */ 204static size_t macosxfs_encoding_pull( 205 void *cd, /* Encoder handle */ 206 char **inbuf, size_t *inbytesleft, /* Script string */ 207 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ 208{ 209 static const int script_code = kCFStringEncodingUTF8; 210 static CFMutableStringRef cfstring = NULL; 211 size_t outsize; 212 CFRange range; 213 214 (void) cd; /* UNUSED */ 215 216 if (0 == *inbytesleft) { 217 return 0; 218 } 219 220 if (NULL == cfstring) { 221 /* 222 * A version with an external backing store as in the 223 * push function should have been more efficient, but 224 * testing shows, that it is actually slower (!). 225 * Maybe kCFAllocatorDefault gets shortcut evaluation 226 * internally, while kCFAllocatorNull doesn't. 227 */ 228 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0); 229 } 230 231 /* 232 * Three methods of appending to a CFString, choose the most 233 * efficient. 234 */ 235 if (0 == (*inbuf)[*inbytesleft-1]) { 236 CFStringAppendCString(cfstring, *inbuf, script_code); 237 } else if (*inbytesleft <= 255) { 238 Str255 buffer; 239 buffer[0] = *inbytesleft; 240 memcpy(buffer+1, *inbuf, buffer[0]); 241 CFStringAppendPascalString(cfstring, buffer, script_code); 242 } else { 243 /* 244 * We would like to use a fixed buffer and a loop 245 * here, but than we can't garantee that the input is 246 * well-formed UTF-8, as we are supposed to do. 247 */ 248 static char *buffer = NULL; 249 static size_t buflen = 0; 250 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1); 251 memcpy(buffer, *inbuf, *inbytesleft); 252 buffer[*inbytesleft] = 0; 253 CFStringAppendCString(cfstring, *inbuf, script_code); 254 } 255 256 /* 257 * Compose characters, using the non-canonical composition 258 * form. 259 */ 260 CFStringNormalize(cfstring, kCFStringNormalizationFormC); 261 262 outsize = CFStringGetLength(cfstring); 263 range = CFRangeMake(0,outsize); 264 265 if (outsize == 0) { 266 /* 267 * HACK: smbd/mangle_hash2.c:is_legal_name() expects 268 * errors here. That function will always pass 2 269 * characters. smbd/open.c:check_for_pipe() cuts a 270 * patchname to 10 characters blindly. Suppress the 271 * debug output in those cases. 272 */ 273 if(2 != *inbytesleft && 10 != *inbytesleft) { 274 debug_out("String conversion: " 275 "An unknown error occurred\n"); 276 hexdump("UTF8->UTF16LE (old) input", 277 *inbuf, *inbytesleft); 278 } 279 errno = EILSEQ; /* Not sure, but this is what we have 280 * actually seen. */ 281 return -1; 282 } 283 if (outsize*2 > *outbytesleft) { 284 CFStringDelete(cfstring, range); 285 debug_out("String conversion: " 286 "Output buffer too small\n"); 287 hexdump("UTF8->UTF16LE (old) input", 288 *inbuf, *inbytesleft); 289 errno = E2BIG; 290 return -1; 291 } 292 293 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf); 294 CFStringDelete(cfstring, range); 295 296 native_to_le(*outbuf, outsize*2); 297 298 /* 299 * Add a converted null byte, if the CFString conversions 300 * prevented that until now. 301 */ 302 if (0 == (*inbuf)[*inbytesleft-1] && 303 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) { 304 305 if ((outsize*2+2) > *outbytesleft) { 306 debug_out("String conversion: " 307 "Output buffer too small\n"); 308 hexdump("UTF8->UTF16LE (old) input", 309 *inbuf, *inbytesleft); 310 errno = E2BIG; 311 return -1; 312 } 313 314 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0; 315 outsize += 2; 316 } 317 318 *inbuf += *inbytesleft; 319 *inbytesleft = 0; 320 *outbuf += outsize*2; 321 *outbytesleft -= outsize*2; 322 323 return 0; 324} 325 326static size_t macosxfs_encoding_push( 327 void *cd, /* Encoder handle */ 328 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ 329 char **outbuf, size_t *outbytesleft) /* Script string */ 330{ 331 static const int script_code = kCFStringEncodingUTF8; 332 static CFMutableStringRef cfstring = NULL; 333 static UniChar *buffer = NULL; 334 static size_t buflen = 0; 335 CFIndex outsize, cfsize, charsconverted; 336 337 (void) cd; /* UNUSED */ 338 339 if (0 == *inbytesleft) { 340 return 0; 341 } 342 343 /* 344 * We need a buffer that can hold 4 times the original data, 345 * because that is the theoretical maximum that decomposition 346 * can create currently (in Unicode 4.0). 347 */ 348 buffer = set_ucbuffer_with_le_copy( 349 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft); 350 351 if (NULL == cfstring) { 352 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy( 353 kCFAllocatorDefault, 354 buffer, *inbytesleft/2, buflen/2, 355 kCFAllocatorNull); 356 } else { 357 CFStringSetExternalCharactersNoCopy( 358 cfstring, 359 buffer, *inbytesleft/2, buflen/2); 360 } 361 362 /* 363 * Decompose characters, using the non-canonical decomposition 364 * form. 365 * 366 * NB: This isn't exactly what HFS+ wants (see note on 367 * kCFStringEncodingUseHFSPlusCanonical in 368 * CFStringEncodingConverter.h), but AFAIK it's the best that 369 * the official API can do. 370 */ 371 CFStringNormalize(cfstring, kCFStringNormalizationFormD); 372 373 cfsize = CFStringGetLength(cfstring); 374 charsconverted = CFStringGetBytes( 375 cfstring, CFRangeMake(0,cfsize), 376 script_code, 0, False, 377 *outbuf, *outbytesleft, &outsize); 378 379 if (0 == charsconverted) { 380 debug_out("String conversion: " 381 "Buffer too small or not convertable\n"); 382 hexdump("UTF16LE->UTF8 (old) input", 383 *inbuf, *inbytesleft); 384 errno = EILSEQ; /* Probably more likely. */ 385 return -1; 386 } 387 388 /* 389 * Add a converted null byte, if the CFString conversions 390 * prevented that until now. 391 */ 392 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] && 393 (0 != (*outbuf)[outsize-1])) { 394 395 if (((size_t)outsize+1) > *outbytesleft) { 396 debug_out("String conversion: " 397 "Output buffer too small\n"); 398 hexdump("UTF16LE->UTF8 (old) input", 399 *inbuf, *inbytesleft); 400 errno = E2BIG; 401 return -1; 402 } 403 404 (*outbuf)[outsize] = 0; 405 ++outsize; 406 } 407 408 *inbuf += *inbytesleft; 409 *inbytesleft = 0; 410 *outbuf += outsize; 411 *outbytesleft -= outsize; 412 413 return 0; 414} 415 416#else /* USE_INTERNAL_API */ 417 418/* 419 * An implementation based on internal code as known from the 420 * OpenDarwin CVS. 421 * 422 * This code doesn't need much memory management because it uses 423 * functions that operate on the raw memory directly. 424 * 425 * The push routine here is faster and more compatible with HFS+ than 426 * the other implementation above. The pull routine is only faster 427 * for some strings, slightly slower for others. The pull routine 428 * looses because it has to iterate over the data twice, once to 429 * decode UTF-8 and than to do the character composition required by 430 * Windows. 431 */ 432static size_t macosxfs_encoding_pull( 433 void *cd, /* Encoder handle */ 434 char **inbuf, size_t *inbytesleft, /* Script string */ 435 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ 436{ 437 static const int script_code = kCFStringEncodingUTF8; 438 UInt32 srcCharsUsed = 0; 439 UInt32 dstCharsUsed = 0; 440 UInt32 result; 441 uint32_t dstDecomposedUsed = 0; 442 uint32_t dstPrecomposedUsed = 0; 443 444 (void) cd; /* UNUSED */ 445 446 if (0 == *inbytesleft) { 447 return 0; 448 } 449 450 result = CFStringEncodingBytesToUnicode( 451 script_code, kCFStringEncodingComposeCombinings, 452 *inbuf, *inbytesleft, &srcCharsUsed, 453 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed); 454 455 switch(result) { 456 case kCFStringEncodingConversionSuccess: 457 if (*inbytesleft == srcCharsUsed) 458 break; 459 else 460 ; /*fall through*/ 461 case kCFStringEncodingInsufficientOutputBufferLength: 462 debug_out("String conversion: " 463 "Output buffer too small\n"); 464 hexdump("UTF8->UTF16LE (new) input", 465 *inbuf, *inbytesleft); 466 errno = E2BIG; 467 return -1; 468 case kCFStringEncodingInvalidInputStream: 469 /* 470 * HACK: smbd/mangle_hash2.c:is_legal_name() expects 471 * errors here. That function will always pass 2 472 * characters. smbd/open.c:check_for_pipe() cuts a 473 * patchname to 10 characters blindly. Suppress the 474 * debug output in those cases. 475 */ 476 if(2 != *inbytesleft && 10 != *inbytesleft) { 477 debug_out("String conversion: " 478 "Invalid input sequence\n"); 479 hexdump("UTF8->UTF16LE (new) input", 480 *inbuf, *inbytesleft); 481 } 482 errno = EILSEQ; 483 return -1; 484 case kCFStringEncodingConverterUnavailable: 485 debug_out("String conversion: " 486 "Unknown encoding\n"); 487 hexdump("UTF8->UTF16LE (new) input", 488 *inbuf, *inbytesleft); 489 errno = EINVAL; 490 return -1; 491 } 492 493 /* 494 * It doesn't look like CFStringEncodingBytesToUnicode() can 495 * produce precomposed characters (flags=ComposeCombinings 496 * doesn't do it), so we need another pass over the data here. 497 * We can do this in-place, as the string can only get 498 * shorter. 499 * 500 * (Actually in theory there should be an internal 501 * decomposition and reordering before the actual composition 502 * step. But we should be able to rely on that we always get 503 * fully decomposed strings for input, so this can't create 504 * problems in reality.) 505 */ 506 CFUniCharPrecompose( 507 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed, 508 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed); 509 510 native_to_le(*outbuf, dstPrecomposedUsed*2); 511 512 *inbuf += srcCharsUsed; 513 *inbytesleft -= srcCharsUsed; 514 *outbuf += dstPrecomposedUsed*2; 515 *outbytesleft -= dstPrecomposedUsed*2; 516 517 return 0; 518} 519 520static size_t macosxfs_encoding_push( 521 void *cd, /* Encoder handle */ 522 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ 523 char **outbuf, size_t *outbytesleft) /* Script string */ 524{ 525 static const int script_code = kCFStringEncodingUTF8; 526 static UniChar *buffer = NULL; 527 static size_t buflen = 0; 528 UInt32 srcCharsUsed=0, dstCharsUsed=0, result; 529 530 (void) cd; /* UNUSED */ 531 532 if (0 == *inbytesleft) { 533 return 0; 534 } 535 536 buffer = set_ucbuffer_with_le( 537 buffer, &buflen, *inbuf, *inbytesleft); 538 539 result = CFStringEncodingUnicodeToBytes( 540 script_code, kCFStringEncodingUseHFSPlusCanonical, 541 buffer, *inbytesleft/2, &srcCharsUsed, 542 *outbuf, *outbytesleft, &dstCharsUsed); 543 544 switch(result) { 545 case kCFStringEncodingConversionSuccess: 546 if (*inbytesleft/2 == srcCharsUsed) 547 break; 548 else 549 ; /*fall through*/ 550 case kCFStringEncodingInsufficientOutputBufferLength: 551 debug_out("String conversion: " 552 "Output buffer too small\n"); 553 hexdump("UTF16LE->UTF8 (new) input", 554 *inbuf, *inbytesleft); 555 errno = E2BIG; 556 return -1; 557 case kCFStringEncodingInvalidInputStream: 558 /* 559 * HACK: smbd/open.c:check_for_pipe():is_legal_name() 560 * cuts a pathname to 10 characters blindly. Suppress 561 * the debug output in those cases. 562 */ 563 if(10 != *inbytesleft) { 564 debug_out("String conversion: " 565 "Invalid input sequence\n"); 566 hexdump("UTF16LE->UTF8 (new) input", 567 *inbuf, *inbytesleft); 568 } 569 errno = EILSEQ; 570 return -1; 571 case kCFStringEncodingConverterUnavailable: 572 debug_out("String conversion: " 573 "Unknown encoding\n"); 574 hexdump("UTF16LE->UTF8 (new) input", 575 *inbuf, *inbytesleft); 576 errno = EINVAL; 577 return -1; 578 } 579 580 *inbuf += srcCharsUsed*2; 581 *inbytesleft -= srcCharsUsed*2; 582 *outbuf += dstCharsUsed; 583 *outbytesleft -= dstCharsUsed; 584 585 return 0; 586} 587 588#endif /* USE_INTERNAL_API */ 589 590/* 591 * For initialization, actually install the encoding as "macosxfs". 592 */ 593static struct charset_functions macosxfs_encoding_functions = { 594 "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push 595}; 596 597NTSTATUS init_module(void) 598{ 599 return smb_register_charset(&macosxfs_encoding_functions); 600} 601 602/* eof */ 603