1/* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9/* 10 * os_mac_conv.c: Code specifically for Mac string conversions. 11 * 12 * This code has been put in a separate file to avoid the conflicts that are 13 * caused by including both the X11 and Carbon header files. 14 */ 15 16#define NO_X11_INCLUDES 17#include "vim.h" 18#ifndef FEAT_GUI_MAC 19# include <CoreServices/CoreServices.h> 20#endif 21 22 23#if defined(MACOS_CONVERT) || defined(PROTO) 24 25# ifdef PROTO 26/* A few dummy types to be able to generate function prototypes. */ 27typedef int UniChar; 28typedef int *TECObjectRef; 29typedef int CFStringRef; 30# endif 31 32static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); 33static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); 34 35/* Converter for composing decomposed HFS+ file paths */ 36static TECObjectRef gPathConverter; 37/* Converter used by mac_utf16_to_utf8 */ 38static TECObjectRef gUTF16ToUTF8Converter; 39 40/* 41 * A Mac version of string_convert_ext() for special cases. 42 */ 43 char_u * 44mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) 45 char_u *ptr; 46 int len; 47 int *lenp; 48 int fail_on_error; 49 int from_enc; 50 int to_enc; 51 int *unconvlenp; 52{ 53 char_u *retval, *d; 54 CFStringRef cfstr; 55 int buflen, in, out, l, i; 56 CFStringEncoding from; 57 CFStringEncoding to; 58 59 switch (from_enc) 60 { 61 case 'l': from = kCFStringEncodingISOLatin1; break; 62 case 'm': from = kCFStringEncodingMacRoman; break; 63 case 'u': from = kCFStringEncodingUTF8; break; 64 default: return NULL; 65 } 66 switch (to_enc) 67 { 68 case 'l': to = kCFStringEncodingISOLatin1; break; 69 case 'm': to = kCFStringEncodingMacRoman; break; 70 case 'u': to = kCFStringEncodingUTF8; break; 71 default: return NULL; 72 } 73 74 if (unconvlenp != NULL) 75 *unconvlenp = 0; 76 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 77 78 if(cfstr == NULL) 79 fprintf(stderr, "Encoding failed\n"); 80 /* When conversion failed, try excluding bytes from the end, helps when 81 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid 82 * looping a long time when there really is something unconvertible. */ 83 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) 84 { 85 --len; 86 ++*unconvlenp; 87 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 88 } 89 if (cfstr == NULL) 90 return NULL; 91 92 if (to == kCFStringEncodingUTF8) 93 buflen = len * 6 + 1; 94 else 95 buflen = len + 1; 96 retval = alloc(buflen); 97 if (retval == NULL) 98 { 99 CFRelease(cfstr); 100 return NULL; 101 } 102 103#if 0 104 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); 105 /* Determine output buffer size */ 106 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); 107 retval = (buflen > 0) ? alloc(buflen) : NULL; 108 if (retval == NULL) { 109 CFRelease(cfstr); 110 return NULL; 111 } 112 113 if (lenp) 114 *lenp = buflen / sizeof(char_u); 115 116 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) 117#endif 118 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) 119 { 120 CFRelease(cfstr); 121 if (fail_on_error) 122 { 123 vim_free(retval); 124 return NULL; 125 } 126 127 fprintf(stderr, "Trying char-by-char conversion...\n"); 128 /* conversion failed for the whole string, but maybe it will work 129 * for each character */ 130 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) 131 { 132 if (from == kCFStringEncodingUTF8) 133 l = utf_ptr2len(ptr + in); 134 else 135 l = 1; 136 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); 137 if (cfstr == NULL) 138 { 139 *d++ = '?'; 140 out++; 141 } 142 else 143 { 144 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) 145 { 146 *d++ = '?'; 147 out++; 148 } 149 else 150 { 151 i = STRLEN(d); 152 d += i; 153 out += i; 154 } 155 CFRelease(cfstr); 156 } 157 in += l; 158 } 159 *d = NUL; 160 if (lenp != NULL) 161 *lenp = out; 162 return retval; 163 } 164 CFRelease(cfstr); 165 if (lenp != NULL) 166 *lenp = STRLEN(retval); 167 168 return retval; 169} 170 171/* 172 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using 173 * standard Carbon framework. 174 * Input: "ptr[*sizep]". 175 * "real_size" is the size of the buffer that "ptr" points to. 176 * output is in-place, "sizep" is adjusted. 177 * Returns OK or FAIL. 178 */ 179 int 180macroman2enc(ptr, sizep, real_size) 181 char_u *ptr; 182 long *sizep; 183 long real_size; 184{ 185 CFStringRef cfstr; 186 CFRange r; 187 CFIndex len = *sizep; 188 189 /* MacRoman is an 8-bit encoding, no need to move bytes to 190 * conv_rest[]. */ 191 cfstr = CFStringCreateWithBytes(NULL, ptr, len, 192 kCFStringEncodingMacRoman, 0); 193 /* 194 * If there is a conversion error, try using another 195 * conversion. 196 */ 197 if (cfstr == NULL) 198 return FAIL; 199 200 r.location = 0; 201 r.length = CFStringGetLength(cfstr); 202 if (r.length != CFStringGetBytes(cfstr, r, 203 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 204 0, /* no lossy conversion */ 205 0, /* not external representation */ 206 ptr + *sizep, real_size - *sizep, &len)) 207 { 208 CFRelease(cfstr); 209 return FAIL; 210 } 211 CFRelease(cfstr); 212 mch_memmove(ptr, ptr + *sizep, len); 213 *sizep = len; 214 215 return OK; 216} 217 218/* 219 * Conversion from UTF-8 or latin1 to MacRoman. 220 * Input: "from[fromlen]" 221 * Output: "to[maxtolen]" length in "*tolenp" 222 * Unconverted rest in rest[*restlenp]. 223 * Returns OK or FAIL. 224 */ 225 int 226enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) 227 char_u *from; 228 size_t fromlen; 229 char_u *to; 230 int *tolenp; 231 int maxtolen; 232 char_u *rest; 233 int *restlenp; 234{ 235 CFStringRef cfstr; 236 CFRange r; 237 CFIndex l; 238 239 *restlenp = 0; 240 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 241 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 242 0); 243 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) 244 { 245 rest[*restlenp++] = from[--fromlen]; 246 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, 247 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, 248 0); 249 } 250 if (cfstr == NULL) 251 return FAIL; 252 253 r.location = 0; 254 r.length = CFStringGetLength(cfstr); 255 if (r.length != CFStringGetBytes(cfstr, r, 256 kCFStringEncodingMacRoman, 257 0, /* no lossy conversion */ 258 0, /* not external representation (since vim 259 * handles this internally */ 260 to, maxtolen, &l)) 261 { 262 CFRelease(cfstr); 263 return FAIL; 264 } 265 CFRelease(cfstr); 266 *tolenp = l; 267 return OK; 268} 269 270/* 271 * Initializes text converters 272 */ 273 void 274mac_conv_init() 275{ 276 TextEncoding utf8_encoding; 277 TextEncoding utf8_hfsplus_encoding; 278 TextEncoding utf8_canon_encoding; 279 TextEncoding utf16_encoding; 280 281 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 282 kTextEncodingDefaultVariant, kUnicodeUTF8Format); 283 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 284 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); 285 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 286 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); 287 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 288 kTextEncodingDefaultVariant, kUnicode16BitFormat); 289 290 if (TECCreateConverter(&gPathConverter, utf8_encoding, 291 utf8_hfsplus_encoding) != noErr) 292 gPathConverter = NULL; 293 294 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 295 utf8_canon_encoding) != noErr) 296 { 297 /* On pre-10.3, Unicode normalization is not available so 298 * fall back to non-normalizing converter */ 299 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, 300 utf8_encoding) != noErr) 301 gUTF16ToUTF8Converter = NULL; 302 } 303} 304 305/* 306 * Destroys text converters 307 */ 308 void 309mac_conv_cleanup() 310{ 311 if (gUTF16ToUTF8Converter) 312 { 313 TECDisposeConverter(gUTF16ToUTF8Converter); 314 gUTF16ToUTF8Converter = NULL; 315 } 316 317 if (gPathConverter) 318 { 319 TECDisposeConverter(gPathConverter); 320 gPathConverter = NULL; 321 } 322} 323 324/* 325 * Conversion from UTF-16 UniChars to 'encoding' 326 * The function signature uses the real type of UniChar (as typedef'ed in 327 * CFBase.h) to avoid clashes with X11 header files in the .pro file 328 */ 329 char_u * 330mac_utf16_to_enc(from, fromLen, actualLen) 331 unsigned short *from; 332 size_t fromLen; 333 size_t *actualLen; 334{ 335 /* Following code borrows somewhat from os_mswin.c */ 336 vimconv_T conv; 337 size_t utf8_len; 338 char_u *utf8_str; 339 char_u *result = NULL; 340 341 /* Convert to utf-8 first, works better with iconv */ 342 utf8_len = 0; 343 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); 344 345 if (utf8_str) 346 { 347 /* We might be called before we have p_enc set up. */ 348 conv.vc_type = CONV_NONE; 349 350 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim 351 * internal unicode is always utf-8) so don't convert in such cases */ 352 353 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) 354 convert_setup(&conv, (char_u *)"utf-8", 355 p_enc? p_enc: (char_u *)"macroman"); 356 if (conv.vc_type == CONV_NONE) 357 { 358 /* p_enc is utf-8, so we're done. */ 359 result = utf8_str; 360 } 361 else 362 { 363 result = string_convert(&conv, utf8_str, (int *)&utf8_len); 364 vim_free(utf8_str); 365 } 366 367 convert_setup(&conv, NULL, NULL); 368 369 if (actualLen) 370 *actualLen = utf8_len; 371 } 372 else if (actualLen) 373 *actualLen = 0; 374 375 return result; 376} 377 378/* 379 * Conversion from 'encoding' to UTF-16 UniChars 380 * The function return uses the real type of UniChar (as typedef'ed in 381 * CFBase.h) to avoid clashes with X11 header files in the .pro file 382 */ 383 unsigned short * 384mac_enc_to_utf16(from, fromLen, actualLen) 385 char_u *from; 386 size_t fromLen; 387 size_t *actualLen; 388{ 389 /* Following code borrows somewhat from os_mswin.c */ 390 vimconv_T conv; 391 size_t utf8_len; 392 char_u *utf8_str; 393 UniChar *result = NULL; 394 Boolean should_free_utf8 = FALSE; 395 396 do 397 { 398 /* Use MacRoman by default, we might be called before we have p_enc 399 * set up. Convert to utf-8 first, works better with iconv(). Does 400 * nothing if 'encoding' is "utf-8". */ 401 conv.vc_type = CONV_NONE; 402 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && 403 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", 404 (char_u *)"utf-8") == FAIL) 405 break; 406 407 if (conv.vc_type != CONV_NONE) 408 { 409 utf8_len = fromLen; 410 utf8_str = string_convert(&conv, from, (int *)&utf8_len); 411 should_free_utf8 = TRUE; 412 } 413 else 414 { 415 utf8_str = from; 416 utf8_len = fromLen; 417 } 418 419 if (utf8_str == NULL) 420 break; 421 422 convert_setup(&conv, NULL, NULL); 423 424 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); 425 426 if (should_free_utf8) 427 vim_free(utf8_str); 428 return result; 429 } 430 while (0); 431 432 if (actualLen) 433 *actualLen = 0; 434 435 return result; 436} 437 438/* 439 * Converts from UTF-16 UniChars to CFString 440 * The void * return type is actually a CFStringRef 441 */ 442 void * 443mac_enc_to_cfstring(from, fromLen) 444 char_u *from; 445 size_t fromLen; 446{ 447 UniChar *utf16_str; 448 size_t utf16_len; 449 CFStringRef result = NULL; 450 451 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); 452 if (utf16_str) 453 { 454 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); 455 vim_free(utf16_str); 456 } 457 458 return (void *)result; 459} 460 461/* 462 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 463 */ 464 char_u * 465mac_precompose_path(decompPath, decompLen, precompLen) 466 char_u *decompPath; 467 size_t decompLen; 468 size_t *precompLen; 469{ 470 char_u *result = NULL; 471 size_t actualLen = 0; 472 473 if (gPathConverter) 474 { 475 result = alloc(decompLen); 476 if (result) 477 { 478 if (TECConvertText(gPathConverter, decompPath, 479 decompLen, &decompLen, result, 480 decompLen, &actualLen) != noErr) 481 { 482 vim_free(result); 483 result = NULL; 484 } 485 } 486 } 487 488 if (precompLen) 489 *precompLen = actualLen; 490 491 return result; 492} 493 494/* 495 * Converts from UTF-16 UniChars to precomposed UTF-8 496 */ 497 static char_u * 498mac_utf16_to_utf8(from, fromLen, actualLen) 499 UniChar *from; 500 size_t fromLen; 501 size_t *actualLen; 502{ 503 ByteCount utf8_len; 504 ByteCount inputRead; 505 char_u *result; 506 507 if (gUTF16ToUTF8Converter) 508 { 509 result = alloc(fromLen * 6 + 1); 510 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, 511 fromLen, &inputRead, result, 512 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) 513 { 514 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); 515 utf8_len += inputRead; 516 } 517 else 518 { 519 vim_free(result); 520 result = NULL; 521 } 522 } 523 else 524 { 525 result = NULL; 526 } 527 528 if (actualLen) 529 *actualLen = result ? utf8_len : 0; 530 531 return result; 532} 533 534/* 535 * Converts from UTF-8 to UTF-16 UniChars 536 */ 537 static UniChar * 538mac_utf8_to_utf16(from, fromLen, actualLen) 539 char_u *from; 540 size_t fromLen; 541 size_t *actualLen; 542{ 543 CFStringRef utf8_str; 544 CFRange convertRange; 545 UniChar *result = NULL; 546 547 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, 548 kCFStringEncodingUTF8, FALSE); 549 550 if (utf8_str == NULL) { 551 if (actualLen) 552 *actualLen = 0; 553 return NULL; 554 } 555 556 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); 557 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); 558 559 CFStringGetCharacters(utf8_str, convertRange, result); 560 561 CFRelease(utf8_str); 562 563 if (actualLen) 564 *actualLen = convertRange.length * sizeof(UniChar); 565 566 return result; 567} 568 569/* 570 * Sets LANG environment variable in Vim from Mac locale 571 */ 572 void 573mac_lang_init() { 574 if (mch_getenv((char_u *)"LANG") == NULL) 575 { 576 char buf[20]; 577 if (LocaleRefGetPartString(NULL, 578 kLocaleLanguageMask | kLocaleLanguageVariantMask | 579 kLocaleRegionMask | kLocaleRegionVariantMask, 580 sizeof buf, buf) == noErr && *buf) 581 { 582 vim_setenv((char_u *)"LANG", (char_u *)buf); 583# ifdef HAVE_LOCALE_H 584 setlocale(LC_ALL, ""); 585# endif 586 } 587 } 588} 589#endif /* MACOS_CONVERT */ 590