1/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved	by Bram Moolenaar
4 *
5 * Do ":help uganda"  in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9/*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16#define NO_X11_INCLUDES
17#include "vim.h"
18#ifndef FEAT_GUI_MAC
19# include <CoreServices/CoreServices.h>
20#endif
21
22
23#if defined(MACOS_CONVERT) || defined(PROTO)
24
25# ifdef PROTO
26/* A few dummy types to be able to generate function prototypes. */
27typedef int UniChar;
28typedef int *TECObjectRef;
29typedef int CFStringRef;
30# endif
31
32static char_u	    *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
33static UniChar	    *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
34
35/* Converter for composing decomposed HFS+ file paths */
36static TECObjectRef gPathConverter;
37/* Converter used by mac_utf16_to_utf8 */
38static TECObjectRef gUTF16ToUTF8Converter;
39
40/*
41 * A Mac version of string_convert_ext() for special cases.
42 */
43    char_u *
44mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
45    char_u		*ptr;
46    int			len;
47    int			*lenp;
48    int			fail_on_error;
49    int			from_enc;
50    int			to_enc;
51    int			*unconvlenp;
52{
53    char_u		*retval, *d;
54    CFStringRef		cfstr;
55    int			buflen, in, out, l, i;
56    CFStringEncoding	from;
57    CFStringEncoding	to;
58
59    switch (from_enc)
60    {
61	case 'l':   from = kCFStringEncodingISOLatin1; break;
62	case 'm':   from = kCFStringEncodingMacRoman; break;
63	case 'u':   from = kCFStringEncodingUTF8; break;
64	default:    return NULL;
65    }
66    switch (to_enc)
67    {
68	case 'l':   to = kCFStringEncodingISOLatin1; break;
69	case 'm':   to = kCFStringEncodingMacRoman; break;
70	case 'u':   to = kCFStringEncodingUTF8; break;
71	default:    return NULL;
72    }
73
74    if (unconvlenp != NULL)
75	*unconvlenp = 0;
76    cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
77
78    if(cfstr == NULL)
79	fprintf(stderr, "Encoding failed\n");
80    /* When conversion failed, try excluding bytes from the end, helps when
81     * there is an incomplete byte sequence.  Only do up to 6 bytes to avoid
82     * looping a long time when there really is something unconvertible. */
83    while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
84    {
85	--len;
86	++*unconvlenp;
87	cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
88    }
89    if (cfstr == NULL)
90	return NULL;
91
92    if (to == kCFStringEncodingUTF8)
93	buflen = len * 6 + 1;
94    else
95	buflen = len + 1;
96    retval = alloc(buflen);
97    if (retval == NULL)
98    {
99	CFRelease(cfstr);
100	return NULL;
101    }
102
103#if 0
104    CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
105    /*  Determine output buffer size */
106    CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
107    retval = (buflen > 0) ? alloc(buflen) : NULL;
108    if (retval == NULL) {
109	CFRelease(cfstr);
110	return NULL;
111    }
112
113    if (lenp)
114	*lenp = buflen / sizeof(char_u);
115
116    if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
117#endif
118    if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
119    {
120	CFRelease(cfstr);
121	if (fail_on_error)
122	{
123	    vim_free(retval);
124	    return NULL;
125	}
126
127	fprintf(stderr, "Trying char-by-char conversion...\n");
128	/* conversion failed for the whole string, but maybe it will work
129	 * for each character */
130	for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
131	{
132	    if (from == kCFStringEncodingUTF8)
133		l = utf_ptr2len(ptr + in);
134	    else
135		l = 1;
136	    cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
137	    if (cfstr == NULL)
138	    {
139		*d++ = '?';
140		out++;
141	    }
142	    else
143	    {
144		if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
145		{
146		    *d++ = '?';
147		    out++;
148		}
149		else
150		{
151		    i = STRLEN(d);
152		    d += i;
153		    out += i;
154		}
155		CFRelease(cfstr);
156	    }
157	    in += l;
158	}
159	*d = NUL;
160	if (lenp != NULL)
161	    *lenp = out;
162	return retval;
163    }
164    CFRelease(cfstr);
165    if (lenp != NULL)
166	*lenp = STRLEN(retval);
167
168    return retval;
169}
170
171/*
172 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
173 * standard Carbon framework.
174 * Input: "ptr[*sizep]".
175 * "real_size" is the size of the buffer that "ptr" points to.
176 * output is in-place, "sizep" is adjusted.
177 * Returns OK or FAIL.
178 */
179    int
180macroman2enc(ptr, sizep, real_size)
181    char_u	*ptr;
182    long	*sizep;
183    long	real_size;
184{
185    CFStringRef		cfstr;
186    CFRange		r;
187    CFIndex		len = *sizep;
188
189    /* MacRoman is an 8-bit encoding, no need to move bytes to
190     * conv_rest[]. */
191    cfstr = CFStringCreateWithBytes(NULL, ptr, len,
192						kCFStringEncodingMacRoman, 0);
193    /*
194     * If there is a conversion error, try using another
195     * conversion.
196     */
197    if (cfstr == NULL)
198	return FAIL;
199
200    r.location = 0;
201    r.length = CFStringGetLength(cfstr);
202    if (r.length != CFStringGetBytes(cfstr, r,
203	    (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
204	    0, /* no lossy conversion */
205	    0, /* not external representation */
206	    ptr + *sizep, real_size - *sizep, &len))
207    {
208	CFRelease(cfstr);
209	return FAIL;
210    }
211    CFRelease(cfstr);
212    mch_memmove(ptr, ptr + *sizep, len);
213    *sizep = len;
214
215    return OK;
216}
217
218/*
219 * Conversion from UTF-8 or latin1 to MacRoman.
220 * Input: "from[fromlen]"
221 * Output: "to[maxtolen]" length in "*tolenp"
222 * Unconverted rest in rest[*restlenp].
223 * Returns OK or FAIL.
224 */
225    int
226enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
227    char_u	*from;
228    size_t	fromlen;
229    char_u	*to;
230    int		*tolenp;
231    int		maxtolen;
232    char_u	*rest;
233    int		*restlenp;
234{
235    CFStringRef	cfstr;
236    CFRange	r;
237    CFIndex	l;
238
239    *restlenp = 0;
240    cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
241	    (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
242	    0);
243    while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
244    {
245	rest[*restlenp++] = from[--fromlen];
246	cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
247		(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
248		0);
249    }
250    if (cfstr == NULL)
251	return FAIL;
252
253    r.location = 0;
254    r.length = CFStringGetLength(cfstr);
255    if (r.length != CFStringGetBytes(cfstr, r,
256		kCFStringEncodingMacRoman,
257		0, /* no lossy conversion */
258		0, /* not external representation (since vim
259		    * handles this internally */
260		to, maxtolen, &l))
261    {
262	CFRelease(cfstr);
263	return FAIL;
264    }
265    CFRelease(cfstr);
266    *tolenp = l;
267    return OK;
268}
269
270/*
271 * Initializes text converters
272 */
273    void
274mac_conv_init()
275{
276    TextEncoding    utf8_encoding;
277    TextEncoding    utf8_hfsplus_encoding;
278    TextEncoding    utf8_canon_encoding;
279    TextEncoding    utf16_encoding;
280
281    utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
282	    kTextEncodingDefaultVariant, kUnicodeUTF8Format);
283    utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
284	    kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
285    utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
286	    kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
287    utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
288	    kTextEncodingDefaultVariant, kUnicode16BitFormat);
289
290    if (TECCreateConverter(&gPathConverter, utf8_encoding,
291		utf8_hfsplus_encoding) != noErr)
292	gPathConverter = NULL;
293
294    if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
295		utf8_canon_encoding) != noErr)
296    {
297	/* On pre-10.3, Unicode normalization is not available so
298	 * fall back to non-normalizing converter */
299	if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
300		    utf8_encoding) != noErr)
301	    gUTF16ToUTF8Converter = NULL;
302    }
303}
304
305/*
306 * Destroys text converters
307 */
308    void
309mac_conv_cleanup()
310{
311    if (gUTF16ToUTF8Converter)
312    {
313	TECDisposeConverter(gUTF16ToUTF8Converter);
314	gUTF16ToUTF8Converter = NULL;
315    }
316
317    if (gPathConverter)
318    {
319	TECDisposeConverter(gPathConverter);
320	gPathConverter = NULL;
321    }
322}
323
324/*
325 * Conversion from UTF-16 UniChars to 'encoding'
326 * The function signature uses the real type of UniChar (as typedef'ed in
327 * CFBase.h) to avoid clashes with X11 header files in the .pro file
328 */
329    char_u *
330mac_utf16_to_enc(from, fromLen, actualLen)
331    unsigned short *from;
332    size_t fromLen;
333    size_t *actualLen;
334{
335    /* Following code borrows somewhat from os_mswin.c */
336    vimconv_T	conv;
337    size_t      utf8_len;
338    char_u      *utf8_str;
339    char_u      *result = NULL;
340
341    /* Convert to utf-8 first, works better with iconv */
342    utf8_len = 0;
343    utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
344
345    if (utf8_str)
346    {
347	/* We might be called before we have p_enc set up. */
348	conv.vc_type = CONV_NONE;
349
350	/* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
351	 * internal unicode is always utf-8) so don't convert in such cases */
352
353	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
354	    convert_setup(&conv, (char_u *)"utf-8",
355		    p_enc? p_enc: (char_u *)"macroman");
356	if (conv.vc_type == CONV_NONE)
357	{
358	    /* p_enc is utf-8, so we're done. */
359	    result = utf8_str;
360	}
361	else
362	{
363	    result = string_convert(&conv, utf8_str, (int *)&utf8_len);
364	    vim_free(utf8_str);
365	}
366
367	convert_setup(&conv, NULL, NULL);
368
369	if (actualLen)
370	    *actualLen = utf8_len;
371    }
372    else if (actualLen)
373	*actualLen = 0;
374
375    return result;
376}
377
378/*
379 * Conversion from 'encoding' to UTF-16 UniChars
380 * The function return uses the real type of UniChar (as typedef'ed in
381 * CFBase.h) to avoid clashes with X11 header files in the .pro file
382 */
383    unsigned short *
384mac_enc_to_utf16(from, fromLen, actualLen)
385    char_u *from;
386    size_t fromLen;
387    size_t *actualLen;
388{
389    /* Following code borrows somewhat from os_mswin.c */
390    vimconv_T	conv;
391    size_t      utf8_len;
392    char_u      *utf8_str;
393    UniChar     *result = NULL;
394    Boolean     should_free_utf8 = FALSE;
395
396    do
397    {
398	/* Use MacRoman by default, we might be called before we have p_enc
399	 * set up.  Convert to utf-8 first, works better with iconv().  Does
400	 * nothing if 'encoding' is "utf-8". */
401	conv.vc_type = CONV_NONE;
402	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
403		convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
404		    (char_u *)"utf-8") == FAIL)
405	    break;
406
407	if (conv.vc_type != CONV_NONE)
408	{
409	    utf8_len = fromLen;
410	    utf8_str = string_convert(&conv, from, (int *)&utf8_len);
411	    should_free_utf8 = TRUE;
412	}
413	else
414	{
415	    utf8_str = from;
416	    utf8_len = fromLen;
417	}
418
419	if (utf8_str == NULL)
420	    break;
421
422	convert_setup(&conv, NULL, NULL);
423
424	result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
425
426	if (should_free_utf8)
427	    vim_free(utf8_str);
428	return result;
429    }
430    while (0);
431
432    if (actualLen)
433	*actualLen = 0;
434
435    return result;
436}
437
438/*
439 * Converts from UTF-16 UniChars to CFString
440 * The void * return type is actually a CFStringRef
441 */
442    void *
443mac_enc_to_cfstring(from, fromLen)
444    char_u  *from;
445    size_t  fromLen;
446{
447    UniChar	*utf16_str;
448    size_t	utf16_len;
449    CFStringRef	result = NULL;
450
451    utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
452    if (utf16_str)
453    {
454	result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
455	vim_free(utf16_str);
456    }
457
458    return (void *)result;
459}
460
461/*
462 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
463 */
464    char_u *
465mac_precompose_path(decompPath, decompLen, precompLen)
466    char_u  *decompPath;
467    size_t  decompLen;
468    size_t  *precompLen;
469{
470    char_u  *result = NULL;
471    size_t  actualLen = 0;
472
473    if (gPathConverter)
474    {
475	result = alloc(decompLen);
476	if (result)
477	{
478	    if (TECConvertText(gPathConverter, decompPath,
479			decompLen, &decompLen, result,
480			decompLen, &actualLen) != noErr)
481	    {
482		vim_free(result);
483		result = NULL;
484	    }
485	}
486    }
487
488    if (precompLen)
489	*precompLen = actualLen;
490
491    return result;
492}
493
494/*
495 * Converts from UTF-16 UniChars to precomposed UTF-8
496 */
497    static char_u *
498mac_utf16_to_utf8(from, fromLen, actualLen)
499    UniChar *from;
500    size_t fromLen;
501    size_t *actualLen;
502{
503    ByteCount		utf8_len;
504    ByteCount		inputRead;
505    char_u		*result;
506
507    if (gUTF16ToUTF8Converter)
508    {
509	result = alloc(fromLen * 6 + 1);
510	if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
511		    fromLen, &inputRead, result,
512		    (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
513	{
514	    TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
515	    utf8_len += inputRead;
516	}
517	else
518	{
519	    vim_free(result);
520	    result = NULL;
521	}
522    }
523    else
524    {
525	result = NULL;
526    }
527
528    if (actualLen)
529	*actualLen = result ? utf8_len : 0;
530
531    return result;
532}
533
534/*
535 * Converts from UTF-8 to UTF-16 UniChars
536 */
537    static UniChar *
538mac_utf8_to_utf16(from, fromLen, actualLen)
539    char_u *from;
540    size_t fromLen;
541    size_t *actualLen;
542{
543    CFStringRef  utf8_str;
544    CFRange      convertRange;
545    UniChar      *result = NULL;
546
547    utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
548	    kCFStringEncodingUTF8, FALSE);
549
550    if (utf8_str == NULL) {
551	if (actualLen)
552	    *actualLen = 0;
553	return NULL;
554    }
555
556    convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
557    result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
558
559    CFStringGetCharacters(utf8_str, convertRange, result);
560
561    CFRelease(utf8_str);
562
563    if (actualLen)
564	*actualLen = convertRange.length * sizeof(UniChar);
565
566    return result;
567}
568
569/*
570 * Sets LANG environment variable in Vim from Mac locale
571 */
572    void
573mac_lang_init() {
574    if (mch_getenv((char_u *)"LANG") == NULL)
575    {
576	char	buf[20];
577	if (LocaleRefGetPartString(NULL,
578		    kLocaleLanguageMask | kLocaleLanguageVariantMask |
579		    kLocaleRegionMask | kLocaleRegionVariantMask,
580		    sizeof buf, buf) == noErr && *buf)
581	{
582	    vim_setenv((char_u *)"LANG", (char_u *)buf);
583#   ifdef HAVE_LOCALE_H
584	    setlocale(LC_ALL, "");
585#   endif
586	}
587    }
588}
589#endif /* MACOS_CONVERT */
590