1/*
2 *  unicode.c
3 *
4 *  $Id: unicode.c,v 1.3 2007/10/07 12:32:08 source Exp $
5 *
6 *  ODBC unicode functions
7 *
8 *  The iODBC driver manager.
9 *
10 *  Copyright (C) 1996-2006 by OpenLink Software <iodbc@openlinksw.com>
11 *  All Rights Reserved.
12 *
13 *  This software is released under the terms of either of the following
14 *  licenses:
15 *
16 *      - GNU Library General Public License (see LICENSE.LGPL)
17 *      - The BSD License (see LICENSE.BSD).
18 *
19 *  Note that the only valid version of the LGPL license as far as this
20 *  project is concerned is the original GNU Library General Public License
21 *  Version 2, dated June 1991.
22 *
23 *  While not mandated by the BSD license, any patches you make to the
24 *  iODBC source code may be contributed back into the iODBC project
25 *  at your discretion. Contributions will benefit the Open Source and
26 *  Data Access community as a whole. Submissions may be made at:
27 *
28 *      http://www.iodbc.org
29 *
30 *
31 *  GNU Library Generic Public License Version 2
32 *  ============================================
33 *  This library is free software; you can redistribute it and/or
34 *  modify it under the terms of the GNU Library General Public
35 *  License as published by the Free Software Foundation; only
36 *  Version 2 of the License dated June 1991.
37 *
38 *  This library is distributed in the hope that it will be useful,
39 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
40 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
41 *  Library General Public License for more details.
42 *
43 *  You should have received a copy of the GNU Library General Public
44 *  License along with this library; if not, write to the Free
45 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
46 *
47 *
48 *  The BSD License
49 *  ===============
50 *  Redistribution and use in source and binary forms, with or without
51 *  modification, are permitted provided that the following conditions
52 *  are met:
53 *
54 *  1. Redistributions of source code must retain the above copyright
55 *     notice, this list of conditions and the following disclaimer.
56 *  2. Redistributions in binary form must reproduce the above copyright
57 *     notice, this list of conditions and the following disclaimer in
58 *     the documentation and/or other materials provided with the
59 *     distribution.
60 *  3. Neither the name of OpenLink Software Inc. nor the names of its
61 *     contributors may be used to endorse or promote products derived
62 *     from this software without specific prior written permission.
63 *
64 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
65 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
66 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
67 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OPENLINK OR
68 *  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
69 *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
70 *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
71 *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
72 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
73 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
74 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
75 */
76
77#define UNICODE
78
79#include <iodbc.h>
80
81#include <sql.h>
82#include <sqlext.h>
83#include <sqltypes.h>
84
85#include <stdlib.h>
86#include <string.h>
87
88#ifdef WIN32
89#include <ansiapi.h>
90#include <mapinls.h>
91#endif
92
93#include "unicode.h"
94
95#if !defined(HAVE_WCSLEN)
96size_t
97wcslen (const wchar_t * wcs)
98{
99  size_t len = 0;
100
101  while (*wcs++ != L'\0')
102    len++;
103
104  return len;
105}
106#endif
107
108
109#if !defined(HAVE_WCSCPY)
110wchar_t *
111wcscpy (wchar_t * wcd, const wchar_t * wcs)
112{
113  wchar_t *dst = wcd;
114
115  while ((*dst++ = *wcs++) != L'\0')
116    ;
117
118  return wcd;
119}
120#endif
121
122
123#if !defined (HAVE_WCSNCPY)
124wchar_t *
125wcsncpy (wchar_t * wcd, const wchar_t * wcs, size_t n)
126{
127  wchar_t *dst = wcd;
128  size_t len = 0;
129
130  while ( len < n && (*dst++ = *wcs++) != L'\0')
131    len++;
132
133  for (; len < n; len++)
134    *dst++ = L'\0';
135
136  return wcd;
137}
138#endif
139
140#if !defined(HAVE_WCSCHR)
141wchar_t* wcschr(const wchar_t *wcs, const wchar_t wc)
142{
143  do
144    if(*wcs == wc)
145      return (wchar_t*) wcs;
146  while(*wcs++ != L'\0');
147
148  return NULL;
149}
150#endif
151
152#if !defined(HAVE_WCSCAT)
153wchar_t* wcscat(wchar_t *dest, const wchar_t *src)
154{
155  wchar_t *s1 = dest;
156  const wchar_t *s2 = src;
157  wchar_t c;
158
159  do
160    c = *s1 ++;
161  while(c != L'\0');
162
163  s1 -= 2;
164
165  do
166    {
167      c = *s2 ++;
168      *++s1 = c;
169    }
170  while(c != L'\0');
171
172  return dest;
173}
174#endif
175
176#if !defined(HAVE_WCSCMP)
177int wcscmp (const wchar_t* s1, const wchar_t* s2)
178{
179  wchar_t c1, c2;
180
181  if (s1 == s2)
182    return 0;
183
184  do
185    {
186      c1 = *s1++;
187      c2 = *s2++;
188      if(c1 == L'\0')
189        break;
190    }
191  while (c1 == c2);
192
193  return c1 - c2;
194}
195#endif
196
197
198#if !defined(HAVE_TOWLOWER)
199
200#if (defined (__APPLE__) && !(defined (NO_FRAMEWORKS) || defined (_LP64)))
201
202#include <Carbon/Carbon.h>
203
204wchar_t
205towlower (wchar_t wc)
206{
207  CFMutableStringRef strRef = CFStringCreateMutable (NULL, 0);
208  UniChar c = (UniChar) wc;
209  wchar_t wcs;
210
211  CFStringAppendCharacters (strRef, &c, 1);
212  CFStringLowercase (strRef, NULL);
213  wcs = CFStringGetCharacterAtIndex (strRef, 0);
214  CFRelease (strRef);
215
216  return wcs;
217}
218
219#else
220
221/* Use dummy function */
222wchar_t
223towlower (wchar_t wc)
224{
225  return wc;
226}
227
228#endif /* __APPLE__ */
229#endif /* !HAVE_TOWLOWER */
230
231
232#if !defined(HAVE_WCSNCASECMP)
233int wcsncasecmp (wchar_t* s1, wchar_t* s2, size_t n)
234{
235  wchar_t c1, c2;
236
237  if (s1 == s2 || n ==0)
238    return 0;
239
240  do
241    {
242      c1 = towlower(*s1++);
243      c2 = towlower(*s2++);
244      if(c1 == L'\0' || c1 != c2)
245        return c1 - c2;
246    } while (--n > 0);
247
248  return c1 - c2;
249}
250#endif
251
252SQLCHAR *
253dm_SQL_W2A (SQLWCHAR * inStr, ssize_t size)
254{
255  SQLCHAR *outStr = NULL;
256  size_t len;
257
258  if (inStr == NULL)
259    return NULL;
260
261  if (size == SQL_NTS)
262    len = wcslen (inStr);
263  else
264    len = size;
265
266  if (len < 0)
267    return NULL;
268
269  if ((outStr = (SQLCHAR *) malloc (len * UTF8_MAX_CHAR_LEN + 1)) != NULL)
270    {
271      if (len > 0)
272	OPL_W2A (inStr, outStr, len);
273      outStr[len] = '\0';
274    }
275
276  return outStr;
277}
278
279
280SQLWCHAR *
281dm_SQL_A2W (SQLCHAR * inStr, ssize_t size)
282{
283  SQLWCHAR *outStr = NULL;
284  size_t len;
285
286  if (inStr == NULL)
287    return NULL;
288
289  if (size == SQL_NTS)
290    len = strlen ((char *) inStr);
291  else
292    len = size;
293
294  if (len < 0)
295    return NULL;
296
297  if ((outStr = (SQLWCHAR *) calloc (len + 1, sizeof (SQLWCHAR))) != NULL)
298    {
299      if (len > 0)
300	OPL_A2W (inStr, outStr, len);
301      outStr[len] = L'\0';
302    }
303
304  return outStr;
305}
306
307
308int
309dm_StrCopyOut2_A2W (
310  SQLCHAR	* inStr,
311  SQLWCHAR	* outStr,
312  SQLSMALLINT	  size,
313  SQLSMALLINT	* result)
314{
315  size_t length;
316
317  if (!inStr)
318    return -1;
319
320  length = strlen ((char *) inStr);
321
322  if (result)
323    *result = (SQLSMALLINT) length;
324
325  if (!outStr)
326    return 0;
327
328  if (size >= length + 1)
329    {
330      if (length > 0)
331	OPL_A2W (inStr, outStr, length);
332      outStr[length] = L'\0';
333      return 0;
334    }
335  if (size > 0)
336    {
337      OPL_A2W (inStr, outStr, size);
338      outStr[--size] = L'\0';
339    }
340  return -1;
341}
342
343
344int
345dm_StrCopyOut2_W2A (
346  SQLWCHAR	* inStr,
347  SQLCHAR	* outStr,
348  SQLSMALLINT	  size,
349  SQLSMALLINT	* result)
350{
351  size_t length;
352
353  if (!inStr)
354    return -1;
355
356  length = wcslen (inStr);
357
358  if (result)
359    *result = (SQLSMALLINT) length;
360
361  if (!outStr)
362    return 0;
363
364  if (size >= length + 1)
365    {
366      if (length > 0)
367	OPL_W2A (inStr, outStr, length);
368      outStr[length] = '\0';
369      return 0;
370    }
371  if (size > 0)
372    {
373      OPL_W2A (inStr, outStr, size);
374      outStr[--size] = '\0';
375    }
376  return -1;
377}
378
379
380SQLWCHAR *
381dm_strcpy_A2W (SQLWCHAR * destStr, SQLCHAR * sourStr)
382{
383  size_t length;
384
385  if (!sourStr || !destStr)
386    return destStr;
387
388  length = strlen ((char *) sourStr);
389  if (length > 0)
390    OPL_A2W (sourStr, destStr, length);
391  destStr[length] = L'\0';
392  return destStr;
393}
394
395
396SQLCHAR *
397dm_strcpy_W2A (SQLCHAR * destStr, SQLWCHAR * sourStr)
398{
399  size_t length;
400
401  if (!sourStr || !destStr)
402    return destStr;
403
404  length = wcslen (sourStr);
405  if (length > 0)
406    OPL_W2A (sourStr, destStr, length);
407  destStr[length] = '\0';
408  return destStr;
409}
410
411
412static size_t
413calc_len_for_utf8 (SQLWCHAR * str, ssize_t size)
414{
415  size_t len = 0;
416  SQLWCHAR c;
417
418  if (!str)
419    return len;
420
421  if (size == SQL_NTS)
422    {
423      while ((c = *str))
424	{
425	  if (c < 0x80)
426	    len += 1;
427	  else if (c < 0x800)
428	    len += 2;
429	  else if (c < 0x10000)
430	    len += 3;
431	  else if (c < 0x200000)
432	    len += 4;
433	  else
434	    len += 1;
435
436	  str++;
437	}
438    }
439  else
440    {
441      while (size > 0)
442	{
443	  c = *str;
444	  if (c < 0x80)
445	    len += 1;
446	  else if (c < 0x800)
447	    len += 2;
448	  else if (c < 0x10000)
449	    len += 3;
450	  else if (c < 0x200000)
451	    len += 4;
452	  else
453	    len += 1;
454
455	  str++;
456	  size--;
457	}
458    }
459  return len;
460}
461
462
463static size_t
464utf8_len (SQLCHAR * p, ssize_t size)
465{
466  size_t len = 0;
467
468  if (!*p)
469    return 0;
470
471  if (size == SQL_NTS)
472    while (*p)
473      {
474	for (p++; (*p & 0xC0) == 0x80; p++)
475	  ;
476	len++;
477      }
478  else
479    while (size > 0)
480      {
481	for (p++, size--; (size > 0) && ((*p & 0xC0) == 0x80); p++, size--)
482	  ;
483	len++;
484      }
485  return len;
486}
487
488
489/*
490 *  size      - size of buffer for output utf8 string in bytes
491 *  return    - length of output utf8 string
492 */
493static size_t
494wcstoutf8 (SQLWCHAR * wstr, SQLCHAR * ustr, size_t size)
495{
496  size_t len;
497  SQLWCHAR c;
498  int first;
499  size_t i;
500  size_t count = 0;
501
502  if (!wstr)
503    return 0;
504
505  while ((c = *wstr) && count < size)
506    {
507      if (c < 0x80)
508	{
509	  len = 1;
510	  first = 0;
511	}
512      else if (c < 0x800)
513	{
514	  len = 2;
515	  first = 0xC0;
516	}
517      else if (c < 0x10000)
518	{
519	  len = 3;
520	  first = 0xE0;
521	}
522      else if (c < 0x200000)
523	{
524	  len = 4;
525	  first = 0xf0;
526	}
527      else
528	{
529	  len = 1;
530	  first = 0;
531	  c = '?';
532	}
533
534      if (size - count < len)
535	{
536	  return count;
537	}
538
539      for (i = len - 1; i > 0; --i)
540	{
541	  ustr[i] = (c & 0x3f) | 0x80;
542	  c >>= 6;
543	}
544      ustr[0] = c | first;
545
546      ustr += len;
547      count += len;
548      wstr++;
549    }
550  return count;
551}
552
553
554/*
555 *  wlen      - length of input *wstr string in symbols
556 *  size     - size of buffer ( *ustr string) in bytes
557 *  converted - number of converted symbols from *wstr
558 *
559 *  Return    - length of output utf8 string
560 */
561static int
562wcsntoutf8 (
563  SQLWCHAR	* wstr,
564  SQLCHAR	* ustr,
565  size_t	  wlen,
566  size_t	  size,
567  u_short 	* converted)
568{
569  size_t len;
570  SQLWCHAR c;
571  int first;
572  size_t i;
573  size_t count = 0;
574  size_t _converted = 0;
575
576  if (!wstr)
577    return 0;
578
579  while (_converted < wlen && count < size)
580    {
581      c = *wstr;
582      if (c < 0x80)
583	{
584	  len = 1;
585	  first = 0;
586	}
587      else if (c < 0x800)
588	{
589	  len = 2;
590	  first = 0xC0;
591	}
592      else if (c < 0x10000)
593	{
594	  len = 3;
595	  first = 0xE0;
596	}
597      else if (c < 0x200000)
598	{
599	  len = 4;
600	  first = 0xf0;
601	}
602      else
603	{
604	  len = 1;
605	  first = 0;
606	  c = '?';
607	}
608
609      if (size - count < len)
610	{
611	  if (converted)
612	    *converted = (u_short) _converted;
613	  return count;
614	}
615
616      for (i = len - 1; i > 0; --i)
617	{
618	  ustr[i] = (c & 0x3f) | 0x80;
619	  c >>= 6;
620	}
621      ustr[0] = c | first;
622
623      ustr += len;
624      count += len;
625      wstr++;
626      _converted++;
627    }
628  if (converted)
629    *converted = (u_short) _converted;
630  return count;
631}
632
633
634static SQLCHAR *
635strdup_WtoU8 (SQLWCHAR * str)
636{
637  SQLCHAR *ret;
638  size_t len;
639
640  if (!str)
641    return NULL;
642
643  len = calc_len_for_utf8 (str, SQL_NTS);
644  if ((ret = (SQLCHAR *) malloc (len + 1)) == NULL)
645    return NULL;
646
647  len = wcstoutf8 (str, ret, len);
648  ret[len] = '\0';
649
650  return ret;
651}
652
653
654/* decode */
655#define UTF8_COMPUTE(Char, Mask, Len)					      \
656  if (Char < 128)							      \
657    {									      \
658      Len = 1;								      \
659      Mask = 0x7f;							      \
660    }									      \
661  else if ((Char & 0xe0) == 0xc0)					      \
662    {									      \
663      Len = 2;								      \
664      Mask = 0x1f;							      \
665    }									      \
666  else if ((Char & 0xf0) == 0xe0)					      \
667    {									      \
668      Len = 3;								      \
669      Mask = 0x0f;							      \
670    }									      \
671  else if ((Char & 0xf8) == 0xf0)					      \
672    {									      \
673      Len = 4;								      \
674      Mask = 0x07;							      \
675    }									      \
676  else									      \
677    Len = -1;
678
679
680
681/*
682 *  size      - size of buffer for output string in symbols (SQLWCHAR)
683 *  return    - length of output SQLWCHAR string
684 */
685static size_t
686utf8towcs (SQLCHAR * ustr, SQLWCHAR * wstr, ssize_t size)
687{
688  int i;
689  int mask = 0;
690  int len;
691  SQLCHAR c;
692  SQLWCHAR wc;
693  int count = 0;
694
695  if (!ustr)
696    return 0;
697
698  while ((c = (SQLCHAR) *ustr) && count < size)
699    {
700      UTF8_COMPUTE (c, mask, len);
701      if (len == -1)
702	return count;
703
704      wc = c & mask;
705      for (i = 1; i < len; i++)
706	{
707	  if ((ustr[i] & 0xC0) != 0x80)
708	    return count;
709	  wc <<= 6;
710	  wc |= (ustr[i] & 0x3F);
711	}
712      *wstr = wc;
713      ustr += len;
714      wstr++;
715      count++;
716    }
717  return count;
718}
719
720
721/*
722 *  ulen      - length of input *ustr string in bytes
723 *  size      - size of buffer ( *wstr string) in symbols
724 *  converted - number of converted bytes from *ustr
725 *
726 *  Return    - length of output wcs string
727 */
728static int
729utf8ntowcs (
730  SQLCHAR	* ustr,
731  SQLWCHAR	* wstr,
732  size_t	  ulen,
733  size_t	  size,
734  int		* converted)
735{
736  int i;
737  int mask = 0;
738  int len;
739  SQLCHAR c;
740  SQLWCHAR wc;
741  size_t count = 0;
742  size_t _converted = 0;
743
744  if (!ustr)
745    return 0;
746
747  while ((_converted < ulen) && (count < size))
748    {
749      c = (SQLCHAR) *ustr;
750      UTF8_COMPUTE (c, mask, len);
751      if ((len == -1) || (_converted + len > ulen))
752	{
753	  if (converted)
754	    *converted = (u_short) _converted;
755	  return count;
756	}
757
758      wc = c & mask;
759      for (i = 1; i < len; i++)
760	{
761	  if ((ustr[i] & 0xC0) != 0x80)
762	    {
763	      if (converted)
764		*converted = (u_short) _converted;
765	      return count;
766	    }
767	  wc <<= 6;
768	  wc |= (ustr[i] & 0x3F);
769	}
770      *wstr = wc;
771      ustr += len;
772      wstr++;
773      count++;
774      _converted += len;
775    }
776  if (converted)
777    *converted = (u_short) _converted;
778  return count;
779}
780
781
782static SQLWCHAR *
783strdup_U8toW (SQLCHAR * str)
784{
785  SQLWCHAR *ret;
786  size_t len;
787
788  if (!str)
789    return NULL;
790
791  len = utf8_len (str, SQL_NTS);
792  if ((ret = (SQLWCHAR *) malloc ((len + 1) * sizeof (SQLWCHAR))) == NULL)
793    return NULL;
794
795  len = utf8towcs (str, ret, len);
796  ret[len] = L'\0';
797
798  return ret;
799}
800
801
802SQLCHAR *
803dm_SQL_WtoU8 (SQLWCHAR * inStr, ssize_t size)
804{
805  SQLCHAR *outStr = NULL;
806  size_t len;
807
808  if (inStr == NULL)
809    return NULL;
810
811  if (size == SQL_NTS)
812    {
813      outStr = strdup_WtoU8 (inStr);
814    }
815  else
816    {
817      len = calc_len_for_utf8 (inStr, size);
818      if ((outStr = (SQLCHAR *) malloc (len + 1)) != NULL)
819	{
820	  len = wcsntoutf8 (inStr, outStr, size, len, NULL);
821	  outStr[len] = '\0';
822	}
823    }
824
825  return outStr;
826}
827
828
829SQLWCHAR *
830dm_SQL_U8toW (SQLCHAR * inStr, SQLSMALLINT size)
831{
832  SQLWCHAR *outStr = NULL;
833  size_t len;
834
835  if (inStr == NULL)
836    return NULL;
837
838  if (size == SQL_NTS)
839    {
840      outStr = strdup_U8toW (inStr);
841    }
842  else
843    {
844      len = utf8_len (inStr, size);
845      if ((outStr = (SQLWCHAR *) calloc (len + 1, sizeof (SQLWCHAR))) != NULL)
846	utf8ntowcs (inStr, outStr, size, len, NULL);
847    }
848
849  return outStr;
850}
851
852
853int
854dm_StrCopyOut2_U8toW (
855  SQLCHAR	* inStr,
856  SQLWCHAR	* outStr,
857  size_t	  size,
858  u_short	* result)
859{
860  size_t length;
861
862  if (!inStr)
863    return -1;
864
865  length = utf8_len (inStr, SQL_NTS);
866
867  if (result)
868    *result = (u_short) length;
869
870  if (!outStr)
871    return 0;
872
873  if (size >= length + 1)
874    {
875      length = utf8towcs (inStr, outStr, size);
876      outStr[length] = L'\0';
877      return 0;
878    }
879  if (size > 0)
880    {
881      length = utf8towcs (inStr, outStr, size - 1);
882      outStr[length] = L'\0';
883    }
884  return -1;
885}
886