1/* 2 * unicode.c 3 * 4 * $Id: unicode.c,v 1.3 2007/10/07 12:32:08 source Exp $ 5 * 6 * ODBC unicode functions 7 * 8 * The iODBC driver manager. 9 * 10 * Copyright (C) 1996-2006 by OpenLink Software <iodbc@openlinksw.com> 11 * All Rights Reserved. 12 * 13 * This software is released under the terms of either of the following 14 * licenses: 15 * 16 * - GNU Library General Public License (see LICENSE.LGPL) 17 * - The BSD License (see LICENSE.BSD). 18 * 19 * Note that the only valid version of the LGPL license as far as this 20 * project is concerned is the original GNU Library General Public License 21 * Version 2, dated June 1991. 22 * 23 * While not mandated by the BSD license, any patches you make to the 24 * iODBC source code may be contributed back into the iODBC project 25 * at your discretion. Contributions will benefit the Open Source and 26 * Data Access community as a whole. Submissions may be made at: 27 * 28 * http://www.iodbc.org 29 * 30 * 31 * GNU Library Generic Public License Version 2 32 * ============================================ 33 * This library is free software; you can redistribute it and/or 34 * modify it under the terms of the GNU Library General Public 35 * License as published by the Free Software Foundation; only 36 * Version 2 of the License dated June 1991. 37 * 38 * This library is distributed in the hope that it will be useful, 39 * but WITHOUT ANY WARRANTY; without even the implied warranty of 40 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 41 * Library General Public License for more details. 42 * 43 * You should have received a copy of the GNU Library General Public 44 * License along with this library; if not, write to the Free 45 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 46 * 47 * 48 * The BSD License 49 * =============== 50 * Redistribution and use in source and binary forms, with or without 51 * modification, are permitted provided that the following conditions 52 * are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in 58 * the documentation and/or other materials provided with the 59 * distribution. 60 * 3. Neither the name of OpenLink Software Inc. nor the names of its 61 * contributors may be used to endorse or promote products derived 62 * from this software without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 65 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 66 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 67 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OPENLINK OR 68 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 69 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 70 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 71 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 72 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 73 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 74 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 75 */ 76 77#define UNICODE 78 79#include <iodbc.h> 80 81#include <sql.h> 82#include <sqlext.h> 83#include <sqltypes.h> 84 85#include <stdlib.h> 86#include <string.h> 87 88#ifdef WIN32 89#include <ansiapi.h> 90#include <mapinls.h> 91#endif 92 93#include "unicode.h" 94 95#if !defined(HAVE_WCSLEN) 96size_t 97wcslen (const wchar_t * wcs) 98{ 99 size_t len = 0; 100 101 while (*wcs++ != L'\0') 102 len++; 103 104 return len; 105} 106#endif 107 108 109#if !defined(HAVE_WCSCPY) 110wchar_t * 111wcscpy (wchar_t * wcd, const wchar_t * wcs) 112{ 113 wchar_t *dst = wcd; 114 115 while ((*dst++ = *wcs++) != L'\0') 116 ; 117 118 return wcd; 119} 120#endif 121 122 123#if !defined (HAVE_WCSNCPY) 124wchar_t * 125wcsncpy (wchar_t * wcd, const wchar_t * wcs, size_t n) 126{ 127 wchar_t *dst = wcd; 128 size_t len = 0; 129 130 while ( len < n && (*dst++ = *wcs++) != L'\0') 131 len++; 132 133 for (; len < n; len++) 134 *dst++ = L'\0'; 135 136 return wcd; 137} 138#endif 139 140#if !defined(HAVE_WCSCHR) 141wchar_t* wcschr(const wchar_t *wcs, const wchar_t wc) 142{ 143 do 144 if(*wcs == wc) 145 return (wchar_t*) wcs; 146 while(*wcs++ != L'\0'); 147 148 return NULL; 149} 150#endif 151 152#if !defined(HAVE_WCSCAT) 153wchar_t* wcscat(wchar_t *dest, const wchar_t *src) 154{ 155 wchar_t *s1 = dest; 156 const wchar_t *s2 = src; 157 wchar_t c; 158 159 do 160 c = *s1 ++; 161 while(c != L'\0'); 162 163 s1 -= 2; 164 165 do 166 { 167 c = *s2 ++; 168 *++s1 = c; 169 } 170 while(c != L'\0'); 171 172 return dest; 173} 174#endif 175 176#if !defined(HAVE_WCSCMP) 177int wcscmp (const wchar_t* s1, const wchar_t* s2) 178{ 179 wchar_t c1, c2; 180 181 if (s1 == s2) 182 return 0; 183 184 do 185 { 186 c1 = *s1++; 187 c2 = *s2++; 188 if(c1 == L'\0') 189 break; 190 } 191 while (c1 == c2); 192 193 return c1 - c2; 194} 195#endif 196 197 198#if !defined(HAVE_TOWLOWER) 199 200#if (defined (__APPLE__) && !(defined (NO_FRAMEWORKS) || defined (_LP64))) 201 202#include <Carbon/Carbon.h> 203 204wchar_t 205towlower (wchar_t wc) 206{ 207 CFMutableStringRef strRef = CFStringCreateMutable (NULL, 0); 208 UniChar c = (UniChar) wc; 209 wchar_t wcs; 210 211 CFStringAppendCharacters (strRef, &c, 1); 212 CFStringLowercase (strRef, NULL); 213 wcs = CFStringGetCharacterAtIndex (strRef, 0); 214 CFRelease (strRef); 215 216 return wcs; 217} 218 219#else 220 221/* Use dummy function */ 222wchar_t 223towlower (wchar_t wc) 224{ 225 return wc; 226} 227 228#endif /* __APPLE__ */ 229#endif /* !HAVE_TOWLOWER */ 230 231 232#if !defined(HAVE_WCSNCASECMP) 233int wcsncasecmp (wchar_t* s1, wchar_t* s2, size_t n) 234{ 235 wchar_t c1, c2; 236 237 if (s1 == s2 || n ==0) 238 return 0; 239 240 do 241 { 242 c1 = towlower(*s1++); 243 c2 = towlower(*s2++); 244 if(c1 == L'\0' || c1 != c2) 245 return c1 - c2; 246 } while (--n > 0); 247 248 return c1 - c2; 249} 250#endif 251 252SQLCHAR * 253dm_SQL_W2A (SQLWCHAR * inStr, ssize_t size) 254{ 255 SQLCHAR *outStr = NULL; 256 size_t len; 257 258 if (inStr == NULL) 259 return NULL; 260 261 if (size == SQL_NTS) 262 len = wcslen (inStr); 263 else 264 len = size; 265 266 if (len < 0) 267 return NULL; 268 269 if ((outStr = (SQLCHAR *) malloc (len * UTF8_MAX_CHAR_LEN + 1)) != NULL) 270 { 271 if (len > 0) 272 OPL_W2A (inStr, outStr, len); 273 outStr[len] = '\0'; 274 } 275 276 return outStr; 277} 278 279 280SQLWCHAR * 281dm_SQL_A2W (SQLCHAR * inStr, ssize_t size) 282{ 283 SQLWCHAR *outStr = NULL; 284 size_t len; 285 286 if (inStr == NULL) 287 return NULL; 288 289 if (size == SQL_NTS) 290 len = strlen ((char *) inStr); 291 else 292 len = size; 293 294 if (len < 0) 295 return NULL; 296 297 if ((outStr = (SQLWCHAR *) calloc (len + 1, sizeof (SQLWCHAR))) != NULL) 298 { 299 if (len > 0) 300 OPL_A2W (inStr, outStr, len); 301 outStr[len] = L'\0'; 302 } 303 304 return outStr; 305} 306 307 308int 309dm_StrCopyOut2_A2W ( 310 SQLCHAR * inStr, 311 SQLWCHAR * outStr, 312 SQLSMALLINT size, 313 SQLSMALLINT * result) 314{ 315 size_t length; 316 317 if (!inStr) 318 return -1; 319 320 length = strlen ((char *) inStr); 321 322 if (result) 323 *result = (SQLSMALLINT) length; 324 325 if (!outStr) 326 return 0; 327 328 if (size >= length + 1) 329 { 330 if (length > 0) 331 OPL_A2W (inStr, outStr, length); 332 outStr[length] = L'\0'; 333 return 0; 334 } 335 if (size > 0) 336 { 337 OPL_A2W (inStr, outStr, size); 338 outStr[--size] = L'\0'; 339 } 340 return -1; 341} 342 343 344int 345dm_StrCopyOut2_W2A ( 346 SQLWCHAR * inStr, 347 SQLCHAR * outStr, 348 SQLSMALLINT size, 349 SQLSMALLINT * result) 350{ 351 size_t length; 352 353 if (!inStr) 354 return -1; 355 356 length = wcslen (inStr); 357 358 if (result) 359 *result = (SQLSMALLINT) length; 360 361 if (!outStr) 362 return 0; 363 364 if (size >= length + 1) 365 { 366 if (length > 0) 367 OPL_W2A (inStr, outStr, length); 368 outStr[length] = '\0'; 369 return 0; 370 } 371 if (size > 0) 372 { 373 OPL_W2A (inStr, outStr, size); 374 outStr[--size] = '\0'; 375 } 376 return -1; 377} 378 379 380SQLWCHAR * 381dm_strcpy_A2W (SQLWCHAR * destStr, SQLCHAR * sourStr) 382{ 383 size_t length; 384 385 if (!sourStr || !destStr) 386 return destStr; 387 388 length = strlen ((char *) sourStr); 389 if (length > 0) 390 OPL_A2W (sourStr, destStr, length); 391 destStr[length] = L'\0'; 392 return destStr; 393} 394 395 396SQLCHAR * 397dm_strcpy_W2A (SQLCHAR * destStr, SQLWCHAR * sourStr) 398{ 399 size_t length; 400 401 if (!sourStr || !destStr) 402 return destStr; 403 404 length = wcslen (sourStr); 405 if (length > 0) 406 OPL_W2A (sourStr, destStr, length); 407 destStr[length] = '\0'; 408 return destStr; 409} 410 411 412static size_t 413calc_len_for_utf8 (SQLWCHAR * str, ssize_t size) 414{ 415 size_t len = 0; 416 SQLWCHAR c; 417 418 if (!str) 419 return len; 420 421 if (size == SQL_NTS) 422 { 423 while ((c = *str)) 424 { 425 if (c < 0x80) 426 len += 1; 427 else if (c < 0x800) 428 len += 2; 429 else if (c < 0x10000) 430 len += 3; 431 else if (c < 0x200000) 432 len += 4; 433 else 434 len += 1; 435 436 str++; 437 } 438 } 439 else 440 { 441 while (size > 0) 442 { 443 c = *str; 444 if (c < 0x80) 445 len += 1; 446 else if (c < 0x800) 447 len += 2; 448 else if (c < 0x10000) 449 len += 3; 450 else if (c < 0x200000) 451 len += 4; 452 else 453 len += 1; 454 455 str++; 456 size--; 457 } 458 } 459 return len; 460} 461 462 463static size_t 464utf8_len (SQLCHAR * p, ssize_t size) 465{ 466 size_t len = 0; 467 468 if (!*p) 469 return 0; 470 471 if (size == SQL_NTS) 472 while (*p) 473 { 474 for (p++; (*p & 0xC0) == 0x80; p++) 475 ; 476 len++; 477 } 478 else 479 while (size > 0) 480 { 481 for (p++, size--; (size > 0) && ((*p & 0xC0) == 0x80); p++, size--) 482 ; 483 len++; 484 } 485 return len; 486} 487 488 489/* 490 * size - size of buffer for output utf8 string in bytes 491 * return - length of output utf8 string 492 */ 493static size_t 494wcstoutf8 (SQLWCHAR * wstr, SQLCHAR * ustr, size_t size) 495{ 496 size_t len; 497 SQLWCHAR c; 498 int first; 499 size_t i; 500 size_t count = 0; 501 502 if (!wstr) 503 return 0; 504 505 while ((c = *wstr) && count < size) 506 { 507 if (c < 0x80) 508 { 509 len = 1; 510 first = 0; 511 } 512 else if (c < 0x800) 513 { 514 len = 2; 515 first = 0xC0; 516 } 517 else if (c < 0x10000) 518 { 519 len = 3; 520 first = 0xE0; 521 } 522 else if (c < 0x200000) 523 { 524 len = 4; 525 first = 0xf0; 526 } 527 else 528 { 529 len = 1; 530 first = 0; 531 c = '?'; 532 } 533 534 if (size - count < len) 535 { 536 return count; 537 } 538 539 for (i = len - 1; i > 0; --i) 540 { 541 ustr[i] = (c & 0x3f) | 0x80; 542 c >>= 6; 543 } 544 ustr[0] = c | first; 545 546 ustr += len; 547 count += len; 548 wstr++; 549 } 550 return count; 551} 552 553 554/* 555 * wlen - length of input *wstr string in symbols 556 * size - size of buffer ( *ustr string) in bytes 557 * converted - number of converted symbols from *wstr 558 * 559 * Return - length of output utf8 string 560 */ 561static int 562wcsntoutf8 ( 563 SQLWCHAR * wstr, 564 SQLCHAR * ustr, 565 size_t wlen, 566 size_t size, 567 u_short * converted) 568{ 569 size_t len; 570 SQLWCHAR c; 571 int first; 572 size_t i; 573 size_t count = 0; 574 size_t _converted = 0; 575 576 if (!wstr) 577 return 0; 578 579 while (_converted < wlen && count < size) 580 { 581 c = *wstr; 582 if (c < 0x80) 583 { 584 len = 1; 585 first = 0; 586 } 587 else if (c < 0x800) 588 { 589 len = 2; 590 first = 0xC0; 591 } 592 else if (c < 0x10000) 593 { 594 len = 3; 595 first = 0xE0; 596 } 597 else if (c < 0x200000) 598 { 599 len = 4; 600 first = 0xf0; 601 } 602 else 603 { 604 len = 1; 605 first = 0; 606 c = '?'; 607 } 608 609 if (size - count < len) 610 { 611 if (converted) 612 *converted = (u_short) _converted; 613 return count; 614 } 615 616 for (i = len - 1; i > 0; --i) 617 { 618 ustr[i] = (c & 0x3f) | 0x80; 619 c >>= 6; 620 } 621 ustr[0] = c | first; 622 623 ustr += len; 624 count += len; 625 wstr++; 626 _converted++; 627 } 628 if (converted) 629 *converted = (u_short) _converted; 630 return count; 631} 632 633 634static SQLCHAR * 635strdup_WtoU8 (SQLWCHAR * str) 636{ 637 SQLCHAR *ret; 638 size_t len; 639 640 if (!str) 641 return NULL; 642 643 len = calc_len_for_utf8 (str, SQL_NTS); 644 if ((ret = (SQLCHAR *) malloc (len + 1)) == NULL) 645 return NULL; 646 647 len = wcstoutf8 (str, ret, len); 648 ret[len] = '\0'; 649 650 return ret; 651} 652 653 654/* decode */ 655#define UTF8_COMPUTE(Char, Mask, Len) \ 656 if (Char < 128) \ 657 { \ 658 Len = 1; \ 659 Mask = 0x7f; \ 660 } \ 661 else if ((Char & 0xe0) == 0xc0) \ 662 { \ 663 Len = 2; \ 664 Mask = 0x1f; \ 665 } \ 666 else if ((Char & 0xf0) == 0xe0) \ 667 { \ 668 Len = 3; \ 669 Mask = 0x0f; \ 670 } \ 671 else if ((Char & 0xf8) == 0xf0) \ 672 { \ 673 Len = 4; \ 674 Mask = 0x07; \ 675 } \ 676 else \ 677 Len = -1; 678 679 680 681/* 682 * size - size of buffer for output string in symbols (SQLWCHAR) 683 * return - length of output SQLWCHAR string 684 */ 685static size_t 686utf8towcs (SQLCHAR * ustr, SQLWCHAR * wstr, ssize_t size) 687{ 688 int i; 689 int mask = 0; 690 int len; 691 SQLCHAR c; 692 SQLWCHAR wc; 693 int count = 0; 694 695 if (!ustr) 696 return 0; 697 698 while ((c = (SQLCHAR) *ustr) && count < size) 699 { 700 UTF8_COMPUTE (c, mask, len); 701 if (len == -1) 702 return count; 703 704 wc = c & mask; 705 for (i = 1; i < len; i++) 706 { 707 if ((ustr[i] & 0xC0) != 0x80) 708 return count; 709 wc <<= 6; 710 wc |= (ustr[i] & 0x3F); 711 } 712 *wstr = wc; 713 ustr += len; 714 wstr++; 715 count++; 716 } 717 return count; 718} 719 720 721/* 722 * ulen - length of input *ustr string in bytes 723 * size - size of buffer ( *wstr string) in symbols 724 * converted - number of converted bytes from *ustr 725 * 726 * Return - length of output wcs string 727 */ 728static int 729utf8ntowcs ( 730 SQLCHAR * ustr, 731 SQLWCHAR * wstr, 732 size_t ulen, 733 size_t size, 734 int * converted) 735{ 736 int i; 737 int mask = 0; 738 int len; 739 SQLCHAR c; 740 SQLWCHAR wc; 741 size_t count = 0; 742 size_t _converted = 0; 743 744 if (!ustr) 745 return 0; 746 747 while ((_converted < ulen) && (count < size)) 748 { 749 c = (SQLCHAR) *ustr; 750 UTF8_COMPUTE (c, mask, len); 751 if ((len == -1) || (_converted + len > ulen)) 752 { 753 if (converted) 754 *converted = (u_short) _converted; 755 return count; 756 } 757 758 wc = c & mask; 759 for (i = 1; i < len; i++) 760 { 761 if ((ustr[i] & 0xC0) != 0x80) 762 { 763 if (converted) 764 *converted = (u_short) _converted; 765 return count; 766 } 767 wc <<= 6; 768 wc |= (ustr[i] & 0x3F); 769 } 770 *wstr = wc; 771 ustr += len; 772 wstr++; 773 count++; 774 _converted += len; 775 } 776 if (converted) 777 *converted = (u_short) _converted; 778 return count; 779} 780 781 782static SQLWCHAR * 783strdup_U8toW (SQLCHAR * str) 784{ 785 SQLWCHAR *ret; 786 size_t len; 787 788 if (!str) 789 return NULL; 790 791 len = utf8_len (str, SQL_NTS); 792 if ((ret = (SQLWCHAR *) malloc ((len + 1) * sizeof (SQLWCHAR))) == NULL) 793 return NULL; 794 795 len = utf8towcs (str, ret, len); 796 ret[len] = L'\0'; 797 798 return ret; 799} 800 801 802SQLCHAR * 803dm_SQL_WtoU8 (SQLWCHAR * inStr, ssize_t size) 804{ 805 SQLCHAR *outStr = NULL; 806 size_t len; 807 808 if (inStr == NULL) 809 return NULL; 810 811 if (size == SQL_NTS) 812 { 813 outStr = strdup_WtoU8 (inStr); 814 } 815 else 816 { 817 len = calc_len_for_utf8 (inStr, size); 818 if ((outStr = (SQLCHAR *) malloc (len + 1)) != NULL) 819 { 820 len = wcsntoutf8 (inStr, outStr, size, len, NULL); 821 outStr[len] = '\0'; 822 } 823 } 824 825 return outStr; 826} 827 828 829SQLWCHAR * 830dm_SQL_U8toW (SQLCHAR * inStr, SQLSMALLINT size) 831{ 832 SQLWCHAR *outStr = NULL; 833 size_t len; 834 835 if (inStr == NULL) 836 return NULL; 837 838 if (size == SQL_NTS) 839 { 840 outStr = strdup_U8toW (inStr); 841 } 842 else 843 { 844 len = utf8_len (inStr, size); 845 if ((outStr = (SQLWCHAR *) calloc (len + 1, sizeof (SQLWCHAR))) != NULL) 846 utf8ntowcs (inStr, outStr, size, len, NULL); 847 } 848 849 return outStr; 850} 851 852 853int 854dm_StrCopyOut2_U8toW ( 855 SQLCHAR * inStr, 856 SQLWCHAR * outStr, 857 size_t size, 858 u_short * result) 859{ 860 size_t length; 861 862 if (!inStr) 863 return -1; 864 865 length = utf8_len (inStr, SQL_NTS); 866 867 if (result) 868 *result = (u_short) length; 869 870 if (!outStr) 871 return 0; 872 873 if (size >= length + 1) 874 { 875 length = utf8towcs (inStr, outStr, size); 876 outStr[length] = L'\0'; 877 return 0; 878 } 879 if (size > 0) 880 { 881 length = utf8towcs (inStr, outStr, size - 1); 882 outStr[length] = L'\0'; 883 } 884 return -1; 885} 886