1/******************************************************************* 2 NOTE: 3 The early netatalk 2.x was based on UCS-2. 4 UCS-2 don't support chars above U+10000. 5 Recent netatalk is based on UTF-16. 6 UTF-16 can support chars above U+10000, using Surrogate Pair. 7 However, Surrogate Pair is complex, dirty, filthy and disagreeable. 8 There might still be latent bugs... 9********************************************************************/ 10 11#ifdef HAVE_CONFIG_H 12#include "config.h" 13#endif /* HAVE_CONFIG_H */ 14 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <sys/param.h> 19#include <sys/stat.h> 20#include <atalk/logger.h> 21#include <errno.h> 22 23#include <netatalk/endian.h> 24 25#include <atalk/unicode.h> 26#include "precompose.h" 27#include "byteorder.h" 28 29/******************************************************************* 30 Convert a string to lower case. 31 return True if any char is converted 32********************************************************************/ 33/* surrogate pair support */ 34 35int strlower_w(ucs2_t *s) 36{ 37 int ret = 0; 38 39 while (*s) { 40 if ((0xD800 <= *s) && (*s < 0xDC00)) { 41 if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { 42 u_int32_t s_sp = (u_int32_t)*s << 16 | (u_int32_t)s[1]; 43 u_int32_t v_sp = tolower_sp(s_sp); 44 if (v_sp != s_sp) { 45 *s = v_sp >> 16; 46 s++; 47 *s = v_sp & 0xFFFF; 48 ret = 1; 49 } 50 } 51 } else { 52 ucs2_t v = tolower_w(*s); 53 if (v != *s) { 54 *s = v; 55 ret = 1; 56 } 57 } 58 s++; 59 } 60 return ret; 61} 62 63/******************************************************************* 64 Convert a string to upper case. 65 return True if any char is converted 66********************************************************************/ 67/* surrogate pair support */ 68 69int strupper_w(ucs2_t *s) 70{ 71 int ret = 0; 72 73 while (*s) { 74 if ((0xD800 <= *s) && (*s < 0xDC00)) { 75 if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { 76 u_int32_t s_sp = (u_int32_t)*s << 16 | (u_int32_t)s[1]; 77 u_int32_t v_sp = toupper_sp(s_sp); 78 if (v_sp != s_sp) { 79 *s = v_sp >> 16; 80 s++; 81 *s = v_sp & 0xFFFF; 82 ret = 1; 83 } 84 } 85 } else { 86 ucs2_t v = toupper_w(*s); 87 if (v != *s) { 88 *s = v; 89 ret = 1; 90 } 91 } 92 s++; 93 } 94 return ret; 95} 96 97/******************************************************************* 98wide & sp islower() 99determine if a character is lowercase 100********************************************************************/ 101/* These functions are not used. */ 102 103int islower_w(ucs2_t c) 104{ 105 return ( c == tolower_w(c)); 106} 107 108int islower_sp(u_int32_t c_sp) 109{ 110 return ( c_sp == tolower_sp(c_sp)); 111} 112 113/******************************************************************* 114wide & sp isupper() 115determine if a character is uppercase 116********************************************************************/ 117/* These functions are not used. */ 118 119int isupper_w(ucs2_t c) 120{ 121 return ( c == toupper_w(c)); 122} 123 124int isupper_sp(u_int32_t c_sp) 125{ 126 return ( c_sp == toupper_sp(c_sp)); 127} 128 129/******************************************************************* 130wide strlen() 131 Count the number of characters in a UTF-16 string. 132********************************************************************/ 133/* NOTE: one surrogate pair is two characters. */ 134 135size_t strlen_w(const ucs2_t *src) 136{ 137 size_t len; 138 139 for(len = 0; *src++; len++) ; 140 141 return len; 142} 143 144/******************************************************************* 145wide strnlen() 146 Count up to max number of characters in a UTF-16 string. 147********************************************************************/ 148/* NOTE: one surrogate pair is two characters. */ 149 150size_t strnlen_w(const ucs2_t *src, size_t max) 151{ 152 size_t len; 153 154 for(len = 0; *src++ && (len < max); len++) ; 155 156 return len; 157} 158 159/******************************************************************* 160wide strchr() 161********************************************************************/ 162/* NOTE: hi and lo of surrogate pair are separately processed. */ 163 164ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c) 165{ 166 while (*s != 0) { 167 if (c == *s) return (ucs2_t *)s; 168 s++; 169 } 170 if (c == *s) return (ucs2_t *)s; 171 172 return NULL; 173} 174 175/******************************************************************* 176wide & sp strcasechr() 177********************************************************************/ 178/* NOTE: separately process BMP and surrogate pair */ 179 180ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c) 181{ 182 while (*s != 0) { 183 if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s; 184 s++; 185 } 186 if (c == *s) return (ucs2_t *)s; 187 188 return NULL; 189} 190 191ucs2_t *strcasechr_sp(const ucs2_t *s, u_int32_t c_sp) 192{ 193 if (*s == 0) return NULL; 194 while (s[1] != 0) { 195 if (tolower_sp(c_sp) == tolower_sp((u_int32_t)*s << 16 | (u_int32_t)s[1])) return (ucs2_t *)s; 196 s++; 197 } 198 199 return NULL; 200} 201 202/******************************************************************* 203wide strcmp() 204********************************************************************/ 205/* no problem of surrogate pair */ 206 207int strcmp_w(const ucs2_t *a, const ucs2_t *b) 208{ 209 while (*b && *a == *b) { a++; b++; } 210 return (*a - *b); 211 /* warning: if *a != *b and both are not 0 we retrun a random 212 greater or lesser than 0 number not realted to which 213 string is longer */ 214} 215 216/******************************************************************* 217wide strncmp() 218********************************************************************/ 219/* no problem of surrogate pair */ 220 221int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) 222{ 223 size_t n = 0; 224 while ((n < len) && *b && *a == *b) { a++; b++; n++;} 225 return (len - n)?(*a - *b):0; 226} 227 228/******************************************************************* 229wide strstr() 230********************************************************************/ 231/* no problem of surrogate pair */ 232 233ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins) 234{ 235 ucs2_t *r; 236 size_t slen, inslen; 237 238 if (!s || !*s || !ins || !*ins) return NULL; 239 slen = strlen_w(s); 240 inslen = strlen_w(ins); 241 r = (ucs2_t *)s; 242 while ((r = strchr_w(r, *ins))) { 243 if (strncmp_w(r, ins, inslen) == 0) return r; 244 r++; 245 } 246 return NULL; 247} 248 249/******************************************************************* 250wide strcasestr() 251********************************************************************/ 252/* surrogate pair support */ 253 254ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins) 255{ 256 ucs2_t *r; 257 size_t slen, inslen; 258 259 if (!s || !*s || !ins || !*ins) return NULL; 260 slen = strlen_w(s); 261 inslen = strlen_w(ins); 262 r = (ucs2_t *)s; 263 264 if ((0xD800 <= *ins) && (*ins < 0xDC00)) { 265 if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) { 266 u_int32_t ins_sp = (u_int32_t)*ins << 16 | (u_int32_t)ins[1]; 267 while ((r = strcasechr_sp(r, ins_sp))) { 268 if (strncasecmp_w(r, ins, inslen) == 0) return r; 269 r++; 270 } 271 } else { 272 return NULL; /* illegal sequence */ 273 } 274 } else { 275 while ((r = strcasechr_w(r, *ins))) { 276 if (strncasecmp_w(r, ins, inslen) == 0) return r; 277 r++; 278 } 279 } 280 return NULL; 281} 282 283/******************************************************************* 284wide strcasecmp() 285case insensitive string comparison 286********************************************************************/ 287/* surrogate pair support */ 288 289int strcasecmp_w(const ucs2_t *a, const ucs2_t *b) 290{ 291 int ret; 292 293 while (*a && *b) { 294 if ((0xD800 <= *a) && (*a < 0xDC00)) { 295 if (ret = tolower_sp((u_int32_t)*a << 16 | (u_int32_t)a[1]) - tolower_sp((u_int32_t)*b << 16 | (u_int32_t)b[1])) return ret; 296 a++; 297 b++; 298 if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */ 299 } else { 300 if (ret = tolower_w(*a) - tolower_w(*b)) return ret; 301 } 302 a++; 303 b++; 304 } 305 return (tolower_w(*a) - tolower_w(*b)); 306} 307 308/******************************************************************* 309wide strncasecmp() 310case insensitive string comparison, length limited 311********************************************************************/ 312/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair */ 313 314int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) 315{ 316 size_t n = 0; 317 int ret; 318 319 while ((n < len) && *a && *b) { 320 if ((0xD800 <= *a) && (*a < 0xDC00)) { 321 if (ret = tolower_sp((u_int32_t)*a << 16 | (u_int32_t)a[1]) - tolower_sp((u_int32_t)*b << 16 | (u_int32_t)b[1])) return ret; 322 a++; 323 b++; 324 n++; 325 if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b)); 326 } else { 327 if (ret = tolower_w(*a) - tolower_w(*b)) return ret; 328 } 329 a++; 330 b++; 331 n++; 332 } 333 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0; 334} 335 336/******************************************************************* 337wide strndup() 338duplicate string 339********************************************************************/ 340/* NOTE: not check isolation of surrogate pair */ 341/* if len == 0 then duplicate the whole string */ 342 343ucs2_t *strndup_w(const ucs2_t *src, size_t len) 344{ 345 ucs2_t *dest; 346 347 if (!len) len = strlen_w(src); 348 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t)); 349 if (!dest) { 350 LOG (log_error, logtype_default, "strdup_w: out of memory!"); 351 return NULL; 352 } 353 354 memcpy(dest, src, len * sizeof(ucs2_t)); 355 dest[len] = 0; 356 357 return dest; 358} 359 360/******************************************************************* 361wide strdup() 362duplicate string 363********************************************************************/ 364/* no problem of surrogate pair */ 365 366ucs2_t *strdup_w(const ucs2_t *src) 367{ 368 return strndup_w(src, 0); 369} 370 371/******************************************************************* 372copy a string with max len 373********************************************************************/ 374/* This function is not used. */ 375/* NOTE: not check isolation of surrogate pair */ 376 377ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max) 378{ 379 size_t len; 380 381 if (!dest || !src) return NULL; 382 383 for (len = 0; (src[len] != 0) && (len < max); len++) 384 dest[len] = src[len]; 385 while (len < max) 386 dest[len++] = 0; 387 388 return dest; 389} 390 391 392/******************************************************************* 393append a string of len bytes and add a terminator 394********************************************************************/ 395/* These functions are not used. */ 396 397/* NOTE: not check isolation of surrogate pair */ 398ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max) 399{ 400 size_t start; 401 size_t len; 402 403 if (!dest || !src) return NULL; 404 405 start = strlen_w(dest); 406 len = strnlen_w(src, max); 407 408 memcpy(&dest[start], src, len*sizeof(ucs2_t)); 409 dest[start+len] = 0; 410 411 return dest; 412} 413 414/* no problem of surrogate pair */ 415ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src) 416{ 417 size_t start; 418 size_t len; 419 420 if (!dest || !src) return NULL; 421 422 start = strlen_w(dest); 423 len = strlen_w(src); 424 425 memcpy(&dest[start], src, len*sizeof(ucs2_t)); 426 dest[start+len] = 0; 427 428 return dest; 429} 430 431 432/******************************************************************* 433binary search for pre|decomposition 434********************************************************************/ 435 436static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 437{ 438 int min = 0; 439 int max = PRECOMP_COUNT - 1; 440 int mid; 441 u_int32_t sought = (base << 16) | comb, that; 442 443 /* binary search */ 444 while (max >= min) { 445 mid = (min + max) / 2; 446 that = (precompositions[mid].base << 16) | (precompositions[mid].comb); 447 if (that < sought) { 448 min = mid + 1; 449 } else if (that > sought) { 450 max = mid - 1; 451 } else { 452 return precompositions[mid].replacement; 453 } 454 } 455 /* no match */ 456 return 0; 457} 458 459/* ------------------------ */ 460static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) 461{ 462 int min = 0; 463 int max = PRECOMP_SP_COUNT - 1; 464 int mid; 465 u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp; 466 467 /* binary search */ 468 while (max >= min) { 469 mid = (min + max) / 2; 470 that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp); 471 if (that_sp < sought_sp) { 472 min = mid + 1; 473 } else if (that_sp > sought_sp) { 474 max = mid - 1; 475 } else { 476 return precompositions_sp[mid].replacement_sp; 477 } 478 } 479 /* no match */ 480 return 0; 481} 482 483/* -------------------------- */ 484static u_int32_t do_decomposition(ucs2_t base) 485{ 486 int min = 0; 487 int max = DECOMP_COUNT - 1; 488 int mid; 489 u_int32_t sought = base; 490 u_int32_t result, that; 491 492 /* binary search */ 493 while (max >= min) { 494 mid = (min + max) / 2; 495 that = decompositions[mid].replacement; 496 if (that < sought) { 497 min = mid + 1; 498 } else if (that > sought) { 499 max = mid - 1; 500 } else { 501 result = (decompositions[mid].base << 16) | (decompositions[mid].comb); 502 return result; 503 } 504 } 505 /* no match */ 506 return 0; 507} 508 509/* -------------------------- */ 510static u_int64_t do_decomposition_sp(unsigned int base_sp) 511{ 512 int min = 0; 513 int max = DECOMP_SP_COUNT - 1; 514 int mid; 515 u_int32_t sought_sp = base_sp; 516 u_int32_t that_sp; 517 u_int64_t result_sp; 518 519 /* binary search */ 520 while (max >= min) { 521 mid = (min + max) / 2; 522 that_sp = decompositions_sp[mid].replacement_sp; 523 if (that_sp < sought_sp) { 524 min = mid + 1; 525 } else if (that_sp > sought_sp) { 526 max = mid - 1; 527 } else { 528 result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp); 529 return result_sp; 530 } 531 } 532 /* no match */ 533 return 0; 534} 535 536/******************************************************************* 537pre|decomposition 538 539 we can't use static, this stuff needs to be reentrant 540 static char comp[MAXPATHLEN +1]; 541 542 We don't implement Singleton and Canonical Ordering. 543 We ignore CompositionExclusions.txt. 544 because they cause the problem of the roundtrip 545 such as Dancing Icon. 546 547 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges 548 in precompose.h from composition according to AFP 3.x spec 549********************************************************************/ 550 551size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) 552{ 553 size_t i; 554 ucs2_t base, comb; 555 u_int32_t base_sp, comb_sp; 556 ucs2_t *in, *out; 557 ucs2_t lindex, vindex; 558 ucs2_t result; 559 u_int32_t result_sp; 560 size_t o_len = *outlen; 561 562 if (!inplen || (inplen & 1) || inplen > o_len) 563 return (size_t)-1; 564 565 i = 0; 566 in = name; 567 out = comp; 568 569 base = *in; 570 while (*outlen > 2) { 571 i += 2; 572 if (i == inplen) { 573 *out = base; 574 out++; 575 *out = 0; 576 *outlen -= 2; 577 return o_len - *outlen; 578 } 579 in++; 580 comb = *in; 581 result = 0; 582 583 /* Non-Combination Character */ 584 if (comb < 0x300) ; 585 586 /* Unicode Standard Annex #15 A10.3 Hangul Composition */ 587 /* Step 1 <L,V> */ 588 else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) { 589 if ((LBASE <= base) && (base < LBASE + LCOUNT)) { 590 result = 1; 591 lindex = base - LBASE; 592 vindex = comb - VBASE; 593 base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; 594 } 595 } 596 597 /* Step 2 <LV,T> */ 598 else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) { 599 if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) { 600 result = 1; 601 base += comb - TBASE; 602 } 603 } 604 605 /* Binary Search for Surrogate Pair */ 606 else if ((0xD800 <= base) && (base < 0xDC00)) { 607 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) { 608 base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb; 609 do { 610 comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2]; 611 if (result_sp = do_precomposition_sp(base_sp, comb_sp)) { 612 base_sp = result_sp; 613 i += 4; 614 in +=2; 615 } 616 } while ((i + 6 <= inplen) && result_sp) ; 617 618 *out = base_sp >> 16; 619 out++; 620 *outlen -= 2; 621 622 if (*outlen <= 2) { 623 errno = E2BIG; 624 return (size_t)-1; 625 } 626 627 *out = base_sp & 0xFFFF; 628 out++; 629 *outlen -= 2; 630 631 i += 2; 632 if (i == inplen) { 633 out++; 634 *out = 0; 635 return o_len - *outlen; 636 } 637 in++; 638 base = *in; 639 640 result = 1; 641 } 642 } 643 644 /* Binary Search for BMP */ 645 else if (result = do_precomposition(base, comb)) { 646 base = result; 647 } 648 649 if (!result) { 650 *out = base; 651 out++; 652 *outlen -= 2; 653 base = comb; 654 } 655 } 656 657 errno = E2BIG; 658 return (size_t)-1; 659} 660 661/* --------------- */ 662size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) 663{ 664 size_t i; 665 size_t comblen; 666 ucs2_t base, comb[COMBBUFLEN]; 667 u_int32_t base_sp; 668 ucs2_t sindex, tjamo; 669 ucs2_t *in, *out; 670 unsigned int result; 671 u_int64_t result_sp; 672 size_t o_len = *outlen; 673 674 if (!inplen || (inplen & 1)) 675 return (size_t)-1; 676 i = 0; 677 in = name; 678 out = comp; 679 680 while (i < inplen) { 681 base = *in; 682 comblen = 0; 683 684 /* check ASCII first. this is frequent. */ 685 if (base <= 0x007f) ; 686 687 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */ 688 else if ((SBASE <= base) && (base < SBASE + SCOUNT)) { 689 sindex = base - SBASE; 690 base = LBASE + sindex / NCOUNT; 691 comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT; 692 693 /* <L,V> */ 694 if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) { 695 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2]; 696 comblen = 1; 697 } 698 699 /* <L,V,T> */ 700 else { 701 comb[COMBBUFLEN-1] = tjamo; 702 comblen = 2; 703 } 704 } 705 706 /* Binary Search for Surrogate Pair */ 707 else if ((0xD800 <= base) && (base < 0xDC00)) { 708 if (i + 2 < inplen) { 709 base_sp = ((u_int32_t)base << 16) | (u_int32_t)in[1]; 710 do { 711 if ( !(result_sp = do_decomposition_sp(base_sp))) break; 712 comblen += 2; 713 base_sp = result_sp >> 32; 714 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */ 715 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */ 716 } while (comblen < MAXCOMBSPLEN); 717 718 if (*outlen < (comblen + 1) << 1) { 719 errno = E2BIG; 720 return (size_t)-1; 721 } 722 723 *out = base_sp >> 16; /* hi */ 724 out++; 725 *outlen -= 2; 726 727 base = base_sp & 0xFFFF; /* lo */ 728 729 i += 2; 730 in++; 731 } 732 } 733 734 /* Binary Search for BMP */ 735 else { 736 do { 737 if ( !(result = do_decomposition(base))) break; 738 comblen++; 739 base = result >> 16; 740 comb[COMBBUFLEN-comblen] = result & 0xFFFF; 741 } while ((0x007f < base) && (comblen < MAXCOMBLEN)); 742 } 743 744 if (*outlen < (comblen + 1) << 1) { 745 errno = E2BIG; 746 return (size_t)-1; 747 } 748 749 *out = base; 750 out++; 751 *outlen -= 2; 752 753 while ( comblen > 0 ) { 754 *out = comb[COMBBUFLEN-comblen]; 755 out++; 756 *outlen -= 2; 757 comblen--; 758 } 759 760 i += 2; 761 in++; 762 } 763 764 *out = 0; 765 return o_len-*outlen; 766} 767 768/******************************************************************* 769length of UTF-8 character and string 770********************************************************************/ 771 772size_t utf8_charlen ( char* utf8 ) 773{ 774 unsigned char *p; 775 776 p = (unsigned char*) utf8; 777 778 if ( *p < 0x80 ) 779 return (1); 780 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0) 781 return (2); 782 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 783 return (3); 784 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 785 return (3); 786 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 787 return (4); 788 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 789 return (4); 790 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 791 return (4); 792 else 793 return ((size_t) -1); 794} 795 796 797size_t utf8_strlen_validate ( char * utf8 ) 798{ 799 size_t len; 800 unsigned char *p; 801 802 p = (unsigned char*) utf8; 803 len = 0; 804 805 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */ 806 807 while ( *p != '\0') 808 { 809 if ( *p < 0x80 ) 810 p++; 811 812 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0) 813 p += 2; 814 815 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 816 p += 3; 817 818 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 819 p += 3; 820 821 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 822 p += 4; 823 824 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 825 p += 4; 826 827 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 828 p += 4; 829 830 else 831 return ((size_t) -1); 832 833 len++; 834 } 835 836 return (len); 837} 838