1/******************************************************************* 2 NOTE: 3 The early netatalk 2.x was based on UCS-2. 4 UCS-2 don't support chars above U+10000. 5 Recent netatalk is based on UTF-16. 6 UTF-16 can support chars above U+10000, using Surrogate Pair. 7 However, Surrogate Pair is complex, dirty, filthy and disagreeable. 8 There might still be latent bugs... 9********************************************************************/ 10 11#ifdef HAVE_CONFIG_H 12#include "config.h" 13#endif /* HAVE_CONFIG_H */ 14 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <sys/param.h> 19#include <sys/stat.h> 20#include <atalk/logger.h> 21#include <errno.h> 22#include <arpa/inet.h> 23 24#include <atalk/unicode.h> 25#include "precompose.h" 26#include "byteorder.h" 27 28/******************************************************************* 29 Convert a string to lower case. 30 return True if any char is converted 31********************************************************************/ 32/* surrogate pair support */ 33 34int strlower_w(ucs2_t *s) 35{ 36 int ret = 0; 37 38 while (*s) { 39 if ((0xD800 <= *s) && (*s < 0xDC00)) { 40 if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { 41 uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1]; 42 uint32_t v_sp = tolower_sp(s_sp); 43 if (v_sp != s_sp) { 44 *s = v_sp >> 16; 45 s++; 46 *s = v_sp & 0xFFFF; 47 ret = 1; 48 } 49 } 50 } else { 51 ucs2_t v = tolower_w(*s); 52 if (v != *s) { 53 *s = v; 54 ret = 1; 55 } 56 } 57 s++; 58 } 59 return ret; 60} 61 62/******************************************************************* 63 Convert a string to upper case. 64 return True if any char is converted 65********************************************************************/ 66/* surrogate pair support */ 67 68int strupper_w(ucs2_t *s) 69{ 70 int ret = 0; 71 72 while (*s) { 73 if ((0xD800 <= *s) && (*s < 0xDC00)) { 74 if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) { 75 uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1]; 76 uint32_t v_sp = toupper_sp(s_sp); 77 if (v_sp != s_sp) { 78 *s = v_sp >> 16; 79 s++; 80 *s = v_sp & 0xFFFF; 81 ret = 1; 82 } 83 } 84 } else { 85 ucs2_t v = toupper_w(*s); 86 if (v != *s) { 87 *s = v; 88 ret = 1; 89 } 90 } 91 s++; 92 } 93 return ret; 94} 95 96/******************************************************************* 97wide & sp islower() 98determine if a character is lowercase 99********************************************************************/ 100/* These functions are not used. */ 101 102int islower_w(ucs2_t c) 103{ 104 return ( c == tolower_w(c)); 105} 106 107int islower_sp(uint32_t c_sp) 108{ 109 return ( c_sp == tolower_sp(c_sp)); 110} 111 112/******************************************************************* 113wide & sp isupper() 114determine if a character is uppercase 115********************************************************************/ 116/* These functions are not used. */ 117 118int isupper_w(ucs2_t c) 119{ 120 return ( c == toupper_w(c)); 121} 122 123int isupper_sp(uint32_t c_sp) 124{ 125 return ( c_sp == toupper_sp(c_sp)); 126} 127 128/******************************************************************* 129wide strlen() 130 Count the number of characters in a UTF-16 string. 131********************************************************************/ 132/* NOTE: one surrogate pair is two characters. */ 133 134size_t strlen_w(const ucs2_t *src) 135{ 136 size_t len; 137 138 for(len = 0; *src++; len++) ; 139 140 return len; 141} 142 143/******************************************************************* 144wide strnlen() 145 Count up to max number of characters in a UTF-16 string. 146********************************************************************/ 147/* NOTE: one surrogate pair is two characters. */ 148 149size_t strnlen_w(const ucs2_t *src, size_t max) 150{ 151 size_t len; 152 153 for(len = 0; *src++ && (len < max); len++) ; 154 155 return len; 156} 157 158/******************************************************************* 159wide strchr() 160********************************************************************/ 161/* NOTE: hi and lo of surrogate pair are separately processed. */ 162 163ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c) 164{ 165 while (*s != 0) { 166 if (c == *s) return (ucs2_t *)s; 167 s++; 168 } 169 if (c == *s) return (ucs2_t *)s; 170 171 return NULL; 172} 173 174/******************************************************************* 175wide & sp strcasechr() 176********************************************************************/ 177/* NOTE: separately process BMP and surrogate pair */ 178 179ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c) 180{ 181 while (*s != 0) { 182 if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s; 183 s++; 184 } 185 if (c == *s) return (ucs2_t *)s; 186 187 return NULL; 188} 189 190ucs2_t *strcasechr_sp(const ucs2_t *s, uint32_t c_sp) 191{ 192 if (*s == 0) return NULL; 193 while (s[1] != 0) { 194 if (tolower_sp(c_sp) == tolower_sp((uint32_t)*s << 16 | (uint32_t)s[1])) return (ucs2_t *)s; 195 s++; 196 } 197 198 return NULL; 199} 200 201/******************************************************************* 202wide strcmp() 203********************************************************************/ 204/* no problem of surrogate pair */ 205 206int strcmp_w(const ucs2_t *a, const ucs2_t *b) 207{ 208 while (*b && *a == *b) { a++; b++; } 209 return (*a - *b); 210 /* warning: if *a != *b and both are not 0 we retrun a random 211 greater or lesser than 0 number not realted to which 212 string is longer */ 213} 214 215/******************************************************************* 216wide strncmp() 217********************************************************************/ 218/* no problem of surrogate pair */ 219 220int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) 221{ 222 size_t n = 0; 223 while ((n < len) && *b && *a == *b) { a++; b++; n++;} 224 return (len - n)?(*a - *b):0; 225} 226 227/******************************************************************* 228wide strstr() 229********************************************************************/ 230/* no problem of surrogate pair */ 231 232ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins) 233{ 234 ucs2_t *r; 235 size_t slen, inslen; 236 237 if (!s || !*s || !ins || !*ins) return NULL; 238 slen = strlen_w(s); 239 inslen = strlen_w(ins); 240 r = (ucs2_t *)s; 241 while ((r = strchr_w(r, *ins))) { 242 if (strncmp_w(r, ins, inslen) == 0) return r; 243 r++; 244 } 245 return NULL; 246} 247 248/******************************************************************* 249wide strcasestr() 250********************************************************************/ 251/* surrogate pair support */ 252 253ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins) 254{ 255 ucs2_t *r; 256 size_t slen, inslen; 257 258 if (!s || !*s || !ins || !*ins) return NULL; 259 slen = strlen_w(s); 260 inslen = strlen_w(ins); 261 r = (ucs2_t *)s; 262 263 if ((0xD800 <= *ins) && (*ins < 0xDC00)) { 264 if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) { 265 uint32_t ins_sp = (uint32_t)*ins << 16 | (uint32_t)ins[1]; 266 while ((r = strcasechr_sp(r, ins_sp))) { 267 if (strncasecmp_w(r, ins, inslen) == 0) return r; 268 r++; 269 } 270 } else { 271 return NULL; /* illegal sequence */ 272 } 273 } else { 274 while ((r = strcasechr_w(r, *ins))) { 275 if (strncasecmp_w(r, ins, inslen) == 0) return r; 276 r++; 277 } 278 } 279 return NULL; 280} 281 282/******************************************************************* 283wide strcasecmp() 284case insensitive string comparison 285********************************************************************/ 286/* surrogate pair support */ 287 288int strcasecmp_w(const ucs2_t *a, const ucs2_t *b) 289{ 290 int ret; 291 292 while (*a && *b) { 293 if ((0xD800 <= *a) && (*a < 0xDC00)) { 294 if ((ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1]))) return ret; 295 a++; 296 b++; 297 if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */ 298 } else { 299 if ((ret = tolower_w(*a) - tolower_w(*b))) return ret; 300 } 301 a++; 302 b++; 303 } 304 return (tolower_w(*a) - tolower_w(*b)); 305} 306 307/******************************************************************* 308wide strncasecmp() 309case insensitive string comparison, length limited 310********************************************************************/ 311/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair */ 312 313int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len) 314{ 315 size_t n = 0; 316 int ret; 317 318 while ((n < len) && *a && *b) { 319 if ((0xD800 <= *a) && (*a < 0xDC00)) { 320 if ((ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1]))) return ret; 321 a++; 322 b++; 323 n++; 324 if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b)); 325 } else { 326 if ((ret = tolower_w(*a) - tolower_w(*b))) return ret; 327 } 328 a++; 329 b++; 330 n++; 331 } 332 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0; 333} 334 335/******************************************************************* 336wide strndup() 337duplicate string 338********************************************************************/ 339/* NOTE: not check isolation of surrogate pair */ 340/* if len == 0 then duplicate the whole string */ 341 342ucs2_t *strndup_w(const ucs2_t *src, size_t len) 343{ 344 ucs2_t *dest; 345 346 if (!len) len = strlen_w(src); 347 dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t)); 348 if (!dest) { 349 LOG (log_error, logtype_default, "strdup_w: out of memory!"); 350 return NULL; 351 } 352 353 memcpy(dest, src, len * sizeof(ucs2_t)); 354 dest[len] = 0; 355 356 return dest; 357} 358 359/******************************************************************* 360wide strdup() 361duplicate string 362********************************************************************/ 363/* no problem of surrogate pair */ 364 365ucs2_t *strdup_w(const ucs2_t *src) 366{ 367 return strndup_w(src, 0); 368} 369 370/******************************************************************* 371copy a string with max len 372********************************************************************/ 373/* This function is not used. */ 374/* NOTE: not check isolation of surrogate pair */ 375 376ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max) 377{ 378 size_t len; 379 380 if (!dest || !src) return NULL; 381 382 for (len = 0; (src[len] != 0) && (len < max); len++) 383 dest[len] = src[len]; 384 while (len < max) 385 dest[len++] = 0; 386 387 return dest; 388} 389 390 391/******************************************************************* 392append a string of len bytes and add a terminator 393********************************************************************/ 394/* These functions are not used. */ 395 396/* NOTE: not check isolation of surrogate pair */ 397ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max) 398{ 399 size_t start; 400 size_t len; 401 402 if (!dest || !src) return NULL; 403 404 start = strlen_w(dest); 405 len = strnlen_w(src, max); 406 407 memcpy(&dest[start], src, len*sizeof(ucs2_t)); 408 dest[start+len] = 0; 409 410 return dest; 411} 412 413/* no problem of surrogate pair */ 414ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src) 415{ 416 size_t start; 417 size_t len; 418 419 if (!dest || !src) return NULL; 420 421 start = strlen_w(dest); 422 len = strlen_w(src); 423 424 memcpy(&dest[start], src, len*sizeof(ucs2_t)); 425 dest[start+len] = 0; 426 427 return dest; 428} 429 430 431/******************************************************************* 432binary search for pre|decomposition 433********************************************************************/ 434 435static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 436{ 437 int min = 0; 438 int max = PRECOMP_COUNT - 1; 439 int mid; 440 uint32_t sought = (base << 16) | comb, that; 441 442 /* binary search */ 443 while (max >= min) { 444 mid = (min + max) / 2; 445 that = (precompositions[mid].base << 16) | (precompositions[mid].comb); 446 if (that < sought) { 447 min = mid + 1; 448 } else if (that > sought) { 449 max = mid - 1; 450 } else { 451 return precompositions[mid].replacement; 452 } 453 } 454 /* no match */ 455 return 0; 456} 457 458/* ------------------------ */ 459static uint32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) 460{ 461 int min = 0; 462 int max = PRECOMP_SP_COUNT - 1; 463 int mid; 464 uint64_t sought_sp = ((uint64_t)base_sp << 32) | (uint64_t)comb_sp, that_sp; 465 466 /* binary search */ 467 while (max >= min) { 468 mid = (min + max) / 2; 469 that_sp = ((uint64_t)precompositions_sp[mid].base_sp << 32) | ((uint64_t)precompositions_sp[mid].comb_sp); 470 if (that_sp < sought_sp) { 471 min = mid + 1; 472 } else if (that_sp > sought_sp) { 473 max = mid - 1; 474 } else { 475 return precompositions_sp[mid].replacement_sp; 476 } 477 } 478 /* no match */ 479 return 0; 480} 481 482/* -------------------------- */ 483static uint32_t do_decomposition(ucs2_t base) 484{ 485 int min = 0; 486 int max = DECOMP_COUNT - 1; 487 int mid; 488 uint32_t sought = base; 489 uint32_t result, that; 490 491 /* binary search */ 492 while (max >= min) { 493 mid = (min + max) / 2; 494 that = decompositions[mid].replacement; 495 if (that < sought) { 496 min = mid + 1; 497 } else if (that > sought) { 498 max = mid - 1; 499 } else { 500 result = (decompositions[mid].base << 16) | (decompositions[mid].comb); 501 return result; 502 } 503 } 504 /* no match */ 505 return 0; 506} 507 508/* -------------------------- */ 509static uint64_t do_decomposition_sp(unsigned int base_sp) 510{ 511 int min = 0; 512 int max = DECOMP_SP_COUNT - 1; 513 int mid; 514 uint32_t sought_sp = base_sp; 515 uint32_t that_sp; 516 uint64_t result_sp; 517 518 /* binary search */ 519 while (max >= min) { 520 mid = (min + max) / 2; 521 that_sp = decompositions_sp[mid].replacement_sp; 522 if (that_sp < sought_sp) { 523 min = mid + 1; 524 } else if (that_sp > sought_sp) { 525 max = mid - 1; 526 } else { 527 result_sp = ((uint64_t)decompositions_sp[mid].base_sp << 32) | ((uint64_t)decompositions_sp[mid].comb_sp); 528 return result_sp; 529 } 530 } 531 /* no match */ 532 return 0; 533} 534 535/******************************************************************* 536pre|decomposition 537 538 we can't use static, this stuff needs to be reentrant 539 static char comp[MAXPATHLEN +1]; 540 541 We don't implement Singleton and Canonical Ordering. 542 We ignore CompositionExclusions.txt. 543 because they cause the problem of the roundtrip 544 such as Dancing Icon. 545 546 exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges 547 in precompose.h from composition according to AFP 3.x spec 548********************************************************************/ 549 550size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) 551{ 552 size_t i; 553 ucs2_t base, comb; 554 uint32_t base_sp, comb_sp; 555 ucs2_t *in, *out; 556 ucs2_t lindex, vindex; 557 ucs2_t result; 558 uint32_t result_sp; 559 size_t o_len = *outlen; 560 561 if (!inplen || (inplen & 1) || inplen > o_len) 562 return (size_t)-1; 563 564 i = 0; 565 in = name; 566 out = comp; 567 568 base = *in; 569 while (*outlen > 2) { 570 i += 2; 571 if (i == inplen) { 572 *out = base; 573 out++; 574 *out = 0; 575 *outlen -= 2; 576 return o_len - *outlen; 577 } 578 in++; 579 comb = *in; 580 result = 0; 581 582 /* Non-Combination Character */ 583 if (comb < 0x300) ; 584 585 /* Unicode Standard Annex #15 A10.3 Hangul Composition */ 586 /* Step 1 <L,V> */ 587 else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) { 588 if ((LBASE <= base) && (base < LBASE + LCOUNT)) { 589 result = 1; 590 lindex = base - LBASE; 591 vindex = comb - VBASE; 592 base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT; 593 } 594 } 595 596 /* Step 2 <LV,T> */ 597 else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) { 598 if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) { 599 result = 1; 600 base += comb - TBASE; 601 } 602 } 603 604 /* Binary Search for Surrogate Pair */ 605 else if ((0xD800 <= base) && (base < 0xDC00)) { 606 if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) { 607 base_sp = ((uint32_t)base << 16) | (uint32_t)comb; 608 do { 609 comb_sp = ((uint32_t)in[1] << 16) | (uint32_t)in[2]; 610 if ((result_sp = do_precomposition_sp(base_sp, comb_sp))) { 611 base_sp = result_sp; 612 i += 4; 613 in +=2; 614 } 615 } while ((i + 6 <= inplen) && result_sp) ; 616 617 *out = base_sp >> 16; 618 out++; 619 *outlen -= 2; 620 621 if (*outlen <= 2) { 622 errno = E2BIG; 623 return (size_t)-1; 624 } 625 626 *out = base_sp & 0xFFFF; 627 out++; 628 *outlen -= 2; 629 630 i += 2; 631 if (i == inplen) { 632 out++; 633 *out = 0; 634 return o_len - *outlen; 635 } 636 in++; 637 base = *in; 638 639 result = 1; 640 } 641 } 642 643 /* Binary Search for BMP */ 644 else if ((result = do_precomposition(base, comb))) { 645 base = result; 646 } 647 648 if (!result) { 649 *out = base; 650 out++; 651 *outlen -= 2; 652 base = comb; 653 } 654 } 655 656 errno = E2BIG; 657 return (size_t)-1; 658} 659 660/* --------------- */ 661size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) 662{ 663 size_t i; 664 size_t comblen; 665 ucs2_t base, comb[COMBBUFLEN]; 666 uint32_t base_sp; 667 ucs2_t sindex, tjamo; 668 ucs2_t *in, *out; 669 unsigned int result; 670 uint64_t result_sp; 671 size_t o_len = *outlen; 672 673 if (!inplen || (inplen & 1)) 674 return (size_t)-1; 675 i = 0; 676 in = name; 677 out = comp; 678 679 while (i < inplen) { 680 base = *in; 681 comblen = 0; 682 683 /* check ASCII first. this is frequent. */ 684 if (base <= 0x007f) ; 685 686 /* Unicode Standard Annex #15 A10.2 Hangul Decomposition */ 687 else if ((SBASE <= base) && (base < SBASE + SCOUNT)) { 688 sindex = base - SBASE; 689 base = LBASE + sindex / NCOUNT; 690 comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT; 691 692 /* <L,V> */ 693 if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) { 694 comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2]; 695 comblen = 1; 696 } 697 698 /* <L,V,T> */ 699 else { 700 comb[COMBBUFLEN-1] = tjamo; 701 comblen = 2; 702 } 703 } 704 705 /* Binary Search for Surrogate Pair */ 706 else if ((0xD800 <= base) && (base < 0xDC00)) { 707 if (i + 2 < inplen) { 708 base_sp = ((uint32_t)base << 16) | (uint32_t)in[1]; 709 do { 710 if ( !(result_sp = do_decomposition_sp(base_sp))) break; 711 comblen += 2; 712 base_sp = result_sp >> 32; 713 comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */ 714 comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */ 715 } while (comblen < MAXCOMBSPLEN); 716 717 if (*outlen < (comblen + 1) << 1) { 718 errno = E2BIG; 719 return (size_t)-1; 720 } 721 722 *out = base_sp >> 16; /* hi */ 723 out++; 724 *outlen -= 2; 725 726 base = base_sp & 0xFFFF; /* lo */ 727 728 i += 2; 729 in++; 730 } 731 } 732 733 /* Binary Search for BMP */ 734 else { 735 do { 736 if ( !(result = do_decomposition(base))) break; 737 comblen++; 738 base = result >> 16; 739 comb[COMBBUFLEN-comblen] = result & 0xFFFF; 740 } while ((0x007f < base) && (comblen < MAXCOMBLEN)); 741 } 742 743 if (*outlen < (comblen + 1) << 1) { 744 errno = E2BIG; 745 return (size_t)-1; 746 } 747 748 *out = base; 749 out++; 750 *outlen -= 2; 751 752 while ( comblen > 0 ) { 753 *out = comb[COMBBUFLEN-comblen]; 754 out++; 755 *outlen -= 2; 756 comblen--; 757 } 758 759 i += 2; 760 in++; 761 } 762 763 *out = 0; 764 return o_len-*outlen; 765} 766 767/******************************************************************* 768length of UTF-8 character and string 769********************************************************************/ 770 771size_t utf8_charlen ( char* utf8 ) 772{ 773 unsigned char *p; 774 775 p = (unsigned char*) utf8; 776 777 if ( *p < 0x80 ) 778 return (1); 779 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0) 780 return (2); 781 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 782 return (3); 783 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 784 return (3); 785 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 786 return (4); 787 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 788 return (4); 789 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 790 return (4); 791 else 792 return ((size_t) -1); 793} 794 795 796size_t utf8_strlen_validate ( char * utf8 ) 797{ 798 size_t len; 799 unsigned char *p; 800 801 p = (unsigned char*) utf8; 802 len = 0; 803 804 /* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */ 805 806 while ( *p != '\0') 807 { 808 if ( *p < 0x80 ) 809 p++; 810 811 else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0) 812 p += 2; 813 814 else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 815 p += 3; 816 817 else if ( *p > 0xe0 && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0) 818 p += 3; 819 820 else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 821 p += 4; 822 823 else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 824 p += 4; 825 826 else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 ) 827 p += 4; 828 829 else 830 return ((size_t) -1); 831 832 len++; 833 } 834 835 return (len); 836} 837