• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/netatalk-2.2.5/libatalk/unicode/
1/*******************************************************************
2  NOTE:
3  The early netatalk 2.x was based on UCS-2.
4  UCS-2 don't support chars above U+10000.
5  Recent netatalk is based on UTF-16.
6  UTF-16 can support chars above U+10000, using Surrogate Pair.
7  However, Surrogate Pair is complex, dirty, filthy and disagreeable.
8  There might still be latent bugs...
9********************************************************************/
10
11#ifdef HAVE_CONFIG_H
12#include "config.h"
13#endif /* HAVE_CONFIG_H */
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <sys/param.h>
19#include <sys/stat.h>
20#include <atalk/logger.h>
21#include <errno.h>
22
23#include <netatalk/endian.h>
24
25#include <atalk/unicode.h>
26#include "precompose.h"
27#include "byteorder.h"
28
29/*******************************************************************
30 Convert a string to lower case.
31 return True if any char is converted
32********************************************************************/
33/* surrogate pair support */
34
35int strlower_w(ucs2_t *s)
36{
37	int ret = 0;
38
39	while (*s) {
40		if ((0xD800 <= *s) && (*s < 0xDC00)) {
41			if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
42				u_int32_t s_sp = (u_int32_t)*s << 16 | (u_int32_t)s[1];
43				u_int32_t v_sp = tolower_sp(s_sp);
44				if (v_sp != s_sp) {
45					*s = v_sp >> 16;
46					s++;
47					*s = v_sp & 0xFFFF;
48					ret = 1;
49				}
50			}
51		} else {
52			ucs2_t v = tolower_w(*s);
53			if (v != *s) {
54				*s = v;
55				ret = 1;
56			}
57		}
58		s++;
59	}
60	return ret;
61}
62
63/*******************************************************************
64 Convert a string to upper case.
65 return True if any char is converted
66********************************************************************/
67/* surrogate pair support */
68
69int strupper_w(ucs2_t *s)
70{
71	int ret = 0;
72
73	while (*s) {
74		if ((0xD800 <= *s) && (*s < 0xDC00)) {
75			if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
76				u_int32_t s_sp = (u_int32_t)*s << 16 | (u_int32_t)s[1];
77				u_int32_t v_sp = toupper_sp(s_sp);
78				if (v_sp != s_sp) {
79					*s = v_sp >> 16;
80					s++;
81					*s = v_sp & 0xFFFF;
82					ret = 1;
83				}
84			}
85		} else {
86			ucs2_t v = toupper_w(*s);
87			if (v != *s) {
88				*s = v;
89				ret = 1;
90			}
91		}
92		s++;
93	}
94	return ret;
95}
96
97/*******************************************************************
98wide & sp islower()
99determine if a character is lowercase
100********************************************************************/
101/* These functions are not used. */
102
103int islower_w(ucs2_t c)
104{
105	return ( c == tolower_w(c));
106}
107
108int islower_sp(u_int32_t c_sp)
109{
110	return ( c_sp == tolower_sp(c_sp));
111}
112
113/*******************************************************************
114wide & sp isupper()
115determine if a character is uppercase
116********************************************************************/
117/* These functions are not used. */
118
119int isupper_w(ucs2_t c)
120{
121	return ( c == toupper_w(c));
122}
123
124int isupper_sp(u_int32_t c_sp)
125{
126	return ( c_sp == toupper_sp(c_sp));
127}
128
129/*******************************************************************
130wide strlen()
131 Count the number of characters in a UTF-16 string.
132********************************************************************/
133/* NOTE: one surrogate pair is two characters. */
134
135size_t strlen_w(const ucs2_t *src)
136{
137	size_t len;
138
139	for(len = 0; *src++; len++) ;
140
141	return len;
142}
143
144/*******************************************************************
145wide strnlen()
146 Count up to max number of characters in a UTF-16 string.
147********************************************************************/
148/* NOTE: one surrogate pair is two characters. */
149
150size_t strnlen_w(const ucs2_t *src, size_t max)
151{
152	size_t len;
153
154	for(len = 0; *src++ && (len < max); len++) ;
155
156	return len;
157}
158
159/*******************************************************************
160wide strchr()
161********************************************************************/
162/* NOTE: hi and lo of surrogate pair are separately processed. */
163
164ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
165{
166	while (*s != 0) {
167		if (c == *s) return (ucs2_t *)s;
168		s++;
169	}
170	if (c == *s) return (ucs2_t *)s;
171
172	return NULL;
173}
174
175/*******************************************************************
176wide & sp strcasechr()
177********************************************************************/
178/* NOTE: separately process BMP and surrogate pair */
179
180ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
181{
182	while (*s != 0) {
183		if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s;
184		s++;
185	}
186	if (c == *s) return (ucs2_t *)s;
187
188	return NULL;
189}
190
191ucs2_t *strcasechr_sp(const ucs2_t *s, u_int32_t c_sp)
192{
193	if (*s == 0) return NULL;
194	while (s[1] != 0) {
195		if (tolower_sp(c_sp) == tolower_sp((u_int32_t)*s << 16 | (u_int32_t)s[1])) return (ucs2_t *)s;
196		s++;
197	}
198
199	return NULL;
200}
201
202/*******************************************************************
203wide strcmp()
204********************************************************************/
205/* no problem of surrogate pair */
206
207int strcmp_w(const ucs2_t *a, const ucs2_t *b)
208{
209	while (*b && *a == *b) { a++; b++; }
210	return (*a - *b);
211	/* warning: if *a != *b and both are not 0 we retrun a random
212	   greater or lesser than 0 number not realted to which
213	   string is longer */
214}
215
216/*******************************************************************
217wide strncmp()
218********************************************************************/
219/* no problem of surrogate pair */
220
221int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
222{
223	size_t n = 0;
224	while ((n < len) && *b && *a == *b) { a++; b++; n++;}
225	return (len - n)?(*a - *b):0;
226}
227
228/*******************************************************************
229wide strstr()
230********************************************************************/
231/* no problem of surrogate pair */
232
233ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
234{
235	ucs2_t *r;
236	size_t slen, inslen;
237
238	if (!s || !*s || !ins || !*ins) return NULL;
239	slen = strlen_w(s);
240	inslen = strlen_w(ins);
241	r = (ucs2_t *)s;
242	while ((r = strchr_w(r, *ins))) {
243		if (strncmp_w(r, ins, inslen) == 0) return r;
244		r++;
245	}
246	return NULL;
247}
248
249/*******************************************************************
250wide strcasestr()
251********************************************************************/
252/* surrogate pair support */
253
254ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
255{
256	ucs2_t *r;
257	size_t slen, inslen;
258
259	if (!s || !*s || !ins || !*ins) return NULL;
260	slen = strlen_w(s);
261	inslen = strlen_w(ins);
262	r = (ucs2_t *)s;
263
264	if ((0xD800 <= *ins) && (*ins < 0xDC00)) {
265		if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) {
266			u_int32_t ins_sp = (u_int32_t)*ins << 16 | (u_int32_t)ins[1];
267			while ((r = strcasechr_sp(r, ins_sp))) {
268				if (strncasecmp_w(r, ins, inslen) == 0) return r;
269				r++;
270			}
271		} else {
272			return NULL; /* illegal sequence */
273		}
274	} else {
275		while ((r = strcasechr_w(r, *ins))) {
276			if (strncasecmp_w(r, ins, inslen) == 0) return r;
277			r++;
278		}
279	}
280	return NULL;
281}
282
283/*******************************************************************
284wide strcasecmp()
285case insensitive string comparison
286********************************************************************/
287/* surrogate pair support */
288
289int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
290{
291	int ret;
292
293	while (*a && *b) {
294		if ((0xD800 <= *a) && (*a < 0xDC00)) {
295			if (ret = tolower_sp((u_int32_t)*a << 16 | (u_int32_t)a[1]) - tolower_sp((u_int32_t)*b << 16 | (u_int32_t)b[1])) return ret;
296			a++;
297			b++;
298			if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */
299		} else {
300			if (ret = tolower_w(*a) - tolower_w(*b)) return ret;
301		}
302		a++;
303		b++;
304	}
305	return (tolower_w(*a) - tolower_w(*b));
306}
307
308/*******************************************************************
309wide strncasecmp()
310case insensitive string comparison, length limited
311********************************************************************/
312/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair  */
313
314int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
315{
316	size_t n = 0;
317	int ret;
318
319	while ((n < len) && *a && *b) {
320		if ((0xD800 <= *a) && (*a < 0xDC00)) {
321			if (ret = tolower_sp((u_int32_t)*a << 16 | (u_int32_t)a[1]) - tolower_sp((u_int32_t)*b << 16 | (u_int32_t)b[1])) return ret;
322			a++;
323			b++;
324			n++;
325			if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b));
326		} else {
327			if (ret = tolower_w(*a) - tolower_w(*b)) return ret;
328		}
329		a++;
330		b++;
331		n++;
332	}
333	return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
334}
335
336/*******************************************************************
337wide strndup()
338duplicate string
339********************************************************************/
340/* NOTE: not check isolation of surrogate pair */
341/* if len == 0 then duplicate the whole string */
342
343ucs2_t *strndup_w(const ucs2_t *src, size_t len)
344{
345	ucs2_t *dest;
346
347	if (!len) len = strlen_w(src);
348	dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
349	if (!dest) {
350		LOG (log_error, logtype_default, "strdup_w: out of memory!");
351		return NULL;
352	}
353
354	memcpy(dest, src, len * sizeof(ucs2_t));
355	dest[len] = 0;
356
357	return dest;
358}
359
360/*******************************************************************
361wide strdup()
362duplicate string
363********************************************************************/
364/* no problem of surrogate pair */
365
366ucs2_t *strdup_w(const ucs2_t *src)
367{
368	return strndup_w(src, 0);
369}
370
371/*******************************************************************
372copy a string with max len
373********************************************************************/
374/* This function is not used. */
375/* NOTE: not check isolation of surrogate pair */
376
377ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
378{
379	size_t len;
380
381	if (!dest || !src) return NULL;
382
383	for (len = 0; (src[len] != 0) && (len < max); len++)
384		dest[len] = src[len];
385	while (len < max)
386		dest[len++] = 0;
387
388	return dest;
389}
390
391
392/*******************************************************************
393append a string of len bytes and add a terminator
394********************************************************************/
395/* These functions are not used. */
396
397/* NOTE: not check isolation of surrogate pair */
398ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
399{
400	size_t start;
401	size_t len;
402
403	if (!dest || !src) return NULL;
404
405	start = strlen_w(dest);
406	len = strnlen_w(src, max);
407
408	memcpy(&dest[start], src, len*sizeof(ucs2_t));
409	dest[start+len] = 0;
410
411	return dest;
412}
413
414/* no problem of surrogate pair */
415ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
416{
417	size_t start;
418	size_t len;
419
420	if (!dest || !src) return NULL;
421
422	start = strlen_w(dest);
423	len = strlen_w(src);
424
425	memcpy(&dest[start], src, len*sizeof(ucs2_t));
426	dest[start+len] = 0;
427
428	return dest;
429}
430
431
432/*******************************************************************
433binary search for pre|decomposition
434********************************************************************/
435
436static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
437{
438	int min = 0;
439	int max = PRECOMP_COUNT - 1;
440	int mid;
441	u_int32_t sought = (base << 16) | comb, that;
442
443	/* binary search */
444	while (max >= min) {
445		mid = (min + max) / 2;
446		that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
447		if (that < sought) {
448			min = mid + 1;
449		} else if (that > sought) {
450			max = mid - 1;
451		} else {
452			return precompositions[mid].replacement;
453		}
454	}
455	/* no match */
456	return 0;
457}
458
459/* ------------------------ */
460static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
461{
462	int min = 0;
463	int max = PRECOMP_SP_COUNT - 1;
464	int mid;
465	u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp;
466
467	/* binary search */
468	while (max >= min) {
469		mid = (min + max) / 2;
470		that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp);
471		if (that_sp < sought_sp) {
472			min = mid + 1;
473		} else if (that_sp > sought_sp) {
474			max = mid - 1;
475		} else {
476			return precompositions_sp[mid].replacement_sp;
477		}
478	}
479	/* no match */
480	return 0;
481}
482
483/* -------------------------- */
484static u_int32_t do_decomposition(ucs2_t base)
485{
486	int min = 0;
487	int max = DECOMP_COUNT - 1;
488	int mid;
489	u_int32_t sought = base;
490	u_int32_t result, that;
491
492	/* binary search */
493	while (max >= min) {
494		mid = (min + max) / 2;
495		that = decompositions[mid].replacement;
496		if (that < sought) {
497			min = mid + 1;
498		} else if (that > sought) {
499			max = mid - 1;
500		} else {
501			result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
502			return result;
503		}
504	}
505	/* no match */
506	return 0;
507}
508
509/* -------------------------- */
510static u_int64_t do_decomposition_sp(unsigned int base_sp)
511{
512	int min = 0;
513	int max = DECOMP_SP_COUNT - 1;
514	int mid;
515	u_int32_t sought_sp = base_sp;
516	u_int32_t that_sp;
517	u_int64_t result_sp;
518
519	/* binary search */
520	while (max >= min) {
521		mid = (min + max) / 2;
522		that_sp = decompositions_sp[mid].replacement_sp;
523		if (that_sp < sought_sp) {
524			min = mid + 1;
525		} else if (that_sp > sought_sp) {
526			max = mid - 1;
527		} else {
528			result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp);
529			return result_sp;
530		}
531	}
532	/* no match */
533	return 0;
534}
535
536/*******************************************************************
537pre|decomposition
538
539   we can't use static, this stuff needs to be reentrant
540   static char comp[MAXPATHLEN +1];
541
542   We don't implement Singleton and Canonical Ordering.
543   We ignore CompositionExclusions.txt.
544   because they cause the problem of the roundtrip
545   such as Dancing Icon.
546
547   exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
548   in precompose.h from composition according to AFP 3.x spec
549********************************************************************/
550
551size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
552{
553	size_t i;
554	ucs2_t base, comb;
555	u_int32_t base_sp, comb_sp;
556	ucs2_t *in, *out;
557	ucs2_t lindex, vindex;
558	ucs2_t result;
559	u_int32_t result_sp;
560	size_t o_len = *outlen;
561
562	if (!inplen || (inplen & 1) || inplen > o_len)
563		return (size_t)-1;
564
565	i = 0;
566	in  = name;
567	out = comp;
568
569	base = *in;
570	while (*outlen > 2) {
571		i += 2;
572		if (i == inplen) {
573			*out = base;
574			out++;
575			*out = 0;
576			*outlen -= 2;
577			return o_len - *outlen;
578		}
579		in++;
580		comb = *in;
581		result = 0;
582
583		/* Non-Combination Character */
584		if (comb < 0x300) ;
585
586		/* Unicode Standard Annex #15 A10.3 Hangul Composition */
587		/* Step 1 <L,V> */
588		else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
589			if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
590				result = 1;
591				lindex = base - LBASE;
592				vindex = comb - VBASE;
593				base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
594			}
595		}
596
597		/* Step 2 <LV,T> */
598		else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
599			if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
600				result = 1;
601				base += comb - TBASE;
602			}
603		}
604
605		/* Binary Search for Surrogate Pair */
606		else if ((0xD800 <= base) && (base < 0xDC00)) {
607			if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) {
608				base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
609				do {
610					comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
611					if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
612						base_sp = result_sp;
613						i += 4;
614						in +=2;
615					}
616				} while ((i + 6 <= inplen) && result_sp) ;
617
618				*out = base_sp >> 16;
619				out++;
620				*outlen -= 2;
621
622				if (*outlen <= 2) {
623					errno = E2BIG;
624					return (size_t)-1;
625				}
626
627				*out = base_sp & 0xFFFF;
628				out++;
629				*outlen -= 2;
630
631				i += 2;
632				if (i == inplen) {
633					out++;
634					*out = 0;
635					return o_len - *outlen;
636				}
637				in++;
638				base = *in;
639
640				result = 1;
641			}
642		}
643
644		/* Binary Search for BMP */
645		else if (result = do_precomposition(base, comb)) {
646			base = result;
647		}
648
649		if (!result) {
650			*out = base;
651			out++;
652			*outlen -= 2;
653			base = comb;
654		}
655	}
656
657	errno = E2BIG;
658	return (size_t)-1;
659}
660
661/* --------------- */
662size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
663{
664	size_t i;
665	size_t comblen;
666	ucs2_t base, comb[COMBBUFLEN];
667	u_int32_t base_sp;
668	ucs2_t sindex, tjamo;
669	ucs2_t *in, *out;
670	unsigned int result;
671	u_int64_t result_sp;
672	size_t o_len = *outlen;
673
674	if (!inplen || (inplen & 1))
675		return (size_t)-1;
676	i = 0;
677	in  = name;
678	out = comp;
679
680	while (i < inplen) {
681		base = *in;
682		comblen = 0;
683
684		/* check ASCII first. this is frequent. */
685		if (base <= 0x007f) ;
686
687		/* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
688		else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
689			sindex = base - SBASE;
690			base = LBASE + sindex / NCOUNT;
691			comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
692
693			/* <L,V> */
694			if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
695				comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
696				comblen = 1;
697			}
698
699			/* <L,V,T> */
700			else {
701				comb[COMBBUFLEN-1] = tjamo;
702				comblen = 2;
703			}
704		}
705
706		/* Binary Search for Surrogate Pair */
707		else if ((0xD800 <= base) && (base < 0xDC00)) {
708			if (i + 2 < inplen) {
709				base_sp =  ((u_int32_t)base << 16) | (u_int32_t)in[1];
710				do {
711					if ( !(result_sp = do_decomposition_sp(base_sp))) break;
712					comblen += 2;
713					base_sp = result_sp >> 32;
714					comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
715					comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
716				} while (comblen < MAXCOMBSPLEN);
717
718				if (*outlen < (comblen + 1) << 1) {
719					errno = E2BIG;
720					return (size_t)-1;
721				}
722
723				*out = base_sp >> 16;   /* hi */
724				out++;
725				*outlen -= 2;
726
727				base = base_sp & 0xFFFF; /* lo */
728
729				i += 2;
730				in++;
731			}
732		}
733
734		/* Binary Search for BMP */
735		else {
736			do {
737				if ( !(result = do_decomposition(base))) break;
738				comblen++;
739				base = result  >> 16;
740				comb[COMBBUFLEN-comblen] = result & 0xFFFF;
741			} while ((0x007f < base) && (comblen < MAXCOMBLEN));
742		}
743
744		if (*outlen < (comblen + 1) << 1) {
745			errno = E2BIG;
746			return (size_t)-1;
747		}
748
749		*out = base;
750		out++;
751		*outlen -= 2;
752
753		while ( comblen > 0 ) {
754			*out = comb[COMBBUFLEN-comblen];
755			out++;
756			*outlen -= 2;
757			comblen--;
758		}
759
760		i += 2;
761		in++;
762	}
763
764	*out = 0;
765	return o_len-*outlen;
766}
767
768/*******************************************************************
769length of UTF-8 character and string
770********************************************************************/
771
772size_t utf8_charlen ( char* utf8 )
773{
774	unsigned char *p;
775
776	p = (unsigned char*) utf8;
777
778	if ( *p < 0x80 )
779		return (1);
780	else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
781		return (2);
782	else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
783		return (3);
784	else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
785		return (3);
786	else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
787		return (4);
788	else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
789		return (4);
790	else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
791		return (4);
792	else
793		return ((size_t) -1);
794}
795
796
797size_t utf8_strlen_validate ( char * utf8 )
798{
799	size_t len;
800	unsigned char *p;
801
802	p = (unsigned char*) utf8;
803	len = 0;
804
805	/* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
806
807	while ( *p != '\0')
808	{
809		if ( *p < 0x80 )
810			p++;
811
812		else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
813			p += 2;
814
815		else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
816			p += 3;
817
818		else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
819			p += 3;
820
821		else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
822			p += 4;
823
824		else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
825			p += 4;
826
827		else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
828			p += 4;
829
830		else
831			return ((size_t) -1);
832
833		len++;
834	}
835
836	return (len);
837}
838