• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/router/netatalk-3.0.5/libatalk/unicode/
1/*******************************************************************
2  NOTE:
3  The early netatalk 2.x was based on UCS-2.
4  UCS-2 don't support chars above U+10000.
5  Recent netatalk is based on UTF-16.
6  UTF-16 can support chars above U+10000, using Surrogate Pair.
7  However, Surrogate Pair is complex, dirty, filthy and disagreeable.
8  There might still be latent bugs...
9********************************************************************/
10
11#ifdef HAVE_CONFIG_H
12#include "config.h"
13#endif /* HAVE_CONFIG_H */
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <sys/param.h>
19#include <sys/stat.h>
20#include <atalk/logger.h>
21#include <errno.h>
22#include <arpa/inet.h>
23
24#include <atalk/unicode.h>
25#include "precompose.h"
26#include "byteorder.h"
27
28/*******************************************************************
29 Convert a string to lower case.
30 return True if any char is converted
31********************************************************************/
32/* surrogate pair support */
33
34int strlower_w(ucs2_t *s)
35{
36	int ret = 0;
37
38	while (*s) {
39		if ((0xD800 <= *s) && (*s < 0xDC00)) {
40			if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
41				uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1];
42				uint32_t v_sp = tolower_sp(s_sp);
43				if (v_sp != s_sp) {
44					*s = v_sp >> 16;
45					s++;
46					*s = v_sp & 0xFFFF;
47					ret = 1;
48				}
49			}
50		} else {
51			ucs2_t v = tolower_w(*s);
52			if (v != *s) {
53				*s = v;
54				ret = 1;
55			}
56		}
57		s++;
58	}
59	return ret;
60}
61
62/*******************************************************************
63 Convert a string to upper case.
64 return True if any char is converted
65********************************************************************/
66/* surrogate pair support */
67
68int strupper_w(ucs2_t *s)
69{
70	int ret = 0;
71
72	while (*s) {
73		if ((0xD800 <= *s) && (*s < 0xDC00)) {
74			if ((0xDC00 <= s[1]) && (s[1] < 0xE000)) {
75				uint32_t s_sp = (uint32_t)*s << 16 | (uint32_t)s[1];
76				uint32_t v_sp = toupper_sp(s_sp);
77				if (v_sp != s_sp) {
78					*s = v_sp >> 16;
79					s++;
80					*s = v_sp & 0xFFFF;
81					ret = 1;
82				}
83			}
84		} else {
85			ucs2_t v = toupper_w(*s);
86			if (v != *s) {
87				*s = v;
88				ret = 1;
89			}
90		}
91		s++;
92	}
93	return ret;
94}
95
96/*******************************************************************
97wide & sp islower()
98determine if a character is lowercase
99********************************************************************/
100/* These functions are not used. */
101
102int islower_w(ucs2_t c)
103{
104	return ( c == tolower_w(c));
105}
106
107int islower_sp(uint32_t c_sp)
108{
109	return ( c_sp == tolower_sp(c_sp));
110}
111
112/*******************************************************************
113wide & sp isupper()
114determine if a character is uppercase
115********************************************************************/
116/* These functions are not used. */
117
118int isupper_w(ucs2_t c)
119{
120	return ( c == toupper_w(c));
121}
122
123int isupper_sp(uint32_t c_sp)
124{
125	return ( c_sp == toupper_sp(c_sp));
126}
127
128/*******************************************************************
129wide strlen()
130 Count the number of characters in a UTF-16 string.
131********************************************************************/
132/* NOTE: one surrogate pair is two characters. */
133
134size_t strlen_w(const ucs2_t *src)
135{
136	size_t len;
137
138	for(len = 0; *src++; len++) ;
139
140	return len;
141}
142
143/*******************************************************************
144wide strnlen()
145 Count up to max number of characters in a UTF-16 string.
146********************************************************************/
147/* NOTE: one surrogate pair is two characters. */
148
149size_t strnlen_w(const ucs2_t *src, size_t max)
150{
151	size_t len;
152
153	for(len = 0; *src++ && (len < max); len++) ;
154
155	return len;
156}
157
158/*******************************************************************
159wide strchr()
160********************************************************************/
161/* NOTE: hi and lo of surrogate pair are separately processed. */
162
163ucs2_t *strchr_w(const ucs2_t *s, ucs2_t c)
164{
165	while (*s != 0) {
166		if (c == *s) return (ucs2_t *)s;
167		s++;
168	}
169	if (c == *s) return (ucs2_t *)s;
170
171	return NULL;
172}
173
174/*******************************************************************
175wide & sp strcasechr()
176********************************************************************/
177/* NOTE: separately process BMP and surrogate pair */
178
179ucs2_t *strcasechr_w(const ucs2_t *s, ucs2_t c)
180{
181	while (*s != 0) {
182		if (tolower_w(c) == tolower_w(*s)) return (ucs2_t *)s;
183		s++;
184	}
185	if (c == *s) return (ucs2_t *)s;
186
187	return NULL;
188}
189
190ucs2_t *strcasechr_sp(const ucs2_t *s, uint32_t c_sp)
191{
192	if (*s == 0) return NULL;
193	while (s[1] != 0) {
194		if (tolower_sp(c_sp) == tolower_sp((uint32_t)*s << 16 | (uint32_t)s[1])) return (ucs2_t *)s;
195		s++;
196	}
197
198	return NULL;
199}
200
201/*******************************************************************
202wide strcmp()
203********************************************************************/
204/* no problem of surrogate pair */
205
206int strcmp_w(const ucs2_t *a, const ucs2_t *b)
207{
208	while (*b && *a == *b) { a++; b++; }
209	return (*a - *b);
210	/* warning: if *a != *b and both are not 0 we retrun a random
211	   greater or lesser than 0 number not realted to which
212	   string is longer */
213}
214
215/*******************************************************************
216wide strncmp()
217********************************************************************/
218/* no problem of surrogate pair */
219
220int strncmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
221{
222	size_t n = 0;
223	while ((n < len) && *b && *a == *b) { a++; b++; n++;}
224	return (len - n)?(*a - *b):0;
225}
226
227/*******************************************************************
228wide strstr()
229********************************************************************/
230/* no problem of surrogate pair */
231
232ucs2_t *strstr_w(const ucs2_t *s, const ucs2_t *ins)
233{
234	ucs2_t *r;
235	size_t slen, inslen;
236
237	if (!s || !*s || !ins || !*ins) return NULL;
238	slen = strlen_w(s);
239	inslen = strlen_w(ins);
240	r = (ucs2_t *)s;
241	while ((r = strchr_w(r, *ins))) {
242		if (strncmp_w(r, ins, inslen) == 0) return r;
243		r++;
244	}
245	return NULL;
246}
247
248/*******************************************************************
249wide strcasestr()
250********************************************************************/
251/* surrogate pair support */
252
253ucs2_t *strcasestr_w(const ucs2_t *s, const ucs2_t *ins)
254{
255	ucs2_t *r;
256	size_t slen, inslen;
257
258	if (!s || !*s || !ins || !*ins) return NULL;
259	slen = strlen_w(s);
260	inslen = strlen_w(ins);
261	r = (ucs2_t *)s;
262
263	if ((0xD800 <= *ins) && (*ins < 0xDC00)) {
264		if ((0xDC00 <= ins[1]) && (ins[1] < 0xE000)) {
265			uint32_t ins_sp = (uint32_t)*ins << 16 | (uint32_t)ins[1];
266			while ((r = strcasechr_sp(r, ins_sp))) {
267				if (strncasecmp_w(r, ins, inslen) == 0) return r;
268				r++;
269			}
270		} else {
271			return NULL; /* illegal sequence */
272		}
273	} else {
274		while ((r = strcasechr_w(r, *ins))) {
275			if (strncasecmp_w(r, ins, inslen) == 0) return r;
276			r++;
277		}
278	}
279	return NULL;
280}
281
282/*******************************************************************
283wide strcasecmp()
284case insensitive string comparison
285********************************************************************/
286/* surrogate pair support */
287
288int strcasecmp_w(const ucs2_t *a, const ucs2_t *b)
289{
290	int ret;
291
292	while (*a && *b) {
293		if ((0xD800 <= *a) && (*a < 0xDC00)) {
294			if ((ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1]))) return ret;
295			a++;
296			b++;
297			if (!(*a && *b)) return (tolower_w(*a) - tolower_w(*b)); /* avoid buffer over run */
298		} else {
299			if ((ret = tolower_w(*a) - tolower_w(*b))) return ret;
300		}
301		a++;
302		b++;
303	}
304	return (tolower_w(*a) - tolower_w(*b));
305}
306
307/*******************************************************************
308wide strncasecmp()
309case insensitive string comparison, length limited
310********************************************************************/
311/* NOTE: compare up to 'len+1' if 'len' isolate surrogate pair  */
312
313int strncasecmp_w(const ucs2_t *a, const ucs2_t *b, size_t len)
314{
315	size_t n = 0;
316	int ret;
317
318	while ((n < len) && *a && *b) {
319		if ((0xD800 <= *a) && (*a < 0xDC00)) {
320			if ((ret = tolower_sp((uint32_t)*a << 16 | (uint32_t)a[1]) - tolower_sp((uint32_t)*b << 16 | (uint32_t)b[1]))) return ret;
321			a++;
322			b++;
323			n++;
324			if (!((n < len) && *a && *b)) return (tolower_w(*a) - tolower_w(*b));
325		} else {
326			if ((ret = tolower_w(*a) - tolower_w(*b))) return ret;
327		}
328		a++;
329		b++;
330		n++;
331	}
332	return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
333}
334
335/*******************************************************************
336wide strndup()
337duplicate string
338********************************************************************/
339/* NOTE: not check isolation of surrogate pair */
340/* if len == 0 then duplicate the whole string */
341
342ucs2_t *strndup_w(const ucs2_t *src, size_t len)
343{
344	ucs2_t *dest;
345
346	if (!len) len = strlen_w(src);
347	dest = (ucs2_t *)malloc((len + 1) * sizeof(ucs2_t));
348	if (!dest) {
349		LOG (log_error, logtype_default, "strdup_w: out of memory!");
350		return NULL;
351	}
352
353	memcpy(dest, src, len * sizeof(ucs2_t));
354	dest[len] = 0;
355
356	return dest;
357}
358
359/*******************************************************************
360wide strdup()
361duplicate string
362********************************************************************/
363/* no problem of surrogate pair */
364
365ucs2_t *strdup_w(const ucs2_t *src)
366{
367	return strndup_w(src, 0);
368}
369
370/*******************************************************************
371copy a string with max len
372********************************************************************/
373/* This function is not used. */
374/* NOTE: not check isolation of surrogate pair */
375
376ucs2_t *strncpy_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
377{
378	size_t len;
379
380	if (!dest || !src) return NULL;
381
382	for (len = 0; (src[len] != 0) && (len < max); len++)
383		dest[len] = src[len];
384	while (len < max)
385		dest[len++] = 0;
386
387	return dest;
388}
389
390
391/*******************************************************************
392append a string of len bytes and add a terminator
393********************************************************************/
394/* These functions are not used. */
395
396/* NOTE: not check isolation of surrogate pair */
397ucs2_t *strncat_w(ucs2_t *dest, const ucs2_t *src, const size_t max)
398{
399	size_t start;
400	size_t len;
401
402	if (!dest || !src) return NULL;
403
404	start = strlen_w(dest);
405	len = strnlen_w(src, max);
406
407	memcpy(&dest[start], src, len*sizeof(ucs2_t));
408	dest[start+len] = 0;
409
410	return dest;
411}
412
413/* no problem of surrogate pair */
414ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
415{
416	size_t start;
417	size_t len;
418
419	if (!dest || !src) return NULL;
420
421	start = strlen_w(dest);
422	len = strlen_w(src);
423
424	memcpy(&dest[start], src, len*sizeof(ucs2_t));
425	dest[start+len] = 0;
426
427	return dest;
428}
429
430
431/*******************************************************************
432binary search for pre|decomposition
433********************************************************************/
434
435static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
436{
437	int min = 0;
438	int max = PRECOMP_COUNT - 1;
439	int mid;
440	uint32_t sought = (base << 16) | comb, that;
441
442	/* binary search */
443	while (max >= min) {
444		mid = (min + max) / 2;
445		that = (precompositions[mid].base << 16) | (precompositions[mid].comb);
446		if (that < sought) {
447			min = mid + 1;
448		} else if (that > sought) {
449			max = mid - 1;
450		} else {
451			return precompositions[mid].replacement;
452		}
453	}
454	/* no match */
455	return 0;
456}
457
458/* ------------------------ */
459static uint32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp)
460{
461	int min = 0;
462	int max = PRECOMP_SP_COUNT - 1;
463	int mid;
464	uint64_t sought_sp = ((uint64_t)base_sp << 32) | (uint64_t)comb_sp, that_sp;
465
466	/* binary search */
467	while (max >= min) {
468		mid = (min + max) / 2;
469		that_sp = ((uint64_t)precompositions_sp[mid].base_sp << 32) | ((uint64_t)precompositions_sp[mid].comb_sp);
470		if (that_sp < sought_sp) {
471			min = mid + 1;
472		} else if (that_sp > sought_sp) {
473			max = mid - 1;
474		} else {
475			return precompositions_sp[mid].replacement_sp;
476		}
477	}
478	/* no match */
479	return 0;
480}
481
482/* -------------------------- */
483static uint32_t do_decomposition(ucs2_t base)
484{
485	int min = 0;
486	int max = DECOMP_COUNT - 1;
487	int mid;
488	uint32_t sought = base;
489	uint32_t result, that;
490
491	/* binary search */
492	while (max >= min) {
493		mid = (min + max) / 2;
494		that = decompositions[mid].replacement;
495		if (that < sought) {
496			min = mid + 1;
497		} else if (that > sought) {
498			max = mid - 1;
499		} else {
500			result = (decompositions[mid].base << 16) | (decompositions[mid].comb);
501			return result;
502		}
503	}
504	/* no match */
505	return 0;
506}
507
508/* -------------------------- */
509static uint64_t do_decomposition_sp(unsigned int base_sp)
510{
511	int min = 0;
512	int max = DECOMP_SP_COUNT - 1;
513	int mid;
514	uint32_t sought_sp = base_sp;
515	uint32_t that_sp;
516	uint64_t result_sp;
517
518	/* binary search */
519	while (max >= min) {
520		mid = (min + max) / 2;
521		that_sp = decompositions_sp[mid].replacement_sp;
522		if (that_sp < sought_sp) {
523			min = mid + 1;
524		} else if (that_sp > sought_sp) {
525			max = mid - 1;
526		} else {
527			result_sp = ((uint64_t)decompositions_sp[mid].base_sp << 32) | ((uint64_t)decompositions_sp[mid].comb_sp);
528			return result_sp;
529		}
530	}
531	/* no match */
532	return 0;
533}
534
535/*******************************************************************
536pre|decomposition
537
538   we can't use static, this stuff needs to be reentrant
539   static char comp[MAXPATHLEN +1];
540
541   We don't implement Singleton and Canonical Ordering.
542   We ignore CompositionExclusions.txt.
543   because they cause the problem of the roundtrip
544   such as Dancing Icon.
545
546   exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
547   in precompose.h from composition according to AFP 3.x spec
548********************************************************************/
549
550size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
551{
552	size_t i;
553	ucs2_t base, comb;
554	uint32_t base_sp, comb_sp;
555	ucs2_t *in, *out;
556	ucs2_t lindex, vindex;
557	ucs2_t result;
558	uint32_t result_sp;
559	size_t o_len = *outlen;
560
561	if (!inplen || (inplen & 1) || inplen > o_len)
562		return (size_t)-1;
563
564	i = 0;
565	in  = name;
566	out = comp;
567
568	base = *in;
569	while (*outlen > 2) {
570		i += 2;
571		if (i == inplen) {
572			*out = base;
573			out++;
574			*out = 0;
575			*outlen -= 2;
576			return o_len - *outlen;
577		}
578		in++;
579		comb = *in;
580		result = 0;
581
582		/* Non-Combination Character */
583		if (comb < 0x300) ;
584
585		/* Unicode Standard Annex #15 A10.3 Hangul Composition */
586		/* Step 1 <L,V> */
587		else if ((VBASE <= comb) && (comb <= VBASE + VCOUNT)) {
588			if ((LBASE <= base) && (base < LBASE + LCOUNT)) {
589				result = 1;
590				lindex = base - LBASE;
591				vindex = comb - VBASE;
592				base = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;
593			}
594		}
595
596		/* Step 2 <LV,T> */
597		else if ((TBASE < comb) && (comb < TBASE + TCOUNT)) {
598			if ((SBASE <= base) && (base < SBASE + SCOUNT) && (((base - SBASE) % TCOUNT) == 0)) {
599				result = 1;
600				base += comb - TBASE;
601			}
602		}
603
604		/* Binary Search for Surrogate Pair */
605		else if ((0xD800 <= base) && (base < 0xDC00)) {
606			if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 6 <= inplen)) {
607				base_sp = ((uint32_t)base << 16) | (uint32_t)comb;
608				do {
609					comb_sp = ((uint32_t)in[1] << 16) | (uint32_t)in[2];
610					if ((result_sp = do_precomposition_sp(base_sp, comb_sp))) {
611						base_sp = result_sp;
612						i += 4;
613						in +=2;
614					}
615				} while ((i + 6 <= inplen) && result_sp) ;
616
617				*out = base_sp >> 16;
618				out++;
619				*outlen -= 2;
620
621				if (*outlen <= 2) {
622					errno = E2BIG;
623					return (size_t)-1;
624				}
625
626				*out = base_sp & 0xFFFF;
627				out++;
628				*outlen -= 2;
629
630				i += 2;
631				if (i == inplen) {
632					out++;
633					*out = 0;
634					return o_len - *outlen;
635				}
636				in++;
637				base = *in;
638
639				result = 1;
640			}
641		}
642
643		/* Binary Search for BMP */
644		else if ((result = do_precomposition(base, comb))) {
645			base = result;
646		}
647
648		if (!result) {
649			*out = base;
650			out++;
651			*outlen -= 2;
652			base = comb;
653		}
654	}
655
656	errno = E2BIG;
657	return (size_t)-1;
658}
659
660/* --------------- */
661size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
662{
663	size_t i;
664	size_t comblen;
665	ucs2_t base, comb[COMBBUFLEN];
666	uint32_t base_sp;
667	ucs2_t sindex, tjamo;
668	ucs2_t *in, *out;
669	unsigned int result;
670	uint64_t result_sp;
671	size_t o_len = *outlen;
672
673	if (!inplen || (inplen & 1))
674		return (size_t)-1;
675	i = 0;
676	in  = name;
677	out = comp;
678
679	while (i < inplen) {
680		base = *in;
681		comblen = 0;
682
683		/* check ASCII first. this is frequent. */
684		if (base <= 0x007f) ;
685
686		/* Unicode Standard Annex #15 A10.2 Hangul Decomposition */
687		else if ((SBASE <= base) && (base < SBASE + SCOUNT)) {
688			sindex = base - SBASE;
689			base = LBASE + sindex / NCOUNT;
690			comb[COMBBUFLEN-2] = VBASE + (sindex % NCOUNT) / TCOUNT;
691
692			/* <L,V> */
693			if ((tjamo = TBASE + sindex % TCOUNT) == TBASE) {
694				comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
695				comblen = 1;
696			}
697
698			/* <L,V,T> */
699			else {
700				comb[COMBBUFLEN-1] = tjamo;
701				comblen = 2;
702			}
703		}
704
705		/* Binary Search for Surrogate Pair */
706		else if ((0xD800 <= base) && (base < 0xDC00)) {
707			if (i + 2 < inplen) {
708				base_sp =  ((uint32_t)base << 16) | (uint32_t)in[1];
709				do {
710					if ( !(result_sp = do_decomposition_sp(base_sp))) break;
711					comblen += 2;
712					base_sp = result_sp >> 32;
713					comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
714					comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
715				} while (comblen < MAXCOMBSPLEN);
716
717				if (*outlen < (comblen + 1) << 1) {
718					errno = E2BIG;
719					return (size_t)-1;
720				}
721
722				*out = base_sp >> 16;   /* hi */
723				out++;
724				*outlen -= 2;
725
726				base = base_sp & 0xFFFF; /* lo */
727
728				i += 2;
729				in++;
730			}
731		}
732
733		/* Binary Search for BMP */
734		else {
735			do {
736				if ( !(result = do_decomposition(base))) break;
737				comblen++;
738				base = result  >> 16;
739				comb[COMBBUFLEN-comblen] = result & 0xFFFF;
740			} while ((0x007f < base) && (comblen < MAXCOMBLEN));
741		}
742
743		if (*outlen < (comblen + 1) << 1) {
744			errno = E2BIG;
745			return (size_t)-1;
746		}
747
748		*out = base;
749		out++;
750		*outlen -= 2;
751
752		while ( comblen > 0 ) {
753			*out = comb[COMBBUFLEN-comblen];
754			out++;
755			*outlen -= 2;
756			comblen--;
757		}
758
759		i += 2;
760		in++;
761	}
762
763	*out = 0;
764	return o_len-*outlen;
765}
766
767/*******************************************************************
768length of UTF-8 character and string
769********************************************************************/
770
771size_t utf8_charlen ( char* utf8 )
772{
773	unsigned char *p;
774
775	p = (unsigned char*) utf8;
776
777	if ( *p < 0x80 )
778		return (1);
779	else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
780		return (2);
781	else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
782		return (3);
783	else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
784		return (3);
785	else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
786		return (4);
787	else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
788		return (4);
789	else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
790		return (4);
791	else
792		return ((size_t) -1);
793}
794
795
796size_t utf8_strlen_validate ( char * utf8 )
797{
798	size_t len;
799	unsigned char *p;
800
801	p = (unsigned char*) utf8;
802	len = 0;
803
804	/* see http://www.unicode.org/unicode/reports/tr27/ for an explanation */
805
806	while ( *p != '\0')
807	{
808		if ( *p < 0x80 )
809			p++;
810
811		else if ( *p > 0xC1 && *p < 0xe0 && *(p+1) > 0x7f && *(p+1) < 0xC0)
812			p += 2;
813
814		else if ( *p == 0xe0 && *(p+1) > 0x9f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
815			p += 3;
816
817		else if ( *p > 0xe0  && *p < 0xf0 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0)
818			p += 3;
819
820		else if ( *p == 0xf0 && *(p+1) > 0x8f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
821			p += 4;
822
823		else if ( *p > 0xf0 && *p < 0xf4 && *(p+1) > 0x7f && *(p+1) < 0xc0 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
824			p += 4;
825
826		else if ( *p == 0xf4 && *(p+1) > 0x7f && *(p+1) < 0x90 && *(p+2) > 0x7f && *(p+2) < 0xc0 && *(p+3) > 0x7f && *(p+3) < 0xc0 )
827			p += 4;
828
829		else
830			return ((size_t) -1);
831
832		len++;
833	}
834
835	return (len);
836}
837