• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt/router/samba-3.5.8/lib/util/charset/
1/*
2   Unix SMB/CIFS implementation.
3   minimal iconv implementation
4   Copyright (C) Andrew Tridgell 2001
5   Copyright (C) Jelmer Vernooij 2002
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include "includes.h"
22#include "../lib/util/dlinklist.h"
23#include "system/iconv.h"
24#include "system/filesys.h"
25
26
27/**
28 * @file
29 *
30 * @brief Samba wrapper/stub for iconv character set conversion.
31 *
32 * iconv is the XPG2 interface for converting between character
33 * encodings.  This file provides a Samba wrapper around it, and also
34 * a simple reimplementation that is used if the system does not
35 * implement iconv.
36 *
37 * Samba only works with encodings that are supersets of ASCII: ascii
38 * characters like whitespace can be tested for directly, multibyte
39 * sequences start with a byte with the high bit set, and strings are
40 * terminated by a nul byte.
41 *
42 * Note that the only function provided by iconv is conversion between
43 * characters.  It doesn't directly support operations like
44 * uppercasing or comparison.  We have to convert to UTF-16LE and
45 * compare there.
46 *
47 * @sa Samba Developers Guide
48 **/
49
50static size_t ascii_pull  (void *,const char **, size_t *, char **, size_t *);
51static size_t ascii_push  (void *,const char **, size_t *, char **, size_t *);
52static size_t utf8_pull   (void *,const char **, size_t *, char **, size_t *);
53static size_t utf8_push   (void *,const char **, size_t *, char **, size_t *);
54static size_t utf16_munged_pull(void *,const char **, size_t *, char **, size_t *);
55static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
56static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
57static size_t iconv_copy  (void *,const char **, size_t *, char **, size_t *);
58static size_t iconv_swab  (void *,const char **, size_t *, char **, size_t *);
59
60static const struct charset_functions builtin_functions[] = {
61	/* windows is closest to UTF-16 */
62	{"UCS-2LE",  iconv_copy, iconv_copy},
63	{"UTF-16LE",  iconv_copy, iconv_copy},
64	{"UCS-2BE",  iconv_swab, iconv_swab},
65	{"UTF-16BE",  iconv_swab, iconv_swab},
66
67	/* we include the UTF-8 alias to cope with differing locale settings */
68	{"UTF8",   utf8_pull,  utf8_push},
69	{"UTF-8",   utf8_pull,  utf8_push},
70
71	/* this handles the munging needed for String2Key */
72	{"UTF16_MUNGED",   utf16_munged_pull,  iconv_copy},
73
74	{"ASCII", ascii_pull, ascii_push},
75	{"UCS2-HEX", ucs2hex_pull, ucs2hex_push}
76};
77
78static struct charset_functions *charsets = NULL;
79
80bool charset_register_backend(const void *_funcs)
81{
82	struct charset_functions *funcs = (struct charset_functions *)memdup(_funcs,sizeof(struct charset_functions));
83	struct charset_functions *c;
84
85	/* Check whether we already have this charset... */
86	for (c = charsets; c != NULL; c = c->next) {
87		if(!strcasecmp(c->name, funcs->name)) {
88			DEBUG(2, ("Duplicate charset %s, not registering\n", funcs->name));
89			return false;
90		}
91	}
92
93	funcs->next = funcs->prev = NULL;
94	DLIST_ADD(charsets, funcs);
95	return true;
96}
97
98#ifdef HAVE_NATIVE_ICONV
99/* if there was an error then reset the internal state,
100   this ensures that we don't have a shift state remaining for
101   character sets like SJIS */
102static size_t sys_iconv(void *cd,
103			const char **inbuf, size_t *inbytesleft,
104			char **outbuf, size_t *outbytesleft)
105{
106	size_t ret = iconv((iconv_t)cd,
107			   discard_const_p(char *, inbuf), inbytesleft,
108			   outbuf, outbytesleft);
109	if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
110	return ret;
111}
112#endif
113
114/**
115 * This is a simple portable iconv() implementaion.
116 *
117 * It only knows about a very small number of character sets - just
118 * enough that Samba works on systems that don't have iconv.
119 **/
120_PUBLIC_ size_t smb_iconv(smb_iconv_t cd,
121		 const char **inbuf, size_t *inbytesleft,
122		 char **outbuf, size_t *outbytesleft)
123{
124	char cvtbuf[2048];
125	size_t bufsize;
126
127	/* in many cases we can go direct */
128	if (cd->direct) {
129		return cd->direct(cd->cd_direct,
130				  inbuf, inbytesleft, outbuf, outbytesleft);
131	}
132
133
134	/* otherwise we have to do it chunks at a time */
135	while (*inbytesleft > 0) {
136		char *bufp1 = cvtbuf;
137		const char *bufp2 = cvtbuf;
138
139		bufsize = sizeof(cvtbuf);
140
141		if (cd->pull(cd->cd_pull,
142			     inbuf, inbytesleft, &bufp1, &bufsize) == -1
143		    && errno != E2BIG) return -1;
144
145		bufsize = sizeof(cvtbuf) - bufsize;
146
147		if (cd->push(cd->cd_push,
148			     &bufp2, &bufsize,
149			     outbuf, outbytesleft) == -1) return -1;
150	}
151
152	return 0;
153}
154
155static bool is_utf16(const char *name)
156{
157	return strcasecmp(name, "UCS-2LE") == 0 ||
158		strcasecmp(name, "UTF-16LE") == 0;
159}
160
161int smb_iconv_t_destructor(smb_iconv_t hwd)
162{
163#ifdef HAVE_NATIVE_ICONV
164	if (hwd->cd_pull != NULL && hwd->cd_pull != (iconv_t)-1)
165		iconv_close(hwd->cd_pull);
166	if (hwd->cd_push != NULL && hwd->cd_push != (iconv_t)-1)
167		iconv_close(hwd->cd_push);
168	if (hwd->cd_direct != NULL && hwd->cd_direct != (iconv_t)-1)
169		iconv_close(hwd->cd_direct);
170#endif
171
172	return 0;
173}
174
175_PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
176			      const char *fromcode, bool native_iconv)
177{
178	smb_iconv_t ret;
179	const struct charset_functions *from=NULL, *to=NULL;
180	int i;
181
182	ret = (smb_iconv_t)talloc_named(mem_ctx,
183					sizeof(*ret),
184					"iconv(%s,%s)", tocode, fromcode);
185	if (!ret) {
186		errno = ENOMEM;
187		return (smb_iconv_t)-1;
188	}
189	memset(ret, 0, sizeof(*ret));
190	talloc_set_destructor(ret, smb_iconv_t_destructor);
191
192	/* check for the simplest null conversion */
193	if (strcmp(fromcode, tocode) == 0) {
194		ret->direct = iconv_copy;
195		return ret;
196	}
197
198	for (i=0;i<ARRAY_SIZE(builtin_functions);i++) {
199		if (strcasecmp(fromcode, builtin_functions[i].name) == 0) {
200			from = &builtin_functions[i];
201		}
202		if (strcasecmp(tocode, builtin_functions[i].name) == 0) {
203			to = &builtin_functions[i];
204		}
205	}
206
207	if (from == NULL) {
208		for (from=charsets; from; from=from->next) {
209			if (strcasecmp(from->name, fromcode) == 0) break;
210		}
211	}
212
213	if (to == NULL) {
214		for (to=charsets; to; to=to->next) {
215			if (strcasecmp(to->name, tocode) == 0) break;
216		}
217	}
218
219#ifdef HAVE_NATIVE_ICONV
220	if ((!from || !to) && !native_iconv) {
221		goto failed;
222	}
223	if (!from) {
224		ret->pull = sys_iconv;
225		ret->cd_pull = iconv_open("UTF-16LE", fromcode);
226		if (ret->cd_pull == (iconv_t)-1)
227			ret->cd_pull = iconv_open("UCS-2LE", fromcode);
228		if (ret->cd_pull == (iconv_t)-1) goto failed;
229	}
230
231	if (!to) {
232		ret->push = sys_iconv;
233		ret->cd_push = iconv_open(tocode, "UTF-16LE");
234		if (ret->cd_push == (iconv_t)-1)
235			ret->cd_push = iconv_open(tocode, "UCS-2LE");
236		if (ret->cd_push == (iconv_t)-1) goto failed;
237	}
238#else
239	if (!from || !to) {
240		goto failed;
241	}
242#endif
243
244	/* check for conversion to/from ucs2 */
245	if (is_utf16(fromcode) && to) {
246		ret->direct = to->push;
247		return ret;
248	}
249	if (is_utf16(tocode) && from) {
250		ret->direct = from->pull;
251		return ret;
252	}
253
254#ifdef HAVE_NATIVE_ICONV
255	if (is_utf16(fromcode)) {
256		ret->direct = sys_iconv;
257		ret->cd_direct = ret->cd_push;
258		ret->cd_push = NULL;
259		return ret;
260	}
261	if (is_utf16(tocode)) {
262		ret->direct = sys_iconv;
263		/* could be set just above - so we need to close iconv */
264		if (ret->cd_direct != NULL && ret->cd_direct != (iconv_t)-1)
265			iconv_close(ret->cd_direct);
266		ret->cd_direct = ret->cd_pull;
267		ret->cd_pull = NULL;
268		return ret;
269	}
270#endif
271
272	/* the general case has to go via a buffer */
273	if (!ret->pull) ret->pull = from->pull;
274	if (!ret->push) ret->push = to->push;
275	return ret;
276
277failed:
278	talloc_free(ret);
279	errno = EINVAL;
280	return (smb_iconv_t)-1;
281}
282
283/*
284  simple iconv_open() wrapper
285 */
286_PUBLIC_ smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
287{
288	return smb_iconv_open_ex(talloc_autofree_context(), tocode, fromcode, true);
289}
290
291/*
292  simple iconv_close() wrapper
293*/
294_PUBLIC_ int smb_iconv_close(smb_iconv_t cd)
295{
296	talloc_free(cd);
297	return 0;
298}
299
300
301/**********************************************************************
302 the following functions implement the builtin character sets in Samba
303 and also the "test" character sets that are designed to test
304 multi-byte character set support for english users
305***********************************************************************/
306static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
307			 char **outbuf, size_t *outbytesleft)
308{
309	while (*inbytesleft >= 1 && *outbytesleft >= 2) {
310		(*outbuf)[0] = (*inbuf)[0];
311		(*outbuf)[1] = 0;
312		(*inbytesleft)  -= 1;
313		(*outbytesleft) -= 2;
314		(*inbuf)  += 1;
315		(*outbuf) += 2;
316	}
317
318	if (*inbytesleft > 0) {
319		errno = E2BIG;
320		return -1;
321	}
322
323	return 0;
324}
325
326static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
327			 char **outbuf, size_t *outbytesleft)
328{
329	int ir_count=0;
330
331	while (*inbytesleft >= 2 && *outbytesleft >= 1) {
332		(*outbuf)[0] = (*inbuf)[0] & 0x7F;
333		if ((*inbuf)[1]) ir_count++;
334		(*inbytesleft)  -= 2;
335		(*outbytesleft) -= 1;
336		(*inbuf)  += 2;
337		(*outbuf) += 1;
338	}
339
340	if (*inbytesleft == 1) {
341		errno = EINVAL;
342		return -1;
343	}
344
345	if (*inbytesleft > 1) {
346		errno = E2BIG;
347		return -1;
348	}
349
350	return ir_count;
351}
352
353
354static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
355			 char **outbuf, size_t *outbytesleft)
356{
357	while (*inbytesleft >= 1 && *outbytesleft >= 2) {
358		uint_t v;
359
360		if ((*inbuf)[0] != '@') {
361			/* seven bit ascii case */
362			(*outbuf)[0] = (*inbuf)[0];
363			(*outbuf)[1] = 0;
364			(*inbytesleft)  -= 1;
365			(*outbytesleft) -= 2;
366			(*inbuf)  += 1;
367			(*outbuf) += 2;
368			continue;
369		}
370		/* it's a hex character */
371		if (*inbytesleft < 5) {
372			errno = EINVAL;
373			return -1;
374		}
375
376		if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
377			errno = EILSEQ;
378			return -1;
379		}
380
381		(*outbuf)[0] = v&0xff;
382		(*outbuf)[1] = v>>8;
383		(*inbytesleft)  -= 5;
384		(*outbytesleft) -= 2;
385		(*inbuf)  += 5;
386		(*outbuf) += 2;
387	}
388
389	if (*inbytesleft > 0) {
390		errno = E2BIG;
391		return -1;
392	}
393
394	return 0;
395}
396
397static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
398			   char **outbuf, size_t *outbytesleft)
399{
400	while (*inbytesleft >= 2 && *outbytesleft >= 1) {
401		char buf[6];
402
403		if ((*inbuf)[1] == 0 &&
404		    ((*inbuf)[0] & 0x80) == 0 &&
405		    (*inbuf)[0] != '@') {
406			(*outbuf)[0] = (*inbuf)[0];
407			(*inbytesleft)  -= 2;
408			(*outbytesleft) -= 1;
409			(*inbuf)  += 2;
410			(*outbuf) += 1;
411			continue;
412		}
413		if (*outbytesleft < 5) {
414			errno = E2BIG;
415			return -1;
416		}
417		snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
418		memcpy(*outbuf, buf, 5);
419		(*inbytesleft)  -= 2;
420		(*outbytesleft) -= 5;
421		(*inbuf)  += 2;
422		(*outbuf) += 5;
423	}
424
425	if (*inbytesleft == 1) {
426		errno = EINVAL;
427		return -1;
428	}
429
430	if (*inbytesleft > 1) {
431		errno = E2BIG;
432		return -1;
433	}
434
435	return 0;
436}
437
438static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
439			 char **outbuf, size_t *outbytesleft)
440{
441	int n;
442
443	n = MIN(*inbytesleft, *outbytesleft);
444
445	swab(*inbuf, *outbuf, (n&~1));
446	if (n&1) {
447		(*outbuf)[n-1] = 0;
448	}
449
450	(*inbytesleft) -= n;
451	(*outbytesleft) -= n;
452	(*inbuf) += n;
453	(*outbuf) += n;
454
455	if (*inbytesleft > 0) {
456		errno = E2BIG;
457		return -1;
458	}
459
460	return 0;
461}
462
463
464static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
465			 char **outbuf, size_t *outbytesleft)
466{
467	int n;
468
469	n = MIN(*inbytesleft, *outbytesleft);
470
471	memmove(*outbuf, *inbuf, n);
472
473	(*inbytesleft) -= n;
474	(*outbytesleft) -= n;
475	(*inbuf) += n;
476	(*outbuf) += n;
477
478	if (*inbytesleft > 0) {
479		errno = E2BIG;
480		return -1;
481	}
482
483	return 0;
484}
485
486/*
487  this takes a UTF8 sequence and produces a UTF16 sequence
488 */
489static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
490			 char **outbuf, size_t *outbytesleft)
491{
492	size_t in_left=*inbytesleft, out_left=*outbytesleft;
493	const uint8_t *c = (const uint8_t *)*inbuf;
494	uint8_t *uc = (uint8_t *)*outbuf;
495
496	while (in_left >= 1 && out_left >= 2) {
497		if ((c[0] & 0x80) == 0) {
498			uc[0] = c[0];
499			uc[1] = 0;
500			c  += 1;
501			in_left  -= 1;
502			out_left -= 2;
503			uc += 2;
504			continue;
505		}
506
507		if ((c[0] & 0xe0) == 0xc0) {
508			if (in_left < 2 ||
509			    (c[1] & 0xc0) != 0x80) {
510				errno = EILSEQ;
511				goto error;
512			}
513			uc[1] = (c[0]>>2) & 0x7;
514			uc[0] = (c[0]<<6) | (c[1]&0x3f);
515			c  += 2;
516			in_left  -= 2;
517			out_left -= 2;
518			uc += 2;
519			continue;
520		}
521
522		if ((c[0] & 0xf0) == 0xe0) {
523			if (in_left < 3 ||
524			    (c[1] & 0xc0) != 0x80 ||
525			    (c[2] & 0xc0) != 0x80) {
526				errno = EILSEQ;
527				goto error;
528			}
529			uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
530			uc[0] = (c[1]<<6) | (c[2]&0x3f);
531			c  += 3;
532			in_left  -= 3;
533			out_left -= 2;
534			uc += 2;
535			continue;
536		}
537
538		if ((c[0] & 0xf8) == 0xf0) {
539			unsigned int codepoint;
540			if (in_left < 4 ||
541			    (c[1] & 0xc0) != 0x80 ||
542			    (c[2] & 0xc0) != 0x80 ||
543			    (c[3] & 0xc0) != 0x80) {
544				errno = EILSEQ;
545				goto error;
546			}
547			codepoint =
548				(c[3]&0x3f) |
549				((c[2]&0x3f)<<6) |
550				((c[1]&0x3f)<<12) |
551				((c[0]&0x7)<<18);
552			if (codepoint < 0x10000) {
553				/* accept UTF-8 characters that are not
554				   minimally packed, but pack the result */
555				uc[0] = (codepoint & 0xFF);
556				uc[1] = (codepoint >> 8);
557				c += 4;
558				in_left -= 4;
559				out_left -= 2;
560				uc += 2;
561				continue;
562			}
563
564			codepoint -= 0x10000;
565
566			if (out_left < 4) {
567				errno = E2BIG;
568				goto error;
569			}
570
571			uc[0] = (codepoint>>10) & 0xFF;
572			uc[1] = (codepoint>>18) | 0xd8;
573			uc[2] = codepoint & 0xFF;
574			uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
575			c  += 4;
576			in_left  -= 4;
577			out_left -= 4;
578			uc += 4;
579			continue;
580		}
581
582		/* we don't handle 5 byte sequences */
583		errno = EINVAL;
584		goto error;
585	}
586
587	if (in_left > 0) {
588		errno = E2BIG;
589		goto error;
590	}
591
592	*inbytesleft = in_left;
593	*outbytesleft = out_left;
594	*inbuf = (const char *)c;
595	*outbuf = (char *)uc;
596	return 0;
597
598error:
599	*inbytesleft = in_left;
600	*outbytesleft = out_left;
601	*inbuf = (const char *)c;
602	*outbuf = (char *)uc;
603	return -1;
604}
605
606
607/*
608  this takes a UTF16 sequence and produces a UTF8 sequence
609 */
610static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
611			char **outbuf, size_t *outbytesleft)
612{
613	size_t in_left=*inbytesleft, out_left=*outbytesleft;
614	uint8_t *c = (uint8_t *)*outbuf;
615	const uint8_t *uc = (const uint8_t *)*inbuf;
616
617	while (in_left >= 2 && out_left >= 1) {
618		unsigned int codepoint;
619
620		if (uc[1] == 0 && !(uc[0] & 0x80)) {
621			/* simplest case */
622			c[0] = uc[0];
623			in_left  -= 2;
624			out_left -= 1;
625			uc += 2;
626			c  += 1;
627			continue;
628		}
629
630		if ((uc[1]&0xf8) == 0) {
631			/* next simplest case */
632			if (out_left < 2) {
633				errno = E2BIG;
634				goto error;
635			}
636			c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
637			c[1] = 0x80 | (uc[0] & 0x3f);
638			in_left  -= 2;
639			out_left -= 2;
640			uc += 2;
641			c  += 2;
642			continue;
643		}
644
645		if ((uc[1] & 0xfc) == 0xdc) {
646			/* its the second part of a 4 byte sequence. Illegal */
647			if (in_left < 4) {
648				errno = EINVAL;
649			} else {
650				errno = EILSEQ;
651			}
652			goto error;
653		}
654
655		if ((uc[1] & 0xfc) != 0xd8) {
656			codepoint = uc[0] | (uc[1]<<8);
657			if (out_left < 3) {
658				errno = E2BIG;
659				goto error;
660			}
661			c[0] = 0xe0 | (codepoint >> 12);
662			c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
663			c[2] = 0x80 | (codepoint & 0x3f);
664
665			in_left  -= 2;
666			out_left -= 3;
667			uc  += 2;
668			c   += 3;
669			continue;
670		}
671
672		/* its the first part of a 4 byte sequence */
673		if (in_left < 4) {
674			errno = EINVAL;
675			goto error;
676		}
677		if ((uc[3] & 0xfc) != 0xdc) {
678			errno = EILSEQ;
679			goto error;
680		}
681		codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
682				       (uc[0]<<10) | ((uc[1] & 0x3)<<18));
683
684		if (out_left < 4) {
685			errno = E2BIG;
686			goto error;
687		}
688		c[0] = 0xf0 | (codepoint >> 18);
689		c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
690		c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
691		c[3] = 0x80 | (codepoint & 0x3f);
692
693		in_left  -= 4;
694		out_left -= 4;
695		uc       += 4;
696		c        += 4;
697	}
698
699	if (in_left == 1) {
700		errno = EINVAL;
701		goto error;
702	}
703
704	if (in_left > 1) {
705		errno = E2BIG;
706		goto error;
707	}
708
709	*inbytesleft = in_left;
710	*outbytesleft = out_left;
711	*inbuf  = (const char *)uc;
712	*outbuf = (char *)c;
713
714	return 0;
715
716error:
717	*inbytesleft = in_left;
718	*outbytesleft = out_left;
719	*inbuf  = (const char *)uc;
720	*outbuf = (char *)c;
721	return -1;
722}
723
724
725/*
726  this takes a UTF16 munged sequence, modifies it according to the
727  string2key rules, and produces a UTF16 sequence
728
729The rules are:
730
731    1) any 0x0000 characters are mapped to 0x0001
732
733    2) convert any instance of 0xD800 - 0xDBFF (high surrogate)
734       without an immediately following 0xDC00 - 0x0xDFFF (low surrogate) to
735       U+FFFD (OBJECT REPLACEMENT CHARACTER).
736
737    3) the same for any low surrogate that was not preceded by a high surrogate.
738
739 */
740static size_t utf16_munged_pull(void *cd, const char **inbuf, size_t *inbytesleft,
741			       char **outbuf, size_t *outbytesleft)
742{
743	size_t in_left=*inbytesleft, out_left=*outbytesleft;
744	uint8_t *c = (uint8_t *)*outbuf;
745	const uint8_t *uc = (const uint8_t *)*inbuf;
746
747	while (in_left >= 2 && out_left >= 2) {
748		unsigned int codepoint = uc[0] | (uc[1]<<8);
749
750		if (codepoint == 0) {
751			codepoint = 1;
752		}
753
754		if ((codepoint & 0xfc00) == 0xd800) {
755			/* a high surrogate */
756			unsigned int codepoint2;
757			if (in_left < 4) {
758				codepoint = 0xfffd;
759				goto codepoint16;
760			}
761			codepoint2 = uc[2] | (uc[3]<<8);
762			if ((codepoint2 & 0xfc00) != 0xdc00) {
763				/* high surrogate not followed by low
764				   surrogate: convert to 0xfffd */
765				codepoint = 0xfffd;
766				goto codepoint16;
767			}
768			if (out_left < 4) {
769				errno = E2BIG;
770				goto error;
771			}
772			memcpy(c, uc, 4);
773			in_left  -= 4;
774			out_left -= 4;
775			uc       += 4;
776			c        += 4;
777			continue;
778		}
779
780		if ((codepoint & 0xfc00) == 0xdc00) {
781			/* low surrogate not preceded by high
782			   surrogate: convert to 0xfffd */
783			codepoint = 0xfffd;
784		}
785
786	codepoint16:
787		c[0] = codepoint & 0xFF;
788		c[1] = (codepoint>>8) & 0xFF;
789
790		in_left  -= 2;
791		out_left -= 2;
792		uc  += 2;
793		c   += 2;
794		continue;
795	}
796
797	if (in_left == 1) {
798		errno = EINVAL;
799		goto error;
800	}
801
802	if (in_left > 1) {
803		errno = E2BIG;
804		goto error;
805	}
806
807	*inbytesleft = in_left;
808	*outbytesleft = out_left;
809	*inbuf  = (const char *)uc;
810	*outbuf = (char *)c;
811
812	return 0;
813
814error:
815	*inbytesleft = in_left;
816	*outbytesleft = out_left;
817	*inbuf  = (const char *)uc;
818	*outbuf = (char *)c;
819	return -1;
820}
821
822
823
824